mirror of
https://github.com/Dictionarry-Hub/database.git
synced 2025-12-10 15:57:00 +00:00
Improvement/split (#5)
* del. Removed legacy bash split script * add. Implemented python script to split json files into seperate objects
This commit is contained in:
42
scripts/split.py
Normal file
42
scripts/split.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def sanitize_filename(filename):
|
||||
return ''.join(c for c in filename if c.isalnum() or c in (' ', '.', '_', '-'))
|
||||
|
||||
def process_json(input_file, output_dir):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
with open(input_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
for obj in data:
|
||||
name = obj.get('name')
|
||||
if name:
|
||||
sanitized_name = sanitize_filename(name)
|
||||
output_file = os.path.join(output_dir, f"{sanitized_name}.json")
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(obj, f, indent=2)
|
||||
print(f"Created file: {output_file}")
|
||||
else:
|
||||
print(f"Skipping object without a name field: {obj}")
|
||||
|
||||
print("Finished processing JSON objects.")
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print(f"Usage: {sys.argv[0]} <input_json_file> <output_directory>")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_dir = sys.argv[2]
|
||||
|
||||
if not os.path.isfile(input_file):
|
||||
print(f"Input file not found: {input_file}")
|
||||
sys.exit(1)
|
||||
|
||||
process_json(input_file, output_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Check if jq is installed
|
||||
if ! command -v jq &> /dev/null
|
||||
then
|
||||
echo "jq could not be found. Please install jq before running this script."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if input file is provided as an argument
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Usage: $0 <input_json_file>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Input JSON file
|
||||
input_file="$1"
|
||||
|
||||
# Check if the input file exists
|
||||
if [[ ! -f "$input_file" ]]; then
|
||||
echo "Input file not found: $input_file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
output_dir="../output"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Function to sanitize filenames by removing slashes
|
||||
sanitize_filename() {
|
||||
echo "$1" | tr -d '/'
|
||||
}
|
||||
|
||||
# Read each object in the array and save to a separate JSON file
|
||||
jq -c '.[]' "$input_file" | while read -r object; do
|
||||
name=$(echo "$object" | jq -r '.name')
|
||||
if [[ -n "$name" ]]; then
|
||||
sanitized_name=$(sanitize_filename "$name")
|
||||
echo "$object" | jq '.' > "$output_dir/$sanitized_name.json"
|
||||
echo "Created file: $output_dir/$sanitized_name.json"
|
||||
else
|
||||
echo "Skipping object without a name field: $object"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Finished processing JSON objects."
|
||||
Reference in New Issue
Block a user