mirror of
https://github.com/Dictionarry-Hub/database.git
synced 2025-12-10 15:57:00 +00:00
tweak(script): improve markdown and YAML file processing with error handling and default metadata
This commit is contained in:
@@ -4,7 +4,6 @@ from pathlib import Path
|
||||
from datetime import datetime, timezone, date
|
||||
|
||||
|
||||
# Add this class at the top with the imports
|
||||
class DateTimeEncoder(json.JSONEncoder):
|
||||
|
||||
def default(self, obj):
|
||||
@@ -34,38 +33,56 @@ def bundle_markdown(folder_name):
|
||||
folder_path = Path(folder_name)
|
||||
|
||||
if folder_path.exists():
|
||||
for md_file in folder_path.glob("*.md"):
|
||||
with open(md_file) as f:
|
||||
content = f.read()
|
||||
# Sort files to ensure consistent ordering
|
||||
for md_file in sorted(folder_path.glob("*.md")):
|
||||
try:
|
||||
with open(md_file, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
meta, content = parse_frontmatter(content)
|
||||
|
||||
meta, content = parse_frontmatter(content)
|
||||
item = {
|
||||
"_id":
|
||||
md_file.stem,
|
||||
"content":
|
||||
content,
|
||||
"last_modified":
|
||||
datetime.fromtimestamp(md_file.stat().st_mtime,
|
||||
tz=timezone.utc).isoformat(),
|
||||
**meta
|
||||
}
|
||||
data.append(item)
|
||||
# Ensure required fields exist
|
||||
if 'title' not in meta:
|
||||
meta['title'] = md_file.stem.replace('-', ' ').title()
|
||||
if 'author' not in meta:
|
||||
meta['author'] = 'Unknown'
|
||||
|
||||
item = {
|
||||
"_id":
|
||||
md_file.stem,
|
||||
"content":
|
||||
content,
|
||||
"last_modified":
|
||||
datetime.fromtimestamp(md_file.stat().st_mtime,
|
||||
tz=timezone.utc).isoformat(),
|
||||
**meta
|
||||
}
|
||||
data.append(item)
|
||||
except Exception as e:
|
||||
print(f"Error processing {md_file}: {e}")
|
||||
return data
|
||||
|
||||
|
||||
def bundle_folder(folder_name):
|
||||
"""Bundle files based on type"""
|
||||
if folder_name == "wiki":
|
||||
# List of folders that contain markdown files
|
||||
markdown_folders = ["wiki", "dev_logs"]
|
||||
|
||||
if folder_name in markdown_folders:
|
||||
return bundle_markdown(folder_name)
|
||||
else:
|
||||
data = []
|
||||
folder_path = Path(folder_name)
|
||||
|
||||
if folder_path.exists():
|
||||
for yml_file in folder_path.glob("*.yml"):
|
||||
with open(yml_file) as f:
|
||||
item = yaml.safe_load(f)
|
||||
item["_id"] = yml_file.stem
|
||||
data.append(item)
|
||||
# Sort files to ensure consistent ordering
|
||||
for yml_file in sorted(folder_path.glob("*.yml")):
|
||||
try:
|
||||
with open(yml_file, encoding='utf-8') as f:
|
||||
item = yaml.safe_load(f)
|
||||
item["_id"] = yml_file.stem
|
||||
data.append(item)
|
||||
except Exception as e:
|
||||
print(f"Error processing {yml_file}: {e}")
|
||||
return data
|
||||
|
||||
|
||||
@@ -80,16 +97,21 @@ folders = [
|
||||
|
||||
# Bundle each folder
|
||||
for folder in folders:
|
||||
print(f"Processing {folder}...")
|
||||
data = bundle_folder(folder)
|
||||
bundle_path = f"bundles/{folder}.json"
|
||||
# Here's where we use the encoder
|
||||
with open(bundle_path, "w") as f:
|
||||
|
||||
with open(bundle_path, "w", encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, cls=DateTimeEncoder)
|
||||
print(f"Created {bundle_path} with {len(data)} items")
|
||||
|
||||
# Create version file
|
||||
version = {
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"folders": folders
|
||||
}
|
||||
with open("bundles/version.json", "w") as f:
|
||||
|
||||
with open("bundles/version.json", "w", encoding='utf-8') as f:
|
||||
json.dump(version, f, indent=2, cls=DateTimeEncoder)
|
||||
|
||||
print("Bundle creation complete!")
|
||||
|
||||
Reference in New Issue
Block a user