tweak(script): improve markdown and YAML file processing with error handling and default metadata

This commit is contained in:
Sam Chau
2025-01-21 17:07:11 +10:30
parent aa13fe7308
commit 35d57fbcb7

View File

@@ -4,7 +4,6 @@ from pathlib import Path
from datetime import datetime, timezone, date from datetime import datetime, timezone, date
# Add this class at the top with the imports
class DateTimeEncoder(json.JSONEncoder): class DateTimeEncoder(json.JSONEncoder):
def default(self, obj): def default(self, obj):
@@ -34,38 +33,56 @@ def bundle_markdown(folder_name):
folder_path = Path(folder_name) folder_path = Path(folder_name)
if folder_path.exists(): if folder_path.exists():
for md_file in folder_path.glob("*.md"): # Sort files to ensure consistent ordering
with open(md_file) as f: for md_file in sorted(folder_path.glob("*.md")):
content = f.read() try:
with open(md_file, encoding='utf-8') as f:
content = f.read()
meta, content = parse_frontmatter(content)
meta, content = parse_frontmatter(content) # Ensure required fields exist
item = { if 'title' not in meta:
"_id": meta['title'] = md_file.stem.replace('-', ' ').title()
md_file.stem, if 'author' not in meta:
"content": meta['author'] = 'Unknown'
content,
"last_modified": item = {
datetime.fromtimestamp(md_file.stat().st_mtime, "_id":
tz=timezone.utc).isoformat(), md_file.stem,
**meta "content":
} content,
data.append(item) "last_modified":
datetime.fromtimestamp(md_file.stat().st_mtime,
tz=timezone.utc).isoformat(),
**meta
}
data.append(item)
except Exception as e:
print(f"Error processing {md_file}: {e}")
return data return data
def bundle_folder(folder_name): def bundle_folder(folder_name):
"""Bundle files based on type""" """Bundle files based on type"""
if folder_name == "wiki": # List of folders that contain markdown files
markdown_folders = ["wiki", "dev_logs"]
if folder_name in markdown_folders:
return bundle_markdown(folder_name) return bundle_markdown(folder_name)
else: else:
data = [] data = []
folder_path = Path(folder_name) folder_path = Path(folder_name)
if folder_path.exists(): if folder_path.exists():
for yml_file in folder_path.glob("*.yml"): # Sort files to ensure consistent ordering
with open(yml_file) as f: for yml_file in sorted(folder_path.glob("*.yml")):
item = yaml.safe_load(f) try:
item["_id"] = yml_file.stem with open(yml_file, encoding='utf-8') as f:
data.append(item) item = yaml.safe_load(f)
item["_id"] = yml_file.stem
data.append(item)
except Exception as e:
print(f"Error processing {yml_file}: {e}")
return data return data
@@ -80,16 +97,21 @@ folders = [
# Bundle each folder # Bundle each folder
for folder in folders: for folder in folders:
print(f"Processing {folder}...")
data = bundle_folder(folder) data = bundle_folder(folder)
bundle_path = f"bundles/{folder}.json" bundle_path = f"bundles/{folder}.json"
# Here's where we use the encoder
with open(bundle_path, "w") as f: with open(bundle_path, "w", encoding='utf-8') as f:
json.dump(data, f, indent=2, cls=DateTimeEncoder) json.dump(data, f, indent=2, cls=DateTimeEncoder)
print(f"Created {bundle_path} with {len(data)} items")
# Create version file # Create version file
version = { version = {
"updated_at": datetime.now(timezone.utc).isoformat(), "updated_at": datetime.now(timezone.utc).isoformat(),
"folders": folders "folders": folders
} }
with open("bundles/version.json", "w") as f:
with open("bundles/version.json", "w", encoding='utf-8') as f:
json.dump(version, f, indent=2, cls=DateTimeEncoder) json.dump(version, f, indent=2, cls=DateTimeEncoder)
print("Bundle creation complete!")