#!/usr/bin/env python3 """ Fix internal archive links that point to /legacy/weblog/archive/ instead of Hugo posts """ import re from pathlib import Path from datetime import datetime POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts') def find_posts_by_date(year, month, day=None): """Find posts from a specific date""" if day: pattern = f"{year}-{month:02d}-{day:02d}*.md" else: pattern = f"{year}-{month:02d}-*.md" return list(POSTS_DIR.glob(pattern)) def fix_internal_archive_links(): """Fix archive links that now point to /legacy/weblog/archive/""" fixed_count = 0 total_fixes = 0 for post in POSTS_DIR.glob('*.md'): with open(post, 'r', encoding='utf-8') as f: content = f.read() original_content = content # Pattern for internal archive links patterns = [ # /legacy/weblog/archive/YYYY_MM_DD_archive.html (r'/legacy/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'), # /legacy/weblog/archives/YYYY_MM_DD_archive.html (r'/legacy/weblog/archives/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'), # /weblog/archive/YYYY_MM_DD_archive.html (in case some weren't updated) (r'/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'), ] for pattern, date_type in patterns: matches = list(re.finditer(pattern, content)) for match in matches: year = match.group(1) month = match.group(2) day = match.group(3) if date_type == 'day' else None # Find posts from that date matching_posts = find_posts_by_date(int(year), int(month), int(day) if day else None) if matching_posts: # Use the first matching post new_url = f'/posts/{matching_posts[0].stem}/' old_url = match.group(0) content = content.replace(old_url, new_url) print(f" Fixed: {old_url} -> {new_url} in {post.name}") total_fixes += 1 else: print(f" Warning: No post found for date {year}-{month}-{day or 'XX'} (from {match.group(0)} in {post.name})") if content != original_content: with open(post, 'w', encoding='utf-8') as f: f.write(content) fixed_count += 1 return fixed_count, total_fixes def check_remaining_legacy_links(): """Check for any remaining /legacy/ links that might be broken""" legacy_links = set() for post in POSTS_DIR.glob('*.md'): with open(post, 'r', encoding='utf-8') as f: content = f.read() # Find all /legacy/ links matches = re.findall(r'/legacy/[^)\s"]+', content) legacy_links.update(matches) return legacy_links def main(): print("Fixing internal archive links...") # Fix the archive links posts_fixed, links_fixed = fix_internal_archive_links() print(f"\nāœ… Fixed {links_fixed} archive links in {posts_fixed} posts") # Check what legacy links remain legacy_links = check_remaining_legacy_links() if legacy_links: print(f"\nšŸ“Š Remaining /legacy/ links to verify ({len(legacy_links)} unique):") # Group by type archives = [] images = [] other = [] for link in sorted(legacy_links): if '/archive' in link: archives.append(link) elif any(ext in link for ext in ['.jpg', '.jpeg', '.gif', '.png']): images.append(link) else: other.append(link) if archives: print(f"\n Archive links ({len(archives)}):") for link in archives[:5]: print(f" {link}") if len(archives) > 5: print(f" ... and {len(archives) - 5} more") if images: print(f"\n Image links ({len(images)}):") for link in images[:5]: print(f" {link}") if len(images) > 5: print(f" ... and {len(images) - 5} more") if other: print(f"\n Other links ({len(other)}):") for link in other[:5]: print(f" {link}") if len(other) > 5: print(f" ... and {len(other) - 5} more") if __name__ == "__main__": main()