Import WordPress posts and migrate standalone content to Hugo

- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-23 16:23:40 -04:00
parent c1b41472ac
commit eddd9d2a80
2423 changed files with 36062 additions and 3 deletions
--- a/fix_internal_archive_links.py
+++ b/fix_internal_archive_links.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Fix internal archive links that point to /legacy/weblog/archive/ instead of Hugo posts
+"""
+
+import re
+from pathlib import Path
+from datetime import datetime
+
+POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts')
+
+def find_posts_by_date(year, month, day=None):
+    """Find posts from a specific date"""
+    if day:
+        pattern = f"{year}-{month:02d}-{day:02d}*.md"
+    else:
+        pattern = f"{year}-{month:02d}-*.md"
+
+    return list(POSTS_DIR.glob(pattern))
+
+def fix_internal_archive_links():
+    """Fix archive links that now point to /legacy/weblog/archive/"""
+
+    fixed_count = 0
+    total_fixes = 0
+
+    for post in POSTS_DIR.glob('*.md'):
+        with open(post, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        original_content = content
+
+        # Pattern for internal archive links
+        patterns = [
+            # /legacy/weblog/archive/YYYY_MM_DD_archive.html
+            (r'/legacy/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
+            # /legacy/weblog/archives/YYYY_MM_DD_archive.html
+            (r'/legacy/weblog/archives/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
+            # /weblog/archive/YYYY_MM_DD_archive.html (in case some weren't updated)
+            (r'/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
+        ]
+
+        for pattern, date_type in patterns:
+            matches = list(re.finditer(pattern, content))
+            for match in matches:
+                year = match.group(1)
+                month = match.group(2)
+                day = match.group(3) if date_type == 'day' else None
+
+                # Find posts from that date
+                matching_posts = find_posts_by_date(int(year), int(month), int(day) if day else None)
+
+                if matching_posts:
+                    # Use the first matching post
+                    new_url = f'/posts/{matching_posts[0].stem}/'
+                    old_url = match.group(0)
+                    content = content.replace(old_url, new_url)
+                    print(f"  Fixed: {old_url} -> {new_url} in {post.name}")
+                    total_fixes += 1
+                else:
+                    print(f"  Warning: No post found for date {year}-{month}-{day or 'XX'} (from {match.group(0)} in {post.name})")
+
+        if content != original_content:
+            with open(post, 'w', encoding='utf-8') as f:
+                f.write(content)
+            fixed_count += 1
+
+    return fixed_count, total_fixes
+
+def check_remaining_legacy_links():
+    """Check for any remaining /legacy/ links that might be broken"""
+
+    legacy_links = set()
+
+    for post in POSTS_DIR.glob('*.md'):
+        with open(post, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # Find all /legacy/ links
+        matches = re.findall(r'/legacy/[^)\s"]+', content)
+        legacy_links.update(matches)
+
+    return legacy_links
+
+def main():
+    print("Fixing internal archive links...")
+
+    # Fix the archive links
+    posts_fixed, links_fixed = fix_internal_archive_links()
+    print(f"\n✅ Fixed {links_fixed} archive links in {posts_fixed} posts")
+
+    # Check what legacy links remain
+    legacy_links = check_remaining_legacy_links()
+
+    if legacy_links:
+        print(f"\n📊 Remaining /legacy/ links to verify ({len(legacy_links)} unique):")
+
+        # Group by type
+        archives = []
+        images = []
+        other = []
+
+        for link in sorted(legacy_links):
+            if '/archive' in link:
+                archives.append(link)
+            elif any(ext in link for ext in ['.jpg', '.jpeg', '.gif', '.png']):
+                images.append(link)
+            else:
+                other.append(link)
+
+        if archives:
+            print(f"\n  Archive links ({len(archives)}):")
+            for link in archives[:5]:
+                print(f"    {link}")
+            if len(archives) > 5:
+                print(f"    ... and {len(archives) - 5} more")
+
+        if images:
+            print(f"\n  Image links ({len(images)}):")
+            for link in images[:5]:
+                print(f"    {link}")
+            if len(images) > 5:
+                print(f"    ... and {len(images) - 5} more")
+
+        if other:
+            print(f"\n  Other links ({len(other)}):")
+            for link in other[:5]:
+                print(f"    {link}")
+            if len(other) > 5:
+                print(f"    ... and {len(other) - 5} more")
+
+if __name__ == "__main__":
+    main()