Import WordPress posts and migrate standalone content to Hugo

- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-23 16:23:40 -04:00
parent c1b41472ac
commit eddd9d2a80
2423 changed files with 36062 additions and 3 deletions
--- a/remap_archive_links.py
+++ b/remap_archive_links.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+Remap old WordPress archive links to new Hugo post URLs
+"""
+
+import re
+from pathlib import Path
+from datetime import datetime
+
+POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts')
+
+def build_post_mapping():
+    """Build mapping from old WordPress IDs/dates to new Hugo URLs"""
+    mapping = {}
+
+    # Map by WordPress post ID (from filename)
+    for post in POSTS_DIR.glob('*.md'):
+        # Extract post ID if it exists in filename (e.g., "2003-11-18-first-image-1234.md")
+        match = re.search(r'-(\d+)\.md$', post.name)
+        if match:
+            post_id = match.group(1)
+            hugo_url = f'/posts/{post.stem}/'
+            # WordPress archive format: /archives/000123.html
+            mapping[f'/weblog/archives/{post_id.zfill(6)}.html'] = hugo_url
+            mapping[f'/weblog/archives/{post_id}.html'] = hugo_url
+
+    # Also map by date-based archives
+    for post in POSTS_DIR.glob('*.md'):
+        # Extract date from filename (e.g., "2003-11-18-...")
+        match = re.match(r'(\d{4})-(\d{2})-(\d{2})', post.name)
+        if match:
+            year, month, day = match.groups()
+            hugo_url = f'/posts/{post.stem}/'
+
+            # Read the post to get the exact timestamp if needed
+            with open(post, 'r', encoding='utf-8') as f:
+                content = f.read()
+                # Look for date in front matter
+                date_match = re.search(r"date:\s*['\"]([^'\"]+)['\"]", content)
+                if date_match:
+                    try:
+                        post_date = datetime.fromisoformat(date_match.group(1).replace('T', ' ').split('+')[0].split('-')[0])
+
+                        # Create various archive URL formats that WordPress used
+                        # Format: /archive/YYYY_MM_DD_archive.html
+                        archive_date = f"{year}_{month}_{day}"
+                        mapping[f'/weblog/archive/{archive_date}_archive.html'] = hugo_url
+
+                        # Weekly archives: /archive/YYYY_MM_DD_archive.html (Sunday of that week)
+                        week_start = post_date.strftime('%Y_%m_%d')
+                        mapping[f'/weblog/archive/{week_start}_archive.html'] = hugo_url
+                    except:
+                        pass
+
+    return mapping
+
+def update_archive_links():
+    """Update archive links in all posts"""
+    mapping = build_post_mapping()
+    print(f"Built mapping for {len(mapping)} archive URLs")
+
+    updated_posts = 0
+    total_replacements = 0
+
+    for post in POSTS_DIR.glob('*.md'):
+        with open(post, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        original_content = content
+        replacements = 0
+
+        # Find all ericwagoner.com links
+        for old_url, new_url in mapping.items():
+            full_old_url = f'http://www.ericwagoner.com{old_url}'
+            if full_old_url in content:
+                content = content.replace(full_old_url, new_url)
+                replacements += 1
+                print(f"  Mapped: {old_url} -> {new_url}")
+
+        # Also handle archive links with anchors (e.g., /archive/1999_10_31_archive.html#10460)
+        archive_pattern = r'http://www\.ericwagoner\.com/weblog/archive/(\d{4}_\d{2}_\d{2})_archive\.html(?:#\d+)?'
+
+        def replace_archive_link(match):
+            date_str = match.group(1)
+            # Try to find a post from that date
+            year, month, day = date_str.split('_')
+            date_prefix = f"{year}-{month}-{day}"
+
+            # Find posts from that date
+            matching_posts = list(POSTS_DIR.glob(f"{date_prefix}*.md"))
+            if matching_posts:
+                # Use the first post from that date
+                return f'/posts/{matching_posts[0].stem}/'
+            return match.group(0)  # Keep original if no match
+
+        content = re.sub(archive_pattern, replace_archive_link, content)
+        if content != original_content:
+            replacements = content.count('/posts/') - original_content.count('/posts/')
+
+        # Handle generic weblog links
+        content = content.replace('http://www.ericwagoner.com/weblog/', '/')
+        content = content.replace('http://www.ericwagoner.com/weblog', '/')
+
+        if content != original_content:
+            with open(post, 'w', encoding='utf-8') as f:
+                f.write(content)
+            updated_posts += 1
+            total_replacements += replacements
+            if replacements > 0:
+                print(f"Updated {post.name}: {replacements} archive links")
+
+    return updated_posts, total_replacements
+
+def main():
+    print("Remapping WordPress archive links to Hugo posts...")
+
+    # First, show sample of archive links that exist
+    sample_links = set()
+    for post in list(POSTS_DIR.glob('*.md'))[:100]:
+        with open(post, 'r', encoding='utf-8') as f:
+            content = f.read()
+        links = re.findall(r'http://www\.ericwagoner\.com/weblog/archive/[^)\s"]+', content)
+        sample_links.update(links)
+
+    if sample_links:
+        print("\nSample archive links found:")
+        for link in list(sample_links)[:10]:
+            print(f"  {link}")
+
+    # Update the links
+    updated, total = update_archive_links()
+
+    print(f"\n✅ Remapping complete!")
+    print(f"  Updated {updated} posts")
+    print(f"  Remapped {total} archive links")
+
+    # Check what's left
+    remaining = 0
+    for post in POSTS_DIR.glob('*.md'):
+        with open(post, 'r', encoding='utf-8') as f:
+            content = f.read()
+        if 'http://www.ericwagoner.com' in content:
+            remaining += content.count('http://www.ericwagoner.com')
+
+    print(f"\n📊 Remaining external links: {remaining}")
+
+if __name__ == "__main__":
+    main()