Import WordPress posts and migrate standalone content to Hugo
- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
148
remap_archive_links.py
Normal file
148
remap_archive_links.py
Normal file
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Remap old WordPress archive links to new Hugo post URLs
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts')
|
||||
|
||||
def build_post_mapping():
|
||||
"""Build mapping from old WordPress IDs/dates to new Hugo URLs"""
|
||||
mapping = {}
|
||||
|
||||
# Map by WordPress post ID (from filename)
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
# Extract post ID if it exists in filename (e.g., "2003-11-18-first-image-1234.md")
|
||||
match = re.search(r'-(\d+)\.md$', post.name)
|
||||
if match:
|
||||
post_id = match.group(1)
|
||||
hugo_url = f'/posts/{post.stem}/'
|
||||
# WordPress archive format: /archives/000123.html
|
||||
mapping[f'/weblog/archives/{post_id.zfill(6)}.html'] = hugo_url
|
||||
mapping[f'/weblog/archives/{post_id}.html'] = hugo_url
|
||||
|
||||
# Also map by date-based archives
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
# Extract date from filename (e.g., "2003-11-18-...")
|
||||
match = re.match(r'(\d{4})-(\d{2})-(\d{2})', post.name)
|
||||
if match:
|
||||
year, month, day = match.groups()
|
||||
hugo_url = f'/posts/{post.stem}/'
|
||||
|
||||
# Read the post to get the exact timestamp if needed
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Look for date in front matter
|
||||
date_match = re.search(r"date:\s*['\"]([^'\"]+)['\"]", content)
|
||||
if date_match:
|
||||
try:
|
||||
post_date = datetime.fromisoformat(date_match.group(1).replace('T', ' ').split('+')[0].split('-')[0])
|
||||
|
||||
# Create various archive URL formats that WordPress used
|
||||
# Format: /archive/YYYY_MM_DD_archive.html
|
||||
archive_date = f"{year}_{month}_{day}"
|
||||
mapping[f'/weblog/archive/{archive_date}_archive.html'] = hugo_url
|
||||
|
||||
# Weekly archives: /archive/YYYY_MM_DD_archive.html (Sunday of that week)
|
||||
week_start = post_date.strftime('%Y_%m_%d')
|
||||
mapping[f'/weblog/archive/{week_start}_archive.html'] = hugo_url
|
||||
except:
|
||||
pass
|
||||
|
||||
return mapping
|
||||
|
||||
def update_archive_links():
|
||||
"""Update archive links in all posts"""
|
||||
mapping = build_post_mapping()
|
||||
print(f"Built mapping for {len(mapping)} archive URLs")
|
||||
|
||||
updated_posts = 0
|
||||
total_replacements = 0
|
||||
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
replacements = 0
|
||||
|
||||
# Find all ericwagoner.com links
|
||||
for old_url, new_url in mapping.items():
|
||||
full_old_url = f'http://www.ericwagoner.com{old_url}'
|
||||
if full_old_url in content:
|
||||
content = content.replace(full_old_url, new_url)
|
||||
replacements += 1
|
||||
print(f" Mapped: {old_url} -> {new_url}")
|
||||
|
||||
# Also handle archive links with anchors (e.g., /archive/1999_10_31_archive.html#10460)
|
||||
archive_pattern = r'http://www\.ericwagoner\.com/weblog/archive/(\d{4}_\d{2}_\d{2})_archive\.html(?:#\d+)?'
|
||||
|
||||
def replace_archive_link(match):
|
||||
date_str = match.group(1)
|
||||
# Try to find a post from that date
|
||||
year, month, day = date_str.split('_')
|
||||
date_prefix = f"{year}-{month}-{day}"
|
||||
|
||||
# Find posts from that date
|
||||
matching_posts = list(POSTS_DIR.glob(f"{date_prefix}*.md"))
|
||||
if matching_posts:
|
||||
# Use the first post from that date
|
||||
return f'/posts/{matching_posts[0].stem}/'
|
||||
return match.group(0) # Keep original if no match
|
||||
|
||||
content = re.sub(archive_pattern, replace_archive_link, content)
|
||||
if content != original_content:
|
||||
replacements = content.count('/posts/') - original_content.count('/posts/')
|
||||
|
||||
# Handle generic weblog links
|
||||
content = content.replace('http://www.ericwagoner.com/weblog/', '/')
|
||||
content = content.replace('http://www.ericwagoner.com/weblog', '/')
|
||||
|
||||
if content != original_content:
|
||||
with open(post, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
updated_posts += 1
|
||||
total_replacements += replacements
|
||||
if replacements > 0:
|
||||
print(f"Updated {post.name}: {replacements} archive links")
|
||||
|
||||
return updated_posts, total_replacements
|
||||
|
||||
def main():
|
||||
print("Remapping WordPress archive links to Hugo posts...")
|
||||
|
||||
# First, show sample of archive links that exist
|
||||
sample_links = set()
|
||||
for post in list(POSTS_DIR.glob('*.md'))[:100]:
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
links = re.findall(r'http://www\.ericwagoner\.com/weblog/archive/[^)\s"]+', content)
|
||||
sample_links.update(links)
|
||||
|
||||
if sample_links:
|
||||
print("\nSample archive links found:")
|
||||
for link in list(sample_links)[:10]:
|
||||
print(f" {link}")
|
||||
|
||||
# Update the links
|
||||
updated, total = update_archive_links()
|
||||
|
||||
print(f"\n✅ Remapping complete!")
|
||||
print(f" Updated {updated} posts")
|
||||
print(f" Remapped {total} archive links")
|
||||
|
||||
# Check what's left
|
||||
remaining = 0
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
if 'http://www.ericwagoner.com' in content:
|
||||
remaining += content.count('http://www.ericwagoner.com')
|
||||
|
||||
print(f"\n📊 Remaining external links: {remaining}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user