Import WordPress posts and migrate standalone content to Hugo
- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
133
fix_internal_archive_links.py
Normal file
133
fix_internal_archive_links.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fix internal archive links that point to /legacy/weblog/archive/ instead of Hugo posts
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts')
|
||||
|
||||
def find_posts_by_date(year, month, day=None):
|
||||
"""Find posts from a specific date"""
|
||||
if day:
|
||||
pattern = f"{year}-{month:02d}-{day:02d}*.md"
|
||||
else:
|
||||
pattern = f"{year}-{month:02d}-*.md"
|
||||
|
||||
return list(POSTS_DIR.glob(pattern))
|
||||
|
||||
def fix_internal_archive_links():
|
||||
"""Fix archive links that now point to /legacy/weblog/archive/"""
|
||||
|
||||
fixed_count = 0
|
||||
total_fixes = 0
|
||||
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
original_content = content
|
||||
|
||||
# Pattern for internal archive links
|
||||
patterns = [
|
||||
# /legacy/weblog/archive/YYYY_MM_DD_archive.html
|
||||
(r'/legacy/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
|
||||
# /legacy/weblog/archives/YYYY_MM_DD_archive.html
|
||||
(r'/legacy/weblog/archives/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
|
||||
# /weblog/archive/YYYY_MM_DD_archive.html (in case some weren't updated)
|
||||
(r'/weblog/archive/(\d{4})_(\d{2})_(\d{2})_archive\.html(?:#\d+)?', 'day'),
|
||||
]
|
||||
|
||||
for pattern, date_type in patterns:
|
||||
matches = list(re.finditer(pattern, content))
|
||||
for match in matches:
|
||||
year = match.group(1)
|
||||
month = match.group(2)
|
||||
day = match.group(3) if date_type == 'day' else None
|
||||
|
||||
# Find posts from that date
|
||||
matching_posts = find_posts_by_date(int(year), int(month), int(day) if day else None)
|
||||
|
||||
if matching_posts:
|
||||
# Use the first matching post
|
||||
new_url = f'/posts/{matching_posts[0].stem}/'
|
||||
old_url = match.group(0)
|
||||
content = content.replace(old_url, new_url)
|
||||
print(f" Fixed: {old_url} -> {new_url} in {post.name}")
|
||||
total_fixes += 1
|
||||
else:
|
||||
print(f" Warning: No post found for date {year}-{month}-{day or 'XX'} (from {match.group(0)} in {post.name})")
|
||||
|
||||
if content != original_content:
|
||||
with open(post, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
fixed_count += 1
|
||||
|
||||
return fixed_count, total_fixes
|
||||
|
||||
def check_remaining_legacy_links():
|
||||
"""Check for any remaining /legacy/ links that might be broken"""
|
||||
|
||||
legacy_links = set()
|
||||
|
||||
for post in POSTS_DIR.glob('*.md'):
|
||||
with open(post, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Find all /legacy/ links
|
||||
matches = re.findall(r'/legacy/[^)\s"]+', content)
|
||||
legacy_links.update(matches)
|
||||
|
||||
return legacy_links
|
||||
|
||||
def main():
|
||||
print("Fixing internal archive links...")
|
||||
|
||||
# Fix the archive links
|
||||
posts_fixed, links_fixed = fix_internal_archive_links()
|
||||
print(f"\n✅ Fixed {links_fixed} archive links in {posts_fixed} posts")
|
||||
|
||||
# Check what legacy links remain
|
||||
legacy_links = check_remaining_legacy_links()
|
||||
|
||||
if legacy_links:
|
||||
print(f"\n📊 Remaining /legacy/ links to verify ({len(legacy_links)} unique):")
|
||||
|
||||
# Group by type
|
||||
archives = []
|
||||
images = []
|
||||
other = []
|
||||
|
||||
for link in sorted(legacy_links):
|
||||
if '/archive' in link:
|
||||
archives.append(link)
|
||||
elif any(ext in link for ext in ['.jpg', '.jpeg', '.gif', '.png']):
|
||||
images.append(link)
|
||||
else:
|
||||
other.append(link)
|
||||
|
||||
if archives:
|
||||
print(f"\n Archive links ({len(archives)}):")
|
||||
for link in archives[:5]:
|
||||
print(f" {link}")
|
||||
if len(archives) > 5:
|
||||
print(f" ... and {len(archives) - 5} more")
|
||||
|
||||
if images:
|
||||
print(f"\n Image links ({len(images)}):")
|
||||
for link in images[:5]:
|
||||
print(f" {link}")
|
||||
if len(images) > 5:
|
||||
print(f" ... and {len(images) - 5} more")
|
||||
|
||||
if other:
|
||||
print(f"\n Other links ({len(other)}):")
|
||||
for link in other[:5]:
|
||||
print(f" {link}")
|
||||
if len(other) > 5:
|
||||
print(f" ... and {len(other) - 5} more")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user