- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
134 lines
4.0 KiB
Python
134 lines
4.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Find and migrate images from old WordPress site to Hugo
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
# Directories
|
|
POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts')
|
|
ARCHIVE_DIR = Path('/Users/ericwagoner/Sites/ericwagoner.com')
|
|
STATIC_DIR = Path('/Users/ericwagoner/Sites/blog/static')
|
|
|
|
def extract_image_urls():
|
|
"""Extract all image URLs from posts"""
|
|
image_urls = set()
|
|
|
|
for post in POSTS_DIR.glob('*.md'):
|
|
with open(post, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Find markdown image links
|
|
md_images = re.findall(r'!\[.*?\]\((http://www\.ericwagoner\.com/[^)]+(?:jpg|jpeg|gif|png))\)', content)
|
|
image_urls.update(md_images)
|
|
|
|
# Find HTML img tags
|
|
html_images = re.findall(r'src="(http://www\.ericwagoner\.com/[^"]+(?:jpg|jpeg|gif|png))"', content)
|
|
image_urls.update(html_images)
|
|
|
|
return sorted(image_urls)
|
|
|
|
def find_image_in_archive(url):
|
|
"""Find the image file in the archive"""
|
|
parsed = urlparse(url)
|
|
path = parsed.path
|
|
|
|
# Try direct path first
|
|
full_path = ARCHIVE_DIR / path.lstrip('/')
|
|
if full_path.exists():
|
|
return full_path
|
|
|
|
# Sometimes weblog is in the path but not in archive
|
|
if '/weblog/' in path:
|
|
alt_path = path.replace('/weblog/', '/', 1)
|
|
full_path = ARCHIVE_DIR / alt_path.lstrip('/')
|
|
if full_path.exists():
|
|
return full_path
|
|
|
|
return None
|
|
|
|
def copy_image_to_hugo(src_path, url):
|
|
"""Copy image to Hugo static directory and return new path"""
|
|
parsed = urlparse(url)
|
|
path = parsed.path
|
|
|
|
# Create directory structure in static
|
|
relative_path = path.lstrip('/')
|
|
dest_path = STATIC_DIR / 'images' / 'legacy' / relative_path
|
|
|
|
# Create parent directories
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Copy the file
|
|
try:
|
|
shutil.copy2(src_path, dest_path)
|
|
# Return the new URL path
|
|
return f'/images/legacy/{relative_path}'
|
|
except Exception as e:
|
|
print(f"Error copying {src_path}: {e}")
|
|
return None
|
|
|
|
def update_posts_with_new_urls(url_mapping):
|
|
"""Update all posts with new image URLs"""
|
|
for post in POSTS_DIR.glob('*.md'):
|
|
with open(post, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
modified = False
|
|
for old_url, new_url in url_mapping.items():
|
|
if old_url in content:
|
|
# Escape special characters for regex
|
|
escaped_old = re.escape(old_url)
|
|
content = re.sub(escaped_old, new_url, content)
|
|
modified = True
|
|
|
|
if modified:
|
|
with open(post, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
print(f"Updated {post.name}")
|
|
|
|
def main():
|
|
"""Main migration function"""
|
|
print("Extracting image URLs from posts...")
|
|
image_urls = extract_image_urls()
|
|
print(f"Found {len(image_urls)} unique image URLs")
|
|
|
|
url_mapping = {}
|
|
found_count = 0
|
|
not_found = []
|
|
|
|
print("\nSearching for images in archive...")
|
|
for url in image_urls:
|
|
src_path = find_image_in_archive(url)
|
|
if src_path:
|
|
new_url = copy_image_to_hugo(src_path, url)
|
|
if new_url:
|
|
url_mapping[url] = new_url
|
|
found_count += 1
|
|
if found_count % 10 == 0:
|
|
print(f"Copied {found_count} images...")
|
|
else:
|
|
not_found.append(url)
|
|
|
|
print(f"\n✅ Successfully copied {found_count} images")
|
|
|
|
if not_found:
|
|
print(f"⚠️ Could not find {len(not_found)} images:")
|
|
for url in not_found[:5]:
|
|
print(f" - {url}")
|
|
if len(not_found) > 5:
|
|
print(f" ... and {len(not_found) - 5} more")
|
|
|
|
print("\nUpdating posts with new image URLs...")
|
|
update_posts_with_new_urls(url_mapping)
|
|
|
|
print(f"\n✅ Migration complete: {found_count} images migrated")
|
|
|
|
return found_count, len(not_found)
|
|
|
|
if __name__ == "__main__":
|
|
found, missing = main() |