Files
kestrelsnest-blog/wordpress_to_hugo.py
Eric Wagoner eddd9d2a80 Import WordPress posts and migrate standalone content to Hugo
- Successfully imported 1731 WordPress posts to Hugo markdown format
- Migrated 204+ images from archive to static directory
- Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday)
- Fixed all internal links to use /legacy prefix for archived content
- Remapped archive links to point to correct Hugo posts
- Fixed Louisville Georgia Cemetery post rendering issue

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-23 16:23:40 -04:00

188 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""
Convert WordPress posts from MySQL database to Hugo markdown files
"""
import os
import re
import mysql.connector
from datetime import datetime
import html2text
import yaml
from pathlib import Path
import unicodedata
def slugify(value):
"""Convert a string to a valid filename/slug"""
value = unicodedata.normalize('NFKD', value)
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
value = re.sub(r'[-\s]+', '-', value)
return value[:100] # Limit length
def clean_content(content):
"""Clean and convert WordPress HTML content to Markdown"""
if not content:
return ""
# Initialize html2text
h = html2text.HTML2Text()
h.body_width = 0 # Don't wrap lines
h.unicode_snob = True # Use unicode characters
h.images_as_html = False # Convert images to markdown
h.links_each_paragraph = False
# Convert to markdown
markdown = h.handle(content)
# Clean up common WordPress artifacts
markdown = re.sub(r'\[caption[^\]]*\](.*?)\[/caption\]', r'\1', markdown)
markdown = re.sub(r'<!--.*?-->', '', markdown, flags=re.DOTALL)
return markdown.strip()
def get_categories_and_tags(post_id, cursor):
"""Get categories and tags for a post"""
categories = []
tags = []
try:
# Check if taxonomy tables exist
cursor.execute("SHOW TABLES LIKE 'wp_terms'")
if cursor.fetchone():
query = """
SELECT t.name, tt.taxonomy
FROM wp_terms t
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
WHERE tr.object_id = %s
AND tt.taxonomy IN ('category', 'post_tag')
"""
cursor.execute(query, (post_id,))
for name, taxonomy in cursor.fetchall():
if taxonomy == 'category' and name.lower() != 'uncategorized':
categories.append(name)
elif taxonomy == 'post_tag':
tags.append(name)
except Exception:
# If tables don't exist or error, just return empty lists
pass
return categories, tags
def convert_posts():
"""Main conversion function"""
# Database connection
conn = mysql.connector.connect(
host='localhost',
user='root',
password='',
database='wordpress_import',
charset='utf8mb4',
collation='utf8mb4_unicode_ci'
)
cursor = conn.cursor()
# Create output directory
output_dir = Path('/Users/ericwagoner/Sites/blog/content/posts')
output_dir.mkdir(parents=True, exist_ok=True)
# Fetch all published posts
query = """
SELECT ID, post_title, post_content, post_date, post_name, post_excerpt
FROM wp_posts
WHERE post_status = 'publish'
AND (post_type = 'post' OR post_type = '')
ORDER BY post_date ASC
"""
cursor.execute(query)
posts = cursor.fetchall()
print(f"Found {len(posts)} posts to convert")
converted_count = 0
errors = []
for post_id, title, content, post_date, post_name, excerpt in posts:
try:
# Skip if no title and no content
if not title and not content:
continue
# Use post_name as slug if available, otherwise create from title
if post_name:
slug = post_name[:100]
elif title:
slug = slugify(title)
else:
slug = f"post-{post_id}"
# Get categories and tags
categories, tags = get_categories_and_tags(post_id, cursor)
# Create filename with date
date_str = post_date.strftime('%Y-%m-%d')
filename = f"{date_str}-{slug}.md"
filepath = output_dir / filename
# Check if file already exists
if filepath.exists():
# Add post ID to make unique
filename = f"{date_str}-{slug}-{post_id}.md"
filepath = output_dir / filename
# Convert content to markdown
markdown_content = clean_content(content)
# Create front matter
front_matter = {
'title': title or f"Post {post_id}",
'date': post_date.isoformat(),
'draft': False,
'author': 'Eric Wagoner'
}
if excerpt:
front_matter['description'] = excerpt.strip()
if categories:
front_matter['categories'] = categories
if tags:
front_matter['tags'] = tags
# Write the file
with open(filepath, 'w', encoding='utf-8') as f:
f.write('---\n')
f.write(yaml.dump(front_matter, default_flow_style=False, allow_unicode=True))
f.write('---\n\n')
f.write(markdown_content)
converted_count += 1
if converted_count % 100 == 0:
print(f"Converted {converted_count} posts...")
except Exception as e:
error_msg = f"Error converting post {post_id} ('{title}'): {str(e)}"
errors.append(error_msg)
print(error_msg)
continue
cursor.close()
conn.close()
print(f"\n✅ Successfully converted {converted_count} posts")
if errors:
print(f"⚠️ {len(errors)} posts had errors:")
for error in errors[:10]: # Show first 10 errors
print(f" - {error}")
if len(errors) > 10:
print(f" ... and {len(errors) - 10} more")
return converted_count, len(errors)
if __name__ == "__main__":
print("Starting WordPress to Hugo conversion...")
converted, errors = convert_posts()
print(f"\nConversion complete: {converted} posts converted, {errors} errors")