- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
188 lines
5.8 KiB
Python
188 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Convert WordPress posts from MySQL database to Hugo markdown files
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import mysql.connector
|
|
from datetime import datetime
|
|
import html2text
|
|
import yaml
|
|
from pathlib import Path
|
|
import unicodedata
|
|
|
|
def slugify(value):
|
|
"""Convert a string to a valid filename/slug"""
|
|
value = unicodedata.normalize('NFKD', value)
|
|
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
|
|
value = re.sub(r'[-\s]+', '-', value)
|
|
return value[:100] # Limit length
|
|
|
|
def clean_content(content):
|
|
"""Clean and convert WordPress HTML content to Markdown"""
|
|
if not content:
|
|
return ""
|
|
|
|
# Initialize html2text
|
|
h = html2text.HTML2Text()
|
|
h.body_width = 0 # Don't wrap lines
|
|
h.unicode_snob = True # Use unicode characters
|
|
h.images_as_html = False # Convert images to markdown
|
|
h.links_each_paragraph = False
|
|
|
|
# Convert to markdown
|
|
markdown = h.handle(content)
|
|
|
|
# Clean up common WordPress artifacts
|
|
markdown = re.sub(r'\[caption[^\]]*\](.*?)\[/caption\]', r'\1', markdown)
|
|
markdown = re.sub(r'<!--.*?-->', '', markdown, flags=re.DOTALL)
|
|
|
|
return markdown.strip()
|
|
|
|
def get_categories_and_tags(post_id, cursor):
|
|
"""Get categories and tags for a post"""
|
|
categories = []
|
|
tags = []
|
|
|
|
try:
|
|
# Check if taxonomy tables exist
|
|
cursor.execute("SHOW TABLES LIKE 'wp_terms'")
|
|
if cursor.fetchone():
|
|
query = """
|
|
SELECT t.name, tt.taxonomy
|
|
FROM wp_terms t
|
|
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
|
|
JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
|
|
WHERE tr.object_id = %s
|
|
AND tt.taxonomy IN ('category', 'post_tag')
|
|
"""
|
|
|
|
cursor.execute(query, (post_id,))
|
|
for name, taxonomy in cursor.fetchall():
|
|
if taxonomy == 'category' and name.lower() != 'uncategorized':
|
|
categories.append(name)
|
|
elif taxonomy == 'post_tag':
|
|
tags.append(name)
|
|
except Exception:
|
|
# If tables don't exist or error, just return empty lists
|
|
pass
|
|
|
|
return categories, tags
|
|
|
|
def convert_posts():
|
|
"""Main conversion function"""
|
|
# Database connection
|
|
conn = mysql.connector.connect(
|
|
host='localhost',
|
|
user='root',
|
|
password='',
|
|
database='wordpress_import',
|
|
charset='utf8mb4',
|
|
collation='utf8mb4_unicode_ci'
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
# Create output directory
|
|
output_dir = Path('/Users/ericwagoner/Sites/blog/content/posts')
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Fetch all published posts
|
|
query = """
|
|
SELECT ID, post_title, post_content, post_date, post_name, post_excerpt
|
|
FROM wp_posts
|
|
WHERE post_status = 'publish'
|
|
AND (post_type = 'post' OR post_type = '')
|
|
ORDER BY post_date ASC
|
|
"""
|
|
|
|
cursor.execute(query)
|
|
posts = cursor.fetchall()
|
|
|
|
print(f"Found {len(posts)} posts to convert")
|
|
|
|
converted_count = 0
|
|
errors = []
|
|
|
|
for post_id, title, content, post_date, post_name, excerpt in posts:
|
|
try:
|
|
# Skip if no title and no content
|
|
if not title and not content:
|
|
continue
|
|
|
|
# Use post_name as slug if available, otherwise create from title
|
|
if post_name:
|
|
slug = post_name[:100]
|
|
elif title:
|
|
slug = slugify(title)
|
|
else:
|
|
slug = f"post-{post_id}"
|
|
|
|
# Get categories and tags
|
|
categories, tags = get_categories_and_tags(post_id, cursor)
|
|
|
|
# Create filename with date
|
|
date_str = post_date.strftime('%Y-%m-%d')
|
|
filename = f"{date_str}-{slug}.md"
|
|
filepath = output_dir / filename
|
|
|
|
# Check if file already exists
|
|
if filepath.exists():
|
|
# Add post ID to make unique
|
|
filename = f"{date_str}-{slug}-{post_id}.md"
|
|
filepath = output_dir / filename
|
|
|
|
# Convert content to markdown
|
|
markdown_content = clean_content(content)
|
|
|
|
# Create front matter
|
|
front_matter = {
|
|
'title': title or f"Post {post_id}",
|
|
'date': post_date.isoformat(),
|
|
'draft': False,
|
|
'author': 'Eric Wagoner'
|
|
}
|
|
|
|
if excerpt:
|
|
front_matter['description'] = excerpt.strip()
|
|
|
|
if categories:
|
|
front_matter['categories'] = categories
|
|
|
|
if tags:
|
|
front_matter['tags'] = tags
|
|
|
|
# Write the file
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write('---\n')
|
|
f.write(yaml.dump(front_matter, default_flow_style=False, allow_unicode=True))
|
|
f.write('---\n\n')
|
|
f.write(markdown_content)
|
|
|
|
converted_count += 1
|
|
if converted_count % 100 == 0:
|
|
print(f"Converted {converted_count} posts...")
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error converting post {post_id} ('{title}'): {str(e)}"
|
|
errors.append(error_msg)
|
|
print(error_msg)
|
|
continue
|
|
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
print(f"\n✅ Successfully converted {converted_count} posts")
|
|
if errors:
|
|
print(f"⚠️ {len(errors)} posts had errors:")
|
|
for error in errors[:10]: # Show first 10 errors
|
|
print(f" - {error}")
|
|
if len(errors) > 10:
|
|
print(f" ... and {len(errors) - 10} more")
|
|
|
|
return converted_count, len(errors)
|
|
|
|
if __name__ == "__main__":
|
|
print("Starting WordPress to Hugo conversion...")
|
|
converted, errors = convert_posts()
|
|
print(f"\nConversion complete: {converted} posts converted, {errors} errors") |