Import WordPress posts and migrate standalone content to Hugo
- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
188
wordpress_to_hugo.py
Normal file
188
wordpress_to_hugo.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert WordPress posts from MySQL database to Hugo markdown files
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import mysql.connector
|
||||
from datetime import datetime
|
||||
import html2text
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import unicodedata
|
||||
|
||||
def slugify(value):
|
||||
"""Convert a string to a valid filename/slug"""
|
||||
value = unicodedata.normalize('NFKD', value)
|
||||
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
|
||||
value = re.sub(r'[-\s]+', '-', value)
|
||||
return value[:100] # Limit length
|
||||
|
||||
def clean_content(content):
|
||||
"""Clean and convert WordPress HTML content to Markdown"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
# Initialize html2text
|
||||
h = html2text.HTML2Text()
|
||||
h.body_width = 0 # Don't wrap lines
|
||||
h.unicode_snob = True # Use unicode characters
|
||||
h.images_as_html = False # Convert images to markdown
|
||||
h.links_each_paragraph = False
|
||||
|
||||
# Convert to markdown
|
||||
markdown = h.handle(content)
|
||||
|
||||
# Clean up common WordPress artifacts
|
||||
markdown = re.sub(r'\[caption[^\]]*\](.*?)\[/caption\]', r'\1', markdown)
|
||||
markdown = re.sub(r'<!--.*?-->', '', markdown, flags=re.DOTALL)
|
||||
|
||||
return markdown.strip()
|
||||
|
||||
def get_categories_and_tags(post_id, cursor):
|
||||
"""Get categories and tags for a post"""
|
||||
categories = []
|
||||
tags = []
|
||||
|
||||
try:
|
||||
# Check if taxonomy tables exist
|
||||
cursor.execute("SHOW TABLES LIKE 'wp_terms'")
|
||||
if cursor.fetchone():
|
||||
query = """
|
||||
SELECT t.name, tt.taxonomy
|
||||
FROM wp_terms t
|
||||
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
|
||||
JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
|
||||
WHERE tr.object_id = %s
|
||||
AND tt.taxonomy IN ('category', 'post_tag')
|
||||
"""
|
||||
|
||||
cursor.execute(query, (post_id,))
|
||||
for name, taxonomy in cursor.fetchall():
|
||||
if taxonomy == 'category' and name.lower() != 'uncategorized':
|
||||
categories.append(name)
|
||||
elif taxonomy == 'post_tag':
|
||||
tags.append(name)
|
||||
except Exception:
|
||||
# If tables don't exist or error, just return empty lists
|
||||
pass
|
||||
|
||||
return categories, tags
|
||||
|
||||
def convert_posts():
|
||||
"""Main conversion function"""
|
||||
# Database connection
|
||||
conn = mysql.connector.connect(
|
||||
host='localhost',
|
||||
user='root',
|
||||
password='',
|
||||
database='wordpress_import',
|
||||
charset='utf8mb4',
|
||||
collation='utf8mb4_unicode_ci'
|
||||
)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create output directory
|
||||
output_dir = Path('/Users/ericwagoner/Sites/blog/content/posts')
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Fetch all published posts
|
||||
query = """
|
||||
SELECT ID, post_title, post_content, post_date, post_name, post_excerpt
|
||||
FROM wp_posts
|
||||
WHERE post_status = 'publish'
|
||||
AND (post_type = 'post' OR post_type = '')
|
||||
ORDER BY post_date ASC
|
||||
"""
|
||||
|
||||
cursor.execute(query)
|
||||
posts = cursor.fetchall()
|
||||
|
||||
print(f"Found {len(posts)} posts to convert")
|
||||
|
||||
converted_count = 0
|
||||
errors = []
|
||||
|
||||
for post_id, title, content, post_date, post_name, excerpt in posts:
|
||||
try:
|
||||
# Skip if no title and no content
|
||||
if not title and not content:
|
||||
continue
|
||||
|
||||
# Use post_name as slug if available, otherwise create from title
|
||||
if post_name:
|
||||
slug = post_name[:100]
|
||||
elif title:
|
||||
slug = slugify(title)
|
||||
else:
|
||||
slug = f"post-{post_id}"
|
||||
|
||||
# Get categories and tags
|
||||
categories, tags = get_categories_and_tags(post_id, cursor)
|
||||
|
||||
# Create filename with date
|
||||
date_str = post_date.strftime('%Y-%m-%d')
|
||||
filename = f"{date_str}-{slug}.md"
|
||||
filepath = output_dir / filename
|
||||
|
||||
# Check if file already exists
|
||||
if filepath.exists():
|
||||
# Add post ID to make unique
|
||||
filename = f"{date_str}-{slug}-{post_id}.md"
|
||||
filepath = output_dir / filename
|
||||
|
||||
# Convert content to markdown
|
||||
markdown_content = clean_content(content)
|
||||
|
||||
# Create front matter
|
||||
front_matter = {
|
||||
'title': title or f"Post {post_id}",
|
||||
'date': post_date.isoformat(),
|
||||
'draft': False,
|
||||
'author': 'Eric Wagoner'
|
||||
}
|
||||
|
||||
if excerpt:
|
||||
front_matter['description'] = excerpt.strip()
|
||||
|
||||
if categories:
|
||||
front_matter['categories'] = categories
|
||||
|
||||
if tags:
|
||||
front_matter['tags'] = tags
|
||||
|
||||
# Write the file
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write('---\n')
|
||||
f.write(yaml.dump(front_matter, default_flow_style=False, allow_unicode=True))
|
||||
f.write('---\n\n')
|
||||
f.write(markdown_content)
|
||||
|
||||
converted_count += 1
|
||||
if converted_count % 100 == 0:
|
||||
print(f"Converted {converted_count} posts...")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error converting post {post_id} ('{title}'): {str(e)}"
|
||||
errors.append(error_msg)
|
||||
print(error_msg)
|
||||
continue
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
print(f"\n✅ Successfully converted {converted_count} posts")
|
||||
if errors:
|
||||
print(f"⚠️ {len(errors)} posts had errors:")
|
||||
for error in errors[:10]: # Show first 10 errors
|
||||
print(f" - {error}")
|
||||
if len(errors) > 10:
|
||||
print(f" ... and {len(errors) - 10} more")
|
||||
|
||||
return converted_count, len(errors)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Starting WordPress to Hugo conversion...")
|
||||
converted, errors = convert_posts()
|
||||
print(f"\nConversion complete: {converted} posts converted, {errors} errors")
|
Reference in New Issue
Block a user