Import WordPress posts and migrate standalone content to Hugo

- Successfully imported 1731 WordPress posts to Hugo markdown format - Migrated 204+ images from archive to static directory - Copied standalone directories (curtain, farm, gobbler, house, images, party, revcemetery, railsday, birthday) - Fixed all internal links to use /legacy prefix for archived content - Remapped archive links to point to correct Hugo posts - Fixed Louisville Georgia Cemetery post rendering issue 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-23 16:23:40 -04:00
parent c1b41472ac
commit eddd9d2a80
2423 changed files with 36062 additions and 3 deletions
--- a/wordpress_to_hugo.py
+++ b/wordpress_to_hugo.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""
+Convert WordPress posts from MySQL database to Hugo markdown files
+"""
+
+import os
+import re
+import mysql.connector
+from datetime import datetime
+import html2text
+import yaml
+from pathlib import Path
+import unicodedata
+
+def slugify(value):
+    """Convert a string to a valid filename/slug"""
+    value = unicodedata.normalize('NFKD', value)
+    value = re.sub(r'[^\w\s-]', '', value).strip().lower()
+    value = re.sub(r'[-\s]+', '-', value)
+    return value[:100]  # Limit length
+
+def clean_content(content):
+    """Clean and convert WordPress HTML content to Markdown"""
+    if not content:
+        return ""
+
+    # Initialize html2text
+    h = html2text.HTML2Text()
+    h.body_width = 0  # Don't wrap lines
+    h.unicode_snob = True  # Use unicode characters
+    h.images_as_html = False  # Convert images to markdown
+    h.links_each_paragraph = False
+
+    # Convert to markdown
+    markdown = h.handle(content)
+
+    # Clean up common WordPress artifacts
+    markdown = re.sub(r'\[caption[^\]]*\](.*?)\[/caption\]', r'\1', markdown)
+    markdown = re.sub(r'<!--.*?-->', '', markdown, flags=re.DOTALL)
+
+    return markdown.strip()
+
+def get_categories_and_tags(post_id, cursor):
+    """Get categories and tags for a post"""
+    categories = []
+    tags = []
+
+    try:
+        # Check if taxonomy tables exist
+        cursor.execute("SHOW TABLES LIKE 'wp_terms'")
+        if cursor.fetchone():
+            query = """
+            SELECT t.name, tt.taxonomy
+            FROM wp_terms t
+            JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id
+            JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
+            WHERE tr.object_id = %s
+            AND tt.taxonomy IN ('category', 'post_tag')
+            """
+
+            cursor.execute(query, (post_id,))
+            for name, taxonomy in cursor.fetchall():
+                if taxonomy == 'category' and name.lower() != 'uncategorized':
+                    categories.append(name)
+                elif taxonomy == 'post_tag':
+                    tags.append(name)
+    except Exception:
+        # If tables don't exist or error, just return empty lists
+        pass
+
+    return categories, tags
+
+def convert_posts():
+    """Main conversion function"""
+    # Database connection
+    conn = mysql.connector.connect(
+        host='localhost',
+        user='root',
+        password='',
+        database='wordpress_import',
+        charset='utf8mb4',
+        collation='utf8mb4_unicode_ci'
+    )
+    cursor = conn.cursor()
+
+    # Create output directory
+    output_dir = Path('/Users/ericwagoner/Sites/blog/content/posts')
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Fetch all published posts
+    query = """
+    SELECT ID, post_title, post_content, post_date, post_name, post_excerpt
+    FROM wp_posts
+    WHERE post_status = 'publish'
+    AND (post_type = 'post' OR post_type = '')
+    ORDER BY post_date ASC
+    """
+
+    cursor.execute(query)
+    posts = cursor.fetchall()
+
+    print(f"Found {len(posts)} posts to convert")
+
+    converted_count = 0
+    errors = []
+
+    for post_id, title, content, post_date, post_name, excerpt in posts:
+        try:
+            # Skip if no title and no content
+            if not title and not content:
+                continue
+
+            # Use post_name as slug if available, otherwise create from title
+            if post_name:
+                slug = post_name[:100]
+            elif title:
+                slug = slugify(title)
+            else:
+                slug = f"post-{post_id}"
+
+            # Get categories and tags
+            categories, tags = get_categories_and_tags(post_id, cursor)
+
+            # Create filename with date
+            date_str = post_date.strftime('%Y-%m-%d')
+            filename = f"{date_str}-{slug}.md"
+            filepath = output_dir / filename
+
+            # Check if file already exists
+            if filepath.exists():
+                # Add post ID to make unique
+                filename = f"{date_str}-{slug}-{post_id}.md"
+                filepath = output_dir / filename
+
+            # Convert content to markdown
+            markdown_content = clean_content(content)
+
+            # Create front matter
+            front_matter = {
+                'title': title or f"Post {post_id}",
+                'date': post_date.isoformat(),
+                'draft': False,
+                'author': 'Eric Wagoner'
+            }
+
+            if excerpt:
+                front_matter['description'] = excerpt.strip()
+
+            if categories:
+                front_matter['categories'] = categories
+
+            if tags:
+                front_matter['tags'] = tags
+
+            # Write the file
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write('---\n')
+                f.write(yaml.dump(front_matter, default_flow_style=False, allow_unicode=True))
+                f.write('---\n\n')
+                f.write(markdown_content)
+
+            converted_count += 1
+            if converted_count % 100 == 0:
+                print(f"Converted {converted_count} posts...")
+
+        except Exception as e:
+            error_msg = f"Error converting post {post_id} ('{title}'): {str(e)}"
+            errors.append(error_msg)
+            print(error_msg)
+            continue
+
+    cursor.close()
+    conn.close()
+
+    print(f"\n✅ Successfully converted {converted_count} posts")
+    if errors:
+        print(f"⚠️  {len(errors)} posts had errors:")
+        for error in errors[:10]:  # Show first 10 errors
+            print(f"  - {error}")
+        if len(errors) > 10:
+            print(f"  ... and {len(errors) - 10} more")
+
+    return converted_count, len(errors)
+
+if __name__ == "__main__":
+    print("Starting WordPress to Hugo conversion...")
+    converted, errors = convert_posts()
+    print(f"\nConversion complete: {converted} posts converted, {errors} errors")