#!/usr/bin/env python3 """ Convert WordPress posts from MySQL database to Hugo markdown files """ import os import re import mysql.connector from datetime import datetime import html2text import yaml from pathlib import Path import unicodedata def slugify(value): """Convert a string to a valid filename/slug""" value = unicodedata.normalize('NFKD', value) value = re.sub(r'[^\w\s-]', '', value).strip().lower() value = re.sub(r'[-\s]+', '-', value) return value[:100] # Limit length def clean_content(content): """Clean and convert WordPress HTML content to Markdown""" if not content: return "" # Initialize html2text h = html2text.HTML2Text() h.body_width = 0 # Don't wrap lines h.unicode_snob = True # Use unicode characters h.images_as_html = False # Convert images to markdown h.links_each_paragraph = False # Convert to markdown markdown = h.handle(content) # Clean up common WordPress artifacts markdown = re.sub(r'\[caption[^\]]*\](.*?)\[/caption\]', r'\1', markdown) markdown = re.sub(r'', '', markdown, flags=re.DOTALL) return markdown.strip() def get_categories_and_tags(post_id, cursor): """Get categories and tags for a post""" categories = [] tags = [] try: # Check if taxonomy tables exist cursor.execute("SHOW TABLES LIKE 'wp_terms'") if cursor.fetchone(): query = """ SELECT t.name, tt.taxonomy FROM wp_terms t JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id WHERE tr.object_id = %s AND tt.taxonomy IN ('category', 'post_tag') """ cursor.execute(query, (post_id,)) for name, taxonomy in cursor.fetchall(): if taxonomy == 'category' and name.lower() != 'uncategorized': categories.append(name) elif taxonomy == 'post_tag': tags.append(name) except Exception: # If tables don't exist or error, just return empty lists pass return categories, tags def convert_posts(): """Main conversion function""" # Database connection conn = mysql.connector.connect( host='localhost', user='root', password='', database='wordpress_import', charset='utf8mb4', collation='utf8mb4_unicode_ci' ) cursor = conn.cursor() # Create output directory output_dir = Path('/Users/ericwagoner/Sites/blog/content/posts') output_dir.mkdir(parents=True, exist_ok=True) # Fetch all published posts query = """ SELECT ID, post_title, post_content, post_date, post_name, post_excerpt FROM wp_posts WHERE post_status = 'publish' AND (post_type = 'post' OR post_type = '') ORDER BY post_date ASC """ cursor.execute(query) posts = cursor.fetchall() print(f"Found {len(posts)} posts to convert") converted_count = 0 errors = [] for post_id, title, content, post_date, post_name, excerpt in posts: try: # Skip if no title and no content if not title and not content: continue # Use post_name as slug if available, otherwise create from title if post_name: slug = post_name[:100] elif title: slug = slugify(title) else: slug = f"post-{post_id}" # Get categories and tags categories, tags = get_categories_and_tags(post_id, cursor) # Create filename with date date_str = post_date.strftime('%Y-%m-%d') filename = f"{date_str}-{slug}.md" filepath = output_dir / filename # Check if file already exists if filepath.exists(): # Add post ID to make unique filename = f"{date_str}-{slug}-{post_id}.md" filepath = output_dir / filename # Convert content to markdown markdown_content = clean_content(content) # Create front matter front_matter = { 'title': title or f"Post {post_id}", 'date': post_date.isoformat(), 'draft': False, 'author': 'Eric Wagoner' } if excerpt: front_matter['description'] = excerpt.strip() if categories: front_matter['categories'] = categories if tags: front_matter['tags'] = tags # Write the file with open(filepath, 'w', encoding='utf-8') as f: f.write('---\n') f.write(yaml.dump(front_matter, default_flow_style=False, allow_unicode=True)) f.write('---\n\n') f.write(markdown_content) converted_count += 1 if converted_count % 100 == 0: print(f"Converted {converted_count} posts...") except Exception as e: error_msg = f"Error converting post {post_id} ('{title}'): {str(e)}" errors.append(error_msg) print(error_msg) continue cursor.close() conn.close() print(f"\n✅ Successfully converted {converted_count} posts") if errors: print(f"⚠️ {len(errors)} posts had errors:") for error in errors[:10]: # Show first 10 errors print(f" - {error}") if len(errors) > 10: print(f" ... and {len(errors) - 10} more") return converted_count, len(errors) if __name__ == "__main__": print("Starting WordPress to Hugo conversion...") converted, errors = convert_posts() print(f"\nConversion complete: {converted} posts converted, {errors} errors")