#!/usr/bin/env python3 """ Quick check of external links """ import re from pathlib import Path from collections import defaultdict POSTS_DIR = Path('/Users/ericwagoner/Sites/blog/content/posts') # Just check a sample of posts first sample_posts = list(POSTS_DIR.glob('*.md'))[:50] all_links = set() for post in sample_posts: with open(post, 'r', encoding='utf-8') as f: content = f.read() # Find all ericwagoner.com links links = re.findall(r'http://www\.ericwagoner\.com[^\s\)"]+', content) all_links.update(links) # Categorize images = [] html_pages = [] other = [] for url in all_links: if re.search(r'\.(jpg|jpeg|gif|png)$', url, re.IGNORECASE): images.append(url) elif url.endswith('.html') or url.endswith('.htm'): html_pages.append(url) else: other.append(url) print(f"Sample from first 50 posts:") print(f" Total unique links: {len(all_links)}") print(f" Images: {len(images)}") print(f" HTML pages: {len(html_pages)}") print(f" Other: {len(other)}") print("\nSample image links:") for url in images[:5]: print(f" {url}") print("\nSample HTML page links:") for url in html_pages[:5]: print(f" {url}") print("\nSample other links:") for url in other[:5]: print(f" {url}")