And we're back

After setting up by bluesky to use a custom @johnpaulett handle (learn how set up your own domain handle), a friend pointed out my website was dead! Apparently sometime in the past 15 years (🤯) without touching it, my custom Django blog app written in Django v1.3 got disconnected from its Heroku Postgres.

Figured I should simplify the setup, since there is no need to have a database and app server (that costs $10/mo) to serve a blog that probably gets a few pageviews a month, a static generated site will work.

The Process:

All in a productive Sunday!

blog-convert.py:

#!/usr/bin/env python3
import csv
import os
import subprocess
from datetime import datetime
import json
from collections import defaultdict
from zoneinfo import ZoneInfo


def convert_rst_to_md(content):
    """Convert reStructuredText to Markdown using pandoc."""
    try:
        # Write RST content to a temporary file
        with open("temp.rst", "w") as f:
            f.write(content)

        # Call pandoc to convert RST to Markdown
        result = subprocess.run(
            ["pandoc", "temp.rst", "-f", "rst", "-t", "markdown"],
            capture_output=True,
            text=True,
        )

        # Clean up temporary file
        os.remove("temp.rst")

        if result.returncode != 0:
            print(f"Error converting RST to Markdown: {result.stderr}")
            return content

        return result.stdout.strip()
    except Exception as e:
        print(f"Error during conversion: {e}")
        return content


def format_tags(tags_str):
    """Convert tags string into YAML array format."""
    if not tags_str:
        return []

    # First split by comma if present
    if "," in tags_str:
        tags = [tag.strip() for tag in tags_str.split(",")]
    else:
        # If no commas, split by spaces
        tags = [tag.strip() for tag in tags_str.split()]

    # Remove empty tags and ensure uniqueness
    return list(set(tag for tag in tags if tag))


def format_date(date_str):
    """Convert date string to ISO 8601 format."""
    try:
        # Parse the input datetime string
        # Add ':00' to the timezone offset to match ISO format
        if "+00" in date_str:
            date_str = date_str.replace("+00", "+0000")
        dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S%z")
        # Format to ISO 8601 with correct timezone format
        return dt.strftime("%Y-%m-%dT%H:%M:%S%z").replace("+0000", "+00:00")
    except ValueError as e:
        print(f"Error parsing date {date_str}: {e}")
        return date_str


def create_frontmatter(row):
    """Create Hugo frontmatter from CSV row."""
    # Parse the date for alias creation
    if "+00" in row["pub_date"]:
        pub_date = row["pub_date"].replace("+00", "+0000")
    dt = datetime.strptime(pub_date, "%Y-%m-%d %H:%M:%S%z")

    # Convert UTC to US/Central for the alias URL
    # First ensure we're working with UTC
    utc_dt = dt.replace(tzinfo=ZoneInfo("UTC"))
    central_dt = utc_dt.astimezone(ZoneInfo("America/Chicago"))

    # Create legacy URL alias using Central time
    legacy_url = (
        f"/{central_dt.year}/{central_dt.month:02d}/{central_dt.day:02d}/{row['slug']}/"
    )

    frontmatter = {
        "title": row["title"],
        "date": format_date(row["pub_date"]),
        "draft": row["public"] != "TRUE",
        "slug": row["slug"],
        "tags": format_tags(row["tags"]),
        "aliases": [legacy_url],
    }

    # Convert frontmatter to YAML-style string
    yaml = ["---"]
    for key, value in frontmatter.items():
        if isinstance(value, list):
            yaml.append(f"{key}:")
            for item in value:
                yaml.append(f"  - {item}")
        else:
            yaml.append(f"{key}: {json.dumps(value)}")
    yaml.append("---")

    return "\n".join(yaml)


def create_year_index(year):
    """Create _index.md file for a year directory."""
    content = f"""---
title: "{year}"
type: "yearly-archive"
---

Posts from {year}
"""
    return content


def get_post_year(pub_date):
    """Extract year from publication date."""
    try:
        if "+00" in pub_date:
            pub_date = pub_date.replace("+00", "+0000")
        dt = datetime.strptime(pub_date, "%Y-%m-%d %H:%M:%S%z")
        return str(dt.year)
    except ValueError as e:
        print(f"Error parsing date {pub_date}: {e}")
        return "unknown"


def create_directory(path):
    """Create directory if it doesn't exist."""
    os.makedirs(path, exist_ok=True)


def convert_csv_to_hugo(csv_path, published_dir, unpublished_dir):
    """Convert CSV entries to Hugo markdown files."""
    # Create main directories
    create_directory(published_dir)
    create_directory(unpublished_dir)

    # Keep track of years for index files
    years_used = set()

    with open(csv_path, "r", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)

        for row in reader:
            # Determine if post is draft
            is_draft = row["public"] != "TRUE"

            # Get the year and create year directory if needed
            year = get_post_year(row["pub_date"])

            # Determine base directory and full year path
            base_dir = unpublished_dir if is_draft else published_dir
            if not is_draft:
                year_dir = os.path.join(base_dir, year)
                create_directory(year_dir)
                years_used.add(year)
            else:
                year_dir = base_dir

            # Create filename from slug or fallback to sanitized title
            filename = (
                row["slug"] if row["slug"] else row["title"].lower().replace(" ", "-")
            )
            filename = f"{filename}.md"

            # Create full output path
            output_path = os.path.join(year_dir, filename)

            # Generate frontmatter
            frontmatter = create_frontmatter(row)

            # Convert description from RST to Markdown
            content = convert_rst_to_md(row.get("description", ""))

            # Combine frontmatter and content
            full_content = f"{frontmatter}\n\n{content}"

            # Write to file
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(full_content)

            print(f"Created {output_path}")

    # Create _index.md files for each year
    for year in sorted(years_used):
        index_path = os.path.join(published_dir, year, "_index.md")
        with open(index_path, "w", encoding="utf-8") as f:
            f.write(create_year_index(year))
        print(f"Created year index {index_path}")


if __name__ == "__main__":
    # Configuration
    CSV_PATH = "legacy/johnpaulettcom_blog_entry.csv"
    PUBLISHED_DIR = "content/posts"
    UNPUBLISHED_DIR = "legacy/unpublished"

    # Convert CSV to Hugo markdown files
    convert_csv_to_hugo(CSV_PATH, PUBLISHED_DIR, UNPUBLISHED_DIR)
    print("Conversion completed!")