And we're back
After setting up by bluesky to use a custom @johnpaulett handle (learn how set up your own domain handle), a friend pointed out my website was dead! Apparently sometime in the past 15 years (🤯) without touching it, my custom Django blog app written in Django v1.3 got disconnected from its Heroku Postgres.
Figured I should simplify the setup, since there is no need to have a database and app server (that costs $10/mo) to serve a blog that probably gets a few pageviews a month, a static generated site will work.
The Process:
- Grabbed the
blog_entry
table as CSV from Heroku - Went to Claude AI to create a Python script to convert the csv with Restructured Text to Markdown (see
blog-convert.py
below). After a few successive prompts, had it working well! I had a some timezone errors in my orginal blog code in URL permalinks. - Grabbed images from archive.org (my S3 account had long been disable)
- Minor cleanup of content
- Tweaks to the hyde theme.
- Github Action to deploy as a Github Pages static site
All in a productive Sunday!
blog-convert.py
:
#!/usr/bin/env python3
import csv
import os
import subprocess
from datetime import datetime
import json
from collections import defaultdict
from zoneinfo import ZoneInfo
def convert_rst_to_md(content):
"""Convert reStructuredText to Markdown using pandoc."""
try:
# Write RST content to a temporary file
with open("temp.rst", "w") as f:
f.write(content)
# Call pandoc to convert RST to Markdown
result = subprocess.run(
["pandoc", "temp.rst", "-f", "rst", "-t", "markdown"],
capture_output=True,
text=True,
)
# Clean up temporary file
os.remove("temp.rst")
if result.returncode != 0:
print(f"Error converting RST to Markdown: {result.stderr}")
return content
return result.stdout.strip()
except Exception as e:
print(f"Error during conversion: {e}")
return content
def format_tags(tags_str):
"""Convert tags string into YAML array format."""
if not tags_str:
return []
# First split by comma if present
if "," in tags_str:
tags = [tag.strip() for tag in tags_str.split(",")]
else:
# If no commas, split by spaces
tags = [tag.strip() for tag in tags_str.split()]
# Remove empty tags and ensure uniqueness
return list(set(tag for tag in tags if tag))
def format_date(date_str):
"""Convert date string to ISO 8601 format."""
try:
# Parse the input datetime string
# Add ':00' to the timezone offset to match ISO format
if "+00" in date_str:
date_str = date_str.replace("+00", "+0000")
dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S%z")
# Format to ISO 8601 with correct timezone format
return dt.strftime("%Y-%m-%dT%H:%M:%S%z").replace("+0000", "+00:00")
except ValueError as e:
print(f"Error parsing date {date_str}: {e}")
return date_str
def create_frontmatter(row):
"""Create Hugo frontmatter from CSV row."""
# Parse the date for alias creation
if "+00" in row["pub_date"]:
pub_date = row["pub_date"].replace("+00", "+0000")
dt = datetime.strptime(pub_date, "%Y-%m-%d %H:%M:%S%z")
# Convert UTC to US/Central for the alias URL
# First ensure we're working with UTC
utc_dt = dt.replace(tzinfo=ZoneInfo("UTC"))
central_dt = utc_dt.astimezone(ZoneInfo("America/Chicago"))
# Create legacy URL alias using Central time
legacy_url = (
f"/{central_dt.year}/{central_dt.month:02d}/{central_dt.day:02d}/{row['slug']}/"
)
frontmatter = {
"title": row["title"],
"date": format_date(row["pub_date"]),
"draft": row["public"] != "TRUE",
"slug": row["slug"],
"tags": format_tags(row["tags"]),
"aliases": [legacy_url],
}
# Convert frontmatter to YAML-style string
yaml = ["---"]
for key, value in frontmatter.items():
if isinstance(value, list):
yaml.append(f"{key}:")
for item in value:
yaml.append(f" - {item}")
else:
yaml.append(f"{key}: {json.dumps(value)}")
yaml.append("---")
return "\n".join(yaml)
def create_year_index(year):
"""Create _index.md file for a year directory."""
content = f"""---
title: "{year}"
type: "yearly-archive"
---
Posts from {year}
"""
return content
def get_post_year(pub_date):
"""Extract year from publication date."""
try:
if "+00" in pub_date:
pub_date = pub_date.replace("+00", "+0000")
dt = datetime.strptime(pub_date, "%Y-%m-%d %H:%M:%S%z")
return str(dt.year)
except ValueError as e:
print(f"Error parsing date {pub_date}: {e}")
return "unknown"
def create_directory(path):
"""Create directory if it doesn't exist."""
os.makedirs(path, exist_ok=True)
def convert_csv_to_hugo(csv_path, published_dir, unpublished_dir):
"""Convert CSV entries to Hugo markdown files."""
# Create main directories
create_directory(published_dir)
create_directory(unpublished_dir)
# Keep track of years for index files
years_used = set()
with open(csv_path, "r", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
# Determine if post is draft
is_draft = row["public"] != "TRUE"
# Get the year and create year directory if needed
year = get_post_year(row["pub_date"])
# Determine base directory and full year path
base_dir = unpublished_dir if is_draft else published_dir
if not is_draft:
year_dir = os.path.join(base_dir, year)
create_directory(year_dir)
years_used.add(year)
else:
year_dir = base_dir
# Create filename from slug or fallback to sanitized title
filename = (
row["slug"] if row["slug"] else row["title"].lower().replace(" ", "-")
)
filename = f"{filename}.md"
# Create full output path
output_path = os.path.join(year_dir, filename)
# Generate frontmatter
frontmatter = create_frontmatter(row)
# Convert description from RST to Markdown
content = convert_rst_to_md(row.get("description", ""))
# Combine frontmatter and content
full_content = f"{frontmatter}\n\n{content}"
# Write to file
with open(output_path, "w", encoding="utf-8") as f:
f.write(full_content)
print(f"Created {output_path}")
# Create _index.md files for each year
for year in sorted(years_used):
index_path = os.path.join(published_dir, year, "_index.md")
with open(index_path, "w", encoding="utf-8") as f:
f.write(create_year_index(year))
print(f"Created year index {index_path}")
if __name__ == "__main__":
# Configuration
CSV_PATH = "legacy/johnpaulettcom_blog_entry.csv"
PUBLISHED_DIR = "content/posts"
UNPUBLISHED_DIR = "legacy/unpublished"
# Convert CSV to Hugo markdown files
convert_csv_to_hugo(CSV_PATH, PUBLISHED_DIR, UNPUBLISHED_DIR)
print("Conversion completed!")