nightshift/utils/textdoc-compile/compile.py
2026-05-23 17:34:57 -07:00

685 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Compile a NightShift-style story folder into:
- story/build/manuscript.md
- story/build/manuscript.html
- story/build/manuscript.pdf
Expected layout:
root/
story/
TITLE.md optional
metadata.json optional
cover.png optional, currently only copied/referenced
chapters/
chapter-000/
scene-001.md
chapter-001/
scene-001.md
scene-002.md
.nightshift/
tasks.md
Install:
pip install markdown reportlab
Example:
python compile_story.py --root .
python compile_story.py --root . --chapter-format word
python compile_story.py --root . --toc off
python compile_story.py --root . --pdf-style manuscript
"""
from __future__ import annotations
import argparse
import html
import json
import re
import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
# ----------------------------
# Models
# ----------------------------
@dataclass
class Metadata:
title: str | None = None
subtitle: str | None = None
author: str | None = None
language: str | None = None
@dataclass
class BuildOptions:
root: Path
chapter_format: str
toc: str
pdf_style: str
scene_headings: bool
output_name: str
# ----------------------------
# Natural sorting
# ----------------------------
def natural_key(path: Path) -> list[object]:
"""
Sorts scene-003a.md after scene-003.md and before scene-004.md.
"""
text = path.name.lower()
parts = re.split(r"(\d+)", text)
return [int(p) if p.isdigit() else p for p in parts]
def chapter_number(chapter_dir: Path) -> int | None:
match = re.search(r"chapter-(\d+)", chapter_dir.name, re.I)
if not match:
return None
return int(match.group(1))
# ----------------------------
# Metadata / title / acts
# ----------------------------
def load_metadata(story_dir: Path) -> Metadata:
metadata_path = story_dir / "metadata.json"
if not metadata_path.exists():
return Metadata()
data = json.loads(metadata_path.read_text(encoding="utf-8"))
return Metadata(
title=data.get("title"),
subtitle=data.get("subtitle"),
author=data.get("author"),
language=data.get("language"),
)
def read_title_page(story_dir: Path) -> str:
title_path = story_dir / "TITLE.md"
if not title_path.exists():
return ""
return title_path.read_text(encoding="utf-8").strip()
def parse_act_headings(tasks_path: Path) -> list[str]:
"""
Reads only headings like:
# ACT 1 - LOW HEAT
# ACT 2 - WHATEVER
Ignores task entries, descriptions, acceptance criteria, etc.
"""
if not tasks_path.exists():
return []
acts: list[str] = []
for line in tasks_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
match = re.match(r"^#\s+(ACT\s+\d+\s+-\s+.+)$", line, re.I)
if match:
acts.append(match.group(1).strip())
return acts
# ----------------------------
# Chapter / scene rendering
# ----------------------------
def format_chapter_heading(chapter_dir: Path, fmt: str) -> str | None:
num = chapter_number(chapter_dir)
if num == 0:
return None
if fmt == "none":
return None
if fmt == "folder":
return chapter_dir.name
if fmt == "number":
if num is None:
return chapter_dir.name
return f"{num:03d}"
if fmt == "word":
if num is None:
return chapter_dir.name
return f"Chapter {num}"
if fmt == "chapter-dash":
if num is None:
return chapter_dir.name
return f"Chapter-{num:03d}"
raise ValueError(f"Unknown chapter format: {fmt}")
def first_heading(markdown_text: str) -> str | None:
for line in markdown_text.splitlines():
match = re.match(r"^#\s+(.+)$", line.strip())
if match:
return match.group(1).strip()
return None
def strip_top_heading(markdown_text: str) -> str:
"""
Removes the first top-level heading only.
Useful if scene headings are being generated separately.
"""
lines = markdown_text.splitlines()
output: list[str] = []
removed = False
for line in lines:
if not removed and re.match(r"^#\s+.+$", line.strip()):
removed = True
continue
output.append(line)
return "\n".join(output).strip()
def build_scene_markdown(scene_path: Path, include_scene_heading: bool) -> str:
raw = scene_path.read_text(encoding="utf-8").strip()
if not include_scene_heading:
return raw
heading = first_heading(raw)
if heading:
body = strip_top_heading(raw)
return f"### {heading}\n\n{body}".strip()
fallback = scene_path.stem.replace("-", " ").title()
return f"### {fallback}\n\n{raw}".strip()
def chapter_dirs(chapters_dir: Path) -> list[Path]:
dirs = [p for p in chapters_dir.iterdir() if p.is_dir() and p.name.lower().startswith("chapter-")]
return sorted(dirs, key=natural_key)
def scene_files(chapter_dir: Path) -> list[Path]:
files = [p for p in chapter_dir.iterdir() if p.is_file() and p.suffix.lower() == ".md"]
return sorted(files, key=natural_key)
# ----------------------------
# TOC
# ----------------------------
def make_toc(chapter_map: list[tuple[Path, list[Path]]], opts: BuildOptions) -> str:
if opts.toc == "off":
return ""
lines = ["# Contents", ""]
for chapter_dir, scenes in chapter_map:
ch_num = chapter_number(chapter_dir)
if ch_num == 0:
label = "Front Matter"
else:
label = format_chapter_heading(chapter_dir, opts.chapter_format) or chapter_dir.name
if opts.toc == "acts":
# Act-only TOC is handled elsewhere poorly without explicit mapping.
# For now, treat as compact chapter-only.
lines.append(f"- {label}")
elif opts.toc == "chapters":
lines.append(f"- {label}")
elif opts.toc == "full":
lines.append(f"- {label}")
for scene in scenes:
raw = scene.read_text(encoding="utf-8")
heading = first_heading(raw) or scene.stem
lines.append(f" - {heading}")
else:
raise ValueError(f"Unknown TOC style: {opts.toc}")
return "\n".join(lines).strip()
# ----------------------------
# Markdown assembly
# ----------------------------
def assemble_markdown(opts: BuildOptions) -> str:
story_dir = opts.root / "story"
chapters_dir = story_dir / "chapters"
tasks_path = opts.root / ".nightshift" / "tasks.md"
if not story_dir.exists():
raise FileNotFoundError(f"Missing story directory: {story_dir}")
if not chapters_dir.exists():
raise FileNotFoundError(f"Missing chapters directory: {chapters_dir}")
metadata = load_metadata(story_dir)
title_page = read_title_page(story_dir)
acts = parse_act_headings(tasks_path)
all_chapters = chapter_dirs(chapters_dir)
chapter_map = [(chapter, scene_files(chapter)) for chapter in all_chapters]
parts: list[str] = []
# Optional cover reference for markdown/html.
cover_path = story_dir / "cover.png"
if cover_path.exists():
parts.append("![Cover](../cover.png)")
parts.append(r"\newpage")
# TITLE.md wins over metadata title page.
if title_page:
parts.append(title_page)
parts.append(r"\newpage")
elif metadata.title or metadata.author:
title_bits = []
if metadata.title:
title_bits.append(f"# {metadata.title}")
if metadata.subtitle:
title_bits.append(f"## {metadata.subtitle}")
if metadata.author:
title_bits.append(f"### {metadata.author}")
parts.append("\n\n".join(title_bits))
parts.append(r"\newpage")
toc_md = make_toc(chapter_map, opts)
if toc_md:
parts.append(toc_md)
parts.append(r"\newpage")
act_index = 0
for chapter_dir, scenes in chapter_map:
ch_num = chapter_number(chapter_dir)
# chapter-000 is front matter, no act divider, no chapter numbering.
is_front_matter = ch_num == 0
# Insert act divider before chapter-001, chapter-002, chapter-003, etc.
# This assumes ACT 1 maps to chapter-001, ACT 2 maps to chapter-002, etc.
if not is_front_matter and ch_num is not None:
expected_act_number = ch_num
if expected_act_number - 1 < len(acts):
act_heading = acts[expected_act_number - 1]
parts.append(f"# {act_heading}")
parts.append(r"\newpage")
chapter_heading = None if is_front_matter else format_chapter_heading(chapter_dir, opts.chapter_format)
if chapter_heading:
parts.append(f"# {chapter_heading}")
parts.append("")
for scene in scenes:
scene_md = build_scene_markdown(scene, opts.scene_headings)
if scene_md:
parts.append(scene_md)
parts.append("")
parts.append(r"\newpage")
return "\n\n".join(p for p in parts if p is not None).strip() + "\n"
# ----------------------------
# HTML
# ----------------------------
def markdown_to_html(md: str, metadata: Metadata) -> str:
try:
import markdown
except ImportError as exc:
raise RuntimeError("Missing dependency: pip install markdown") from exc
body = markdown.markdown(
md,
extensions=[
"extra",
"toc",
"sane_lists",
],
)
title = metadata.title or "Manuscript"
return f"""<!doctype html>
<html lang="{html.escape(metadata.language or "en")}">
<head>
<meta charset="utf-8">
<title>{html.escape(title)}</title>
<style>
body {{
max-width: 720px;
margin: 3rem auto;
padding: 0 1.5rem;
font-family: Georgia, "Times New Roman", serif;
font-size: 18px;
line-height: 1.65;
color: #111;
}}
h1, h2, h3 {{
font-weight: normal;
text-align: center;
margin-top: 3rem;
}}
h1 {{
font-size: 2.1rem;
page-break-before: always;
}}
h2 {{
font-size: 1.5rem;
}}
h3 {{
font-size: 1.2rem;
margin-top: 2rem;
}}
p {{
text-indent: 1.5em;
margin: 0 0 0.4rem 0;
}}
h1 + p,
h2 + p,
h3 + p {{
text-indent: 0;
}}
ul, ol {{
margin-left: 2rem;
}}
img {{
max-width: 100%;
display: block;
margin: 2rem auto;
}}
code {{
font-family: Consolas, monospace;
font-size: 0.9em;
}}
</style>
</head>
<body>
{body}
</body>
</html>
"""
# ----------------------------
# PDF via ReportLab
# ----------------------------
def write_pdf(md: str, output_path: Path, metadata: Metadata, pdf_style: str) -> None:
try:
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.lib.pagesizes import A5, LETTER
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import inch
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
Spacer,
PageBreak,
)
except ImportError as exc:
raise RuntimeError("Missing dependency: pip install reportlab") from exc
if pdf_style == "paperback":
pagesize = A5
margins = dict(
leftMargin=0.65 * inch,
rightMargin=0.65 * inch,
topMargin=0.7 * inch,
bottomMargin=0.7 * inch,
)
body_size = 10.5
leading = 15
elif pdf_style == "manuscript":
pagesize = LETTER
margins = dict(
leftMargin=1 * inch,
rightMargin=1 * inch,
topMargin=1 * inch,
bottomMargin=1 * inch,
)
body_size = 12
leading = 24
else:
raise ValueError(f"Unknown PDF style: {pdf_style}")
doc = SimpleDocTemplate(
str(output_path),
pagesize=pagesize,
title=metadata.title or "Manuscript",
author=metadata.author or "",
**margins,
)
styles = getSampleStyleSheet()
body = ParagraphStyle(
"NovelBody",
parent=styles["BodyText"],
fontName="Times-Roman",
fontSize=body_size,
leading=leading,
firstLineIndent=18,
spaceAfter=4,
alignment=TA_LEFT,
)
h1 = ParagraphStyle(
"NovelH1",
parent=styles["Heading1"],
fontName="Times-Roman",
fontSize=18,
leading=24,
alignment=TA_CENTER,
spaceBefore=36,
spaceAfter=24,
)
h2 = ParagraphStyle(
"NovelH2",
parent=styles["Heading2"],
fontName="Times-Roman",
fontSize=15,
leading=20,
alignment=TA_CENTER,
spaceBefore=28,
spaceAfter=18,
)
h3 = ParagraphStyle(
"NovelH3",
parent=styles["Heading3"],
fontName="Times-Roman",
fontSize=13,
leading=18,
alignment=TA_CENTER,
spaceBefore=20,
spaceAfter=14,
)
story = []
paragraphs = md.splitlines()
buffer: list[str] = []
def flush_paragraph():
nonlocal buffer
text = " ".join(x.strip() for x in buffer).strip()
buffer = []
if not text:
return
safe = html.escape(text)
story.append(Paragraph(safe, body))
for line in paragraphs:
stripped = line.strip()
if stripped == r"\newpage":
flush_paragraph()
story.append(PageBreak())
continue
if not stripped:
flush_paragraph()
story.append(Spacer(1, 6))
continue
if stripped.startswith("# "):
flush_paragraph()
story.append(Paragraph(html.escape(stripped[2:].strip()), h1))
continue
if stripped.startswith("## "):
flush_paragraph()
story.append(Paragraph(html.escape(stripped[3:].strip()), h2))
continue
if stripped.startswith("### "):
flush_paragraph()
story.append(Paragraph(html.escape(stripped[4:].strip()), h3))
continue
# crude markdown list support
if re.match(r"^[-*]\s+", stripped):
flush_paragraph()
item = re.sub(r"^[-*]\s+", "", stripped)
story.append(Paragraph(html.escape(item), body))
continue
# skip images in ReportLab for now
if stripped.startswith("!["):
flush_paragraph()
continue
buffer.append(stripped)
flush_paragraph()
doc.build(story)
# ----------------------------
# Build
# ----------------------------
def build(opts: BuildOptions) -> None:
story_dir = opts.root / "story"
build_dir = story_dir / "build"
build_dir.mkdir(parents=True, exist_ok=True)
metadata = load_metadata(story_dir)
md = assemble_markdown(opts)
md_path = build_dir / f"{opts.output_name}.md"
html_path = build_dir / f"{opts.output_name}.html"
pdf_path = build_dir / f"{opts.output_name}.pdf"
md_path.write_text(md, encoding="utf-8")
html_doc = markdown_to_html(md, metadata)
html_path.write_text(html_doc, encoding="utf-8")
write_pdf(md, pdf_path, metadata, opts.pdf_style)
cover = story_dir / "cover.png"
if cover.exists():
shutil.copy2(cover, build_dir / "cover.png")
print("Built:")
print(f" {md_path}")
print(f" {html_path}")
print(f" {pdf_path}")
# ----------------------------
# CLI
# ----------------------------
def parse_args() -> BuildOptions:
parser = argparse.ArgumentParser(description="Compile story markdown into a novel build.")
parser.add_argument("--root", default=".", help="Project root containing story/ and .nightshift/")
parser.add_argument(
"--chapter-format",
default="folder",
choices=["folder", "number", "word", "chapter-dash", "none"],
help="How numbered chapters should be titled.",
)
parser.add_argument(
"--toc",
default="full",
choices=["off", "chapters", "full", "acts"],
help="Table of contents style. Default: full.",
)
parser.add_argument(
"--pdf-style",
default="paperback",
choices=["paperback", "manuscript"],
help="PDF formatting style.",
)
parser.add_argument(
"--no-scene-headings",
action="store_true",
help="Do not generate scene headings from scene markdown.",
)
parser.add_argument(
"--output-name",
default="manuscript",
help="Base output filename.",
)
args = parser.parse_args()
return BuildOptions(
root=Path(args.root).resolve(),
chapter_format=args.chapter_format,
toc=args.toc,
pdf_style=args.pdf_style,
scene_headings=not args.no_scene_headings,
output_name=args.output_name,
)
def main() -> None:
opts = parse_args()
build(opts)
if __name__ == "__main__":
main()