diff --git a/utils/textdoc-compile/README.md b/utils/textdoc-compile/README.md new file mode 100644 index 0000000..27cadd3 --- /dev/null +++ b/utils/textdoc-compile/README.md @@ -0,0 +1,420 @@ +# Textdoc Compiler + +# NightShift Story Compiler + +Compile structured markdown fiction projects into novel-style builds. + +Generates: + +* paperback-style PDF +* assembled markdown manuscript +* HTML preview +* optional cover support +* front matter +* act divider pages +* table of contents + +Designed for AI-assisted longform fiction pipelines. + +--- + +# Features + +* Pure Python +* Windows-friendly +* Natural scene sorting: + + * `scene-003.md` + * `scene-003a.md` + * `scene-003b.md` +* Front matter support via `chapter-000` +* Act dividers parsed from `.nightshift/tasks.md` +* Multiple chapter naming styles +* Optional metadata/title pages +* Paperback or manuscript formatting +* Scene heading extraction +* TOC generation +* Clean build output folder + +--- + +# Example Project Structure + +```text +project-root/ +│ +├── compile_story.py +│ +├── .nightshift/ +│ └── tasks.md +│ +└── story/ + ├── TITLE.md + ├── metadata.json + ├── cover.png + │ + ├── chapters/ + │ ├── chapter-000/ + │ │ ├── scene-001.md + │ │ └── scene-002.md + │ │ + │ ├── chapter-001/ + │ │ ├── scene-001.md + │ │ ├── scene-002.md + │ │ └── scene-003a.md + │ │ + │ ├── chapter-002/ + │ └── chapter-003/ + │ + └── build/ +``` + +--- + +# Install + +```powershell +pip install markdown reportlab +``` + +--- + +# Quick Start + +```powershell +python compile_story.py --root . +``` + +Outputs: + +```text +story/build/ + manuscript.md + manuscript.html + manuscript.pdf +``` + +--- + +# Title Pages + +## TITLE.md + +If present: + +```text +story/TITLE.md +``` + +Its contents are inserted as the title page. + +Example: + +```md +# NightShift + +## A Novel + +KHodges42 +``` + +--- + +## metadata.json + +Optional metadata fallback if `TITLE.md` is missing. + +Example: + +```json +{ + "title": "NightShift", + "subtitle": "A Novel", + "author": "KHodges42", + "language": "en" +} +``` + +--- + +# Cover Support + +Optional: + +```text +story/cover.png +``` + +Currently: + +* included in markdown/html +* copied into build folder +* ignored in ReportLab PDF for now + +Future versions can embed directly into PDF. + +--- + +# Front Matter + +`chapter-000` is treated specially. + +Example: + +```text +story/chapters/chapter-000/ +``` + +Use for: + +* foreword +* acknowledgements +* author notes +* epigraphs +* dedication + +No chapter numbering is applied. + +--- + +# Act Dividers + +Acts are parsed from: + +```text +.nightshift/tasks.md +``` + +Example: + +```md +# ACT 1 - LOW HEAT +# ACT 2 - STATIC BODIES +# ACT 3 - RECURSIVE CONTAMINATION +``` + +Each act becomes a standalone divider page. + +Only the ACT headings are parsed. + +Everything else in `tasks.md` is ignored. + +--- + +# Chapter Naming + +Default: + +```powershell +--chapter-format folder +``` + +Results in: + +```text +chapter-001 +chapter-002 +``` + +Other options: + +```powershell +--chapter-format number +``` + +```text +001 +002 +``` + +```powershell +--chapter-format word +``` + +```text +Chapter 1 +Chapter 2 +``` + +```powershell +--chapter-format chapter-dash +``` + +```text +Chapter-001 +Chapter-002 +``` + +```powershell +--chapter-format none +``` + +No chapter headings. + +--- + +# Table of Contents + +Default: + +```powershell +--toc full +``` + +Options: + +## Full + +```powershell +--toc full +``` + +Chapters + scenes. + +## Chapters Only + +```powershell +--toc chapters +``` + +## Compact Acts + +```powershell +--toc acts +``` + +## Disable + +```powershell +--toc off +``` + +--- + +# PDF Styles + +## Paperback (default) + +```powershell +--pdf-style paperback +``` + +* compact trim size +* tighter margins +* novel-like formatting + +## Manuscript + +```powershell +--pdf-style manuscript +``` + +* wider margins +* larger spacing +* draft/review friendly + +--- + +# Scene Headings + +By default: + +* first `# Heading` in each scene file becomes scene title +* heading is normalized into manuscript structure + +Disable: + +```powershell +--no-scene-headings +``` + +--- + +# Example Commands + +## Default Build + +```powershell +python compile_story.py --root . +``` + +## Paperback Build + +```powershell +python compile_story.py --root . --pdf-style paperback +``` + +## Manuscript Draft + +```powershell +python compile_story.py --root . --pdf-style manuscript +``` + +## No TOC + +```powershell +python compile_story.py --root . --toc off +``` + +## Word Chapter Format + +```powershell +python compile_story.py --root . --chapter-format word +``` + +--- + +# Notes + +## Natural Sorting + +Scene files are sorted naturally. + +Example: + +```text +scene-001.md +scene-002.md +scene-003.md +scene-003a.md +scene-003b.md +scene-004.md +``` + +--- + +## EPUB + +Not currently implemented. + +Can be added later using: + +* ebooklib +* pandoc +* markdown-it-py pipelines + +--- + +# Planned Features + +* EPUB export +* embedded cover art in PDF +* page numbers +* running headers +* chapter drop caps +* better typography +* custom fonts +* widow/orphan control +* scene separators +* theme presets +* print-ready trim sizes +* LaTeX backend +* AI-generated glossary/index support + + + +--- + +# License + +GPL3v2 diff --git a/utils/textdoc-compile/compile.py b/utils/textdoc-compile/compile.py new file mode 100644 index 0000000..5de56ec --- /dev/null +++ b/utils/textdoc-compile/compile.py @@ -0,0 +1,685 @@ +#!/usr/bin/env python3 +""" +Compile a NightShift-style story folder into: +- story/build/manuscript.md +- story/build/manuscript.html +- story/build/manuscript.pdf + +Expected layout: + +root/ + story/ + TITLE.md optional + metadata.json optional + cover.png optional, currently only copied/referenced + chapters/ + chapter-000/ + scene-001.md + chapter-001/ + scene-001.md + scene-002.md + .nightshift/ + tasks.md + +Install: + pip install markdown reportlab + +Example: + python compile_story.py --root . + python compile_story.py --root . --chapter-format word + python compile_story.py --root . --toc off + python compile_story.py --root . --pdf-style manuscript +""" + +from __future__ import annotations + +import argparse +import html +import json +import re +import shutil +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + + +# ---------------------------- +# Models +# ---------------------------- + +@dataclass +class Metadata: + title: str | None = None + subtitle: str | None = None + author: str | None = None + language: str | None = None + + +@dataclass +class BuildOptions: + root: Path + chapter_format: str + toc: str + pdf_style: str + scene_headings: bool + output_name: str + + +# ---------------------------- +# Natural sorting +# ---------------------------- + +def natural_key(path: Path) -> list[object]: + """ + Sorts scene-003a.md after scene-003.md and before scene-004.md. + """ + text = path.name.lower() + parts = re.split(r"(\d+)", text) + return [int(p) if p.isdigit() else p for p in parts] + + +def chapter_number(chapter_dir: Path) -> int | None: + match = re.search(r"chapter-(\d+)", chapter_dir.name, re.I) + if not match: + return None + return int(match.group(1)) + + +# ---------------------------- +# Metadata / title / acts +# ---------------------------- + +def load_metadata(story_dir: Path) -> Metadata: + metadata_path = story_dir / "metadata.json" + if not metadata_path.exists(): + return Metadata() + + data = json.loads(metadata_path.read_text(encoding="utf-8")) + + return Metadata( + title=data.get("title"), + subtitle=data.get("subtitle"), + author=data.get("author"), + language=data.get("language"), + ) + + +def read_title_page(story_dir: Path) -> str: + title_path = story_dir / "TITLE.md" + if not title_path.exists(): + return "" + return title_path.read_text(encoding="utf-8").strip() + + +def parse_act_headings(tasks_path: Path) -> list[str]: + """ + Reads only headings like: + + # ACT 1 - LOW HEAT + # ACT 2 - WHATEVER + + Ignores task entries, descriptions, acceptance criteria, etc. + """ + if not tasks_path.exists(): + return [] + + acts: list[str] = [] + + for line in tasks_path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + match = re.match(r"^#\s+(ACT\s+\d+\s+-\s+.+)$", line, re.I) + if match: + acts.append(match.group(1).strip()) + + return acts + + +# ---------------------------- +# Chapter / scene rendering +# ---------------------------- + +def format_chapter_heading(chapter_dir: Path, fmt: str) -> str | None: + num = chapter_number(chapter_dir) + + if num == 0: + return None + + if fmt == "none": + return None + + if fmt == "folder": + return chapter_dir.name + + if fmt == "number": + if num is None: + return chapter_dir.name + return f"{num:03d}" + + if fmt == "word": + if num is None: + return chapter_dir.name + return f"Chapter {num}" + + if fmt == "chapter-dash": + if num is None: + return chapter_dir.name + return f"Chapter-{num:03d}" + + raise ValueError(f"Unknown chapter format: {fmt}") + + +def first_heading(markdown_text: str) -> str | None: + for line in markdown_text.splitlines(): + match = re.match(r"^#\s+(.+)$", line.strip()) + if match: + return match.group(1).strip() + return None + + +def strip_top_heading(markdown_text: str) -> str: + """ + Removes the first top-level heading only. + Useful if scene headings are being generated separately. + """ + lines = markdown_text.splitlines() + output: list[str] = [] + removed = False + + for line in lines: + if not removed and re.match(r"^#\s+.+$", line.strip()): + removed = True + continue + output.append(line) + + return "\n".join(output).strip() + + +def build_scene_markdown(scene_path: Path, include_scene_heading: bool) -> str: + raw = scene_path.read_text(encoding="utf-8").strip() + + if not include_scene_heading: + return raw + + heading = first_heading(raw) + + if heading: + body = strip_top_heading(raw) + return f"### {heading}\n\n{body}".strip() + + fallback = scene_path.stem.replace("-", " ").title() + return f"### {fallback}\n\n{raw}".strip() + + +def chapter_dirs(chapters_dir: Path) -> list[Path]: + dirs = [p for p in chapters_dir.iterdir() if p.is_dir() and p.name.lower().startswith("chapter-")] + return sorted(dirs, key=natural_key) + + +def scene_files(chapter_dir: Path) -> list[Path]: + files = [p for p in chapter_dir.iterdir() if p.is_file() and p.suffix.lower() == ".md"] + return sorted(files, key=natural_key) + + +# ---------------------------- +# TOC +# ---------------------------- + +def make_toc(chapter_map: list[tuple[Path, list[Path]]], opts: BuildOptions) -> str: + if opts.toc == "off": + return "" + + lines = ["# Contents", ""] + + for chapter_dir, scenes in chapter_map: + ch_num = chapter_number(chapter_dir) + + if ch_num == 0: + label = "Front Matter" + else: + label = format_chapter_heading(chapter_dir, opts.chapter_format) or chapter_dir.name + + if opts.toc == "acts": + # Act-only TOC is handled elsewhere poorly without explicit mapping. + # For now, treat as compact chapter-only. + lines.append(f"- {label}") + + elif opts.toc == "chapters": + lines.append(f"- {label}") + + elif opts.toc == "full": + lines.append(f"- {label}") + for scene in scenes: + raw = scene.read_text(encoding="utf-8") + heading = first_heading(raw) or scene.stem + lines.append(f" - {heading}") + + else: + raise ValueError(f"Unknown TOC style: {opts.toc}") + + return "\n".join(lines).strip() + + +# ---------------------------- +# Markdown assembly +# ---------------------------- + +def assemble_markdown(opts: BuildOptions) -> str: + story_dir = opts.root / "story" + chapters_dir = story_dir / "chapters" + tasks_path = opts.root / ".nightshift" / "tasks.md" + + if not story_dir.exists(): + raise FileNotFoundError(f"Missing story directory: {story_dir}") + + if not chapters_dir.exists(): + raise FileNotFoundError(f"Missing chapters directory: {chapters_dir}") + + metadata = load_metadata(story_dir) + title_page = read_title_page(story_dir) + acts = parse_act_headings(tasks_path) + + all_chapters = chapter_dirs(chapters_dir) + chapter_map = [(chapter, scene_files(chapter)) for chapter in all_chapters] + + parts: list[str] = [] + + # Optional cover reference for markdown/html. + cover_path = story_dir / "cover.png" + if cover_path.exists(): + parts.append("") + parts.append(r"\newpage") + + # TITLE.md wins over metadata title page. + if title_page: + parts.append(title_page) + parts.append(r"\newpage") + elif metadata.title or metadata.author: + title_bits = [] + if metadata.title: + title_bits.append(f"# {metadata.title}") + if metadata.subtitle: + title_bits.append(f"## {metadata.subtitle}") + if metadata.author: + title_bits.append(f"### {metadata.author}") + parts.append("\n\n".join(title_bits)) + parts.append(r"\newpage") + + toc_md = make_toc(chapter_map, opts) + if toc_md: + parts.append(toc_md) + parts.append(r"\newpage") + + act_index = 0 + + for chapter_dir, scenes in chapter_map: + ch_num = chapter_number(chapter_dir) + + # chapter-000 is front matter, no act divider, no chapter numbering. + is_front_matter = ch_num == 0 + + # Insert act divider before chapter-001, chapter-002, chapter-003, etc. + # This assumes ACT 1 maps to chapter-001, ACT 2 maps to chapter-002, etc. + if not is_front_matter and ch_num is not None: + expected_act_number = ch_num + if expected_act_number - 1 < len(acts): + act_heading = acts[expected_act_number - 1] + parts.append(f"# {act_heading}") + parts.append(r"\newpage") + + chapter_heading = None if is_front_matter else format_chapter_heading(chapter_dir, opts.chapter_format) + + if chapter_heading: + parts.append(f"# {chapter_heading}") + parts.append("") + + for scene in scenes: + scene_md = build_scene_markdown(scene, opts.scene_headings) + if scene_md: + parts.append(scene_md) + parts.append("") + + parts.append(r"\newpage") + + return "\n\n".join(p for p in parts if p is not None).strip() + "\n" + + +# ---------------------------- +# HTML +# ---------------------------- + +def markdown_to_html(md: str, metadata: Metadata) -> str: + try: + import markdown + except ImportError as exc: + raise RuntimeError("Missing dependency: pip install markdown") from exc + + body = markdown.markdown( + md, + extensions=[ + "extra", + "toc", + "sane_lists", + ], + ) + + title = metadata.title or "Manuscript" + + return f""" + +
+ +