Commit 9a2555ad authored by Nick Mathewson's avatar Nick Mathewson 🥔
Browse files

start grinding

parents
Loading
Loading
Loading
Loading

grinder.py

0 → 100755
+182 −0
Original line number Original line Diff line number Diff line
#!/usr/bin/env python3
#
# Goal: Read tor specs, output mdbook.
#

from enum import Enum
import re

DEBUG=True
def debug(m):
    if DEBUG:
        print("{{",m,"}}")


class FileCfg:
    def __init__(self, base_indent=None):
        self.base_indent = base_indent

    def parse(self, lines):
        if self.base_indent == None:
            self.base_indent = mode(indent_of_line(line) for line in lines if not line.isspace())

        spec = Spec(self)
        for graf in paragraphs(lines):
            graf = self.classify_paragraph(graf)
            spec.push_graf(graf)
        return spec

    def classify_paragraph(self, para):
        if len(para) == 1 and (m := HEADING_PATTERN.match(para[0])):
            return Heading(m.group(1), m.group(2))
        elif len(para) > 1 and uniform_indent(para) == self.base_indent:
            return Body(para)
        elif len(para) > 1:
            return Verbatim(para)
        else:
            assert len(para) == 1
            return Ambiguous(para)

class Spec:
    def __init__(self, cfg):
        self.cfg = cfg
        self.grafs = []

    def push_graf(self, graf):
        if len(self.grafs) == 0:
            self.grafs.append(graf)
        elif self.grafs[-1].merge_from(graf):
            return
        else:
            self.grafs.append(graf)

    def dump_md(self, out):
        for graf in self.grafs:
            graf.dump_md(out, self.cfg)
            out.write("\n")

def paragraphs(f):
    lines = []
    for line in f:
        if line.strip() == "":
            if lines:
                yield lines
                lines = []
        else:
             lines.append(line)
    if lines:
        yield lines


def indent_of_line(line):
    return len(line) - len(line.lstrip(" "))

def uniform_indent(para):
    s = set(indent_of_line(line) for line in para)
    if len(s) == 1:
        return list(s)[0]
    else:
        return None

def mode(lst):
    counts = {}
    for item in lst:
        try:
            counts[item] += 1
        except KeyError:
            counts[item] = 1
    counts = sorted((v,k) for k,v in counts.items())
    return counts[-1][1]

class Item:
    def __init__(self):
        pass

    def get_text(self):
        return self.body

    def kind(self):
        raise NotImplemented()

    def merge_from(self, next_item):
        return False

class Heading(Item):
    def __init__(self, number, title):
        Item.__init__(self)
        self.number = number.rstrip(".")
        self.depth = self.number.count(".") + 1
        self.title = title

    def dump_md(self, out, cfg):
        hdr = "#" * self.depth
        print(f"{hdr} {self.number} -- {self.title}\n", file=out)

    def kind(self):
        return "H"

class Body(Item):
    def __init__(self, body):
        Item.__init__(self)
        self.body = body

    def dump_md(self, out, cfg):
        for line in self.body:
            out.write(line.lstrip(" "))

    def kind(self):
        return "B"

class Verbatim(Item):
    def __init__(self, body):
        Item.__init__(self)
        self.body = body

    def kind(self):
        return "V"

    def dump_md(self, out, cfg):
        out.write("```\n")
        for line in self.body:
            out.write(line)
        out.write("```\n")

    def merge_from(self, next_item):
        if next_item.kind() in "VA":
            self.body.append("\n")
            self.body.extend(next_item.body)
            return True
        else:
            return False

class Ambiguous(Item):
    def __init__(self, body):
        Item.__init__(self)
        self.body = body

    def dump_md(self, out, cfg):
        for line in self.body:
            out.write(line.lstrip())

    def kind(self):
        return "A"



HEADING_PATTERN = re.compile(
   r'''
    ^(
         (?:[A-Z0-9]+\.)+
         (?:[A-Z0-9]+)?
      )\s+(.*)''',
    re.MULTILINE|re.VERBOSE
)


TORSPEC = "/home/nickm/src/torspec/tor-spec.txt"

fc = FileCfg()
import sys

spec = fc.parse(open(TORSPEC).readlines())
spec.dump_md(sys.stdout)