Loading grinder.py 0 → 100755 +182 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 # # Goal: Read tor specs, output mdbook. # from enum import Enum import re DEBUG=True def debug(m): if DEBUG: print("{{",m,"}}") class FileCfg: def __init__(self, base_indent=None): self.base_indent = base_indent def parse(self, lines): if self.base_indent == None: self.base_indent = mode(indent_of_line(line) for line in lines if not line.isspace()) spec = Spec(self) for graf in paragraphs(lines): graf = self.classify_paragraph(graf) spec.push_graf(graf) return spec def classify_paragraph(self, para): if len(para) == 1 and (m := HEADING_PATTERN.match(para[0])): return Heading(m.group(1), m.group(2)) elif len(para) > 1 and uniform_indent(para) == self.base_indent: return Body(para) elif len(para) > 1: return Verbatim(para) else: assert len(para) == 1 return Ambiguous(para) class Spec: def __init__(self, cfg): self.cfg = cfg self.grafs = [] def push_graf(self, graf): if len(self.grafs) == 0: self.grafs.append(graf) elif self.grafs[-1].merge_from(graf): return else: self.grafs.append(graf) def dump_md(self, out): for graf in self.grafs: graf.dump_md(out, self.cfg) out.write("\n") def paragraphs(f): lines = [] for line in f: if line.strip() == "": if lines: yield lines lines = [] else: lines.append(line) if lines: yield lines def indent_of_line(line): return len(line) - len(line.lstrip(" ")) def uniform_indent(para): s = set(indent_of_line(line) for line in para) if len(s) == 1: return list(s)[0] else: return None def mode(lst): counts = {} for item in lst: try: counts[item] += 1 except KeyError: counts[item] = 1 counts = sorted((v,k) for k,v in counts.items()) return counts[-1][1] class Item: def __init__(self): pass def get_text(self): return self.body def kind(self): raise NotImplemented() def merge_from(self, next_item): return False class Heading(Item): def __init__(self, number, title): Item.__init__(self) self.number = number.rstrip(".") self.depth = self.number.count(".") + 1 self.title = title def dump_md(self, out, cfg): hdr = "#" * self.depth print(f"{hdr} {self.number} -- {self.title}\n", file=out) def kind(self): return "H" class Body(Item): def __init__(self, body): Item.__init__(self) self.body = body def dump_md(self, out, cfg): for line in self.body: out.write(line.lstrip(" ")) def kind(self): return "B" class Verbatim(Item): def __init__(self, body): Item.__init__(self) self.body = body def kind(self): return "V" def dump_md(self, out, cfg): out.write("```\n") for line in self.body: out.write(line) out.write("```\n") def merge_from(self, next_item): if next_item.kind() in "VA": self.body.append("\n") self.body.extend(next_item.body) return True else: return False class Ambiguous(Item): def __init__(self, body): Item.__init__(self) self.body = body def dump_md(self, out, cfg): for line in self.body: out.write(line.lstrip()) def kind(self): return "A" HEADING_PATTERN = re.compile( r''' ^( (?:[A-Z0-9]+\.)+ (?:[A-Z0-9]+)? )\s+(.*)''', re.MULTILINE|re.VERBOSE ) TORSPEC = "/home/nickm/src/torspec/tor-spec.txt" fc = FileCfg() import sys spec = fc.parse(open(TORSPEC).readlines()) spec.dump_md(sys.stdout) Loading
grinder.py 0 → 100755 +182 −0 Original line number Original line Diff line number Diff line #!/usr/bin/env python3 # # Goal: Read tor specs, output mdbook. # from enum import Enum import re DEBUG=True def debug(m): if DEBUG: print("{{",m,"}}") class FileCfg: def __init__(self, base_indent=None): self.base_indent = base_indent def parse(self, lines): if self.base_indent == None: self.base_indent = mode(indent_of_line(line) for line in lines if not line.isspace()) spec = Spec(self) for graf in paragraphs(lines): graf = self.classify_paragraph(graf) spec.push_graf(graf) return spec def classify_paragraph(self, para): if len(para) == 1 and (m := HEADING_PATTERN.match(para[0])): return Heading(m.group(1), m.group(2)) elif len(para) > 1 and uniform_indent(para) == self.base_indent: return Body(para) elif len(para) > 1: return Verbatim(para) else: assert len(para) == 1 return Ambiguous(para) class Spec: def __init__(self, cfg): self.cfg = cfg self.grafs = [] def push_graf(self, graf): if len(self.grafs) == 0: self.grafs.append(graf) elif self.grafs[-1].merge_from(graf): return else: self.grafs.append(graf) def dump_md(self, out): for graf in self.grafs: graf.dump_md(out, self.cfg) out.write("\n") def paragraphs(f): lines = [] for line in f: if line.strip() == "": if lines: yield lines lines = [] else: lines.append(line) if lines: yield lines def indent_of_line(line): return len(line) - len(line.lstrip(" ")) def uniform_indent(para): s = set(indent_of_line(line) for line in para) if len(s) == 1: return list(s)[0] else: return None def mode(lst): counts = {} for item in lst: try: counts[item] += 1 except KeyError: counts[item] = 1 counts = sorted((v,k) for k,v in counts.items()) return counts[-1][1] class Item: def __init__(self): pass def get_text(self): return self.body def kind(self): raise NotImplemented() def merge_from(self, next_item): return False class Heading(Item): def __init__(self, number, title): Item.__init__(self) self.number = number.rstrip(".") self.depth = self.number.count(".") + 1 self.title = title def dump_md(self, out, cfg): hdr = "#" * self.depth print(f"{hdr} {self.number} -- {self.title}\n", file=out) def kind(self): return "H" class Body(Item): def __init__(self, body): Item.__init__(self) self.body = body def dump_md(self, out, cfg): for line in self.body: out.write(line.lstrip(" ")) def kind(self): return "B" class Verbatim(Item): def __init__(self, body): Item.__init__(self) self.body = body def kind(self): return "V" def dump_md(self, out, cfg): out.write("```\n") for line in self.body: out.write(line) out.write("```\n") def merge_from(self, next_item): if next_item.kind() in "VA": self.body.append("\n") self.body.extend(next_item.body) return True else: return False class Ambiguous(Item): def __init__(self, body): Item.__init__(self) self.body = body def dump_md(self, out, cfg): for line in self.body: out.write(line.lstrip()) def kind(self): return "A" HEADING_PATTERN = re.compile( r''' ^( (?:[A-Z0-9]+\.)+ (?:[A-Z0-9]+)? )\s+(.*)''', re.MULTILINE|re.VERBOSE ) TORSPEC = "/home/nickm/src/torspec/tor-spec.txt" fc = FileCfg() import sys spec = fc.parse(open(TORSPEC).readlines()) spec.dump_md(sys.stdout)