git.reasoningtechnology.com Git - SubU/blob

   1 import railroad
   2 import pyparsing
   3 import typing
   4 from typing import (
   5     List,
   6     NamedTuple,
   7     Generic,
   8     TypeVar,
   9     Dict,
  10     Callable,
  11     Set,
  12     Iterable,
  13 )
  14 from jinja2 import Template
  15 from io import StringIO
  16 import inspect
  17
  18
  19 jinja2_template_source = """\
  20 <!DOCTYPE html>
  21 <html>
  22 <head>
  23     {% if not head %}
  24         <style type="text/css">
  25             .railroad-heading {
  26                 font-family: monospace;
  27             }
  28         </style>
  29     {% else %}
  30         {{ head | safe }}
  31     {% endif %}
  32 </head>
  33 <body>
  34 {{ body | safe }}
  35 {% for diagram in diagrams %}
  36     <div class="railroad-group">
  37         <h1 class="railroad-heading">{{ diagram.title }}</h1>
  38         <div class="railroad-description">{{ diagram.text }}</div>
  39         <div class="railroad-svg">
  40             {{ diagram.svg }}
  41         </div>
  42     </div>
  43 {% endfor %}
  44 </body>
  45 </html>
  46 """
  47
  48 template = Template(jinja2_template_source)
  49
  50 # Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
  51 NamedDiagram = NamedTuple(
  52     "NamedDiagram",
  53     [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
  54 )
  55 """
  56 A simple structure for associating a name with a railroad diagram
  57 """
  58
  59 T = TypeVar("T")
  60
  61
  62 class EachItem(railroad.Group):
  63     """
  64     Custom railroad item to compose a:
  65     - Group containing a
  66       - OneOrMore containing a
  67         - Choice of the elements in the Each
  68     with the group label indicating that all must be matched
  69     """
  70
  71     all_label = "[ALL]"
  72
  73     def __init__(self, *items):
  74         choice_item = railroad.Choice(len(items) - 1, *items)
  75         one_or_more_item = railroad.OneOrMore(item=choice_item)
  76         super().__init__(one_or_more_item, label=self.all_label)
  77
  78
  79 class AnnotatedItem(railroad.Group):
  80     """
  81     Simple subclass of Group that creates an annotation label
  82     """
  83
  84     def __init__(self, label: str, item):
  85         super().__init__(item=item, label="[{}]".format(label) if label else label)
  86
  87
  88 class EditablePartial(Generic[T]):
  89     """
  90     Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
  91     constructed.
  92     """
  93
  94     # We need this here because the railroad constructors actually transform the data, so can't be called until the
  95     # entire tree is assembled
  96
  97     def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
  98         self.func = func
  99         self.args = args
 100         self.kwargs = kwargs
 101
 102     @classmethod
 103     def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
 104         """
 105         If you call this function in the same way that you would call the constructor, it will store the arguments
 106         as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
 107         """
 108         return EditablePartial(func=func, args=list(args), kwargs=kwargs)
 109
 110     @property
 111     def name(self):
 112         return self.kwargs["name"]
 113
 114     def __call__(self) -> T:
 115         """
 116         Evaluate the partial and return the result
 117         """
 118         args = self.args.copy()
 119         kwargs = self.kwargs.copy()
 120
 121         # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
 122         # args=['list', 'of', 'things'])
 123         arg_spec = inspect.getfullargspec(self.func)
 124         if arg_spec.varargs in self.kwargs:
 125             args += kwargs.pop(arg_spec.varargs)
 126
 127         return self.func(*args, **kwargs)
 128
 129
 130 def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
 131     """
 132     Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
 133     :params kwargs: kwargs to be passed in to the template
 134     """
 135     data = []
 136     for diagram in diagrams:
 137         if diagram.diagram is None:
 138             continue
 139         io = StringIO()
 140         diagram.diagram.writeSvg(io.write)
 141         title = diagram.name
 142         if diagram.index == 0:
 143             title += " (root)"
 144         data.append({"title": title, "text": "", "svg": io.getvalue()})
 145
 146     return template.render(diagrams=data, **kwargs)
 147
 148
 149 def resolve_partial(partial: "EditablePartial[T]") -> T:
 150     """
 151     Recursively resolves a collection of Partials into whatever type they are
 152     """
 153     if isinstance(partial, EditablePartial):
 154         partial.args = resolve_partial(partial.args)
 155         partial.kwargs = resolve_partial(partial.kwargs)
 156         return partial()
 157     elif isinstance(partial, list):
 158         return [resolve_partial(x) for x in partial]
 159     elif isinstance(partial, dict):
 160         return {key: resolve_partial(x) for key, x in partial.items()}
 161     else:
 162         return partial
 163
 164
 165 def to_railroad(
 166     element: pyparsing.ParserElement,
 167     diagram_kwargs: typing.Optional[dict] = None,
 168     vertical: int = 3,
 169     show_results_names: bool = False,
 170     show_groups: bool = False,
 171 ) -> List[NamedDiagram]:
 172     """
 173     Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
 174     creation if you want to access the Railroad tree before it is converted to HTML
 175     :param element: base element of the parser being diagrammed
 176     :param diagram_kwargs: kwargs to pass to the Diagram() constructor
 177     :param vertical: (optional) - int - limit at which number of alternatives should be
 178        shown vertically instead of horizontally
 179     :param show_results_names - bool to indicate whether results name annotations should be
 180        included in the diagram
 181     :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled
 182        surrounding box
 183     """
 184     # Convert the whole tree underneath the root
 185     lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
 186     _to_diagram_element(
 187         element,
 188         lookup=lookup,
 189         parent=None,
 190         vertical=vertical,
 191         show_results_names=show_results_names,
 192         show_groups=show_groups,
 193     )
 194
 195     root_id = id(element)
 196     # Convert the root if it hasn't been already
 197     if root_id in lookup:
 198         if not element.customName:
 199             lookup[root_id].name = ""
 200         lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
 201
 202     # Now that we're finished, we can convert from intermediate structures into Railroad elements
 203     diags = list(lookup.diagrams.values())
 204     if len(diags) > 1:
 205         # collapse out duplicate diags with the same name
 206         seen = set()
 207         deduped_diags = []
 208         for d in diags:
 209             # don't extract SkipTo elements, they are uninformative as subdiagrams
 210             if d.name == "...":
 211                 continue
 212             if d.name is not None and d.name not in seen:
 213                 seen.add(d.name)
 214                 deduped_diags.append(d)
 215         resolved = [resolve_partial(partial) for partial in deduped_diags]
 216     else:
 217         # special case - if just one diagram, always display it, even if
 218         # it has no name
 219         resolved = [resolve_partial(partial) for partial in diags]
 220     return sorted(resolved, key=lambda diag: diag.index)
 221
 222
 223 def _should_vertical(
 224     specification: int, exprs: Iterable[pyparsing.ParserElement]
 225 ) -> bool:
 226     """
 227     Returns true if we should return a vertical list of elements
 228     """
 229     if specification is None:
 230         return False
 231     else:
 232         return len(_visible_exprs(exprs)) >= specification
 233
 234
 235 class ElementState:
 236     """
 237     State recorded for an individual pyparsing Element
 238     """
 239
 240     # Note: this should be a dataclass, but we have to support Python 3.5
 241     def __init__(
 242         self,
 243         element: pyparsing.ParserElement,
 244         converted: EditablePartial,
 245         parent: EditablePartial,
 246         number: int,
 247         name: str = None,
 248         parent_index: typing.Optional[int] = None,
 249     ):
 250         #: The pyparsing element that this represents
 251         self.element: pyparsing.ParserElement = element
 252         #: The name of the element
 253         self.name: typing.Optional[str] = name
 254         #: The output Railroad element in an unconverted state
 255         self.converted: EditablePartial = converted
 256         #: The parent Railroad element, which we store so that we can extract this if it's duplicated
 257         self.parent: EditablePartial = parent
 258         #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
 259         self.number: int = number
 260         #: The index of this inside its parent
 261         self.parent_index: typing.Optional[int] = parent_index
 262         #: If true, we should extract this out into a subdiagram
 263         self.extract: bool = False
 264         #: If true, all of this element's children have been filled out
 265         self.complete: bool = False
 266
 267     def mark_for_extraction(
 268         self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
 269     ):
 270         """
 271         Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
 272         :param el_id: id of the element
 273         :param state: element/diagram state tracker
 274         :param name: name to use for this element's text
 275         :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
 276         root element when we know we're finished
 277         """
 278         self.extract = True
 279
 280         # Set the name
 281         if not self.name:
 282             if name:
 283                 # Allow forcing a custom name
 284                 self.name = name
 285             elif self.element.customName:
 286                 self.name = self.element.customName
 287             else:
 288                 self.name = ""
 289
 290         # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
 291         # to be added
 292         # Also, if this is just a string literal etc, don't bother extracting it
 293         if force or (self.complete and _worth_extracting(self.element)):
 294             state.extract_into_diagram(el_id)
 295
 296
 297 class ConverterState:
 298     """
 299     Stores some state that persists between recursions into the element tree
 300     """
 301
 302     def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
 303         #: A dictionary mapping ParserElements to state relating to them
 304         self._element_diagram_states: Dict[int, ElementState] = {}
 305         #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
 306         self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
 307         #: The index of the next unnamed element
 308         self.unnamed_index: int = 1
 309         #: The index of the next element. This is used for sorting
 310         self.index: int = 0
 311         #: Shared kwargs that are used to customize the construction of diagrams
 312         self.diagram_kwargs: dict = diagram_kwargs or {}
 313         self.extracted_diagram_names: Set[str] = set()
 314
 315     def __setitem__(self, key: int, value: ElementState):
 316         self._element_diagram_states[key] = value
 317
 318     def __getitem__(self, key: int) -> ElementState:
 319         return self._element_diagram_states[key]
 320
 321     def __delitem__(self, key: int):
 322         del self._element_diagram_states[key]
 323
 324     def __contains__(self, key: int):
 325         return key in self._element_diagram_states
 326
 327     def generate_unnamed(self) -> int:
 328         """
 329         Generate a number used in the name of an otherwise unnamed diagram
 330         """
 331         self.unnamed_index += 1
 332         return self.unnamed_index
 333
 334     def generate_index(self) -> int:
 335         """
 336         Generate a number used to index a diagram
 337         """
 338         self.index += 1
 339         return self.index
 340
 341     def extract_into_diagram(self, el_id: int):
 342         """
 343         Used when we encounter the same token twice in the same tree. When this
 344         happens, we replace all instances of that token with a terminal, and
 345         create a new subdiagram for the token
 346         """
 347         position = self[el_id]
 348
 349         # Replace the original definition of this element with a regular block
 350         if position.parent:
 351             ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
 352             if "item" in position.parent.kwargs:
 353                 position.parent.kwargs["item"] = ret
 354             elif "items" in position.parent.kwargs:
 355                 position.parent.kwargs["items"][position.parent_index] = ret
 356
 357         # If the element we're extracting is a group, skip to its content but keep the title
 358         if position.converted.func == railroad.Group:
 359             content = position.converted.kwargs["item"]
 360         else:
 361             content = position.converted
 362
 363         self.diagrams[el_id] = EditablePartial.from_call(
 364             NamedDiagram,
 365             name=position.name,
 366             diagram=EditablePartial.from_call(
 367                 railroad.Diagram, content, **self.diagram_kwargs
 368             ),
 369             index=position.number,
 370         )
 371
 372         del self[el_id]
 373
 374
 375 def _worth_extracting(element: pyparsing.ParserElement) -> bool:
 376     """
 377     Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
 378     themselves have children, then its complex enough to extract
 379     """
 380     children = element.recurse()
 381     return any(child.recurse() for child in children)
 382
 383
 384 def _apply_diagram_item_enhancements(fn):
 385     """
 386     decorator to ensure enhancements to a diagram item (such as results name annotations)
 387     get applied on return from _to_diagram_element (we do this since there are several
 388     returns in _to_diagram_element)
 389     """
 390
 391     def _inner(
 392         element: pyparsing.ParserElement,
 393         parent: typing.Optional[EditablePartial],
 394         lookup: ConverterState = None,
 395         vertical: int = None,
 396         index: int = 0,
 397         name_hint: str = None,
 398         show_results_names: bool = False,
 399         show_groups: bool = False,
 400     ) -> typing.Optional[EditablePartial]:
 401
 402         ret = fn(
 403             element,
 404             parent,
 405             lookup,
 406             vertical,
 407             index,
 408             name_hint,
 409             show_results_names,
 410             show_groups,
 411         )
 412
 413         # apply annotation for results name, if present
 414         if show_results_names and ret is not None:
 415             element_results_name = element.resultsName
 416             if element_results_name:
 417                 # add "*" to indicate if this is a "list all results" name
 418                 element_results_name += "" if element.modalResults else "*"
 419                 ret = EditablePartial.from_call(
 420                     railroad.Group, item=ret, label=element_results_name
 421                 )
 422
 423         return ret
 424
 425     return _inner
 426
 427
 428 def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
 429     non_diagramming_exprs = (
 430         pyparsing.ParseElementEnhance,
 431         pyparsing.PositionToken,
 432         pyparsing.And._ErrorStop,
 433     )
 434     return [
 435         e
 436         for e in exprs
 437         if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
 438     ]
 439
 440
 441 @_apply_diagram_item_enhancements
 442 def _to_diagram_element(
 443     element: pyparsing.ParserElement,
 444     parent: typing.Optional[EditablePartial],
 445     lookup: ConverterState = None,
 446     vertical: int = None,
 447     index: int = 0,
 448     name_hint: str = None,
 449     show_results_names: bool = False,
 450     show_groups: bool = False,
 451 ) -> typing.Optional[EditablePartial]:
 452     """
 453     Recursively converts a PyParsing Element to a railroad Element
 454     :param lookup: The shared converter state that keeps track of useful things
 455     :param index: The index of this element within the parent
 456     :param parent: The parent of this element in the output tree
 457     :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
 458     it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
 459     do so
 460     :param name_hint: If provided, this will override the generated name
 461     :param show_results_names: bool flag indicating whether to add annotations for results names
 462     :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
 463     :param show_groups: bool flag indicating whether to show groups using bounding box
 464     """
 465     exprs = element.recurse()
 466     name = name_hint or element.customName or element.__class__.__name__
 467
 468     # Python's id() is used to provide a unique identifier for elements
 469     el_id = id(element)
 470
 471     element_results_name = element.resultsName
 472
 473     # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
 474     if not element.customName:
 475         if isinstance(
 476             element,
 477             (
 478                 # pyparsing.TokenConverter,
 479                 # pyparsing.Forward,
 480                 pyparsing.Located,
 481             ),
 482         ):
 483             # However, if this element has a useful custom name, and its child does not, we can pass it on to the child
 484             if exprs:
 485                 if not exprs[0].customName:
 486                     propagated_name = name
 487                 else:
 488                     propagated_name = None
 489
 490                 return _to_diagram_element(
 491                     element.expr,
 492                     parent=parent,
 493                     lookup=lookup,
 494                     vertical=vertical,
 495                     index=index,
 496                     name_hint=propagated_name,
 497                     show_results_names=show_results_names,
 498                     show_groups=show_groups,
 499                 )
 500
 501     # If the element isn't worth extracting, we always treat it as the first time we say it
 502     if _worth_extracting(element):
 503         if el_id in lookup:
 504             # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
 505             # so we have to extract it into a new diagram.
 506             looked_up = lookup[el_id]
 507             looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
 508             ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
 509             return ret
 510
 511         elif el_id in lookup.diagrams:
 512             # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
 513             # just put in a marker element that refers to the sub-diagram
 514             ret = EditablePartial.from_call(
 515                 railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
 516             )
 517             return ret
 518
 519     # Recursively convert child elements
 520     # Here we find the most relevant Railroad element for matching pyparsing Element
 521     # We use ``items=[]`` here to hold the place for where the child elements will go once created
 522     if isinstance(element, pyparsing.And):
 523         # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
 524         # (all will have the same name, and resultsName)
 525         if not exprs:
 526             return None
 527         if len(set((e.name, e.resultsName) for e in exprs)) == 1:
 528             ret = EditablePartial.from_call(
 529                 railroad.OneOrMore, item="", repeat=str(len(exprs))
 530             )
 531         elif _should_vertical(vertical, exprs):
 532             ret = EditablePartial.from_call(railroad.Stack, items=[])
 533         else:
 534             ret = EditablePartial.from_call(railroad.Sequence, items=[])
 535     elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
 536         if not exprs:
 537             return None
 538         if _should_vertical(vertical, exprs):
 539             ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
 540         else:
 541             ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
 542     elif isinstance(element, pyparsing.Each):
 543         if not exprs:
 544             return None
 545         ret = EditablePartial.from_call(EachItem, items=[])
 546     elif isinstance(element, pyparsing.NotAny):
 547         ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
 548     elif isinstance(element, pyparsing.FollowedBy):
 549         ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
 550     elif isinstance(element, pyparsing.PrecededBy):
 551         ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
 552     elif isinstance(element, pyparsing.Group):
 553         if show_groups:
 554             ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
 555         else:
 556             ret = EditablePartial.from_call(railroad.Group, label="", item="")
 557     elif isinstance(element, pyparsing.TokenConverter):
 558         ret = EditablePartial.from_call(
 559             AnnotatedItem, label=type(element).__name__.lower(), item=""
 560         )
 561     elif isinstance(element, pyparsing.Opt):
 562         ret = EditablePartial.from_call(railroad.Optional, item="")
 563     elif isinstance(element, pyparsing.OneOrMore):
 564         ret = EditablePartial.from_call(railroad.OneOrMore, item="")
 565     elif isinstance(element, pyparsing.ZeroOrMore):
 566         ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
 567     elif isinstance(element, pyparsing.Group):
 568         ret = EditablePartial.from_call(
 569             railroad.Group, item=None, label=element_results_name
 570         )
 571     elif isinstance(element, pyparsing.Empty) and not element.customName:
 572         # Skip unnamed "Empty" elements
 573         ret = None
 574     elif len(exprs) > 1:
 575         ret = EditablePartial.from_call(railroad.Sequence, items=[])
 576     elif len(exprs) > 0 and not element_results_name:
 577         ret = EditablePartial.from_call(railroad.Group, item="", label=name)
 578     else:
 579         terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
 580         ret = terminal
 581
 582     if ret is None:
 583         return
 584
 585     # Indicate this element's position in the tree so we can extract it if necessary
 586     lookup[el_id] = ElementState(
 587         element=element,
 588         converted=ret,
 589         parent=parent,
 590         parent_index=index,
 591         number=lookup.generate_index(),
 592     )
 593     if element.customName:
 594         lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
 595
 596     i = 0
 597     for expr in exprs:
 598         # Add a placeholder index in case we have to extract the child before we even add it to the parent
 599         if "items" in ret.kwargs:
 600             ret.kwargs["items"].insert(i, None)
 601
 602         item = _to_diagram_element(
 603             expr,
 604             parent=ret,
 605             lookup=lookup,
 606             vertical=vertical,
 607             index=i,
 608             show_results_names=show_results_names,
 609             show_groups=show_groups,
 610         )
 611
 612         # Some elements don't need to be shown in the diagram
 613         if item is not None:
 614             if "item" in ret.kwargs:
 615                 ret.kwargs["item"] = item
 616             elif "items" in ret.kwargs:
 617                 # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
 618                 ret.kwargs["items"][i] = item
 619                 i += 1
 620         elif "items" in ret.kwargs:
 621             # If we're supposed to skip this element, remove it from the parent
 622             del ret.kwargs["items"][i]
 623
 624     # If all this items children are none, skip this item
 625     if ret and (
 626         ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
 627         or ("item" in ret.kwargs and ret.kwargs["item"] is None)
 628     ):
 629         ret = EditablePartial.from_call(railroad.Terminal, name)
 630
 631     # Mark this element as "complete", ie it has all of its children
 632     if el_id in lookup:
 633         lookup[el_id].complete = True
 634
 635     if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
 636         lookup.extract_into_diagram(el_id)
 637         if ret is not None:
 638             ret = EditablePartial.from_call(
 639                 railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
 640             )
 641
 642     return ret