git.reasoningtechnology.com Git - SubU/blob

   1 """
   2     pygments.formatters.latex
   3     ~~~~~~~~~~~~~~~~~~~~~~~~~
   4
   5     Formatter for LaTeX fancyvrb output.
   6
   7     :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
   8     :license: BSD, see LICENSE for details.
   9 """
  10
  11 from io import StringIO
  12
  13 from pip._vendor.pygments.formatter import Formatter
  14 from pip._vendor.pygments.lexer import Lexer, do_insertions
  15 from pip._vendor.pygments.token import Token, STANDARD_TYPES
  16 from pip._vendor.pygments.util import get_bool_opt, get_int_opt
  17
  18
  19 __all__ = ['LatexFormatter']
  20
  21
  22 def escape_tex(text, commandprefix):
  23     return text.replace('\\', '\x00'). \
  24                 replace('{', '\x01'). \
  25                 replace('}', '\x02'). \
  26                 replace('\x00', r'\%sZbs{}' % commandprefix). \
  27                 replace('\x01', r'\%sZob{}' % commandprefix). \
  28                 replace('\x02', r'\%sZcb{}' % commandprefix). \
  29                 replace('^', r'\%sZca{}' % commandprefix). \
  30                 replace('_', r'\%sZus{}' % commandprefix). \
  31                 replace('&', r'\%sZam{}' % commandprefix). \
  32                 replace('<', r'\%sZlt{}' % commandprefix). \
  33                 replace('>', r'\%sZgt{}' % commandprefix). \
  34                 replace('#', r'\%sZsh{}' % commandprefix). \
  35                 replace('%', r'\%sZpc{}' % commandprefix). \
  36                 replace('$', r'\%sZdl{}' % commandprefix). \
  37                 replace('-', r'\%sZhy{}' % commandprefix). \
  38                 replace("'", r'\%sZsq{}' % commandprefix). \
  39                 replace('"', r'\%sZdq{}' % commandprefix). \
  40                 replace('~', r'\%sZti{}' % commandprefix)
  41
  42
  43 DOC_TEMPLATE = r'''
  44 \documentclass{%(docclass)s}
  45 \usepackage{fancyvrb}
  46 \usepackage{color}
  47 \usepackage[%(encoding)s]{inputenc}
  48 %(preamble)s
  49
  50 %(styledefs)s
  51
  52 \begin{document}
  53
  54 \section*{%(title)s}
  55
  56 %(code)s
  57 \end{document}
  58 '''
  59
  60 ## Small explanation of the mess below :)
  61 #
  62 # The previous version of the LaTeX formatter just assigned a command to
  63 # each token type defined in the current style.  That obviously is
  64 # problematic if the highlighted code is produced for a different style
  65 # than the style commands themselves.
  66 #
  67 # This version works much like the HTML formatter which assigns multiple
  68 # CSS classes to each <span> tag, from the most specific to the least
  69 # specific token type, thus falling back to the parent token type if one
  70 # is not defined.  Here, the classes are there too and use the same short
  71 # forms given in token.STANDARD_TYPES.
  72 #
  73 # Highlighted code now only uses one custom command, which by default is
  74 # \PY and selectable by the commandprefix option (and in addition the
  75 # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
  76 # backwards compatibility purposes).
  77 #
  78 # \PY has two arguments: the classes, separated by +, and the text to
  79 # render in that style.  The classes are resolved into the respective
  80 # style commands by magic, which serves to ignore unknown classes.
  81 #
  82 # The magic macros are:
  83 # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
  84 #   to render in \PY@do.  Their definition determines the style.
  85 # * \PY@reset resets \PY@it etc. to do nothing.
  86 # * \PY@toks parses the list of classes, using magic inspired by the
  87 #   keyval package (but modified to use plusses instead of commas
  88 #   because fancyvrb redefines commas inside its environments).
  89 # * \PY@tok processes one class, calling the \PY@tok@classname command
  90 #   if it exists.
  91 # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
  92 #   for its class.
  93 # * \PY resets the style, parses the classnames and then calls \PY@do.
  94 #
  95 # Tip: to read this code, print it out in substituted form using e.g.
  96 # >>> print STYLE_TEMPLATE % {'cp': 'PY'}
  97
  98 STYLE_TEMPLATE = r'''
  99 \makeatletter
 100 \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
 101     \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
 102     \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
 103 \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
 104 \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
 105     \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
 106 \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
 107     \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
 108 \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
 109
 110 %(styles)s
 111
 112 \def\%(cp)sZbs{\char`\\}
 113 \def\%(cp)sZus{\char`\_}
 114 \def\%(cp)sZob{\char`\{}
 115 \def\%(cp)sZcb{\char`\}}
 116 \def\%(cp)sZca{\char`\^}
 117 \def\%(cp)sZam{\char`\&}
 118 \def\%(cp)sZlt{\char`\<}
 119 \def\%(cp)sZgt{\char`\>}
 120 \def\%(cp)sZsh{\char`\#}
 121 \def\%(cp)sZpc{\char`\%%}
 122 \def\%(cp)sZdl{\char`\$}
 123 \def\%(cp)sZhy{\char`\-}
 124 \def\%(cp)sZsq{\char`\'}
 125 \def\%(cp)sZdq{\char`\"}
 126 \def\%(cp)sZti{\char`\~}
 127 %% for compatibility with earlier versions
 128 \def\%(cp)sZat{@}
 129 \def\%(cp)sZlb{[}
 130 \def\%(cp)sZrb{]}
 131 \makeatother
 132 '''
 133
 134
 135 def _get_ttype_name(ttype):
 136     fname = STANDARD_TYPES.get(ttype)
 137     if fname:
 138         return fname
 139     aname = ''
 140     while fname is None:
 141         aname = ttype[-1] + aname
 142         ttype = ttype.parent
 143         fname = STANDARD_TYPES.get(ttype)
 144     return fname + aname
 145
 146
 147 class LatexFormatter(Formatter):
 148     r"""
 149     Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
 150     standard packages.
 151
 152     Without the `full` option, code is formatted as one ``Verbatim``
 153     environment, like this:
 154
 155     .. sourcecode:: latex
 156
 157         \begin{Verbatim}[commandchars=\\\{\}]
 158         \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
 159             \PY{k}{pass}
 160         \end{Verbatim}
 161
 162     Wrapping can be disabled using the `nowrap` option.
 163
 164     The special command used here (``\PY``) and all the other macros it needs
 165     are output by the `get_style_defs` method.
 166
 167     With the `full` option, a complete LaTeX document is output, including
 168     the command definitions in the preamble.
 169
 170     The `get_style_defs()` method of a `LatexFormatter` returns a string
 171     containing ``\def`` commands defining the macros needed inside the
 172     ``Verbatim`` environments.
 173
 174     Additional options accepted:
 175
 176     `nowrap`
 177         If set to ``True``, don't wrap the tokens at all, not even inside a
 178         ``\begin{Verbatim}`` environment. This disables most other options
 179         (default: ``False``).
 180
 181     `style`
 182         The style to use, can be a string or a Style subclass (default:
 183         ``'default'``).
 184
 185     `full`
 186         Tells the formatter to output a "full" document, i.e. a complete
 187         self-contained document (default: ``False``).
 188
 189     `title`
 190         If `full` is true, the title that should be used to caption the
 191         document (default: ``''``).
 192
 193     `docclass`
 194         If the `full` option is enabled, this is the document class to use
 195         (default: ``'article'``).
 196
 197     `preamble`
 198         If the `full` option is enabled, this can be further preamble commands,
 199         e.g. ``\usepackage`` (default: ``''``).
 200
 201     `linenos`
 202         If set to ``True``, output line numbers (default: ``False``).
 203
 204     `linenostart`
 205         The line number for the first line (default: ``1``).
 206
 207     `linenostep`
 208         If set to a number n > 1, only every nth line number is printed.
 209
 210     `verboptions`
 211         Additional options given to the Verbatim environment (see the *fancyvrb*
 212         docs for possible values) (default: ``''``).
 213
 214     `commandprefix`
 215         The LaTeX commands used to produce colored output are constructed
 216         using this prefix and some letters (default: ``'PY'``).
 217
 218         .. versionadded:: 0.7
 219         .. versionchanged:: 0.10
 220            The default is now ``'PY'`` instead of ``'C'``.
 221
 222     `texcomments`
 223         If set to ``True``, enables LaTeX comment lines.  That is, LaTex markup
 224         in comment tokens is not escaped so that LaTeX can render it (default:
 225         ``False``).
 226
 227         .. versionadded:: 1.2
 228
 229     `mathescape`
 230         If set to ``True``, enables LaTeX math mode escape in comments. That
 231         is, ``'$...$'`` inside a comment will trigger math mode (default:
 232         ``False``).
 233
 234         .. versionadded:: 1.2
 235
 236     `escapeinside`
 237         If set to a string of length 2, enables escaping to LaTeX. Text
 238         delimited by these 2 characters is read as LaTeX code and
 239         typeset accordingly. It has no effect in string literals. It has
 240         no effect in comments if `texcomments` or `mathescape` is
 241         set. (default: ``''``).
 242
 243         .. versionadded:: 2.0
 244
 245     `envname`
 246         Allows you to pick an alternative environment name replacing Verbatim.
 247         The alternate environment still has to support Verbatim's option syntax.
 248         (default: ``'Verbatim'``).
 249
 250         .. versionadded:: 2.0
 251     """
 252     name = 'LaTeX'
 253     aliases = ['latex', 'tex']
 254     filenames = ['*.tex']
 255
 256     def __init__(self, **options):
 257         Formatter.__init__(self, **options)
 258         self.nowrap = get_bool_opt(options, 'nowrap', False)
 259         self.docclass = options.get('docclass', 'article')
 260         self.preamble = options.get('preamble', '')
 261         self.linenos = get_bool_opt(options, 'linenos', False)
 262         self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
 263         self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
 264         self.verboptions = options.get('verboptions', '')
 265         self.nobackground = get_bool_opt(options, 'nobackground', False)
 266         self.commandprefix = options.get('commandprefix', 'PY')
 267         self.texcomments = get_bool_opt(options, 'texcomments', False)
 268         self.mathescape = get_bool_opt(options, 'mathescape', False)
 269         self.escapeinside = options.get('escapeinside', '')
 270         if len(self.escapeinside) == 2:
 271             self.left = self.escapeinside[0]
 272             self.right = self.escapeinside[1]
 273         else:
 274             self.escapeinside = ''
 275         self.envname = options.get('envname', 'Verbatim')
 276
 277         self._create_stylesheet()
 278
 279     def _create_stylesheet(self):
 280         t2n = self.ttype2name = {Token: ''}
 281         c2d = self.cmd2def = {}
 282         cp = self.commandprefix
 283
 284         def rgbcolor(col):
 285             if col:
 286                 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
 287                                  for i in (0, 2, 4)])
 288             else:
 289                 return '1,1,1'
 290
 291         for ttype, ndef in self.style:
 292             name = _get_ttype_name(ttype)
 293             cmndef = ''
 294             if ndef['bold']:
 295                 cmndef += r'\let\$$@bf=\textbf'
 296             if ndef['italic']:
 297                 cmndef += r'\let\$$@it=\textit'
 298             if ndef['underline']:
 299                 cmndef += r'\let\$$@ul=\underline'
 300             if ndef['roman']:
 301                 cmndef += r'\let\$$@ff=\textrm'
 302             if ndef['sans']:
 303                 cmndef += r'\let\$$@ff=\textsf'
 304             if ndef['mono']:
 305                 cmndef += r'\let\$$@ff=\textsf'
 306             if ndef['color']:
 307                 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %
 308                            rgbcolor(ndef['color']))
 309             if ndef['border']:
 310                 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}'
 311                            r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' %
 312                            (rgbcolor(ndef['border']),
 313                             rgbcolor(ndef['bgcolor'])))
 314             elif ndef['bgcolor']:
 315                 cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}'
 316                            r'\colorbox[rgb]{%s}{\strut ##1}}}' %
 317                            rgbcolor(ndef['bgcolor']))
 318             if cmndef == '':
 319                 continue
 320             cmndef = cmndef.replace('$$', cp)
 321             t2n[ttype] = name
 322             c2d[name] = cmndef
 323
 324     def get_style_defs(self, arg=''):
 325         """
 326         Return the command sequences needed to define the commands
 327         used to format text in the verbatim environment. ``arg`` is ignored.
 328         """
 329         cp = self.commandprefix
 330         styles = []
 331         for name, definition in self.cmd2def.items():
 332             styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition))
 333         return STYLE_TEMPLATE % {'cp': self.commandprefix,
 334                                  'styles': '\n'.join(styles)}
 335
 336     def format_unencoded(self, tokensource, outfile):
 337         # TODO: add support for background colors
 338         t2n = self.ttype2name
 339         cp = self.commandprefix
 340
 341         if self.full:
 342             realoutfile = outfile
 343             outfile = StringIO()
 344
 345         if not self.nowrap:
 346             outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
 347             if self.linenos:
 348                 start, step = self.linenostart, self.linenostep
 349                 outfile.write(',numbers=left' +
 350                               (start and ',firstnumber=%d' % start or '') +
 351                               (step and ',stepnumber=%d' % step or ''))
 352             if self.mathescape or self.texcomments or self.escapeinside:
 353                 outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7'
 354                               '\\catcode`\\_=8\\relax}')
 355             if self.verboptions:
 356                 outfile.write(',' + self.verboptions)
 357             outfile.write(']\n')
 358
 359         for ttype, value in tokensource:
 360             if ttype in Token.Comment:
 361                 if self.texcomments:
 362                     # Try to guess comment starting lexeme and escape it ...
 363                     start = value[0:1]
 364                     for i in range(1, len(value)):
 365                         if start[0] != value[i]:
 366                             break
 367                         start += value[i]
 368
 369                     value = value[len(start):]
 370                     start = escape_tex(start, cp)
 371
 372                     # ... but do not escape inside comment.
 373                     value = start + value
 374                 elif self.mathescape:
 375                     # Only escape parts not inside a math environment.
 376                     parts = value.split('$')
 377                     in_math = False
 378                     for i, part in enumerate(parts):
 379                         if not in_math:
 380                             parts[i] = escape_tex(part, cp)
 381                         in_math = not in_math
 382                     value = '$'.join(parts)
 383                 elif self.escapeinside:
 384                     text = value
 385                     value = ''
 386                     while text:
 387                         a, sep1, text = text.partition(self.left)
 388                         if sep1:
 389                             b, sep2, text = text.partition(self.right)
 390                             if sep2:
 391                                 value += escape_tex(a, cp) + b
 392                             else:
 393                                 value += escape_tex(a + sep1 + b, cp)
 394                         else:
 395                             value += escape_tex(a, cp)
 396                 else:
 397                     value = escape_tex(value, cp)
 398             elif ttype not in Token.Escape:
 399                 value = escape_tex(value, cp)
 400             styles = []
 401             while ttype is not Token:
 402                 try:
 403                     styles.append(t2n[ttype])
 404                 except KeyError:
 405                     # not in current style
 406                     styles.append(_get_ttype_name(ttype))
 407                 ttype = ttype.parent
 408             styleval = '+'.join(reversed(styles))
 409             if styleval:
 410                 spl = value.split('\n')
 411                 for line in spl[:-1]:
 412                     if line:
 413                         outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))
 414                     outfile.write('\n')
 415                 if spl[-1]:
 416                     outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))
 417             else:
 418                 outfile.write(value)
 419
 420         if not self.nowrap:
 421             outfile.write('\\end{' + self.envname + '}\n')
 422
 423         if self.full:
 424             encoding = self.encoding or 'utf8'
 425             # map known existings encodings from LaTeX distribution
 426             encoding = {
 427                 'utf_8': 'utf8',
 428                 'latin_1': 'latin1',
 429                 'iso_8859_1': 'latin1',
 430             }.get(encoding.replace('-', '_'), encoding)
 431             realoutfile.write(DOC_TEMPLATE %
 432                 dict(docclass  = self.docclass,
 433                      preamble  = self.preamble,
 434                      title     = self.title,
 435                      encoding  = encoding,
 436                      styledefs = self.get_style_defs(),
 437                      code      = outfile.getvalue()))
 438
 439
 440 class LatexEmbeddedLexer(Lexer):
 441     """
 442     This lexer takes one lexer as argument, the lexer for the language
 443     being formatted, and the left and right delimiters for escaped text.
 444
 445     First everything is scanned using the language lexer to obtain
 446     strings and comments. All other consecutive tokens are merged and
 447     the resulting text is scanned for escaped segments, which are given
 448     the Token.Escape type. Finally text that is not escaped is scanned
 449     again with the language lexer.
 450     """
 451     def __init__(self, left, right, lang, **options):
 452         self.left = left
 453         self.right = right
 454         self.lang = lang
 455         Lexer.__init__(self, **options)
 456
 457     def get_tokens_unprocessed(self, text):
 458         # find and remove all the escape tokens (replace with an empty string)
 459         # this is very similar to DelegatingLexer.get_tokens_unprocessed.
 460         buffered = ''
 461         insertions = []
 462         insertion_buf = []
 463         for i, t, v in self._find_safe_escape_tokens(text):
 464             if t is None:
 465                 if insertion_buf:
 466                     insertions.append((len(buffered), insertion_buf))
 467                     insertion_buf = []
 468                 buffered += v
 469             else:
 470                 insertion_buf.append((i, t, v))
 471         if insertion_buf:
 472             insertions.append((len(buffered), insertion_buf))
 473         return do_insertions(insertions,
 474                              self.lang.get_tokens_unprocessed(buffered))
 475
 476     def _find_safe_escape_tokens(self, text):
 477         """ find escape tokens that are not in strings or comments """
 478         for i, t, v in self._filter_to(
 479             self.lang.get_tokens_unprocessed(text),
 480             lambda t: t in Token.Comment or t in Token.String
 481         ):
 482             if t is None:
 483                 for i2, t2, v2 in self._find_escape_tokens(v):
 484                     yield i + i2, t2, v2
 485             else:
 486                 yield i, None, v
 487
 488     def _filter_to(self, it, pred):
 489         """ Keep only the tokens that match `pred`, merge the others together """
 490         buf = ''
 491         idx = 0
 492         for i, t, v in it:
 493             if pred(t):
 494                 if buf:
 495                     yield idx, None, buf
 496                     buf = ''
 497                 yield i, t, v
 498             else:
 499                 if not buf:
 500                     idx = i
 501                 buf += v
 502         if buf:
 503             yield idx, None, buf
 504
 505     def _find_escape_tokens(self, text):
 506         """ Find escape tokens within text, give token=None otherwise """
 507         index = 0
 508         while text:
 509             a, sep1, text = text.partition(self.left)
 510             if a:
 511                 yield index, None, a
 512                 index += len(a)
 513             if sep1:
 514                 b, sep2, text = text.partition(self.right)
 515                 if sep2:
 516                     yield index + len(sep1), Token.Escape, b
 517                     index += len(sep1) + len(b) + len(sep2)
 518                 else:
 519                     yield index, Token.Error, sep1
 520                     index += len(sep1)
 521                     text = b