git.reasoningtechnology.com Git - SubU/blob

   1 import re
   2 import itertools
   3 import textwrap
   4 import functools
   5
   6 try:
   7     from importlib.resources import files  # type: ignore
   8 except ImportError:  # pragma: nocover
   9     from pkg_resources.extern.importlib_resources import files  # type: ignore
  10
  11 from pkg_resources.extern.jaraco.functools import compose, method_cache
  12 from pkg_resources.extern.jaraco.context import ExceptionTrap
  13
  14
  15 def substitution(old, new):
  16     """
  17     Return a function that will perform a substitution on a string
  18     """
  19     return lambda s: s.replace(old, new)
  20
  21
  22 def multi_substitution(*substitutions):
  23     """
  24     Take a sequence of pairs specifying substitutions, and create
  25     a function that performs those substitutions.
  26
  27     >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo')
  28     'baz'
  29     """
  30     substitutions = itertools.starmap(substitution, substitutions)
  31     # compose function applies last function first, so reverse the
  32     #  substitutions to get the expected order.
  33     substitutions = reversed(tuple(substitutions))
  34     return compose(*substitutions)
  35
  36
  37 class FoldedCase(str):
  38     """
  39     A case insensitive string class; behaves just like str
  40     except compares equal when the only variation is case.
  41
  42     >>> s = FoldedCase('hello world')
  43
  44     >>> s == 'Hello World'
  45     True
  46
  47     >>> 'Hello World' == s
  48     True
  49
  50     >>> s != 'Hello World'
  51     False
  52
  53     >>> s.index('O')
  54     4
  55
  56     >>> s.split('O')
  57     ['hell', ' w', 'rld']
  58
  59     >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
  60     ['alpha', 'Beta', 'GAMMA']
  61
  62     Sequence membership is straightforward.
  63
  64     >>> "Hello World" in [s]
  65     True
  66     >>> s in ["Hello World"]
  67     True
  68
  69     You may test for set inclusion, but candidate and elements
  70     must both be folded.
  71
  72     >>> FoldedCase("Hello World") in {s}
  73     True
  74     >>> s in {FoldedCase("Hello World")}
  75     True
  76
  77     String inclusion works as long as the FoldedCase object
  78     is on the right.
  79
  80     >>> "hello" in FoldedCase("Hello World")
  81     True
  82
  83     But not if the FoldedCase object is on the left:
  84
  85     >>> FoldedCase('hello') in 'Hello World'
  86     False
  87
  88     In that case, use ``in_``:
  89
  90     >>> FoldedCase('hello').in_('Hello World')
  91     True
  92
  93     >>> FoldedCase('hello') > FoldedCase('Hello')
  94     False
  95     """
  96
  97     def __lt__(self, other):
  98         return self.lower() < other.lower()
  99
 100     def __gt__(self, other):
 101         return self.lower() > other.lower()
 102
 103     def __eq__(self, other):
 104         return self.lower() == other.lower()
 105
 106     def __ne__(self, other):
 107         return self.lower() != other.lower()
 108
 109     def __hash__(self):
 110         return hash(self.lower())
 111
 112     def __contains__(self, other):
 113         return super().lower().__contains__(other.lower())
 114
 115     def in_(self, other):
 116         "Does self appear in other?"
 117         return self in FoldedCase(other)
 118
 119     # cache lower since it's likely to be called frequently.
 120     @method_cache
 121     def lower(self):
 122         return super().lower()
 123
 124     def index(self, sub):
 125         return self.lower().index(sub.lower())
 126
 127     def split(self, splitter=' ', maxsplit=0):
 128         pattern = re.compile(re.escape(splitter), re.I)
 129         return pattern.split(self, maxsplit)
 130
 131
 132 # Python 3.8 compatibility
 133 _unicode_trap = ExceptionTrap(UnicodeDecodeError)
 134
 135
 136 @_unicode_trap.passes
 137 def is_decodable(value):
 138     r"""
 139     Return True if the supplied value is decodable (using the default
 140     encoding).
 141
 142     >>> is_decodable(b'\xff')
 143     False
 144     >>> is_decodable(b'\x32')
 145     True
 146     """
 147     value.decode()
 148
 149
 150 def is_binary(value):
 151     r"""
 152     Return True if the value appears to be binary (that is, it's a byte
 153     string and isn't decodable).
 154
 155     >>> is_binary(b'\xff')
 156     True
 157     >>> is_binary('\xff')
 158     False
 159     """
 160     return isinstance(value, bytes) and not is_decodable(value)
 161
 162
 163 def trim(s):
 164     r"""
 165     Trim something like a docstring to remove the whitespace that
 166     is common due to indentation and formatting.
 167
 168     >>> trim("\n\tfoo = bar\n\t\tbar = baz\n")
 169     'foo = bar\n\tbar = baz'
 170     """
 171     return textwrap.dedent(s).strip()
 172
 173
 174 def wrap(s):
 175     """
 176     Wrap lines of text, retaining existing newlines as
 177     paragraph markers.
 178
 179     >>> print(wrap(lorem_ipsum))
 180     Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
 181     eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
 182     minim veniam, quis nostrud exercitation ullamco laboris nisi ut
 183     aliquip ex ea commodo consequat. Duis aute irure dolor in
 184     reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla
 185     pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
 186     culpa qui officia deserunt mollit anim id est laborum.
 187     <BLANKLINE>
 188     Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam
 189     varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus
 190     magna felis sollicitudin mauris. Integer in mauris eu nibh euismod
 191     gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis
 192     risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue,
 193     eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas
 194     fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla
 195     a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis,
 196     neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing
 197     sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque
 198     nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus
 199     quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis,
 200     molestie eu, feugiat in, orci. In hac habitasse platea dictumst.
 201     """
 202     paragraphs = s.splitlines()
 203     wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs)
 204     return '\n\n'.join(wrapped)
 205
 206
 207 def unwrap(s):
 208     r"""
 209     Given a multi-line string, return an unwrapped version.
 210
 211     >>> wrapped = wrap(lorem_ipsum)
 212     >>> wrapped.count('\n')
 213     20
 214     >>> unwrapped = unwrap(wrapped)
 215     >>> unwrapped.count('\n')
 216     1
 217     >>> print(unwrapped)
 218     Lorem ipsum dolor sit amet, consectetur adipiscing ...
 219     Curabitur pretium tincidunt lacus. Nulla gravida orci ...
 220
 221     """
 222     paragraphs = re.split(r'\n\n+', s)
 223     cleaned = (para.replace('\n', ' ') for para in paragraphs)
 224     return '\n'.join(cleaned)
 225
 226
 227
 228
 229 class Splitter(object):
 230     """object that will split a string with the given arguments for each call
 231
 232     >>> s = Splitter(',')
 233     >>> s('hello, world, this is your, master calling')
 234     ['hello', ' world', ' this is your', ' master calling']
 235     """
 236
 237     def __init__(self, *args):
 238         self.args = args
 239
 240     def __call__(self, s):
 241         return s.split(*self.args)
 242
 243
 244 def indent(string, prefix=' ' * 4):
 245     """
 246     >>> indent('foo')
 247     '    foo'
 248     """
 249     return prefix + string
 250
 251
 252 class WordSet(tuple):
 253     """
 254     Given an identifier, return the words that identifier represents,
 255     whether in camel case, underscore-separated, etc.
 256
 257     >>> WordSet.parse("camelCase")
 258     ('camel', 'Case')
 259
 260     >>> WordSet.parse("under_sep")
 261     ('under', 'sep')
 262
 263     Acronyms should be retained
 264
 265     >>> WordSet.parse("firstSNL")
 266     ('first', 'SNL')
 267
 268     >>> WordSet.parse("you_and_I")
 269     ('you', 'and', 'I')
 270
 271     >>> WordSet.parse("A simple test")
 272     ('A', 'simple', 'test')
 273
 274     Multiple caps should not interfere with the first cap of another word.
 275
 276     >>> WordSet.parse("myABCClass")
 277     ('my', 'ABC', 'Class')
 278
 279     The result is a WordSet, so you can get the form you need.
 280
 281     >>> WordSet.parse("myABCClass").underscore_separated()
 282     'my_ABC_Class'
 283
 284     >>> WordSet.parse('a-command').camel_case()
 285     'ACommand'
 286
 287     >>> WordSet.parse('someIdentifier').lowered().space_separated()
 288     'some identifier'
 289
 290     Slices of the result should return another WordSet.
 291
 292     >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated()
 293     'out_of_context'
 294
 295     >>> WordSet.from_class_name(WordSet()).lowered().space_separated()
 296     'word set'
 297
 298     >>> example = WordSet.parse('figured it out')
 299     >>> example.headless_camel_case()
 300     'figuredItOut'
 301     >>> example.dash_separated()
 302     'figured-it-out'
 303
 304     """
 305
 306     _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))')
 307
 308     def capitalized(self):
 309         return WordSet(word.capitalize() for word in self)
 310
 311     def lowered(self):
 312         return WordSet(word.lower() for word in self)
 313
 314     def camel_case(self):
 315         return ''.join(self.capitalized())
 316
 317     def headless_camel_case(self):
 318         words = iter(self)
 319         first = next(words).lower()
 320         new_words = itertools.chain((first,), WordSet(words).camel_case())
 321         return ''.join(new_words)
 322
 323     def underscore_separated(self):
 324         return '_'.join(self)
 325
 326     def dash_separated(self):
 327         return '-'.join(self)
 328
 329     def space_separated(self):
 330         return ' '.join(self)
 331
 332     def trim_right(self, item):
 333         """
 334         Remove the item from the end of the set.
 335
 336         >>> WordSet.parse('foo bar').trim_right('foo')
 337         ('foo', 'bar')
 338         >>> WordSet.parse('foo bar').trim_right('bar')
 339         ('foo',)
 340         >>> WordSet.parse('').trim_right('bar')
 341         ()
 342         """
 343         return self[:-1] if self and self[-1] == item else self
 344
 345     def trim_left(self, item):
 346         """
 347         Remove the item from the beginning of the set.
 348
 349         >>> WordSet.parse('foo bar').trim_left('foo')
 350         ('bar',)
 351         >>> WordSet.parse('foo bar').trim_left('bar')
 352         ('foo', 'bar')
 353         >>> WordSet.parse('').trim_left('bar')
 354         ()
 355         """
 356         return self[1:] if self and self[0] == item else self
 357
 358     def trim(self, item):
 359         """
 360         >>> WordSet.parse('foo bar').trim('foo')
 361         ('bar',)
 362         """
 363         return self.trim_left(item).trim_right(item)
 364
 365     def __getitem__(self, item):
 366         result = super(WordSet, self).__getitem__(item)
 367         if isinstance(item, slice):
 368             result = WordSet(result)
 369         return result
 370
 371     @classmethod
 372     def parse(cls, identifier):
 373         matches = cls._pattern.finditer(identifier)
 374         return WordSet(match.group(0) for match in matches)
 375
 376     @classmethod
 377     def from_class_name(cls, subject):
 378         return cls.parse(subject.__class__.__name__)
 379
 380
 381 # for backward compatibility
 382 words = WordSet.parse
 383
 384
 385 def simple_html_strip(s):
 386     r"""
 387     Remove HTML from the string `s`.
 388
 389     >>> str(simple_html_strip(''))
 390     ''
 391
 392     >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise'))
 393     A stormy day in paradise
 394
 395     >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.'))
 396     Somebody  tell the truth.
 397
 398     >>> print(simple_html_strip('What about<br/>\nmultiple lines?'))
 399     What about
 400     multiple lines?
 401     """
 402     html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL)
 403     texts = (match.group(3) or '' for match in html_stripper.finditer(s))
 404     return ''.join(texts)
 405
 406
 407 class SeparatedValues(str):
 408     """
 409     A string separated by a separator. Overrides __iter__ for getting
 410     the values.
 411
 412     >>> list(SeparatedValues('a,b,c'))
 413     ['a', 'b', 'c']
 414
 415     Whitespace is stripped and empty values are discarded.
 416
 417     >>> list(SeparatedValues(' a,   b   , c,  '))
 418     ['a', 'b', 'c']
 419     """
 420
 421     separator = ','
 422
 423     def __iter__(self):
 424         parts = self.split(self.separator)
 425         return filter(None, (part.strip() for part in parts))
 426
 427
 428 class Stripper:
 429     r"""
 430     Given a series of lines, find the common prefix and strip it from them.
 431
 432     >>> lines = [
 433     ...     'abcdefg\n',
 434     ...     'abc\n',
 435     ...     'abcde\n',
 436     ... ]
 437     >>> res = Stripper.strip_prefix(lines)
 438     >>> res.prefix
 439     'abc'
 440     >>> list(res.lines)
 441     ['defg\n', '\n', 'de\n']
 442
 443     If no prefix is common, nothing should be stripped.
 444
 445     >>> lines = [
 446     ...     'abcd\n',
 447     ...     '1234\n',
 448     ... ]
 449     >>> res = Stripper.strip_prefix(lines)
 450     >>> res.prefix = ''
 451     >>> list(res.lines)
 452     ['abcd\n', '1234\n']
 453     """
 454
 455     def __init__(self, prefix, lines):
 456         self.prefix = prefix
 457         self.lines = map(self, lines)
 458
 459     @classmethod
 460     def strip_prefix(cls, lines):
 461         prefix_lines, lines = itertools.tee(lines)
 462         prefix = functools.reduce(cls.common_prefix, prefix_lines)
 463         return cls(prefix, lines)
 464
 465     def __call__(self, line):
 466         if not self.prefix:
 467             return line
 468         null, prefix, rest = line.partition(self.prefix)
 469         return rest
 470
 471     @staticmethod
 472     def common_prefix(s1, s2):
 473         """
 474         Return the common prefix of two lines.
 475         """
 476         index = min(len(s1), len(s2))
 477         while s1[:index] != s2[:index]:
 478             index -= 1
 479         return s1[:index]
 480
 481
 482 def remove_prefix(text, prefix):
 483     """
 484     Remove the prefix from the text if it exists.
 485
 486     >>> remove_prefix('underwhelming performance', 'underwhelming ')
 487     'performance'
 488
 489     >>> remove_prefix('something special', 'sample')
 490     'something special'
 491     """
 492     null, prefix, rest = text.rpartition(prefix)
 493     return rest
 494
 495
 496 def remove_suffix(text, suffix):
 497     """
 498     Remove the suffix from the text if it exists.
 499
 500     >>> remove_suffix('name.git', '.git')
 501     'name'
 502
 503     >>> remove_suffix('something special', 'sample')
 504     'something special'
 505     """
 506     rest, suffix, null = text.partition(suffix)
 507     return rest
 508
 509
 510 def normalize_newlines(text):
 511     r"""
 512     Replace alternate newlines with the canonical newline.
 513
 514     >>> normalize_newlines('Lorem Ipsum\u2029')
 515     'Lorem Ipsum\n'
 516     >>> normalize_newlines('Lorem Ipsum\r\n')
 517     'Lorem Ipsum\n'
 518     >>> normalize_newlines('Lorem Ipsum\x85')
 519     'Lorem Ipsum\n'
 520     """
 521     newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029']
 522     pattern = '|'.join(newlines)
 523     return re.sub(pattern, '\n', text)
 524
 525
 526 def _nonblank(str):
 527     return str and not str.startswith('#')
 528
 529
 530 @functools.singledispatch
 531 def yield_lines(iterable):
 532     r"""
 533     Yield valid lines of a string or iterable.
 534
 535     >>> list(yield_lines(''))
 536     []
 537     >>> list(yield_lines(['foo', 'bar']))
 538     ['foo', 'bar']
 539     >>> list(yield_lines('foo\nbar'))
 540     ['foo', 'bar']
 541     >>> list(yield_lines('\nfoo\n#bar\nbaz #comment'))
 542     ['foo', 'baz #comment']
 543     >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n']))
 544     ['foo', 'bar', 'baz', 'bing']
 545     """
 546     return itertools.chain.from_iterable(map(yield_lines, iterable))
 547
 548
 549 @yield_lines.register(str)
 550 def _(text):
 551     return filter(_nonblank, map(str.strip, text.splitlines()))
 552
 553
 554 def drop_comment(line):
 555     """
 556     Drop comments.
 557
 558     >>> drop_comment('foo # bar')
 559     'foo'
 560
 561     A hash without a space may be in a URL.
 562
 563     >>> drop_comment('http://example.com/foo#bar')
 564     'http://example.com/foo#bar'
 565     """
 566     return line.partition(' #')[0]
 567
 568
 569 def join_continuation(lines):
 570     r"""
 571     Join lines continued by a trailing backslash.
 572
 573     >>> list(join_continuation(['foo \\', 'bar', 'baz']))
 574     ['foobar', 'baz']
 575     >>> list(join_continuation(['foo \\', 'bar', 'baz']))
 576     ['foobar', 'baz']
 577     >>> list(join_continuation(['foo \\', 'bar \\', 'baz']))
 578     ['foobarbaz']
 579
 580     Not sure why, but...
 581     The character preceeding the backslash is also elided.
 582
 583     >>> list(join_continuation(['goo\\', 'dly']))
 584     ['godly']
 585
 586     A terrible idea, but...
 587     If no line is available to continue, suppress the lines.
 588
 589     >>> list(join_continuation(['foo', 'bar\\', 'baz\\']))
 590     ['foo']
 591     """
 592     lines = iter(lines)
 593     for item in lines:
 594         while item.endswith('\\'):
 595             try:
 596                 item = item[:-2].strip() + next(lines)
 597             except StopIteration:
 598                 return
 599         yield item