3 from collections import Counter, defaultdict, deque, abc
4 from collections.abc import Sequence
5 from functools import partial, reduce, wraps
6 from heapq import heapify, heapreplace, heappop
7 from itertools import (
21 from math import exp, factorial, floor, log
22 from queue import Empty, Queue
23 from random import random, randrange, uniform
24 from operator import itemgetter, mul, sub, gt, lt, ge, le
25 from sys import hexversion, maxsize
26 from time import monotonic
28 from .recipes import (
31 UnequalIterablesError,
44 'UnequalIterablesError',
57 'constrained_batches',
62 'distinct_combinations',
63 'distinct_permutations',
66 'duplicates_everseen',
67 'duplicates_justseen',
84 'longest_common_prefix',
126 'substrings_indexes',
141 def chunked(iterable, n, strict=False):
142 """Break *iterable* into lists of length *n*:
144 >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
145 [[1, 2, 3], [4, 5, 6]]
147 By the default, the last yielded list will have fewer than *n* elements
148 if the length of *iterable* is not divisible by *n*:
150 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
151 [[1, 2, 3], [4, 5, 6], [7, 8]]
153 To use a fill-in value instead, see the :func:`grouper` recipe.
155 If the length of *iterable* is not divisible by *n* and *strict* is
156 ``True``, then ``ValueError`` will be raised before the last
160 iterator = iter(partial(take, n, iter(iterable)), [])
163 raise ValueError('n must not be None when using strict mode.')
166 for chunk in iterator:
168 raise ValueError('iterable is not divisible by n.')
176 def first(iterable, default=_marker):
177 """Return the first item of *iterable*, or *default* if *iterable* is
180 >>> first([0, 1, 2, 3])
182 >>> first([], 'some default')
185 If *default* is not provided and there are no items in the iterable,
186 raise ``ValueError``.
188 :func:`first` is useful when you have a generator of expensive-to-retrieve
189 values and want any arbitrary one. It is marginally shorter than
190 ``next(iter(iterable), default)``.
194 return next(iter(iterable))
195 except StopIteration as e:
196 if default is _marker:
198 'first() was called on an empty iterable, and no '
199 'default value was provided.'
204 def last(iterable, default=_marker):
205 """Return the last item of *iterable*, or *default* if *iterable* is
208 >>> last([0, 1, 2, 3])
210 >>> last([], 'some default')
213 If *default* is not provided and there are no items in the iterable,
214 raise ``ValueError``.
217 if isinstance(iterable, Sequence):
219 # Work around https://bugs.python.org/issue38525
220 elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0):
221 return next(reversed(iterable))
223 return deque(iterable, maxlen=1)[-1]
224 except (IndexError, TypeError, StopIteration):
225 if default is _marker:
227 'last() was called on an empty iterable, and no default was '
233 def nth_or_last(iterable, n, default=_marker):
234 """Return the nth or the last item of *iterable*,
235 or *default* if *iterable* is empty.
237 >>> nth_or_last([0, 1, 2, 3], 2)
239 >>> nth_or_last([0, 1], 2)
241 >>> nth_or_last([], 0, 'some default')
244 If *default* is not provided and there are no items in the iterable,
245 raise ``ValueError``.
247 return last(islice(iterable, n + 1), default=default)
251 """Wrap an iterator to allow lookahead and prepending elements.
253 Call :meth:`peek` on the result to get the value that will be returned
254 by :func:`next`. This won't advance the iterator:
256 >>> p = peekable(['a', 'b'])
262 Pass :meth:`peek` a default value to return that instead of raising
263 ``StopIteration`` when the iterator is exhausted.
269 peekables also offer a :meth:`prepend` method, which "inserts" items
270 at the head of the iterable:
272 >>> p = peekable([1, 2, 3])
273 >>> p.prepend(10, 11, 12)
281 peekables can be indexed. Index 0 is the item that will be returned by
282 :func:`next`, index 1 is the item after that, and so on:
283 The values up to the given index will be cached.
285 >>> p = peekable(['a', 'b', 'c', 'd'])
293 Negative indexes are supported, but be aware that they will cache the
294 remaining items in the source iterator, which may require significant
297 To check whether a peekable is exhausted, check its truth value:
299 >>> p = peekable(['a', 'b'])
300 >>> if p: # peekable has items
303 >>> if not p: # peekable is exhausted
309 def __init__(self, iterable):
310 self._it = iter(iterable)
311 self._cache = deque()
319 except StopIteration:
323 def peek(self, default=_marker):
324 """Return the item that will be next returned from ``next()``.
326 Return ``default`` if there are no items left. If ``default`` is not
327 provided, raise ``StopIteration``.
332 self._cache.append(next(self._it))
333 except StopIteration:
334 if default is _marker:
337 return self._cache[0]
339 def prepend(self, *items):
340 """Stack up items to be the next ones returned from ``next()`` or
341 ``self.peek()``. The items will be returned in
342 first in, first out order::
344 >>> p = peekable([1, 2, 3])
345 >>> p.prepend(10, 11, 12)
351 It is possible, by prepending items, to "resurrect" a peekable that
352 previously raised ``StopIteration``.
356 Traceback (most recent call last):
363 Traceback (most recent call last):
368 self._cache.extendleft(reversed(items))
372 return self._cache.popleft()
374 return next(self._it)
376 def _get_slice(self, index):
377 # Normalize the slice's arguments
378 step = 1 if (index.step is None) else index.step
380 start = 0 if (index.start is None) else index.start
381 stop = maxsize if (index.stop is None) else index.stop
383 start = -1 if (index.start is None) else index.start
384 stop = (-maxsize - 1) if (index.stop is None) else index.stop
386 raise ValueError('slice step cannot be zero')
388 # If either the start or stop index is negative, we'll need to cache
389 # the rest of the iterable in order to slice from the right side.
390 if (start < 0) or (stop < 0):
391 self._cache.extend(self._it)
392 # Otherwise we'll need to find the rightmost index and cache to that
395 n = min(max(start, stop) + 1, maxsize)
396 cache_len = len(self._cache)
398 self._cache.extend(islice(self._it, n - cache_len))
400 return list(self._cache)[index]
402 def __getitem__(self, index):
403 if isinstance(index, slice):
404 return self._get_slice(index)
406 cache_len = len(self._cache)
408 self._cache.extend(self._it)
409 elif index >= cache_len:
410 self._cache.extend(islice(self._it, index + 1 - cache_len))
412 return self._cache[index]
416 """Decorator that automatically advances a PEP-342-style "reverse iterator"
417 to its first yield point so you don't have to call ``next()`` on it
424 ... print('Thing number %s is %s.' % (i, (yield)))
429 Thing number 0 is red.
431 Thing number 1 is fish.
433 Without the decorator, you would have to call ``next(t)`` before
434 ``t.send()`` could be used.
439 def wrapper(*args, **kwargs):
440 gen = func(*args, **kwargs)
448 """Return the number of items in *iterable*.
450 >>> ilen(x for x in range(1000000) if x % 3 == 0)
453 This consumes the iterable, so handle with care.
456 # This approach was selected because benchmarks showed it's likely the
457 # fastest of the known implementations at the time of writing.
458 # See GitHub tracker: #236, #230.
460 deque(zip(iterable, counter), maxlen=0)
464 def iterate(func, start):
465 """Return ``start``, ``func(start)``, ``func(func(start))``, ...
467 >>> from itertools import islice
468 >>> list(islice(iterate(lambda x: 2*x, 1), 10))
469 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
477 def with_iter(context_manager):
478 """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
480 For example, this will close the file when the iterator is exhausted::
482 upper_lines = (line.upper() for line in with_iter(open('foo')))
484 Any context manager which returns an iterable is a candidate for
488 with context_manager as iterable:
492 def one(iterable, too_short=None, too_long=None):
493 """Return the first item from *iterable*, which is expected to contain only
494 that item. Raise an exception if *iterable* is empty or has more than one
497 :func:`one` is useful for ensuring that an iterable contains only one item.
498 For example, it can be used to retrieve the result of a database query
499 that is expected to return a single row.
501 If *iterable* is empty, ``ValueError`` will be raised. You may specify a
502 different exception with the *too_short* keyword:
505 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
506 Traceback (most recent call last):
508 ValueError: too many items in iterable (expected 1)'
509 >>> too_short = IndexError('too few items')
510 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
511 Traceback (most recent call last):
513 IndexError: too few items
515 Similarly, if *iterable* contains more than one item, ``ValueError`` will
516 be raised. You may specify a different exception with the *too_long*
519 >>> it = ['too', 'many']
520 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
521 Traceback (most recent call last):
523 ValueError: Expected exactly one item in iterable, but got 'too',
524 'many', and perhaps more.
525 >>> too_long = RuntimeError
526 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
527 Traceback (most recent call last):
531 Note that :func:`one` attempts to advance *iterable* twice to ensure there
532 is only one item. See :func:`spy` or :func:`peekable` to check iterable
533 contents less destructively.
539 first_value = next(it)
540 except StopIteration as e:
542 too_short or ValueError('too few items in iterable (expected 1)')
546 second_value = next(it)
547 except StopIteration:
551 'Expected exactly one item in iterable, but got {!r}, {!r}, '
552 'and perhaps more.'.format(first_value, second_value)
554 raise too_long or ValueError(msg)
559 def raise_(exception, *args):
560 raise exception(*args)
563 def strictly_n(iterable, n, too_short=None, too_long=None):
564 """Validate that *iterable* has exactly *n* items and return them if
565 it does. If it has fewer than *n* items, call function *too_short*
566 with those items. If it has more than *n* items, call function
567 *too_long* with the first ``n + 1`` items.
569 >>> iterable = ['a', 'b', 'c', 'd']
571 >>> list(strictly_n(iterable, n))
574 By default, *too_short* and *too_long* are functions that raise
577 >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL
578 Traceback (most recent call last):
580 ValueError: too few items in iterable (got 2)
582 >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL
583 Traceback (most recent call last):
585 ValueError: too many items in iterable (got at least 3)
587 You can instead supply functions that do something else.
588 *too_short* will be called with the number of items in *iterable*.
589 *too_long* will be called with `n + 1`.
591 >>> def too_short(item_count):
592 ... raise RuntimeError
593 >>> it = strictly_n('abcd', 6, too_short=too_short)
594 >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL
595 Traceback (most recent call last):
599 >>> def too_long(item_count):
600 ... print('The boss is going to hear about this')
601 >>> it = strictly_n('abcdef', 4, too_long=too_long)
603 The boss is going to hear about this
607 if too_short is None:
608 too_short = lambda item_count: raise_(
610 'Too few items in iterable (got {})'.format(item_count),
614 too_long = lambda item_count: raise_(
616 'Too many items in iterable (got at least {})'.format(item_count),
623 except StopIteration:
631 except StopIteration:
637 def distinct_permutations(iterable, r=None):
638 """Yield successive distinct permutations of the elements in *iterable*.
640 >>> sorted(distinct_permutations([1, 0, 1]))
641 [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
643 Equivalent to ``set(permutations(iterable))``, except duplicates are not
644 generated and thrown away. For larger input sequences this is much more
647 Duplicate permutations arise when there are duplicated elements in the
648 input iterable. The number of items returned is
649 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
650 items input, and each `x_i` is the count of a distinct item in the input
653 If *r* is given, only the *r*-length permutations are yielded.
655 >>> sorted(distinct_permutations([1, 0, 1], r=2))
656 [(0, 1), (1, 0), (1, 1)]
657 >>> sorted(distinct_permutations(range(3), r=2))
658 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
661 # Algorithm: https://w.wiki/Qai
664 # Yield the permutation we have
667 # Find the largest index i such that A[i] < A[i + 1]
668 for i in range(size - 2, -1, -1):
671 # If no such index exists, this permutation is the last one
675 # Find the largest index j greater than j such that A[i] < A[j]
676 for j in range(size - 1, i, -1):
680 # Swap the value of A[i] with that of A[j], then reverse the
681 # sequence from A[i + 1] to form the new permutation
682 A[i], A[j] = A[j], A[i]
683 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
685 # Algorithm: modified from the above
687 # Split A into the first r items and the last r items
688 head, tail = A[:r], A[r:]
689 right_head_indexes = range(r - 1, -1, -1)
690 left_tail_indexes = range(len(tail))
693 # Yield the permutation we have
696 # Starting from the right, find the first index of the head with
697 # value smaller than the maximum value of the tail - call it i.
699 for i in right_head_indexes:
706 # Starting from the left, find the first value of the tail
707 # with a value greater than head[i] and swap.
708 for j in left_tail_indexes:
709 if tail[j] > head[i]:
710 head[i], tail[j] = tail[j], head[i]
712 # If we didn't find one, start from the right and find the first
713 # index of the head with a value greater than head[i] and swap.
715 for j in right_head_indexes:
716 if head[j] > head[i]:
717 head[i], head[j] = head[j], head[i]
720 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
721 tail += head[: i - r : -1] # head[i + 1:][::-1]
723 head[i:], tail[:] = tail[: r - i], tail[r - i :]
725 items = sorted(iterable)
732 return _full(items) if (r == size) else _partial(items, r)
734 return iter(() if r else ((),))
737 def intersperse(e, iterable, n=1):
738 """Intersperse filler element *e* among the items in *iterable*, leaving
739 *n* items between each filler element.
741 >>> list(intersperse('!', [1, 2, 3, 4, 5]))
742 [1, '!', 2, '!', 3, '!', 4, '!', 5]
744 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
745 [1, 2, None, 3, 4, None, 5]
749 raise ValueError('n must be > 0')
751 # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2...
752 # islice(..., 1, None) -> x_0, e, x_1, e, x_2...
753 return islice(interleave(repeat(e), iterable), 1, None)
755 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
756 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
757 # flatten(...) -> x_0, x_1, e, x_2, x_3...
759 chunks = chunked(iterable, n)
760 return flatten(islice(interleave(filler, chunks), 1, None))
763 def unique_to_each(*iterables):
764 """Return the elements from each of the input iterables that aren't in the
765 other input iterables.
767 For example, suppose you have a set of packages, each with a set of
770 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
772 If you remove one package, which dependencies can also be removed?
774 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
775 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
776 ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
778 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
779 [['A'], ['C'], ['D']]
781 If there are duplicates in one input iterable that aren't in the others
782 they will be duplicated in the output. Input order is preserved::
784 >>> unique_to_each("mississippi", "missouri")
785 [['p', 'p'], ['o', 'u', 'r']]
787 It is assumed that the elements of each iterable are hashable.
790 pool = [list(it) for it in iterables]
791 counts = Counter(chain.from_iterable(map(set, pool)))
792 uniques = {element for element in counts if counts[element] == 1}
793 return [list(filter(uniques.__contains__, it)) for it in pool]
796 def windowed(seq, n, fillvalue=None, step=1):
797 """Return a sliding window of width *n* over the given iterable.
799 >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
800 >>> list(all_windows)
801 [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
803 When the window is larger than the iterable, *fillvalue* is used in place
806 >>> list(windowed([1, 2, 3], 4))
809 Each window will advance in increments of *step*:
811 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
812 [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
814 To slide into the iterable's items, use :func:`chain` to add filler items
817 >>> iterable = [1, 2, 3, 4]
819 >>> padding = [None] * (n - 1)
820 >>> list(windowed(chain(padding, iterable), 3))
821 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
824 raise ValueError('n must be >= 0')
829 raise ValueError('step must be >= 1')
831 window = deque(maxlen=n)
833 for _ in map(window.append, seq):
843 yield tuple(chain(window, repeat(fillvalue, n - size)))
844 elif 0 < i < min(step, n):
845 window += (fillvalue,) * i
849 def substrings(iterable):
850 """Yield all of the substrings of *iterable*.
852 >>> [''.join(s) for s in substrings('more')]
853 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
855 Note that non-string iterables can also be subdivided.
857 >>> list(substrings([0, 1, 2]))
858 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
861 # The length-1 substrings
863 for item in iter(iterable):
867 item_count = len(seq)
870 for n in range(2, item_count + 1):
871 for i in range(item_count - n + 1):
875 def substrings_indexes(seq, reverse=False):
876 """Yield all substrings and their positions in *seq*
878 The items yielded will be a tuple of the form ``(substr, i, j)``, where
879 ``substr == seq[i:j]``.
881 This function only works for iterables that support slicing, such as
884 >>> for item in substrings_indexes('more'):
897 Set *reverse* to ``True`` to yield the same items in the opposite order.
901 r = range(1, len(seq) + 1)
905 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
910 """Wrap *iterable* and return an object that buckets it iterable into
911 child iterables based on a *key* function.
913 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
914 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
915 >>> sorted(list(s)) # Get the keys
917 >>> a_iterable = s['a']
925 The original iterable will be advanced and its items will be cached until
926 they are used by the child iterables. This may require significant storage.
928 By default, attempting to select a bucket to which no items belong will
929 exhaust the iterable and cache all values.
930 If you specify a *validator* function, selected buckets will instead be
933 >>> from itertools import count
934 >>> it = count(1, 2) # Infinite sequence of odd numbers
935 >>> key = lambda x: x % 10 # Bucket by last digit
936 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
937 >>> s = bucket(it, key=key, validator=validator)
945 def __init__(self, iterable, key, validator=None):
946 self._it = iter(iterable)
948 self._cache = defaultdict(deque)
949 self._validator = validator or (lambda x: True)
951 def __contains__(self, value):
952 if not self._validator(value):
956 item = next(self[value])
957 except StopIteration:
960 self._cache[value].appendleft(item)
964 def _get_values(self, value):
966 Helper to yield items from the parent iterator that match *value*.
967 Items that don't match are stored in the local cache as they
971 # If we've cached some items that match the target value, emit
972 # the first one and evict it from the cache.
973 if self._cache[value]:
974 yield self._cache[value].popleft()
975 # Otherwise we need to advance the parent iterator to search for
976 # a matching item, caching the rest.
980 item = next(self._it)
981 except StopIteration:
983 item_value = self._key(item)
984 if item_value == value:
987 elif self._validator(item_value):
988 self._cache[item_value].append(item)
991 for item in self._it:
992 item_value = self._key(item)
993 if self._validator(item_value):
994 self._cache[item_value].append(item)
996 yield from self._cache.keys()
998 def __getitem__(self, value):
999 if not self._validator(value):
1002 return self._get_values(value)
1005 def spy(iterable, n=1):
1006 """Return a 2-tuple with a list containing the first *n* elements of
1007 *iterable*, and an iterator with the same items as *iterable*.
1008 This allows you to "look ahead" at the items in the iterable without
1011 There is one item in the list by default:
1013 >>> iterable = 'abcdefg'
1014 >>> head, iterable = spy(iterable)
1018 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
1020 You may use unpacking to retrieve items instead of lists:
1022 >>> (head,), iterable = spy('abcdefg')
1025 >>> (first, second), iterable = spy('abcdefg', 2)
1031 The number of items requested can be larger than the number of items in
1034 >>> iterable = [1, 2, 3, 4, 5]
1035 >>> head, iterable = spy(iterable, 10)
1045 return head.copy(), chain(head, it)
1048 def interleave(*iterables):
1049 """Return a new iterable yielding from each iterable in turn,
1050 until the shortest is exhausted.
1052 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
1055 For a version that doesn't terminate after the shortest iterable is
1056 exhausted, see :func:`interleave_longest`.
1059 return chain.from_iterable(zip(*iterables))
1062 def interleave_longest(*iterables):
1063 """Return a new iterable yielding from each iterable in turn,
1064 skipping any that are exhausted.
1066 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
1067 [1, 4, 6, 2, 5, 7, 3, 8]
1069 This function produces the same output as :func:`roundrobin`, but may
1070 perform better for some inputs (in particular when the number of iterables
1074 i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
1075 return (x for x in i if x is not _marker)
1078 def interleave_evenly(iterables, lengths=None):
1080 Interleave multiple iterables so that their elements are evenly distributed
1081 throughout the output sequence.
1083 >>> iterables = [1, 2, 3, 4, 5], ['a', 'b']
1084 >>> list(interleave_evenly(iterables))
1085 [1, 2, 'a', 3, 4, 'b', 5]
1087 >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]]
1088 >>> list(interleave_evenly(iterables))
1089 [1, 6, 4, 2, 7, 3, 8, 5]
1091 This function requires iterables of known length. Iterables without
1092 ``__len__()`` can be used by manually specifying lengths with *lengths*:
1094 >>> from itertools import combinations, repeat
1095 >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']]
1096 >>> lengths = [4 * (4 - 1) // 2, 3]
1097 >>> list(interleave_evenly(iterables, lengths=lengths))
1098 [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c']
1100 Based on Bresenham's algorithm.
1104 lengths = [len(it) for it in iterables]
1107 'Iterable lengths could not be determined automatically. '
1108 'Specify them with the lengths keyword.'
1110 elif len(iterables) != len(lengths):
1111 raise ValueError('Mismatching number of iterables and lengths.')
1115 # sort iterables by length, descending
1116 lengths_permute = sorted(
1117 range(dims), key=lambda i: lengths[i], reverse=True
1119 lengths_desc = [lengths[i] for i in lengths_permute]
1120 iters_desc = [iter(iterables[i]) for i in lengths_permute]
1122 # the longest iterable is the primary one (Bresenham: the longest
1123 # distance along an axis)
1124 delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:]
1125 iter_primary, iters_secondary = iters_desc[0], iters_desc[1:]
1126 errors = [delta_primary // dims] * len(deltas_secondary)
1128 to_yield = sum(lengths)
1130 yield next(iter_primary)
1132 # update errors for each secondary iterable
1133 errors = [e - delta for e, delta in zip(errors, deltas_secondary)]
1135 # those iterables for which the error is negative are yielded
1136 # ("diagonal step" in Bresenham)
1137 for i, e in enumerate(errors):
1139 yield next(iters_secondary[i])
1141 errors[i] += delta_primary
1144 def collapse(iterable, base_type=None, levels=None):
1145 """Flatten an iterable with multiple levels of nesting (e.g., a list of
1146 lists of tuples) into non-iterable types.
1148 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
1149 >>> list(collapse(iterable))
1152 Binary and text strings are not considered iterable and
1153 will not be collapsed.
1155 To avoid collapsing other types, specify *base_type*:
1157 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
1158 >>> list(collapse(iterable, base_type=tuple))
1159 ['ab', ('cd', 'ef'), 'gh', 'ij']
1161 Specify *levels* to stop flattening after a certain level:
1163 >>> iterable = [('a', ['b']), ('c', ['d'])]
1164 >>> list(collapse(iterable)) # Fully flattened
1165 ['a', 'b', 'c', 'd']
1166 >>> list(collapse(iterable, levels=1)) # Only one level flattened
1167 ['a', ['b'], 'c', ['d']]
1171 def walk(node, level):
1173 ((levels is not None) and (level > levels))
1174 or isinstance(node, (str, bytes))
1175 or ((base_type is not None) and isinstance(node, base_type))
1187 yield from walk(child, level + 1)
1189 yield from walk(iterable, 0)
1192 def side_effect(func, iterable, chunk_size=None, before=None, after=None):
1193 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
1194 of items) before yielding the item.
1196 `func` must be a function that takes a single argument. Its return value
1199 *before* and *after* are optional functions that take no arguments. They
1200 will be executed before iteration starts and after it ends, respectively.
1202 `side_effect` can be used for logging, updating progress bars, or anything
1203 that is not functionally "pure."
1205 Emitting a status message:
1207 >>> from more_itertools import consume
1208 >>> func = lambda item: print('Received {}'.format(item))
1209 >>> consume(side_effect(func, range(2)))
1213 Operating on chunks of items:
1216 >>> func = lambda chunk: pair_sums.append(sum(chunk))
1217 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
1222 Writing to a file-like object:
1224 >>> from io import StringIO
1225 >>> from more_itertools import consume
1227 >>> func = lambda x: print(x, file=f)
1228 >>> before = lambda: print(u'HEADER', file=f)
1230 >>> it = [u'a', u'b', u'c']
1231 >>> consume(side_effect(func, it, before=before, after=after))
1237 if before is not None:
1240 if chunk_size is None:
1241 for item in iterable:
1245 for chunk in chunked(iterable, chunk_size):
1249 if after is not None:
1253 def sliced(seq, n, strict=False):
1254 """Yield slices of length *n* from the sequence *seq*.
1256 >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
1257 [(1, 2, 3), (4, 5, 6)]
1259 By the default, the last yielded slice will have fewer than *n* elements
1260 if the length of *seq* is not divisible by *n*:
1262 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
1263 [(1, 2, 3), (4, 5, 6), (7, 8)]
1265 If the length of *seq* is not divisible by *n* and *strict* is
1266 ``True``, then ``ValueError`` will be raised before the last
1269 This function will only work for iterables that support slicing.
1270 For non-sliceable iterables, see :func:`chunked`.
1273 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
1277 for _slice in iterator:
1278 if len(_slice) != n:
1279 raise ValueError("seq is not divisible by n.")
1287 def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
1288 """Yield lists of items from *iterable*, where each list is delimited by
1289 an item where callable *pred* returns ``True``.
1291 >>> list(split_at('abcdcba', lambda x: x == 'b'))
1292 [['a'], ['c', 'd', 'c'], ['a']]
1294 >>> list(split_at(range(10), lambda n: n % 2 == 1))
1295 [[0], [2], [4], [6], [8], []]
1297 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1298 then there is no limit on the number of splits:
1300 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
1301 [[0], [2], [4, 5, 6, 7, 8, 9]]
1303 By default, the delimiting items are not included in the output.
1304 The include them, set *keep_separator* to ``True``.
1306 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
1307 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
1311 yield list(iterable)
1331 def split_before(iterable, pred, maxsplit=-1):
1332 """Yield lists of items from *iterable*, where each list ends just before
1333 an item for which callable *pred* returns ``True``:
1335 >>> list(split_before('OneTwo', lambda s: s.isupper()))
1336 [['O', 'n', 'e'], ['T', 'w', 'o']]
1338 >>> list(split_before(range(10), lambda n: n % 3 == 0))
1339 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
1341 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1342 then there is no limit on the number of splits:
1344 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
1345 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
1348 yield list(iterable)
1354 if pred(item) and buf:
1357 yield [item] + list(it)
1366 def split_after(iterable, pred, maxsplit=-1):
1367 """Yield lists of items from *iterable*, where each list ends with an
1368 item where callable *pred* returns ``True``:
1370 >>> list(split_after('one1two2', lambda s: s.isdigit()))
1371 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
1373 >>> list(split_after(range(10), lambda n: n % 3 == 0))
1374 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
1376 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1377 then there is no limit on the number of splits:
1379 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
1380 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
1384 yield list(iterable)
1391 if pred(item) and buf:
1402 def split_when(iterable, pred, maxsplit=-1):
1403 """Split *iterable* into pieces based on the output of *pred*.
1404 *pred* should be a function that takes successive pairs of items and
1405 returns ``True`` if the iterable should be split in between them.
1407 For example, to find runs of increasing numbers, split the iterable when
1408 element ``i`` is larger than element ``i + 1``:
1410 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
1411 [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
1413 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1414 then there is no limit on the number of splits:
1416 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
1417 ... lambda x, y: x > y, maxsplit=2))
1418 [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
1422 yield list(iterable)
1428 except StopIteration:
1432 for next_item in it:
1433 if pred(cur_item, next_item):
1436 yield [next_item] + list(it)
1441 buf.append(next_item)
1442 cur_item = next_item
1447 def split_into(iterable, sizes):
1448 """Yield a list of sequential items from *iterable* of length 'n' for each
1449 integer 'n' in *sizes*.
1451 >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1452 [[1], [2, 3], [4, 5, 6]]
1454 If the sum of *sizes* is smaller than the length of *iterable*, then the
1455 remaining items of *iterable* will not be returned.
1457 >>> list(split_into([1,2,3,4,5,6], [2,3]))
1460 If the sum of *sizes* is larger than the length of *iterable*, fewer items
1461 will be returned in the iteration that overruns *iterable* and further
1462 lists will be empty:
1464 >>> list(split_into([1,2,3,4], [1,2,3,4]))
1465 [[1], [2, 3], [4], []]
1467 When a ``None`` object is encountered in *sizes*, the returned list will
1468 contain items up to the end of *iterable* the same way that itertools.slice
1471 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1472 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1474 :func:`split_into` can be useful for grouping a series of items where the
1475 sizes of the groups are not uniform. An example would be where in a row
1476 from a table, multiple columns represent elements of the same feature
1477 (e.g. a point represented by x,y,z) but, the format is not the same for
1480 # convert the iterable argument into an iterator so its contents can
1481 # be consumed by islice in case it is a generator
1489 yield list(islice(it, size))
1492 def padded(iterable, fillvalue=None, n=None, next_multiple=False):
1493 """Yield the elements from *iterable*, followed by *fillvalue*, such that
1494 at least *n* items are emitted.
1496 >>> list(padded([1, 2, 3], '?', 5))
1499 If *next_multiple* is ``True``, *fillvalue* will be emitted until the
1500 number of items emitted is a multiple of *n*::
1502 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
1503 [1, 2, 3, 4, None, None]
1505 If *n* is ``None``, *fillvalue* will be emitted indefinitely.
1510 yield from chain(it, repeat(fillvalue))
1512 raise ValueError('n must be at least 1')
1519 remaining = (n - item_count) % n if next_multiple else n - item_count
1520 for _ in range(remaining):
1524 def repeat_each(iterable, n=2):
1525 """Repeat each element in *iterable* *n* times.
1527 >>> list(repeat_each('ABC', 3))
1528 ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
1530 return chain.from_iterable(map(repeat, iterable, repeat(n)))
1533 def repeat_last(iterable, default=None):
1534 """After the *iterable* is exhausted, keep yielding its last element.
1536 >>> list(islice(repeat_last(range(3)), 5))
1539 If the iterable is empty, yield *default* forever::
1541 >>> list(islice(repeat_last(range(0), 42), 5))
1542 [42, 42, 42, 42, 42]
1546 for item in iterable:
1548 final = default if item is _marker else item
1549 yield from repeat(final)
1552 def distribute(n, iterable):
1553 """Distribute the items from *iterable* among *n* smaller iterables.
1555 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
1561 If the length of *iterable* is not evenly divisible by *n*, then the
1562 length of the returned iterables will not be identical:
1564 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
1565 >>> [list(c) for c in children]
1566 [[1, 4, 7], [2, 5], [3, 6]]
1568 If the length of *iterable* is smaller than *n*, then the last returned
1569 iterables will be empty:
1571 >>> children = distribute(5, [1, 2, 3])
1572 >>> [list(c) for c in children]
1573 [[1], [2], [3], [], []]
1575 This function uses :func:`itertools.tee` and may require significant
1576 storage. If you need the order items in the smaller iterables to match the
1577 original iterable, see :func:`divide`.
1581 raise ValueError('n must be at least 1')
1583 children = tee(iterable, n)
1584 return [islice(it, index, None, n) for index, it in enumerate(children)]
1587 def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
1588 """Yield tuples whose elements are offset from *iterable*.
1589 The amount by which the `i`-th item in each tuple is offset is given by
1590 the `i`-th item in *offsets*.
1592 >>> list(stagger([0, 1, 2, 3]))
1593 [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
1594 >>> list(stagger(range(8), offsets=(0, 2, 4)))
1595 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
1597 By default, the sequence will end when the final element of a tuple is the
1598 last item in the iterable. To continue until the first element of a tuple
1599 is the last item in the iterable, set *longest* to ``True``::
1601 >>> list(stagger([0, 1, 2, 3], longest=True))
1602 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
1604 By default, ``None`` will be used to replace offsets beyond the end of the
1605 sequence. Specify *fillvalue* to use some other value.
1608 children = tee(iterable, len(offsets))
1611 *children, offsets=offsets, longest=longest, fillvalue=fillvalue
1615 def zip_equal(*iterables):
1616 """``zip`` the input *iterables* together, but raise
1617 ``UnequalIterablesError`` if they aren't all the same length.
1620 >>> it_2 = iter('abc')
1621 >>> list(zip_equal(it_1, it_2))
1622 [(0, 'a'), (1, 'b'), (2, 'c')]
1625 >>> it_2 = iter('abcd')
1626 >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL
1627 Traceback (most recent call last):
1629 more_itertools.more.UnequalIterablesError: Iterables have different
1633 if hexversion >= 0x30A00A6:
1636 'zip_equal will be removed in a future version of '
1637 'more-itertools. Use the builtin zip function with '
1638 'strict=True instead.'
1643 return _zip_equal(*iterables)
1646 def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
1647 """``zip`` the input *iterables* together, but offset the `i`-th iterable
1648 by the `i`-th item in *offsets*.
1650 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
1651 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
1653 This can be used as a lightweight alternative to SciPy or pandas to analyze
1654 data sets in which some series have a lead or lag relationship.
1656 By default, the sequence will end when the shortest iterable is exhausted.
1657 To continue until the longest iterable is exhausted, set *longest* to
1660 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
1661 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
1663 By default, ``None`` will be used to replace offsets beyond the end of the
1664 sequence. Specify *fillvalue* to use some other value.
1667 if len(iterables) != len(offsets):
1668 raise ValueError("Number of iterables and offsets didn't match")
1671 for it, n in zip(iterables, offsets):
1673 staggered.append(chain(repeat(fillvalue, -n), it))
1675 staggered.append(islice(it, n, None))
1677 staggered.append(it)
1680 return zip_longest(*staggered, fillvalue=fillvalue)
1682 return zip(*staggered)
1685 def sort_together(iterables, key_list=(0,), key=None, reverse=False):
1686 """Return the input iterables sorted together, with *key_list* as the
1687 priority for sorting. All iterables are trimmed to the length of the
1690 This can be used like the sorting function in a spreadsheet. If each
1691 iterable represents a column of data, the key list determines which
1692 columns are used for sorting.
1694 By default, all iterables are sorted using the ``0``-th iterable::
1696 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
1697 >>> sort_together(iterables)
1698 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
1700 Set a different key list to sort according to another iterable.
1701 Specifying multiple keys dictates how ties are broken::
1703 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
1704 >>> sort_together(iterables, key_list=(1, 2))
1705 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
1707 To sort by a function of the elements of the iterable, pass a *key*
1708 function. Its arguments are the elements of the iterables corresponding to
1711 >>> names = ('a', 'b', 'c')
1712 >>> lengths = (1, 2, 3)
1713 >>> widths = (5, 2, 1)
1714 >>> def area(length, width):
1715 ... return length * width
1716 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
1717 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
1719 Set *reverse* to ``True`` to sort in descending order.
1721 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
1722 [(3, 2, 1), ('a', 'b', 'c')]
1726 # if there is no key function, the key argument to sorted is an
1728 key_argument = itemgetter(*key_list)
1730 # if there is a key function, call it with the items at the offsets
1731 # specified by the key function as arguments
1732 key_list = list(key_list)
1733 if len(key_list) == 1:
1734 # if key_list contains a single item, pass the item at that offset
1735 # as the only argument to the key function
1736 key_offset = key_list[0]
1737 key_argument = lambda zipped_items: key(zipped_items[key_offset])
1739 # if key_list contains multiple items, use itemgetter to return a
1740 # tuple of items, which we pass as *args to the key function
1741 get_key_items = itemgetter(*key_list)
1742 key_argument = lambda zipped_items: key(
1743 *get_key_items(zipped_items)
1747 zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse))
1751 def unzip(iterable):
1752 """The inverse of :func:`zip`, this function disaggregates the elements
1753 of the zipped *iterable*.
1755 The ``i``-th iterable contains the ``i``-th element from each element
1756 of the zipped iterable. The first element is used to determine the
1757 length of the remaining elements.
1759 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
1760 >>> letters, numbers = unzip(iterable)
1762 ['a', 'b', 'c', 'd']
1766 This is similar to using ``zip(*iterable)``, but it avoids reading
1767 *iterable* into memory. Note, however, that this function uses
1768 :func:`itertools.tee` and thus may require significant storage.
1771 head, iterable = spy(iter(iterable))
1773 # empty iterable, e.g. zip([], [], [])
1775 # spy returns a one-length iterable as head
1777 iterables = tee(iterable, len(head))
1784 # basically if we have an iterable like
1785 # iter([(1, 2, 3), (4, 5), (6,)])
1786 # the second unzipped iterable would fail at the third tuple
1787 # since it would try to access tup[1]
1788 # same with the third unzipped iterable and the second tuple
1789 # to support these "improperly zipped" iterables,
1790 # we create a custom itemgetter
1791 # which just stops the unzipped iterables
1792 # at first length mismatch
1797 return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))
1800 def divide(n, iterable):
1801 """Divide the elements from *iterable* into *n* parts, maintaining
1804 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
1810 If the length of *iterable* is not evenly divisible by *n*, then the
1811 length of the returned iterables will not be identical:
1813 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
1814 >>> [list(c) for c in children]
1815 [[1, 2, 3], [4, 5], [6, 7]]
1817 If the length of the iterable is smaller than n, then the last returned
1818 iterables will be empty:
1820 >>> children = divide(5, [1, 2, 3])
1821 >>> [list(c) for c in children]
1822 [[1], [2], [3], [], []]
1824 This function will exhaust the iterable before returning and may require
1825 significant storage. If order is not important, see :func:`distribute`,
1826 which does not first pull the iterable into memory.
1830 raise ValueError('n must be at least 1')
1835 seq = tuple(iterable)
1839 q, r = divmod(len(seq), n)
1843 for i in range(1, n + 1):
1845 stop += q + 1 if i <= r else q
1846 ret.append(iter(seq[start:stop]))
1851 def always_iterable(obj, base_type=(str, bytes)):
1852 """If *obj* is iterable, return an iterator over its items::
1855 >>> list(always_iterable(obj))
1858 If *obj* is not iterable, return a one-item iterable containing *obj*::
1861 >>> list(always_iterable(obj))
1864 If *obj* is ``None``, return an empty iterable:
1867 >>> list(always_iterable(None))
1870 By default, binary and text strings are not considered iterable::
1873 >>> list(always_iterable(obj))
1876 If *base_type* is set, objects for which ``isinstance(obj, base_type)``
1877 returns ``True`` won't be considered iterable.
1880 >>> list(always_iterable(obj)) # Iterate over the dict's keys
1882 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
1885 Set *base_type* to ``None`` to avoid any special handling and treat objects
1886 Python considers iterable as iterable:
1889 >>> list(always_iterable(obj, base_type=None))
1895 if (base_type is not None) and isinstance(obj, base_type):
1904 def adjacent(predicate, iterable, distance=1):
1905 """Return an iterable over `(bool, item)` tuples where the `item` is
1906 drawn from *iterable* and the `bool` indicates whether
1907 that item satisfies the *predicate* or is adjacent to an item that does.
1909 For example, to find whether items are adjacent to a ``3``::
1911 >>> list(adjacent(lambda x: x == 3, range(6)))
1912 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
1914 Set *distance* to change what counts as adjacent. For example, to find
1915 whether items are two places away from a ``3``:
1917 >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
1918 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
1920 This is useful for contextualizing the results of a search function.
1921 For example, a code comparison tool might want to identify lines that
1922 have changed, but also surrounding lines to give the viewer of the diff
1925 The predicate function will only be called once for each item in the
1928 See also :func:`groupby_transform`, which can be used with this function
1929 to group ranges of items with the same `bool` value.
1932 # Allow distance=0 mainly for testing that it reproduces results with map()
1934 raise ValueError('distance must be at least 0')
1936 i1, i2 = tee(iterable)
1937 padding = [False] * distance
1938 selected = chain(padding, map(predicate, i1), padding)
1939 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
1940 return zip(adjacent_to_selected, i2)
1943 def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
1944 """An extension of :func:`itertools.groupby` that can apply transformations
1945 to the grouped data.
1947 * *keyfunc* is a function computing a key value for each item in *iterable*
1948 * *valuefunc* is a function that transforms the individual items from
1949 *iterable* after grouping
1950 * *reducefunc* is a function that transforms each group of items
1952 >>> iterable = 'aAAbBBcCC'
1953 >>> keyfunc = lambda k: k.upper()
1954 >>> valuefunc = lambda v: v.lower()
1955 >>> reducefunc = lambda g: ''.join(g)
1956 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
1957 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
1959 Each optional argument defaults to an identity function if not specified.
1961 :func:`groupby_transform` is useful when grouping elements of an iterable
1962 using a separate iterable as the key. To do this, :func:`zip` the iterables
1963 and pass a *keyfunc* that extracts the first element and a *valuefunc*
1964 that extracts the second element::
1966 >>> from operator import itemgetter
1967 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
1968 >>> values = 'abcdefghi'
1969 >>> iterable = zip(keys, values)
1970 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
1971 >>> [(k, ''.join(g)) for k, g in grouper]
1972 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
1974 Note that the order of items in the iterable is significant.
1975 Only adjacent items are grouped together, so if you don't want any
1976 duplicate groups, you should sort the iterable by the key function.
1979 ret = groupby(iterable, keyfunc)
1981 ret = ((k, map(valuefunc, g)) for k, g in ret)
1983 ret = ((k, reducefunc(g)) for k, g in ret)
1988 class numeric_range(abc.Sequence, abc.Hashable):
1989 """An extension of the built-in ``range()`` function whose arguments can
1990 be any orderable numeric type.
1992 With only *stop* specified, *start* defaults to ``0`` and *step*
1993 defaults to ``1``. The output items will match the type of *stop*:
1995 >>> list(numeric_range(3.5))
1996 [0.0, 1.0, 2.0, 3.0]
1998 With only *start* and *stop* specified, *step* defaults to ``1``. The
1999 output items will match the type of *start*:
2001 >>> from decimal import Decimal
2002 >>> start = Decimal('2.1')
2003 >>> stop = Decimal('5.1')
2004 >>> list(numeric_range(start, stop))
2005 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
2007 With *start*, *stop*, and *step* specified the output items will match
2008 the type of ``start + step``:
2010 >>> from fractions import Fraction
2011 >>> start = Fraction(1, 2) # Start at 1/2
2012 >>> stop = Fraction(5, 2) # End at 5/2
2013 >>> step = Fraction(1, 2) # Count by 1/2
2014 >>> list(numeric_range(start, stop, step))
2015 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
2017 If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
2019 >>> list(numeric_range(3, -1, -1.0))
2020 [3.0, 2.0, 1.0, 0.0]
2022 Be aware of the limitations of floating point numbers; the representation
2023 of the yielded numbers may be surprising.
2025 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
2026 is a ``datetime.timedelta`` object:
2029 >>> start = datetime.datetime(2019, 1, 1)
2030 >>> stop = datetime.datetime(2019, 1, 3)
2031 >>> step = datetime.timedelta(days=1)
2032 >>> items = iter(numeric_range(start, stop, step))
2034 datetime.datetime(2019, 1, 1, 0, 0)
2036 datetime.datetime(2019, 1, 2, 0, 0)
2040 _EMPTY_HASH = hash(range(0, 0))
2042 def __init__(self, *args):
2045 (self._stop,) = args
2046 self._start = type(self._stop)(0)
2047 self._step = type(self._stop - self._start)(1)
2049 self._start, self._stop = args
2050 self._step = type(self._stop - self._start)(1)
2052 self._start, self._stop, self._step = args
2055 'numeric_range expected at least '
2056 '1 argument, got {}'.format(argc)
2060 'numeric_range expected at most '
2061 '3 arguments, got {}'.format(argc)
2064 self._zero = type(self._step)(0)
2065 if self._step == self._zero:
2066 raise ValueError('numeric_range() arg 3 must not be zero')
2067 self._growing = self._step > self._zero
2072 return self._start < self._stop
2074 return self._start > self._stop
2076 def __contains__(self, elem):
2078 if self._start <= elem < self._stop:
2079 return (elem - self._start) % self._step == self._zero
2081 if self._start >= elem > self._stop:
2082 return (self._start - elem) % (-self._step) == self._zero
2086 def __eq__(self, other):
2087 if isinstance(other, numeric_range):
2088 empty_self = not bool(self)
2089 empty_other = not bool(other)
2090 if empty_self or empty_other:
2091 return empty_self and empty_other # True if both empty
2094 self._start == other._start
2095 and self._step == other._step
2096 and self._get_by_index(-1) == other._get_by_index(-1)
2101 def __getitem__(self, key):
2102 if isinstance(key, int):
2103 return self._get_by_index(key)
2104 elif isinstance(key, slice):
2105 step = self._step if key.step is None else key.step * self._step
2107 if key.start is None or key.start <= -self._len:
2109 elif key.start >= self._len:
2111 else: # -self._len < key.start < self._len
2112 start = self._get_by_index(key.start)
2114 if key.stop is None or key.stop >= self._len:
2116 elif key.stop <= -self._len:
2118 else: # -self._len < key.stop < self._len
2119 stop = self._get_by_index(key.stop)
2121 return numeric_range(start, stop, step)
2124 'numeric range indices must be '
2125 'integers or slices, not {}'.format(type(key).__name__)
2130 return hash((self._start, self._get_by_index(-1), self._step))
2132 return self._EMPTY_HASH
2135 values = (self._start + (n * self._step) for n in count())
2137 return takewhile(partial(gt, self._stop), values)
2139 return takewhile(partial(lt, self._stop), values)
2144 def _init_len(self):
2153 distance = stop - start
2154 if distance <= self._zero:
2156 else: # distance > 0 and step > 0: regular euclidean division
2157 q, r = divmod(distance, step)
2158 self._len = int(q) + int(r != self._zero)
2160 def __reduce__(self):
2161 return numeric_range, (self._start, self._stop, self._step)
2165 return "numeric_range({}, {})".format(
2166 repr(self._start), repr(self._stop)
2169 return "numeric_range({}, {}, {})".format(
2170 repr(self._start), repr(self._stop), repr(self._step)
2173 def __reversed__(self):
2176 self._get_by_index(-1), self._start - self._step, -self._step
2180 def count(self, value):
2181 return int(value in self)
2183 def index(self, value):
2185 if self._start <= value < self._stop:
2186 q, r = divmod(value - self._start, self._step)
2190 if self._start >= value > self._stop:
2191 q, r = divmod(self._start - value, -self._step)
2195 raise ValueError("{} is not in numeric range".format(value))
2197 def _get_by_index(self, i):
2200 if i < 0 or i >= self._len:
2201 raise IndexError("numeric range object index out of range")
2202 return self._start + i * self._step
2205 def count_cycle(iterable, n=None):
2206 """Cycle through the items from *iterable* up to *n* times, yielding
2207 the number of completed cycles along with each item. If *n* is omitted the
2208 process repeats indefinitely.
2210 >>> list(count_cycle('AB', 3))
2211 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
2214 iterable = tuple(iterable)
2217 counter = count() if n is None else range(n)
2218 return ((i, item) for i in counter for item in iterable)
2221 def mark_ends(iterable):
2222 """Yield 3-tuples of the form ``(is_first, is_last, item)``.
2224 >>> list(mark_ends('ABC'))
2225 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
2227 Use this when looping over an iterable to take special action on its first
2230 >>> iterable = ['Header', 100, 200, 'Footer']
2232 >>> for is_first, is_last, item in mark_ends(iterable):
2234 ... continue # Skip the header
2236 ... continue # Skip the footer
2245 except StopIteration:
2252 yield i == 0, False, a
2254 except StopIteration:
2255 yield i == 0, True, a
2258 def locate(iterable, pred=bool, window_size=None):
2259 """Yield the index of each item in *iterable* for which *pred* returns
2262 *pred* defaults to :func:`bool`, which will select truthy items:
2264 >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
2267 Set *pred* to a custom function to, e.g., find the indexes for a particular
2270 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
2273 If *window_size* is given, then the *pred* function will be called with
2274 that many items. This enables searching for sub-sequences:
2276 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2277 >>> pred = lambda *args: args == (1, 2, 3)
2278 >>> list(locate(iterable, pred=pred, window_size=3))
2281 Use with :func:`seekable` to find indexes and then retrieve the associated
2284 >>> from itertools import count
2285 >>> from more_itertools import seekable
2286 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
2287 >>> it = seekable(source)
2288 >>> pred = lambda x: x > 100
2289 >>> indexes = locate(it, pred=pred)
2290 >>> i = next(indexes)
2296 if window_size is None:
2297 return compress(count(), map(pred, iterable))
2300 raise ValueError('window size must be at least 1')
2302 it = windowed(iterable, window_size, fillvalue=_marker)
2303 return compress(count(), starmap(pred, it))
2306 def longest_common_prefix(iterables):
2307 """Yield elements of the longest common prefix amongst given *iterables*.
2309 >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf']))
2313 return (c[0] for c in takewhile(all_equal, zip(*iterables)))
2316 def lstrip(iterable, pred):
2317 """Yield the items from *iterable*, but strip any from the beginning
2318 for which *pred* returns ``True``.
2320 For example, to remove a set of items from the start of an iterable:
2322 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2323 >>> pred = lambda x: x in {None, False, ''}
2324 >>> list(lstrip(iterable, pred))
2325 [1, 2, None, 3, False, None]
2327 This function is analogous to to :func:`str.lstrip`, and is essentially
2328 an wrapper for :func:`itertools.dropwhile`.
2331 return dropwhile(pred, iterable)
2334 def rstrip(iterable, pred):
2335 """Yield the items from *iterable*, but strip any from the end
2336 for which *pred* returns ``True``.
2338 For example, to remove a set of items from the end of an iterable:
2340 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2341 >>> pred = lambda x: x in {None, False, ''}
2342 >>> list(rstrip(iterable, pred))
2343 [None, False, None, 1, 2, None, 3]
2345 This function is analogous to :func:`str.rstrip`.
2349 cache_append = cache.append
2350 cache_clear = cache.clear
2360 def strip(iterable, pred):
2361 """Yield the items from *iterable*, but strip any from the
2362 beginning and end for which *pred* returns ``True``.
2364 For example, to remove a set of items from both ends of an iterable:
2366 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2367 >>> pred = lambda x: x in {None, False, ''}
2368 >>> list(strip(iterable, pred))
2371 This function is analogous to :func:`str.strip`.
2374 return rstrip(lstrip(iterable, pred), pred)
2377 class islice_extended:
2378 """An extension of :func:`itertools.islice` that supports negative values
2379 for *stop*, *start*, and *step*.
2381 >>> iterable = iter('abcdefgh')
2382 >>> list(islice_extended(iterable, -4, -1))
2385 Slices with negative values require some caching of *iterable*, but this
2386 function takes care to minimize the amount of memory required.
2388 For example, you can use a negative step with an infinite iterator:
2390 >>> from itertools import count
2391 >>> list(islice_extended(count(), 110, 99, -2))
2392 [110, 108, 106, 104, 102, 100]
2394 You can also use slice notation directly:
2396 >>> iterable = map(str, count())
2397 >>> it = islice_extended(iterable)[10:20:2]
2399 ['10', '12', '14', '16', '18']
2403 def __init__(self, iterable, *args):
2406 self._iterable = _islice_helper(it, slice(*args))
2414 return next(self._iterable)
2416 def __getitem__(self, key):
2417 if isinstance(key, slice):
2418 return islice_extended(_islice_helper(self._iterable, key))
2420 raise TypeError('islice_extended.__getitem__ argument must be a slice')
2423 def _islice_helper(it, s):
2427 raise ValueError('step argument must be a non-zero integer or None.')
2431 start = 0 if (start is None) else start
2434 # Consume all but the last -start items
2435 cache = deque(enumerate(it, 1), maxlen=-start)
2436 len_iter = cache[-1][0] if cache else 0
2438 # Adjust start to be positive
2439 i = max(len_iter + start, 0)
2441 # Adjust stop to be positive
2445 j = min(stop, len_iter)
2447 j = max(len_iter + stop, 0)
2454 for index, item in islice(cache, 0, n, step):
2456 elif (stop is not None) and (stop < 0):
2457 # Advance to the start position
2458 next(islice(it, start, start), None)
2460 # When stop is negative, we have to carry -stop items while
2462 cache = deque(islice(it, -stop), maxlen=-stop)
2464 for index, item in enumerate(it):
2465 cached_item = cache.popleft()
2466 if index % step == 0:
2470 # When both start and stop are positive we have the normal case
2471 yield from islice(it, start, stop, step)
2473 start = -1 if (start is None) else start
2475 if (stop is not None) and (stop < 0):
2476 # Consume all but the last items
2478 cache = deque(enumerate(it, 1), maxlen=n)
2479 len_iter = cache[-1][0] if cache else 0
2481 # If start and stop are both negative they are comparable and
2482 # we can just slice. Otherwise we can adjust start to be negative
2487 i, j = min(start - len_iter, -1), None
2489 for index, item in list(cache)[i:j:step]:
2492 # Advance to the stop position
2493 if stop is not None:
2495 next(islice(it, m, m), None)
2497 # stop is positive, so if start is negative they are not comparable
2498 # and we need the rest of the items.
2502 # stop is None and start is positive, so we just need items up to
2507 # Both stop and start are positive, so they are comparable.
2514 cache = list(islice(it, n))
2516 yield from cache[i::step]
2519 def always_reversible(iterable):
2520 """An extension of :func:`reversed` that supports all iterables, not
2521 just those which implement the ``Reversible`` or ``Sequence`` protocols.
2523 >>> print(*always_reversible(x for x in range(3)))
2526 If the iterable is already reversible, this function returns the
2527 result of :func:`reversed()`. If the iterable is not reversible,
2528 this function will cache the remaining items in the iterable and
2529 yield them in reverse order, which may require significant storage.
2532 return reversed(iterable)
2534 return reversed(list(iterable))
2537 def consecutive_groups(iterable, ordering=lambda x: x):
2538 """Yield groups of consecutive items using :func:`itertools.groupby`.
2539 The *ordering* function determines whether two items are adjacent by
2540 returning their position.
2542 By default, the ordering function is the identity function. This is
2543 suitable for finding runs of numbers:
2545 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
2546 >>> for group in consecutive_groups(iterable):
2547 ... print(list(group))
2554 For finding runs of adjacent letters, try using the :meth:`index` method
2555 of a string of letters:
2557 >>> from string import ascii_lowercase
2558 >>> iterable = 'abcdfgilmnop'
2559 >>> ordering = ascii_lowercase.index
2560 >>> for group in consecutive_groups(iterable, ordering):
2561 ... print(list(group))
2562 ['a', 'b', 'c', 'd']
2565 ['l', 'm', 'n', 'o', 'p']
2567 Each group of consecutive items is an iterator that shares it source with
2568 *iterable*. When an an output group is advanced, the previous group is
2569 no longer available unless its elements are copied (e.g., into a ``list``).
2571 >>> iterable = [1, 2, 11, 12, 21, 22]
2572 >>> saved_groups = []
2573 >>> for group in consecutive_groups(iterable):
2574 ... saved_groups.append(list(group)) # Copy group elements
2576 [[1, 2], [11, 12], [21, 22]]
2579 for k, g in groupby(
2580 enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
2582 yield map(itemgetter(1), g)
2585 def difference(iterable, func=sub, *, initial=None):
2586 """This function is the inverse of :func:`itertools.accumulate`. By default
2587 it will compute the first difference of *iterable* using
2588 :func:`operator.sub`:
2590 >>> from itertools import accumulate
2591 >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10
2592 >>> list(difference(iterable))
2595 *func* defaults to :func:`operator.sub`, but other functions can be
2596 specified. They will be applied as follows::
2598 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
2600 For example, to do progressive division:
2602 >>> iterable = [1, 2, 6, 24, 120]
2603 >>> func = lambda x, y: x // y
2604 >>> list(difference(iterable, func))
2607 If the *initial* keyword is set, the first element will be skipped when
2608 computing successive differences.
2610 >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10)
2611 >>> list(difference(it, initial=10))
2615 a, b = tee(iterable)
2618 except StopIteration:
2621 if initial is not None:
2624 return chain(first, map(func, b, a))
2627 class SequenceView(Sequence):
2628 """Return a read-only view of the sequence object *target*.
2630 :class:`SequenceView` objects are analogous to Python's built-in
2631 "dictionary view" types. They provide a dynamic view of a sequence's items,
2632 meaning that when the sequence updates, so does the view.
2634 >>> seq = ['0', '1', '2']
2635 >>> view = SequenceView(seq)
2637 SequenceView(['0', '1', '2'])
2640 SequenceView(['0', '1', '2', '3'])
2642 Sequence views support indexing, slicing, and length queries. They act
2643 like the underlying sequence, except they don't allow assignment:
2652 Sequence views are useful as an alternative to copying, as they don't
2653 require (much) extra storage.
2657 def __init__(self, target):
2658 if not isinstance(target, Sequence):
2660 self._target = target
2662 def __getitem__(self, index):
2663 return self._target[index]
2666 return len(self._target)
2669 return '{}({})'.format(self.__class__.__name__, repr(self._target))
2673 """Wrap an iterator to allow for seeking backward and forward. This
2674 progressively caches the items in the source iterable so they can be
2677 Call :meth:`seek` with an index to seek to that position in the source
2680 To "reset" an iterator, seek to ``0``:
2682 >>> from itertools import count
2683 >>> it = seekable((str(n) for n in count()))
2684 >>> next(it), next(it), next(it)
2687 >>> next(it), next(it), next(it)
2692 You can also seek forward:
2694 >>> it = seekable((str(n) for n in range(20)))
2698 >>> it.seek(20) # Seeking past the end of the source isn't a problem
2701 >>> it.seek(0) # Resetting works even after hitting the end
2702 >>> next(it), next(it), next(it)
2705 Call :meth:`peek` to look ahead one item without advancing the iterator:
2707 >>> it = seekable('1234')
2711 ['1', '2', '3', '4']
2712 >>> it.peek(default='empty')
2715 Before the iterator is at its end, calling :func:`bool` on it will return
2716 ``True``. After it will return ``False``:
2718 >>> it = seekable('5678')
2722 ['5', '6', '7', '8']
2726 You may view the contents of the cache with the :meth:`elements` method.
2727 That returns a :class:`SequenceView`, a view that updates automatically:
2729 >>> it = seekable((str(n) for n in range(10)))
2730 >>> next(it), next(it), next(it)
2732 >>> elements = it.elements()
2734 SequenceView(['0', '1', '2'])
2738 SequenceView(['0', '1', '2', '3'])
2740 By default, the cache grows as the source iterable progresses, so beware of
2741 wrapping very large or infinite iterables. Supply *maxlen* to limit the
2742 size of the cache (this of course limits how far back you can seek).
2744 >>> from itertools import count
2745 >>> it = seekable((str(n) for n in count()), maxlen=2)
2746 >>> next(it), next(it), next(it), next(it)
2747 ('0', '1', '2', '3')
2748 >>> list(it.elements())
2751 >>> next(it), next(it), next(it), next(it)
2752 ('2', '3', '4', '5')
2758 def __init__(self, iterable, maxlen=None):
2759 self._source = iter(iterable)
2763 self._cache = deque([], maxlen)
2770 if self._index is not None:
2772 item = self._cache[self._index]
2779 item = next(self._source)
2780 self._cache.append(item)
2786 except StopIteration:
2790 def peek(self, default=_marker):
2793 except StopIteration:
2794 if default is _marker:
2797 if self._index is None:
2798 self._index = len(self._cache)
2803 return SequenceView(self._cache)
2805 def seek(self, index):
2807 remainder = index - len(self._cache)
2809 consume(self, remainder)
2814 :func:`run_length.encode` compresses an iterable with run-length encoding.
2815 It yields groups of repeated items with the count of how many times they
2818 >>> uncompressed = 'abbcccdddd'
2819 >>> list(run_length.encode(uncompressed))
2820 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2822 :func:`run_length.decode` decompresses an iterable that was previously
2823 compressed with run-length encoding. It yields the items of the
2824 decompressed iterable:
2826 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2827 >>> list(run_length.decode(compressed))
2828 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
2833 def encode(iterable):
2834 return ((k, ilen(g)) for k, g in groupby(iterable))
2837 def decode(iterable):
2838 return chain.from_iterable(repeat(k, n) for k, n in iterable)
2841 def exactly_n(iterable, n, predicate=bool):
2842 """Return ``True`` if exactly ``n`` items in the iterable are ``True``
2843 according to the *predicate* function.
2845 >>> exactly_n([True, True, False], 2)
2847 >>> exactly_n([True, True, False], 1)
2849 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
2852 The iterable will be advanced until ``n + 1`` truthy items are encountered,
2853 so avoid calling it on infinite iterables.
2856 return len(take(n + 1, filter(predicate, iterable))) == n
2859 def circular_shifts(iterable):
2860 """Return a list of circular shifts of *iterable*.
2862 >>> circular_shifts(range(4))
2863 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
2865 lst = list(iterable)
2866 return take(len(lst), windowed(cycle(lst), len(lst)))
2869 def make_decorator(wrapping_func, result_index=0):
2870 """Return a decorator version of *wrapping_func*, which is a function that
2871 modifies an iterable. *result_index* is the position in that function's
2872 signature where the iterable goes.
2874 This lets you use itertools on the "production end," i.e. at function
2875 definition. This can augment what the function returns without changing the
2878 For example, to produce a decorator version of :func:`chunked`:
2880 >>> from more_itertools import chunked
2881 >>> chunker = make_decorator(chunked, result_index=0)
2883 ... def iter_range(n):
2884 ... return iter(range(n))
2886 >>> list(iter_range(9))
2887 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
2889 To only allow truthy items to be returned:
2891 >>> truth_serum = make_decorator(filter, result_index=1)
2892 >>> @truth_serum(bool)
2893 ... def boolean_test():
2894 ... return [0, 1, '', ' ', False, True]
2896 >>> list(boolean_test())
2899 The :func:`peekable` and :func:`seekable` wrappers make for practical
2902 >>> from more_itertools import peekable
2903 >>> peekable_function = make_decorator(peekable)
2904 >>> @peekable_function()
2905 ... def str_range(*args):
2906 ... return (str(x) for x in range(*args))
2908 >>> it = str_range(1, 20, 2)
2909 >>> next(it), next(it), next(it)
2917 # See https://sites.google.com/site/bbayles/index/decorator_factory for
2918 # notes on how this works.
2919 def decorator(*wrapping_args, **wrapping_kwargs):
2920 def outer_wrapper(f):
2921 def inner_wrapper(*args, **kwargs):
2922 result = f(*args, **kwargs)
2923 wrapping_args_ = list(wrapping_args)
2924 wrapping_args_.insert(result_index, result)
2925 return wrapping_func(*wrapping_args_, **wrapping_kwargs)
2927 return inner_wrapper
2929 return outer_wrapper
2934 def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
2935 """Return a dictionary that maps the items in *iterable* to categories
2936 defined by *keyfunc*, transforms them with *valuefunc*, and
2937 then summarizes them by category with *reducefunc*.
2939 *valuefunc* defaults to the identity function if it is unspecified.
2940 If *reducefunc* is unspecified, no summarization takes place:
2942 >>> keyfunc = lambda x: x.upper()
2943 >>> result = map_reduce('abbccc', keyfunc)
2944 >>> sorted(result.items())
2945 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
2947 Specifying *valuefunc* transforms the categorized items:
2949 >>> keyfunc = lambda x: x.upper()
2950 >>> valuefunc = lambda x: 1
2951 >>> result = map_reduce('abbccc', keyfunc, valuefunc)
2952 >>> sorted(result.items())
2953 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
2955 Specifying *reducefunc* summarizes the categorized items:
2957 >>> keyfunc = lambda x: x.upper()
2958 >>> valuefunc = lambda x: 1
2959 >>> reducefunc = sum
2960 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
2961 >>> sorted(result.items())
2962 [('A', 1), ('B', 2), ('C', 3)]
2964 You may want to filter the input iterable before applying the map/reduce
2967 >>> all_items = range(30)
2968 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
2969 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
2970 >>> categories = map_reduce(items, keyfunc=keyfunc)
2971 >>> sorted(categories.items())
2972 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
2973 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
2974 >>> sorted(summaries.items())
2977 Note that all items in the iterable are gathered into a list before the
2978 summarization step, which may require significant storage.
2980 The returned object is a :obj:`collections.defaultdict` with the
2981 ``default_factory`` set to ``None``, such that it behaves like a normal
2985 valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
2987 ret = defaultdict(list)
2988 for item in iterable:
2990 value = valuefunc(item)
2991 ret[key].append(value)
2993 if reducefunc is not None:
2994 for key, value_list in ret.items():
2995 ret[key] = reducefunc(value_list)
2997 ret.default_factory = None
3001 def rlocate(iterable, pred=bool, window_size=None):
3002 """Yield the index of each item in *iterable* for which *pred* returns
3003 ``True``, starting from the right and moving left.
3005 *pred* defaults to :func:`bool`, which will select truthy items:
3007 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
3010 Set *pred* to a custom function to, e.g., find the indexes for a particular
3013 >>> iterable = iter('abcb')
3014 >>> pred = lambda x: x == 'b'
3015 >>> list(rlocate(iterable, pred))
3018 If *window_size* is given, then the *pred* function will be called with
3019 that many items. This enables searching for sub-sequences:
3021 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
3022 >>> pred = lambda *args: args == (1, 2, 3)
3023 >>> list(rlocate(iterable, pred=pred, window_size=3))
3026 Beware, this function won't return anything for infinite iterables.
3027 If *iterable* is reversible, ``rlocate`` will reverse it and search from
3028 the right. Otherwise, it will search from the left and return the results
3031 See :func:`locate` to for other example applications.
3034 if window_size is None:
3036 len_iter = len(iterable)
3037 return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
3041 return reversed(list(locate(iterable, pred, window_size)))
3044 def replace(iterable, pred, substitutes, count=None, window_size=1):
3045 """Yield the items from *iterable*, replacing the items for which *pred*
3046 returns ``True`` with the items from the iterable *substitutes*.
3048 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
3049 >>> pred = lambda x: x == 0
3050 >>> substitutes = (2, 3)
3051 >>> list(replace(iterable, pred, substitutes))
3052 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
3054 If *count* is given, the number of replacements will be limited:
3056 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
3057 >>> pred = lambda x: x == 0
3058 >>> substitutes = [None]
3059 >>> list(replace(iterable, pred, substitutes, count=2))
3060 [1, 1, None, 1, 1, None, 1, 1, 0]
3062 Use *window_size* to control the number of items passed as arguments to
3063 *pred*. This allows for locating and replacing subsequences.
3065 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
3067 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
3068 >>> substitutes = [3, 4] # Splice in these items
3069 >>> list(replace(iterable, pred, substitutes, window_size=window_size))
3074 raise ValueError('window_size must be at least 1')
3076 # Save the substitutes iterable, since it's used more than once
3077 substitutes = tuple(substitutes)
3079 # Add padding such that the number of windows matches the length of the
3081 it = chain(iterable, [_marker] * (window_size - 1))
3082 windows = windowed(it, window_size)
3086 # If the current window matches our predicate (and we haven't hit
3087 # our maximum number of replacements), splice in the substitutes
3088 # and then consume the following windows that overlap with this one.
3089 # For example, if the iterable is (0, 1, 2, 3, 4...)
3090 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
3091 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
3093 if (count is None) or (n < count):
3095 yield from substitutes
3096 consume(windows, window_size - 1)
3099 # If there was no match (or we've reached the replacement limit),
3100 # yield the first item from the window.
3101 if w and (w[0] is not _marker):
3105 def partitions(iterable):
3106 """Yield all possible order-preserving partitions of *iterable*.
3108 >>> iterable = 'abc'
3109 >>> for part in partitions(iterable):
3110 ... print([''.join(p) for p in part])
3116 This is unrelated to :func:`partition`.
3119 sequence = list(iterable)
3121 for i in powerset(range(1, n)):
3122 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
3125 def set_partitions(iterable, k=None):
3127 Yield the set partitions of *iterable* into *k* parts. Set partitions are
3128 not order-preserving.
3130 >>> iterable = 'abc'
3131 >>> for part in set_partitions(iterable, 2):
3132 ... print([''.join(p) for p in part])
3138 If *k* is not given, every set partition is generated.
3140 >>> iterable = 'abc'
3141 >>> for part in set_partitions(iterable):
3142 ... print([''.join(p) for p in part])
3155 "Can't partition in a negative or zero number of groups"
3160 def set_partitions_helper(L, k):
3165 yield [[s] for s in L]
3168 for p in set_partitions_helper(M, k - 1):
3170 for p in set_partitions_helper(M, k):
3171 for i in range(len(p)):
3172 yield p[:i] + [[e] + p[i]] + p[i + 1 :]
3175 for k in range(1, n + 1):
3176 yield from set_partitions_helper(L, k)
3178 yield from set_partitions_helper(L, k)
3183 Yield items from *iterable* until *limit_seconds* have passed.
3184 If the time limit expires before all items have been yielded, the
3185 ``timed_out`` parameter will be set to ``True``.
3187 >>> from time import sleep
3188 >>> def generator():
3193 >>> iterable = time_limited(0.1, generator())
3196 >>> iterable.timed_out
3199 Note that the time is checked before each item is yielded, and iteration
3200 stops if the time elapsed is greater than *limit_seconds*. If your time
3201 limit is 1 second, but it takes 2 seconds to generate the first item from
3202 the iterable, the function will run for 2 seconds and not yield anything.
3206 def __init__(self, limit_seconds, iterable):
3207 if limit_seconds < 0:
3208 raise ValueError('limit_seconds must be positive')
3209 self.limit_seconds = limit_seconds
3210 self._iterable = iter(iterable)
3211 self._start_time = monotonic()
3212 self.timed_out = False
3218 item = next(self._iterable)
3219 if monotonic() - self._start_time > self.limit_seconds:
3220 self.timed_out = True
3226 def only(iterable, default=None, too_long=None):
3227 """If *iterable* has only one item, return it.
3228 If it has zero items, return *default*.
3229 If it has more than one item, raise the exception given by *too_long*,
3230 which is ``ValueError`` by default.
3232 >>> only([], default='missing')
3236 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
3237 Traceback (most recent call last):
3239 ValueError: Expected exactly one item in iterable, but got 1, 2,
3241 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
3242 Traceback (most recent call last):
3246 Note that :func:`only` attempts to advance *iterable* twice to ensure there
3247 is only one item. See :func:`spy` or :func:`peekable` to check
3248 iterable contents less destructively.
3251 first_value = next(it, default)
3254 second_value = next(it)
3255 except StopIteration:
3259 'Expected exactly one item in iterable, but got {!r}, {!r}, '
3260 'and perhaps more.'.format(first_value, second_value)
3262 raise too_long or ValueError(msg)
3268 def __init__(self, iterable, n):
3269 self._it = islice(iterable, n)
3270 self._cache = deque()
3272 def fill_cache(self):
3273 self._cache.extend(self._it)
3280 return next(self._it)
3281 except StopIteration:
3283 return self._cache.popleft()
3288 def ichunked(iterable, n):
3289 """Break *iterable* into sub-iterables with *n* elements each.
3290 :func:`ichunked` is like :func:`chunked`, but it yields iterables
3293 If the sub-iterables are read in order, the elements of *iterable*
3294 won't be stored in memory.
3295 If they are read out of order, :func:`itertools.tee` is used to cache
3296 elements as necessary.
3298 >>> from itertools import count
3299 >>> all_chunks = ichunked(count(), 4)
3300 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
3301 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been
3309 source = peekable(iter(iterable))
3310 ichunk_marker = object()
3312 # Check to see whether we're at the end of the source iterable
3313 item = source.peek(ichunk_marker)
3314 if item is ichunk_marker:
3317 chunk = _IChunk(source, n)
3320 # Advance the source iterable and fill previous chunk's cache
3324 def iequals(*iterables):
3325 """Return ``True`` if all given *iterables* are equal to each other,
3326 which means that they contain the same elements in the same order.
3328 The function is useful for comparing iterables of different data types
3329 or iterables that do not support equality checks.
3331 >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc"))
3334 >>> iequals("abc", "acb")
3337 Not to be confused with :func:`all_equals`, which checks whether all
3338 elements of iterable are equal to each other.
3341 return all(map(all_equal, zip_longest(*iterables, fillvalue=object())))
3344 def distinct_combinations(iterable, r):
3345 """Yield the distinct combinations of *r* items taken from *iterable*.
3347 >>> list(distinct_combinations([0, 0, 1], 2))
3350 Equivalent to ``set(combinations(iterable))``, except duplicates are not
3351 generated and thrown away. For larger input sequences this is much more
3356 raise ValueError('r must be non-negative')
3360 pool = tuple(iterable)
3361 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
3362 current_combo = [None] * r
3366 cur_idx, p = next(generators[-1])
3367 except StopIteration:
3371 current_combo[level] = p
3373 yield tuple(current_combo)
3377 enumerate(pool[cur_idx + 1 :], cur_idx + 1),
3384 def filter_except(validator, iterable, *exceptions):
3385 """Yield the items from *iterable* for which the *validator* function does
3386 not raise one of the specified *exceptions*.
3388 *validator* is called for each item in *iterable*.
3389 It should be a function that accepts one argument and raises an exception
3390 if that item is not valid.
3392 >>> iterable = ['1', '2', 'three', '4', None]
3393 >>> list(filter_except(int, iterable, ValueError, TypeError))
3396 If an exception other than one given by *exceptions* is raised by
3397 *validator*, it is raised like normal.
3399 for item in iterable:
3408 def map_except(function, iterable, *exceptions):
3409 """Transform each item from *iterable* with *function* and yield the
3410 result, unless *function* raises one of the specified *exceptions*.
3412 *function* is called to transform each item in *iterable*.
3413 It should accept one argument.
3415 >>> iterable = ['1', '2', 'three', '4', None]
3416 >>> list(map_except(int, iterable, ValueError, TypeError))
3419 If an exception other than one given by *exceptions* is raised by
3420 *function*, it is raised like normal.
3422 for item in iterable:
3424 yield function(item)
3429 def map_if(iterable, pred, func, func_else=lambda x: x):
3430 """Evaluate each item from *iterable* using *pred*. If the result is
3431 equivalent to ``True``, transform the item with *func* and yield it.
3432 Otherwise, transform the item with *func_else* and yield it.
3434 *pred*, *func*, and *func_else* should each be functions that accept
3435 one argument. By default, *func_else* is the identity function.
3437 >>> from math import sqrt
3438 >>> iterable = list(range(-5, 5))
3440 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
3441 >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig'))
3442 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig']
3443 >>> list(map_if(iterable, lambda x: x >= 0,
3444 ... lambda x: f'{sqrt(x):.2f}', lambda x: None))
3445 [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00']
3447 for item in iterable:
3448 yield func(item) if pred(item) else func_else(item)
3451 def _sample_unweighted(iterable, k):
3452 # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li:
3453 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
3455 # Fill up the reservoir (collection of samples) with the first `k` samples
3456 reservoir = take(k, iterable)
3458 # Generate random number that's the largest in a sample of k U(0,1) numbers
3459 # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic
3460 W = exp(log(random()) / k)
3462 # The number of elements to skip before changing the reservoir is a random
3463 # number with a geometric distribution. Sample it using random() and logs.
3464 next_index = k + floor(log(random()) / log(1 - W))
3466 for index, element in enumerate(iterable, k):
3468 if index == next_index:
3469 reservoir[randrange(k)] = element
3470 # The new W is the largest in a sample of k U(0, `old_W`) numbers
3471 W *= exp(log(random()) / k)
3472 next_index += floor(log(random()) / log(1 - W)) + 1
3477 def _sample_weighted(iterable, k, weights):
3478 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
3479 # "Weighted random sampling with a reservoir".
3481 # Log-transform for numerical stability for weights that are small/large
3482 weight_keys = (log(random()) / weight for weight in weights)
3484 # Fill up the reservoir (collection of samples) with the first `k`
3485 # weight-keys and elements, then heapify the list.
3486 reservoir = take(k, zip(weight_keys, iterable))
3489 # The number of jumps before changing the reservoir is a random variable
3490 # with an exponential distribution. Sample it using random() and logs.
3491 smallest_weight_key, _ = reservoir[0]
3492 weights_to_skip = log(random()) / smallest_weight_key
3494 for weight, element in zip(weights, iterable):
3495 if weight >= weights_to_skip:
3496 # The notation here is consistent with the paper, but we store
3497 # the weight-keys in log-space for better numerical stability.
3498 smallest_weight_key, _ = reservoir[0]
3499 t_w = exp(weight * smallest_weight_key)
3500 r_2 = uniform(t_w, 1) # generate U(t_w, 1)
3501 weight_key = log(r_2) / weight
3502 heapreplace(reservoir, (weight_key, element))
3503 smallest_weight_key, _ = reservoir[0]
3504 weights_to_skip = log(random()) / smallest_weight_key
3506 weights_to_skip -= weight
3508 # Equivalent to [element for weight_key, element in sorted(reservoir)]
3509 return [heappop(reservoir)[1] for _ in range(k)]
3512 def sample(iterable, k, weights=None):
3513 """Return a *k*-length list of elements chosen (without replacement)
3514 from the *iterable*. Like :func:`random.sample`, but works on iterables
3517 >>> iterable = range(100)
3518 >>> sample(iterable, 5) # doctest: +SKIP
3521 An iterable with *weights* may also be given:
3523 >>> iterable = range(100)
3524 >>> weights = (i * i + 1 for i in range(100))
3525 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
3526 [79, 67, 74, 66, 78]
3528 The algorithm can also be used to generate weighted random permutations.
3529 The relative weight of each item determines the probability that it
3530 appears late in the permutation.
3532 >>> data = "abcdefgh"
3533 >>> weights = range(1, len(data) + 1)
3534 >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP
3535 ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f']
3540 iterable = iter(iterable)
3542 return _sample_unweighted(iterable, k)
3544 weights = iter(weights)
3545 return _sample_weighted(iterable, k, weights)
3548 def is_sorted(iterable, key=None, reverse=False, strict=False):
3549 """Returns ``True`` if the items of iterable are in sorted order, and
3550 ``False`` otherwise. *key* and *reverse* have the same meaning that they do
3551 in the built-in :func:`sorted` function.
3553 >>> is_sorted(['1', '2', '3', '4', '5'], key=int)
3555 >>> is_sorted([5, 4, 3, 1, 2], reverse=True)
3558 If *strict*, tests for strict sorting, that is, returns ``False`` if equal
3561 >>> is_sorted([1, 2, 2])
3563 >>> is_sorted([1, 2, 2], strict=True)
3566 The function returns ``False`` after encountering the first out-of-order
3567 item. If there are no out-of-order items, the iterable is exhausted.
3570 compare = (le if reverse else ge) if strict else (lt if reverse else gt)
3571 it = iterable if key is None else map(key, iterable)
3572 return not any(starmap(compare, pairwise(it)))
3575 class AbortThread(BaseException):
3579 class callback_iter:
3580 """Convert a function that uses callbacks to an iterator.
3582 Let *func* be a function that takes a `callback` keyword argument.
3585 >>> def func(callback=None):
3586 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
3592 Use ``with callback_iter(func)`` to get an iterator over the parameters
3593 that are delivered to the callback.
3595 >>> with callback_iter(func) as it:
3596 ... for args, kwargs in it:
3602 The function will be called in a background thread. The ``done`` property
3603 indicates whether it has completed execution.
3608 If it completes successfully, its return value will be available
3609 in the ``result`` property.
3616 * If the function uses some keyword argument besides ``callback``, supply
3618 * If it finished executing, but raised an exception, accessing the
3619 ``result`` property will raise the same exception.
3620 * If it hasn't finished executing, accessing the ``result``
3621 property from within the ``with`` block will raise ``RuntimeError``.
3622 * If it hasn't finished executing, accessing the ``result`` property from
3623 outside the ``with`` block will raise a
3624 ``more_itertools.AbortThread`` exception.
3625 * Provide *wait_seconds* to adjust how frequently the it is polled for
3630 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
3632 self._callback_kwd = callback_kwd
3633 self._aborted = False
3635 self._wait_seconds = wait_seconds
3636 # Lazily import concurrent.future
3637 self._executor = __import__(
3638 ).futures.__import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1)
3639 self._iterator = self._reader()
3641 def __enter__(self):
3644 def __exit__(self, exc_type, exc_value, traceback):
3645 self._aborted = True
3646 self._executor.shutdown()
3652 return next(self._iterator)
3656 if self._future is None:
3658 return self._future.done()
3663 raise RuntimeError('Function has not yet completed')
3665 return self._future.result()
3670 def callback(*args, **kwargs):
3672 raise AbortThread('canceled by user')
3674 q.put((args, kwargs))
3676 self._future = self._executor.submit(
3677 self._func, **{self._callback_kwd: callback}
3682 item = q.get(timeout=self._wait_seconds)
3689 if self._future.done():
3695 item = q.get_nowait()
3700 remaining.append(item)
3702 yield from remaining
3705 def windowed_complete(iterable, n):
3707 Yield ``(beginning, middle, end)`` tuples, where:
3709 * Each ``middle`` has *n* items from *iterable*
3710 * Each ``beginning`` has the items before the ones in ``middle``
3711 * Each ``end`` has the items after the ones in ``middle``
3713 >>> iterable = range(7)
3715 >>> for beginning, middle, end in windowed_complete(iterable, n):
3716 ... print(beginning, middle, end)
3717 () (0, 1, 2) (3, 4, 5, 6)
3718 (0,) (1, 2, 3) (4, 5, 6)
3719 (0, 1) (2, 3, 4) (5, 6)
3720 (0, 1, 2) (3, 4, 5) (6,)
3721 (0, 1, 2, 3) (4, 5, 6) ()
3723 Note that *n* must be at least 0 and most equal to the length of
3726 This function will exhaust the iterable and may require significant
3730 raise ValueError('n must be >= 0')
3732 seq = tuple(iterable)
3736 raise ValueError('n must be <= len(seq)')
3738 for i in range(size - n + 1):
3740 middle = seq[i : i + n]
3742 yield beginning, middle, end
3745 def all_unique(iterable, key=None):
3747 Returns ``True`` if all the elements of *iterable* are unique (no two
3748 elements are equal).
3750 >>> all_unique('ABCB')
3753 If a *key* function is specified, it will be used to make comparisons.
3755 >>> all_unique('ABCb')
3757 >>> all_unique('ABCb', str.lower)
3760 The function returns as soon as the first non-unique element is
3761 encountered. Iterables with a mix of hashable and unhashable items can
3762 be used, but the function will be slower for unhashable items.
3765 seenset_add = seenset.add
3767 seenlist_add = seenlist.append
3768 for element in map(key, iterable) if key else iterable:
3770 if element in seenset:
3772 seenset_add(element)
3774 if element in seenlist:
3776 seenlist_add(element)
3780 def nth_product(index, *args):
3781 """Equivalent to ``list(product(*args))[index]``.
3783 The products of *args* can be ordered lexicographically.
3784 :func:`nth_product` computes the product at sort position *index* without
3785 computing the previous products.
3787 >>> nth_product(8, range(2), range(2), range(2), range(2))
3790 ``IndexError`` will be raised if the given *index* is invalid.
3792 pools = list(map(tuple, reversed(args)))
3793 ns = list(map(len, pools))
3800 if not 0 <= index < c:
3804 for pool, n in zip(pools, ns):
3805 result.append(pool[index % n])
3808 return tuple(reversed(result))
3811 def nth_permutation(iterable, r, index):
3812 """Equivalent to ``list(permutations(iterable, r))[index]```
3814 The subsequences of *iterable* that are of length *r* where order is
3815 important can be ordered lexicographically. :func:`nth_permutation`
3816 computes the subsequence at sort position *index* directly, without
3817 computing the previous subsequences.
3819 >>> nth_permutation('ghijk', 2, 5)
3822 ``ValueError`` will be raised If *r* is negative or greater than the length
3824 ``IndexError`` will be raised if the given *index* is invalid.
3826 pool = list(iterable)
3829 if r is None or r == n:
3830 r, c = n, factorial(n)
3831 elif not 0 <= r < n:
3834 c = factorial(n) // factorial(n - r)
3839 if not 0 <= index < c:
3846 q = index * factorial(n) // c if r < n else index
3847 for d in range(1, n + 1):
3854 return tuple(map(pool.pop, result))
3857 def value_chain(*args):
3858 """Yield all arguments passed to the function in the same order in which
3859 they were passed. If an argument itself is iterable then iterate over its
3862 >>> list(value_chain(1, 2, 3, [4, 5, 6]))
3865 Binary and text strings are not considered iterable and are emitted
3868 >>> list(value_chain('12', '34', ['56', '78']))
3869 ['12', '34', '56', '78']
3872 Multiple levels of nesting are not flattened.
3876 if isinstance(value, (str, bytes)):
3885 def product_index(element, *args):
3886 """Equivalent to ``list(product(*args)).index(element)``
3888 The products of *args* can be ordered lexicographically.
3889 :func:`product_index` computes the first index of *element* without
3890 computing the previous products.
3892 >>> product_index([8, 2], range(10), range(5))
3895 ``ValueError`` will be raised if the given *element* isn't in the product
3900 for x, pool in zip_longest(element, args, fillvalue=_marker):
3901 if x is _marker or pool is _marker:
3902 raise ValueError('element is not a product of args')
3905 index = index * len(pool) + pool.index(x)
3910 def combination_index(element, iterable):
3911 """Equivalent to ``list(combinations(iterable, r)).index(element)``
3913 The subsequences of *iterable* that are of length *r* can be ordered
3914 lexicographically. :func:`combination_index` computes the index of the
3915 first *element*, without computing the previous combinations.
3917 >>> combination_index('adf', 'abcdefg')
3920 ``ValueError`` will be raised if the given *element* isn't one of the
3921 combinations of *iterable*.
3923 element = enumerate(element)
3924 k, y = next(element, (None, None))
3929 pool = enumerate(iterable)
3933 tmp, y = next(element, (None, None))
3939 raise ValueError('element is not a combination of iterable')
3941 n, _ = last(pool, default=(n, None))
3943 # Python versions below 3.8 don't have math.comb
3945 for i, j in enumerate(reversed(indexes), start=1):
3948 index += factorial(j) // (factorial(i) * factorial(j - i))
3950 return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index
3953 def permutation_index(element, iterable):
3954 """Equivalent to ``list(permutations(iterable, r)).index(element)```
3956 The subsequences of *iterable* that are of length *r* where order is
3957 important can be ordered lexicographically. :func:`permutation_index`
3958 computes the index of the first *element* directly, without computing
3959 the previous permutations.
3961 >>> permutation_index([1, 3, 2], range(5))
3964 ``ValueError`` will be raised if the given *element* isn't one of the
3965 permutations of *iterable*.
3968 pool = list(iterable)
3969 for i, x in zip(range(len(pool), -1, -1), element):
3971 index = index * i + r
3978 """Wrap *iterable* and keep a count of how many items have been consumed.
3980 The ``items_seen`` attribute starts at ``0`` and increments as the iterable
3983 >>> iterable = map(str, range(10))
3984 >>> it = countable(iterable)
3987 >>> next(it), next(it)
3990 ['2', '3', '4', '5', '6', '7', '8', '9']
3995 def __init__(self, iterable):
3996 self._it = iter(iterable)
4003 item = next(self._it)
4004 self.items_seen += 1
4009 def chunked_even(iterable, n):
4010 """Break *iterable* into lists of approximately length *n*.
4011 Items are distributed such the lengths of the lists differ by at most
4014 >>> iterable = [1, 2, 3, 4, 5, 6, 7]
4016 >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2
4017 [[1, 2, 3], [4, 5], [6, 7]]
4018 >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1
4019 [[1, 2, 3], [4, 5, 6], [7]]
4023 len_method = getattr(iterable, '__len__', None)
4025 if len_method is None:
4026 return _chunked_even_online(iterable, n)
4028 return _chunked_even_finite(iterable, len_method(), n)
4031 def _chunked_even_online(iterable, n):
4033 maxbuf = n + (n - 2) * (n - 1)
4036 if len(buffer) == maxbuf:
4039 yield from _chunked_even_finite(buffer, len(buffer), n)
4042 def _chunked_even_finite(iterable, N, n):
4046 # Lists are either size `full_size <= n` or `partial_size = full_size - 1`
4048 num_lists = q + (1 if r > 0 else 0)
4049 q, r = divmod(N, num_lists)
4050 full_size = q + (1 if r > 0 else 0)
4051 partial_size = full_size - 1
4052 num_full = N - partial_size * num_lists
4053 num_partial = num_lists - num_full
4056 iterator = iter(iterable)
4058 # Yield num_full lists of full_size
4061 if len(buffer) == full_size:
4068 # Yield num_partial lists of partial_size
4071 if len(buffer) == partial_size:
4077 def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False):
4078 """A version of :func:`zip` that "broadcasts" any scalar
4079 (i.e., non-iterable) items into output tuples.
4081 >>> iterable_1 = [1, 2, 3]
4082 >>> iterable_2 = ['a', 'b', 'c']
4084 >>> list(zip_broadcast(iterable_1, iterable_2, scalar))
4085 [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
4087 The *scalar_types* keyword argument determines what types are considered
4088 scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to
4089 treat strings and byte strings as iterable:
4091 >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None))
4092 [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')]
4094 If the *strict* keyword argument is ``True``, then
4095 ``UnequalIterablesError`` will be raised if any of the iterables have
4100 if scalar_types and isinstance(obj, scalar_types):
4113 iterables, iterable_positions = [], []
4114 scalars, scalar_positions = [], []
4115 for i, obj in enumerate(objects):
4118 scalar_positions.append(i)
4120 iterables.append(iter(obj))
4121 iterable_positions.append(i)
4123 if len(scalars) == size:
4124 yield tuple(objects)
4127 zipper = _zip_equal if strict else zip
4128 for item in zipper(*iterables):
4129 new_item = [None] * size
4131 for i, elem in zip(iterable_positions, item):
4134 for i, elem in zip(scalar_positions, scalars):
4137 yield tuple(new_item)
4140 def unique_in_window(iterable, n, key=None):
4141 """Yield the items from *iterable* that haven't been seen recently.
4142 *n* is the size of the lookback window.
4144 >>> iterable = [0, 1, 0, 2, 3, 0]
4146 >>> list(unique_in_window(iterable, n))
4149 The *key* function, if provided, will be used to determine uniqueness:
4151 >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower()))
4152 ['a', 'b', 'c', 'd', 'a']
4154 The items in *iterable* must be hashable.
4158 raise ValueError('n must be greater than 0')
4160 window = deque(maxlen=n)
4162 use_key = key is not None
4164 for item in iterable:
4165 k = key(item) if use_key else item
4169 if len(uniques) == n:
4170 uniques.discard(window[0])
4178 def duplicates_everseen(iterable, key=None):
4179 """Yield duplicate elements after their first appearance.
4181 >>> list(duplicates_everseen('mississippi'))
4182 ['s', 'i', 's', 's', 'i', 'p', 'i']
4183 >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower))
4184 ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a']
4186 This function is analagous to :func:`unique_everseen` and is subject to
4187 the same performance considerations.
4192 use_key = key is not None
4194 for element in iterable:
4195 k = key(element) if use_key else element
4197 if k not in seen_set:
4202 if k not in seen_list:
4208 def duplicates_justseen(iterable, key=None):
4209 """Yields serially-duplicate elements after their first appearance.
4211 >>> list(duplicates_justseen('mississippi'))
4213 >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower))
4214 ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']
4216 This function is analagous to :func:`unique_justseen`.
4221 lambda group_tuple: islice_extended(group_tuple[1])[1:],
4222 groupby(iterable, key),
4227 def minmax(iterable_or_value, *others, key=None, default=_marker):
4228 """Returns both the smallest and largest items in an iterable
4229 or the largest of two or more arguments.
4231 >>> minmax([3, 1, 5])
4237 If a *key* function is provided, it will be used to transform the input
4238 items for comparison.
4240 >>> minmax([5, 30], key=str) # '30' sorts before '5'
4243 If a *default* value is provided, it will be returned if there are no
4246 >>> minmax([], default=(0, 0))
4249 Otherwise ``ValueError`` is raised.
4251 This function is based on the
4252 `recipe <http://code.activestate.com/recipes/577916/>`__ by
4253 Raymond Hettinger and takes care to minimize the number of comparisons
4256 iterable = (iterable_or_value, *others) if others else iterable_or_value
4262 except StopIteration as e:
4263 if default is _marker:
4265 '`minmax()` argument is an empty iterable. '
4266 'Provide a `default` value to suppress this error.'
4270 # Different branches depending on the presence of key. This saves a lot
4271 # of unimportant copies which would slow the "key=None" branch
4272 # significantly down.
4274 for x, y in zip_longest(it, it, fillvalue=lo):
4283 lo_key = hi_key = key(lo)
4285 for x, y in zip_longest(it, it, fillvalue=lo):
4287 x_key, y_key = key(x), key(y)
4290 x, y, x_key, y_key = y, x, y_key, x_key
4292 lo, lo_key = x, x_key
4294 hi, hi_key = y, y_key
4299 def constrained_batches(
4300 iterable, max_size, max_count=None, get_len=len, strict=True
4302 """Yield batches of items from *iterable* with a combined size limited by
4305 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4306 >>> list(constrained_batches(iterable, 10))
4307 [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')]
4309 If a *max_count* is supplied, the number of items per batch is also
4312 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4313 >>> list(constrained_batches(iterable, 10, max_count = 2))
4314 [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)]
4316 If a *get_len* function is supplied, use that instead of :func:`len` to
4317 determine item size.
4319 If *strict* is ``True``, raise ``ValueError`` if any single item is bigger
4320 than *max_size*. Otherwise, allow single items to exceed *max_size*.
4323 raise ValueError('maximum size must be greater than zero')
4328 for item in iterable:
4329 item_len = get_len(item)
4330 if strict and item_len > max_size:
4331 raise ValueError('item size exceeds maximum size')
4333 reached_count = batch_count == max_count
4334 reached_size = item_len + batch_size > max_size
4335 if batch_count and (reached_size or reached_count):
4342 batch_size += item_len