GH-98363: Have batched() return tuples (GH-100118)

2022-12-08 15:08:16 -06:00 · 2022-12-08 15:08:16 -06:00 · 35cc0ea736
commit 35cc0ea736
parent 41d4ac9da3
4 changed files with 40 additions and 38 deletions
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@ -52,7 +52,7 @@ Iterator            Arguments               Results
 Iterator                        Arguments                       Results                                             Example
 ============================    ============================    =================================================   =============================================================
 :func:`accumulate`              p [,func]                       p0, p0+p1, p0+p1+p2, ...                            ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
-:func:`batched`                 p, n                            [p0, p1, ..., p_n-1], ...                           ``batched('ABCDEFG', n=3) --> ABC DEF G``
+:func:`batched`                 p, n                            (p0, p1, ..., p_n-1), ...                           ``batched('ABCDEFG', n=3) --> ABC DEF G``
 :func:`chain`                   p, q, ...                       p0, p1, ... plast, q0, q1, ...                      ``chain('ABC', 'DEF') --> A B C D E F``
 :func:`chain.from_iterable`     iterable                        p0, p1, ... plast, q0, q1, ...                      ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F``
 :func:`compress`                data, selectors                 (d[0] if s[0]), (d[1] if s[1]), ...                 ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
@ -166,11 +166,11 @@ loops that truncate the stream.

 .. function:: batched(iterable, n)

-   Batch data from the *iterable* into lists of length *n*. The last
+   Batch data from the *iterable* into tuples of length *n*. The last
   batch may be shorter than *n*.

-   Loops over the input iterable and accumulates data into lists up to
-   size *n*.  The input is consumed lazily, just enough to fill a list.
+   Loops over the input iterable and accumulates data into tuples up to
+   size *n*.  The input is consumed lazily, just enough to fill a batch.
   The result is yielded as soon as the batch is full or when the input
   iterable is exhausted:

@ -179,14 +179,14 @@ loops that truncate the stream.
      >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet']
      >>> unflattened = list(batched(flattened_data, 2))
      >>> unflattened
-      [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']]
+      [('roses', 'red'), ('violets', 'blue'), ('sugar', 'sweet')]

      >>> for batch in batched('ABCDEFG', 3):
      ...     print(batch)
      ...
-      ['A', 'B', 'C']
-      ['D', 'E', 'F']
-      ['G']
+      ('A', 'B', 'C')
+      ('D', 'E', 'F')
+      ('G',)

   Roughly equivalent to::

@ -195,7 +195,7 @@ loops that truncate the stream.
          if n < 1:
              raise ValueError('n must be at least one')
          it = iter(iterable)
-          while (batch := list(islice(it, n))):
+          while (batch := tuple(islice(it, n))):
              yield batch

   .. versionadded:: 3.12
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@ -161,11 +161,11 @@ class TestBasicOps(unittest.TestCase):

    def test_batched(self):
        self.assertEqual(list(batched('ABCDEFG', 3)),
-                             [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']])
+                             [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)])
        self.assertEqual(list(batched('ABCDEFG', 2)),
-                             [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']])
+                             [('A', 'B'), ('C', 'D'), ('E', 'F'), ('G',)])
        self.assertEqual(list(batched('ABCDEFG', 1)),
-                             [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']])
+                            [('A',), ('B',), ('C',), ('D',), ('E',), ('F',), ('G',)])

        with self.assertRaises(TypeError):          # Too few arguments
            list(batched('ABCDEFG'))
@ -188,8 +188,8 @@ class TestBasicOps(unittest.TestCase):
                with self.subTest(s=s, n=n, batches=batches):
                    # Order is preserved and no data is lost
                    self.assertEqual(''.join(chain(*batches)), s)
-                    # Each batch is an exact list
-                    self.assertTrue(all(type(batch) is list for batch in batches))
+                    # Each batch is an exact tuple
+                    self.assertTrue(all(type(batch) is tuple for batch in batches))
                    # All but the last batch is of size n
                    if batches:
                        last_batch = batches.pop()
@ -1809,12 +1809,12 @@ class TestPurePythonRoughEquivalents(unittest.TestCase):

    def test_batched_recipe(self):
        def batched_recipe(iterable, n):
-            "Batch data into lists of length n. The last batch may be shorter."
+            "Batch data into tuples of length n. The last batch may be shorter."
            # batched('ABCDEFG', 3) --> ABC DEF G
            if n < 1:
                raise ValueError('n must be at least one')
            it = iter(iterable)
-            while (batch := list(islice(it, n))):
+            while (batch := tuple(islice(it, n))):
                yield batch

        for iterable, n in product(
@ -2087,7 +2087,7 @@ class TestVariousIteratorArgs(unittest.TestCase):

    def test_batched(self):
        s = 'abcde'
-        r = [['a', 'b'], ['c', 'd'], ['e']]
+        r = [('a', 'b'), ('c', 'd'), ('e',)]
        n = 2
        for g in (G, I, Ig, L, R):
            with self.subTest(g=g):
--- a/Modules/clinic/itertoolsmodule.c.h
+++ b/Modules/clinic/itertoolsmodule.c.h
@ -12,19 +12,19 @@ PyDoc_STRVAR(batched_new__doc__,
 "batched(iterable, n)\n"
 "--\n"
 "\n"
-"Batch data into lists of length n. The last batch may be shorter than n.\n"
+"Batch data into tuples of length n. The last batch may be shorter than n.\n"
 "\n"
-"Loops over the input iterable and accumulates data into lists\n"
+"Loops over the input iterable and accumulates data into tuples\n"
 "up to size n.  The input is consumed lazily, just enough to\n"
-"fill a list.  The result is yielded as soon as a batch is full\n"
+"fill a batch.  The result is yielded as soon as a batch is full\n"
 "or when the input iterable is exhausted.\n"
 "\n"
 "    >>> for batch in batched(\'ABCDEFG\', 3):\n"
 "    ...     print(batch)\n"
 "    ...\n"
-"    [\'A\', \'B\', \'C\']\n"
-"    [\'D\', \'E\', \'F\']\n"
-"    [\'G\']");
+"    (\'A\', \'B\', \'C\')\n"
+"    (\'D\', \'E\', \'F\')\n"
+"    (\'G\',)");

 static PyObject *
 batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n);
@ -913,4 +913,4 @@ skip_optional_pos:
 exit:
    return return_value;
 }
-/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=0229ebd72962f130 input=a9049054013a1b77]*/
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@ -56,11 +56,13 @@ static PyTypeObject pairwise_type;
 /* batched object ************************************************************/

 /* Note:  The built-in zip() function includes a "strict" argument
-   that is needed because that function can silently truncate data
-   and there is no easy way for a user to detect that condition.
-   The same reasoning does not apply to batched() which never drops
-   data.  Instead, it produces a shorter list which can be handled
-   as the user sees fit.
+   that was needed because that function would silently truncate data,
+   and there was no easy way for a user to detect the data loss.
+   The same reasoning does not apply to batched() which never drops data.
+   Instead, batched() produces a shorter tuple which can be handled
+   as the user sees fit.  If requested, it would be reasonable to add
+   "fillvalue" support which had demonstrated value in zip_longest().
+   For now, the API is kept simple and clean.
 */

 typedef struct {
@ -74,25 +76,25 @@ typedef struct {
 itertools.batched.__new__ as batched_new
    iterable: object
    n: Py_ssize_t
-Batch data into lists of length n. The last batch may be shorter than n.
+Batch data into tuples of length n. The last batch may be shorter than n.

-Loops over the input iterable and accumulates data into lists
+Loops over the input iterable and accumulates data into tuples
 up to size n.  The input is consumed lazily, just enough to
-fill a list.  The result is yielded as soon as a batch is full
+fill a batch.  The result is yielded as soon as a batch is full
 or when the input iterable is exhausted.

    >>> for batch in batched('ABCDEFG', 3):
    ...     print(batch)
    ...
-    ['A', 'B', 'C']
-    ['D', 'E', 'F']
-    ['G']
+    ('A', 'B', 'C')
+    ('D', 'E', 'F')
+    ('G',)

 [clinic start generated code]*/

 static PyObject *
 batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n)
-/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/
+/*[clinic end generated code: output=7ebc954d655371b6 input=ffd70726927c5129]*/
 {
    PyObject *it;
    batchedobject *bo;
@ -150,12 +152,12 @@ batched_next(batchedobject *bo)
    if (it == NULL) {
        return NULL;
    }
-    result = PyList_New(n);
+    result = PyTuple_New(n);
    if (result == NULL) {
        return NULL;
    }
    iternextfunc iternext = *Py_TYPE(it)->tp_iternext;
-    PyObject **items = _PyList_ITEMS(result);
+    PyObject **items = _PyTuple_ITEMS(result);
    for (i=0 ; i < n ; i++) {
        item = iternext(it);
        if (item == NULL) {