GH-98363: Have batched() return tuples (GH-100118)
This commit is contained in:
parent
41d4ac9da3
commit
35cc0ea736
@ -52,7 +52,7 @@ Iterator Arguments Results
|
||||
Iterator Arguments Results Example
|
||||
============================ ============================ ================================================= =============================================================
|
||||
:func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
|
||||
:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G``
|
||||
:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=3) --> ABC DEF G``
|
||||
:func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F``
|
||||
:func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F``
|
||||
:func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
|
||||
@ -166,11 +166,11 @@ loops that truncate the stream.
|
||||
|
||||
.. function:: batched(iterable, n)
|
||||
|
||||
Batch data from the *iterable* into lists of length *n*. The last
|
||||
Batch data from the *iterable* into tuples of length *n*. The last
|
||||
batch may be shorter than *n*.
|
||||
|
||||
Loops over the input iterable and accumulates data into lists up to
|
||||
size *n*. The input is consumed lazily, just enough to fill a list.
|
||||
Loops over the input iterable and accumulates data into tuples up to
|
||||
size *n*. The input is consumed lazily, just enough to fill a batch.
|
||||
The result is yielded as soon as the batch is full or when the input
|
||||
iterable is exhausted:
|
||||
|
||||
@ -179,14 +179,14 @@ loops that truncate the stream.
|
||||
>>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet']
|
||||
>>> unflattened = list(batched(flattened_data, 2))
|
||||
>>> unflattened
|
||||
[['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']]
|
||||
[('roses', 'red'), ('violets', 'blue'), ('sugar', 'sweet')]
|
||||
|
||||
>>> for batch in batched('ABCDEFG', 3):
|
||||
... print(batch)
|
||||
...
|
||||
['A', 'B', 'C']
|
||||
['D', 'E', 'F']
|
||||
['G']
|
||||
('A', 'B', 'C')
|
||||
('D', 'E', 'F')
|
||||
('G',)
|
||||
|
||||
Roughly equivalent to::
|
||||
|
||||
@ -195,7 +195,7 @@ loops that truncate the stream.
|
||||
if n < 1:
|
||||
raise ValueError('n must be at least one')
|
||||
it = iter(iterable)
|
||||
while (batch := list(islice(it, n))):
|
||||
while (batch := tuple(islice(it, n))):
|
||||
yield batch
|
||||
|
||||
.. versionadded:: 3.12
|
||||
|
@ -161,11 +161,11 @@ class TestBasicOps(unittest.TestCase):
|
||||
|
||||
def test_batched(self):
|
||||
self.assertEqual(list(batched('ABCDEFG', 3)),
|
||||
[['A', 'B', 'C'], ['D', 'E', 'F'], ['G']])
|
||||
[('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)])
|
||||
self.assertEqual(list(batched('ABCDEFG', 2)),
|
||||
[['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']])
|
||||
[('A', 'B'), ('C', 'D'), ('E', 'F'), ('G',)])
|
||||
self.assertEqual(list(batched('ABCDEFG', 1)),
|
||||
[['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']])
|
||||
[('A',), ('B',), ('C',), ('D',), ('E',), ('F',), ('G',)])
|
||||
|
||||
with self.assertRaises(TypeError): # Too few arguments
|
||||
list(batched('ABCDEFG'))
|
||||
@ -188,8 +188,8 @@ class TestBasicOps(unittest.TestCase):
|
||||
with self.subTest(s=s, n=n, batches=batches):
|
||||
# Order is preserved and no data is lost
|
||||
self.assertEqual(''.join(chain(*batches)), s)
|
||||
# Each batch is an exact list
|
||||
self.assertTrue(all(type(batch) is list for batch in batches))
|
||||
# Each batch is an exact tuple
|
||||
self.assertTrue(all(type(batch) is tuple for batch in batches))
|
||||
# All but the last batch is of size n
|
||||
if batches:
|
||||
last_batch = batches.pop()
|
||||
@ -1809,12 +1809,12 @@ class TestPurePythonRoughEquivalents(unittest.TestCase):
|
||||
|
||||
def test_batched_recipe(self):
|
||||
def batched_recipe(iterable, n):
|
||||
"Batch data into lists of length n. The last batch may be shorter."
|
||||
"Batch data into tuples of length n. The last batch may be shorter."
|
||||
# batched('ABCDEFG', 3) --> ABC DEF G
|
||||
if n < 1:
|
||||
raise ValueError('n must be at least one')
|
||||
it = iter(iterable)
|
||||
while (batch := list(islice(it, n))):
|
||||
while (batch := tuple(islice(it, n))):
|
||||
yield batch
|
||||
|
||||
for iterable, n in product(
|
||||
@ -2087,7 +2087,7 @@ class TestVariousIteratorArgs(unittest.TestCase):
|
||||
|
||||
def test_batched(self):
|
||||
s = 'abcde'
|
||||
r = [['a', 'b'], ['c', 'd'], ['e']]
|
||||
r = [('a', 'b'), ('c', 'd'), ('e',)]
|
||||
n = 2
|
||||
for g in (G, I, Ig, L, R):
|
||||
with self.subTest(g=g):
|
||||
|
14
Modules/clinic/itertoolsmodule.c.h
generated
14
Modules/clinic/itertoolsmodule.c.h
generated
@ -12,19 +12,19 @@ PyDoc_STRVAR(batched_new__doc__,
|
||||
"batched(iterable, n)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Batch data into lists of length n. The last batch may be shorter than n.\n"
|
||||
"Batch data into tuples of length n. The last batch may be shorter than n.\n"
|
||||
"\n"
|
||||
"Loops over the input iterable and accumulates data into lists\n"
|
||||
"Loops over the input iterable and accumulates data into tuples\n"
|
||||
"up to size n. The input is consumed lazily, just enough to\n"
|
||||
"fill a list. The result is yielded as soon as a batch is full\n"
|
||||
"fill a batch. The result is yielded as soon as a batch is full\n"
|
||||
"or when the input iterable is exhausted.\n"
|
||||
"\n"
|
||||
" >>> for batch in batched(\'ABCDEFG\', 3):\n"
|
||||
" ... print(batch)\n"
|
||||
" ...\n"
|
||||
" [\'A\', \'B\', \'C\']\n"
|
||||
" [\'D\', \'E\', \'F\']\n"
|
||||
" [\'G\']");
|
||||
" (\'A\', \'B\', \'C\')\n"
|
||||
" (\'D\', \'E\', \'F\')\n"
|
||||
" (\'G\',)");
|
||||
|
||||
static PyObject *
|
||||
batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n);
|
||||
@ -913,4 +913,4 @@ skip_optional_pos:
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=0229ebd72962f130 input=a9049054013a1b77]*/
|
||||
|
@ -56,11 +56,13 @@ static PyTypeObject pairwise_type;
|
||||
/* batched object ************************************************************/
|
||||
|
||||
/* Note: The built-in zip() function includes a "strict" argument
|
||||
that is needed because that function can silently truncate data
|
||||
and there is no easy way for a user to detect that condition.
|
||||
The same reasoning does not apply to batched() which never drops
|
||||
data. Instead, it produces a shorter list which can be handled
|
||||
as the user sees fit.
|
||||
that was needed because that function would silently truncate data,
|
||||
and there was no easy way for a user to detect the data loss.
|
||||
The same reasoning does not apply to batched() which never drops data.
|
||||
Instead, batched() produces a shorter tuple which can be handled
|
||||
as the user sees fit. If requested, it would be reasonable to add
|
||||
"fillvalue" support which had demonstrated value in zip_longest().
|
||||
For now, the API is kept simple and clean.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
@ -74,25 +76,25 @@ typedef struct {
|
||||
itertools.batched.__new__ as batched_new
|
||||
iterable: object
|
||||
n: Py_ssize_t
|
||||
Batch data into lists of length n. The last batch may be shorter than n.
|
||||
Batch data into tuples of length n. The last batch may be shorter than n.
|
||||
|
||||
Loops over the input iterable and accumulates data into lists
|
||||
Loops over the input iterable and accumulates data into tuples
|
||||
up to size n. The input is consumed lazily, just enough to
|
||||
fill a list. The result is yielded as soon as a batch is full
|
||||
fill a batch. The result is yielded as soon as a batch is full
|
||||
or when the input iterable is exhausted.
|
||||
|
||||
>>> for batch in batched('ABCDEFG', 3):
|
||||
... print(batch)
|
||||
...
|
||||
['A', 'B', 'C']
|
||||
['D', 'E', 'F']
|
||||
['G']
|
||||
('A', 'B', 'C')
|
||||
('D', 'E', 'F')
|
||||
('G',)
|
||||
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n)
|
||||
/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/
|
||||
/*[clinic end generated code: output=7ebc954d655371b6 input=ffd70726927c5129]*/
|
||||
{
|
||||
PyObject *it;
|
||||
batchedobject *bo;
|
||||
@ -150,12 +152,12 @@ batched_next(batchedobject *bo)
|
||||
if (it == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result = PyList_New(n);
|
||||
result = PyTuple_New(n);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
iternextfunc iternext = *Py_TYPE(it)->tp_iternext;
|
||||
PyObject **items = _PyList_ITEMS(result);
|
||||
PyObject **items = _PyTuple_ITEMS(result);
|
||||
for (i=0 ; i < n ; i++) {
|
||||
item = iternext(it);
|
||||
if (item == NULL) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user