- Thanks to Scott David Daniels, a subtle bug in how the zlib

extension implemented flush() was fixed.  Scott also rewrite the
  zlib test suite using the unittest module.  (SF bug #640230 and
  patch #678531.)

Backport candidate I think.
This commit is contained in:
Guido van Rossum 2003-02-03 20:45:52 +00:00
parent 94c30c0124
commit 7d9ea5013f
5 changed files with 487 additions and 181 deletions

View File

@ -1,14 +0,0 @@
test_zlib
0xe5c1a120 0x43b6aa94
0xbd602f7 0xbd602f7
expecting Bad compression level
expecting Invalid initialization option
expecting Invalid initialization option
normal compression/decompression succeeded
compress/decompression obj succeeded
decompress with init options succeeded
decompressobj with init options succeeded
should be '': ''
max_length decompressobj succeeded
unconsumed_tail should be '': ''
Testing on 17K of random data

View File

@ -1,8 +1,14 @@
import unittest
from test import test_support
import zlib import zlib
import sys import random
import imp
from test.test_support import TestFailed
# print test_support.TESTFN
def getbuf():
# This was in the original. Avoid non-repeatable sources.
# Left here (unused) in case something wants to be done with it.
import imp
try: try:
t = imp.find_module('test_zlib') t = imp.find_module('test_zlib')
file = t[0] file = t[0]
@ -10,160 +16,404 @@ except ImportError:
file = open(__file__) file = open(__file__)
buf = file.read() * 8 buf = file.read() * 8
file.close() file.close()
return buf
# test the checksums (hex so the test doesn't break on 64-bit machines)
def fix(x):
return "0x%x" % (x & 0xffffffffL)
print fix(zlib.crc32('penguin')), fix(zlib.crc32('penguin', 1))
print fix(zlib.adler32('penguin')), fix(zlib.adler32('penguin', 1))
class ChecksumTestCase(unittest.TestCase):
# checksum test cases
def test_crc32start(self):
self.assertEqual(zlib.crc32(""), zlib.crc32("", 0))
def test_crc32empty(self):
self.assertEqual(zlib.crc32("", 0), 0)
self.assertEqual(zlib.crc32("", 1), 1)
self.assertEqual(zlib.crc32("", 432), 432)
def test_adler32start(self):
self.assertEqual(zlib.adler32(""), zlib.adler32("", 1))
def test_adler32empty(self):
self.assertEqual(zlib.adler32("", 0), 0)
self.assertEqual(zlib.adler32("", 1), 1)
self.assertEqual(zlib.adler32("", 432), 432)
def assertEqual32(self, seen, expected):
# 32-bit values masked -- checksums on 32- vs 64- bit machines
# This is important if bit 31 (0x08000000L) is set.
self.assertEqual(seen & 0x0FFFFFFFFL, expected & 0x0FFFFFFFFL)
def test_penguins(self):
self.assertEqual32(zlib.crc32("penguin", 0), 0x0e5c1a120L)
self.assertEqual32(zlib.crc32("penguin", 1), 0x43b6aa94)
self.assertEqual32(zlib.adler32("penguin", 0), 0x0bcf02f6)
self.assertEqual32(zlib.adler32("penguin", 1), 0x0bd602f7)
self.assertEqual(zlib.crc32("penguin"), zlib.crc32("penguin", 0))
self.assertEqual(zlib.adler32("penguin"),zlib.adler32("penguin",1))
class ExceptionTestCase(unittest.TestCase):
# make sure we generate some expected errors # make sure we generate some expected errors
try: def test_bigbits(self):
zlib.compress('ERROR', zlib.MAX_WBITS + 1) # specifying total bits too large causes an error
except zlib.error, msg: self.assertRaises(zlib.error,
print "expecting", msg zlib.compress, 'ERROR', zlib.MAX_WBITS + 1)
try:
zlib.compressobj(1, 8, 0)
except ValueError, msg:
print "expecting", msg
try:
zlib.decompressobj(0)
except ValueError, msg:
print "expecting", msg
x = zlib.compress(buf) def test_badcompressobj(self):
y = zlib.decompress(x) # verify failure on building compress object with bad params
if buf != y: self.assertRaises(ValueError, zlib.compressobj, 1, 8, 0)
print "normal compression/decompression failed"
else:
print "normal compression/decompression succeeded"
buf = buf * 16 def test_baddecompressobj(self):
# verify failure on building decompress object with bad params
self.assertRaises(ValueError, zlib.decompressobj, 0)
class CompressTestCase(unittest.TestCase):
# Test compression in one go (whole message compression)
def test_speech(self):
# decompress(compress(data)) better be data
x = zlib.compress(hamlet_scene)
self.assertEqual(zlib.decompress(x), hamlet_scene)
def test_speech8(self):
# decompress(compress(data)) better be data -- more compression chances
data = hamlet_scene * 8
x = zlib.compress(data)
self.assertEqual(zlib.decompress(x), data)
def test_speech16(self):
# decompress(compress(data)) better be data -- more compression chances
data = hamlet_scene * 16
x = zlib.compress(data)
self.assertEqual(zlib.decompress(x), data)
def test_speech128(self):
# decompress(compress(data)) better be data -- more compression chances
data = hamlet_scene * 8 * 16
x = zlib.compress(data)
self.assertEqual(zlib.decompress(x), data)
def test_monotonic(self):
# higher compression levels should not expand compressed size
data = hamlet_scene * 8 * 16
last = length = len(zlib.compress(data, 0))
self.failUnless(last > len(data), "compress level 0 always expands")
for level in range(10):
length = len(zlib.compress(data, level))
self.failUnless(length <= last,
'compress level %d more effective than %d!' % (
level-1, level))
last = length
class CompressObjectTestCase(unittest.TestCase):
# Test compression object
def test_pairsmall(self):
# use compress object in straightforward manner, decompress w/ object
data = hamlet_scene
co = zlib.compressobj(8, 8, -15) co = zlib.compressobj(8, 8, -15)
x1 = co.compress(buf) x1 = co.compress(data)
x2 = co.flush() x2 = co.flush()
try: self.assertRaises(zlib.error, co.flush) # second flush should not work
co.flush() dco = zlib.decompressobj(-15)
print "Oops - second flush worked when it should not have!" y1 = dco.decompress(x1 + x2)
except zlib.error: y2 = dco.flush()
pass self.assertEqual(data, y1 + y2)
x = x1 + x2 def test_pair(self):
# straightforward compress/decompress objects, more compression
dc = zlib.decompressobj(-15) data = hamlet_scene * 8 * 16
y1 = dc.decompress(x) co = zlib.compressobj(8, 8, -15)
y2 = dc.flush() x1 = co.compress(data)
y = y1 + y2 x2 = co.flush()
if buf != y: self.assertRaises(zlib.error, co.flush) # second flush should not work
print "compress/decompression obj failed" dco = zlib.decompressobj(-15)
else: y1 = dco.decompress(x1 + x2)
print "compress/decompression obj succeeded" y2 = dco.flush()
self.assertEqual(data, y1 + y2)
def test_compressincremental(self):
# compress object in steps, decompress object as one-shot
data = hamlet_scene * 8 * 16
co = zlib.compressobj(2, 8, -12, 9, 1) co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = [] bufs = []
for i in range(0, len(buf), 256): for i in range(0, len(data), 256):
bufs.append(co.compress(buf[i:i+256])) bufs.append(co.compress(data[i:i+256]))
bufs.append(co.flush()) bufs.append(co.flush())
combuf = ''.join(bufs) combuf = ''.join(bufs)
decomp1 = zlib.decompress(combuf, -12, -5) dco = zlib.decompressobj(-15)
if decomp1 != buf: y1 = dco.decompress(''.join(bufs))
print "decompress with init options failed" y2 = dco.flush()
else: self.assertEqual(data, y1 + y2)
print "decompress with init options succeeded"
deco = zlib.decompressobj(-12) def test_decompressincremental(self):
# compress object in steps, decompress object in steps
data = hamlet_scene * 8 * 16
co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = []
for i in range(0, len(data), 256):
bufs.append(co.compress(data[i:i+256]))
bufs.append(co.flush())
combuf = ''.join(bufs)
self.assertEqual(data, zlib.decompress(combuf, -12, -5))
dco = zlib.decompressobj(-12)
bufs = [] bufs = []
for i in range(0, len(combuf), 128): for i in range(0, len(combuf), 128):
bufs.append(deco.decompress(combuf[i:i+128])) bufs.append(dco.decompress(combuf[i:i+128]))
bufs.append(deco.flush()) self.assertEqual('', dco.unconsumed_tail, ########
decomp2 = ''.join(bufs) "(A) uct should be '': not %d long" %
if decomp2 != buf: len(dco.unconsumed_tail))
print "decompressobj with init options failed" bufs.append(dco.flush())
else: self.assertEqual('', dco.unconsumed_tail, ########
print "decompressobj with init options succeeded" "(B) uct should be '': not %d long" %
len(dco.unconsumed_tail))
self.assertEqual(data, ''.join(bufs))
# Failure means: "decompressobj with init options failed"
print "should be '':", `deco.unconsumed_tail` def test_decompinc(self,sizes=[128],flush=True,source=None,cx=256,dcx=64):
# compress object in steps, decompress object in steps, loop sizes
# Check a decompression object with max_length specified source = source or hamlet_scene
deco = zlib.decompressobj(-12) for reps in sizes:
cb = combuf data = source * reps
co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = [] bufs = []
for i in range(0, len(data), cx):
bufs.append(co.compress(data[i:i+cx]))
bufs.append(co.flush())
combuf = ''.join(bufs)
self.assertEqual(data, zlib.decompress(combuf, -12, -5))
dco = zlib.decompressobj(-12)
bufs = []
for i in range(0, len(combuf), dcx):
bufs.append(dco.decompress(combuf[i:i+dcx]))
self.assertEqual('', dco.unconsumed_tail, ########
"(A) uct should be '': not %d long" %
len(dco.unconsumed_tail))
if flush:
bufs.append(dco.flush())
else:
while True:
chunk = dco.decompress('')
if chunk:
bufs.append(chunk)
else:
break
self.assertEqual('', dco.unconsumed_tail, ########
"(B) uct should be '': not %d long" %
len(dco.unconsumed_tail))
self.assertEqual(data, ''.join(bufs))
# Failure means: "decompressobj with init options failed"
def test_decompimax(self,sizes=[128],flush=True,source=None,cx=256,dcx=64):
# compress in steps, decompress in length-restricted steps, loop sizes
source = source or hamlet_scene
for reps in sizes:
# Check a decompression object with max_length specified
data = source * reps
co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = []
for i in range(0, len(data), cx):
bufs.append(co.compress(data[i:i+cx]))
bufs.append(co.flush())
combuf = ''.join(bufs)
self.assertEqual(data, zlib.decompress(combuf, -12, -5),
'compressed data failure')
dco = zlib.decompressobj(-12)
bufs = []
cb = combuf
while cb:
#max_length = 1 + len(cb)/10
chunk = dco.decompress(cb, dcx)
self.failIf(len(chunk) > dcx,
'chunk too big (%d>%d)' % (len(chunk), dcx))
bufs.append(chunk)
cb = dco.unconsumed_tail
if flush:
bufs.append(dco.flush())
else:
while True:
chunk = dco.decompress('', dcx)
self.failIf(len(chunk) > dcx,
'chunk too big in tail (%d>%d)' % (len(chunk), dcx))
if chunk:
bufs.append(chunk)
else:
break
self.assertEqual(len(data), len(''.join(bufs)))
self.assertEqual(data, ''.join(bufs), 'Wrong data retrieved')
def test_decompressmaxlen(self):
# Check a decompression object with max_length specified
data = hamlet_scene * 8 * 16
co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = []
for i in range(0, len(data), 256):
bufs.append(co.compress(data[i:i+256]))
bufs.append(co.flush())
combuf = ''.join(bufs)
self.assertEqual(data, zlib.decompress(combuf, -12, -5),
'compressed data failure')
dco = zlib.decompressobj(-12)
bufs = []
cb = combuf
while cb: while cb:
max_length = 1 + len(cb)/10 max_length = 1 + len(cb)/10
chunk = deco.decompress(cb, max_length) chunk = dco.decompress(cb, max_length)
if len(chunk) > max_length: self.failIf(len(chunk) > max_length,
print 'chunk too big (%d>%d)' % (len(chunk),max_length) 'chunk too big (%d>%d)' % (len(chunk),max_length))
bufs.append(chunk) bufs.append(chunk)
cb = deco.unconsumed_tail cb = dco.unconsumed_tail
bufs.append(deco.flush()) bufs.append(dco.flush())
decomp2 = ''.join(buf) self.assertEqual(len(data), len(''.join(bufs)))
if decomp2 != buf: self.assertEqual(data, ''.join(bufs), 'Wrong data retrieved')
print "max_length decompressobj failed"
else:
print "max_length decompressobj succeeded"
def test_decompressmaxlenflushless(self):
# identical to test_decompressmaxlen except flush is replaced
# with an equivalent. This works and other fails on (eg) 2.2.2
data = hamlet_scene * 8 * 16
co = zlib.compressobj(2, 8, -12, 9, 1)
bufs = []
for i in range(0, len(data), 256):
bufs.append(co.compress(data[i:i+256]))
bufs.append(co.flush())
combuf = ''.join(bufs)
self.assertEqual(data, zlib.decompress(combuf, -12, -5),
'compressed data mismatch')
dco = zlib.decompressobj(-12)
bufs = []
cb = combuf
while cb:
max_length = 1 + len(cb)/10
chunk = dco.decompress(cb, max_length)
self.failIf(len(chunk) > max_length,
'chunk too big (%d>%d)' % (len(chunk),max_length))
bufs.append(chunk)
cb = dco.unconsumed_tail
#bufs.append(dco.flush())
while len(chunk):
chunk = dco.decompress('', max_length)
self.failIf(len(chunk) > max_length,
'chunk too big (%d>%d)' % (len(chunk),max_length))
bufs.append(chunk)
self.assertEqual(data, ''.join(bufs), 'Wrong data retrieved')
def test_maxlenmisc(self):
# Misc tests of max_length # Misc tests of max_length
deco = zlib.decompressobj(-12) dco = zlib.decompressobj(-12)
try: self.assertRaises(ValueError, dco.decompress, "", -1)
deco.decompress("", -1) self.assertEqual('', dco.unconsumed_tail)
except ValueError:
pass
else:
print "failed to raise value error on bad max_length"
print "unconsumed_tail should be '':", `deco.unconsumed_tail`
# Test flush() with the various options, using all the different levels def test_flushes(self):
# in order to provide more variations. # Test flush() with the various options, using all the
# different levels in order to provide more variations.
sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH'] sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']
sync_opt = [getattr(zlib, opt) for opt in sync_opt if hasattr(zlib, opt)] sync_opt = [getattr(zlib, opt) for opt in sync_opt
if hasattr(zlib, opt)]
data = hamlet_scene * 8
for sync in sync_opt: for sync in sync_opt:
for level in range(10): for level in range(10):
obj = zlib.compressobj( level ) obj = zlib.compressobj( level )
d = obj.compress( buf[:3000] ) a = obj.compress( data[:3000] )
d = d + obj.flush( sync ) b = obj.flush( sync )
d = d + obj.compress( buf[3000:] ) c = obj.compress( data[3000:] )
d = d + obj.flush() d = obj.flush()
if zlib.decompress(d) != buf: self.assertEqual(zlib.decompress(''.join([a,b,c,d])),
print "Decompress failed: flush mode=%i, level=%i" % (sync,level) data, ("Decompress failed: flush "
"mode=%i, level=%i") % (sync, level))
del obj del obj
# Test for the odd flushing bugs noted in 2.0, and hopefully fixed in 2.1 def test_odd_flush(self):
# Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1
import random import random
random.seed(1)
print 'Testing on 17K of random data'
if hasattr(zlib, 'Z_SYNC_FLUSH'): if hasattr(zlib, 'Z_SYNC_FLUSH'):
# Testing on 17K of "random" data
# Create compressor and decompressor objects # Create compressor and decompressor objects
c=zlib.compressobj(9) co = zlib.compressobj(9)
d=zlib.decompressobj() dco = zlib.decompressobj()
# Try 17K of data # Try 17K of data
# generate random data stream # generate random data stream
a="" try:
for i in range(17*1024): # In 2.3 and later, WichmannHill is the RNG of the bug report
a=a+chr(random.randint(0,255)) gen = random.WichmannHill()
except AttributeError:
try:
# 2.2 called it Random
gen = random.Random()
except AttributeError:
# others might simply have a single RNG
gen = random
gen.seed(1)
data = genblock(1, 17 * 1024, generator=gen)
# compress, sync-flush, and decompress # compress, sync-flush, and decompress
t = d.decompress( c.compress(a)+c.flush(zlib.Z_SYNC_FLUSH) ) first = co.compress(data)
second = co.flush(zlib.Z_SYNC_FLUSH)
expanded = dco.decompress(first + second)
# if decompressed data is different from the input data, choke. # if decompressed data is different from the input data, choke.
if len(t) != len(a): self.assertEqual(expanded, data, "17K random source doesn't match")
print len(a),len(t),len(d.unused_data)
raise TestFailed, "output of 17K doesn't match"
def ignore(): def test_manydecompinc(self):
"""An empty function with a big string. # Run incremental decompress test for a large range of sizes
self.test_decompinc(sizes=[1<<n for n in range(8)],
flush=True, cx=32, dcx=4)
Make the compression algorithm work a little harder. def test_manydecompimax(self):
""" # Run incremental decompress maxlen test for a large range of sizes
# avoid the flush bug
self.test_decompimax(sizes=[1<<n for n in range(8)],
flush=False, cx=32, dcx=4)
""" def test_manydecompimaxflush(self):
# Run incremental decompress maxlen test for a large range of sizes
# avoid the flush bug
self.test_decompimax(sizes=[1<<n for n in range(8)],
flush=True, cx=32, dcx=4)
def genblock(seed, length, step=1024, generator=random):
"""length-byte stream of random data from a seed (in step-byte blocks)."""
if seed is not None:
generator.seed(seed)
randint = generator.randint
if length < step or step < 2:
step = length
blocks = []
for i in range(0, length, step):
blocks.append(''.join([chr(randint(0,255))
for x in range(step)]))
return ''.join(blocks)[:length]
def choose_lines(source, number, seed=None, generator=random):
"""Return a list of number lines randomly chosen from the source"""
if seed is not None:
generator.seed(seed)
sources = source.split('\n')
return [generator.choice(sources) for n in range(number)]
hamlet_scene = """
LAERTES LAERTES
O, fear me not. O, fear me not.
@ -226,3 +476,34 @@ LAERTES
Farewell. Farewell.
""" """
def test_main():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(ChecksumTestCase))
suite.addTest(unittest.makeSuite(ExceptionTestCase))
suite.addTest(unittest.makeSuite(CompressTestCase))
suite.addTest(unittest.makeSuite(CompressObjectTestCase))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()
def test(tests=''):
if not tests: tests = 'o'
suite = unittest.TestSuite()
if 'k' in tests: suite.addTest(unittest.makeSuite(ChecksumTestCase))
if 'x' in tests: suite.addTest(unittest.makeSuite(ExceptionTestCase))
if 'c' in tests: suite.addTest(unittest.makeSuite(CompressTestCase))
if 'o' in tests: suite.addTest(unittest.makeSuite(CompressObjectTestCase))
test_support.run_suite(suite)
if False:
import sys
sys.path.insert(1, '/Py23Src/python/dist/src/Lib/test')
import test_zlib as tz
ts, ut = tz.test_support, tz.unittest
su = ut.TestSuite()
su.addTest(ut.makeSuite(tz.CompressTestCase))
ts.run_suite(su)

View File

@ -123,6 +123,7 @@ John Cugini
Andrew Dalke Andrew Dalke
Lars Damerow Lars Damerow
Eric Daniel Eric Daniel
Scott David Daniels
Ben Darnell Ben Darnell
Jonathan Dasteel Jonathan Dasteel
John DeGood John DeGood

View File

@ -33,6 +33,11 @@ Core and builtins
Extension modules Extension modules
----------------- -----------------
- Thanks to Scott David Daniels, a subtle bug in how the zlib
extension implemented flush() was fixed. Scott also rewrite the
zlib test suite using the unittest module. (SF bug #640230 and
patch #678531.)
- Added an itertools module containing high speed, memory efficient - Added an itertools module containing high speed, memory efficient
looping constructs inspired by tools from Haskell and SML. looping constructs inspired by tools from Haskell and SML.

View File

@ -656,27 +656,58 @@ PyDoc_STRVAR(decomp_flush__doc__,
static PyObject * static PyObject *
PyZlib_unflush(compobject *self, PyObject *args) PyZlib_unflush(compobject *self, PyObject *args)
/*decompressor flush is a no-op because all pending data would have been
flushed by the decompress method. However, this routine previously called
inflateEnd, causing any further decompress or flush calls to raise
exceptions. This behaviour has been preserved.*/
{ {
int err; int err, length = DEFAULTALLOC;
PyObject * retval = NULL; PyObject * retval = NULL;
unsigned long start_total_out;
if (!PyArg_ParseTuple(args, "")) if (!PyArg_ParseTuple(args, "|i:flush", &length))
return NULL; return NULL;
if (!(retval = PyString_FromStringAndSize(NULL, length)))
return NULL;
ENTER_ZLIB ENTER_ZLIB
err = inflateEnd(&(self->zst)); start_total_out = self->zst.total_out;
if (err != Z_OK) self->zst.avail_out = length;
zlib_error(self->zst, err, "from inflateEnd()"); self->zst.next_out = (Byte *)PyString_AS_STRING(retval);
else {
self->is_initialised = 0; Py_BEGIN_ALLOW_THREADS
retval = PyString_FromStringAndSize(NULL, 0); err = inflate(&(self->zst), Z_FINISH);
Py_END_ALLOW_THREADS
/* while Z_OK and the output buffer is full, there might be more output,
so extend the output buffer and try again */
while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) {
if (_PyString_Resize(&retval, length << 1) < 0)
goto error;
self->zst.next_out = (Byte *)PyString_AS_STRING(retval) + length;
self->zst.avail_out = length;
length = length << 1;
Py_BEGIN_ALLOW_THREADS
err = inflate(&(self->zst), Z_FINISH);
Py_END_ALLOW_THREADS
} }
/* If flushmode is Z_FINISH, we also have to call deflateEnd() to free
various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH */
if (err == Z_STREAM_END) {
err = inflateEnd(&(self->zst));
self->is_initialised = 0;
if (err != Z_OK) {
zlib_error(self->zst, err, "from inflateEnd()");
Py_DECREF(retval);
retval = NULL;
goto error;
}
}
_PyString_Resize(&retval, self->zst.total_out - start_total_out);
error:
LEAVE_ZLIB LEAVE_ZLIB
return retval; return retval;
@ -868,6 +899,8 @@ PyInit_zlib(void)
if (ver != NULL) if (ver != NULL)
PyModule_AddObject(m, "ZLIB_VERSION", ver); PyModule_AddObject(m, "ZLIB_VERSION", ver);
PyModule_AddStringConstant(m, "__version__", "1.0");
#ifdef WITH_THREAD #ifdef WITH_THREAD
zlib_lock = PyThread_allocate_lock(); zlib_lock = PyThread_allocate_lock();
#endif /* WITH_THREAD */ #endif /* WITH_THREAD */