Use the "MS" getline hack (fgets()) by default on non-get_unlocked
platforms. See NEWS for details.
This commit is contained in:
parent
e119006e7d
commit
f29b64d243
22
Misc/NEWS
22
Misc/NEWS
@ -25,11 +25,23 @@ Core language, builtins, and interpreter
|
|||||||
|
|
||||||
- Even if you don't use file.xreadlines(), you may expect a speedup on
|
- Even if you don't use file.xreadlines(), you may expect a speedup on
|
||||||
line-by-line input. The file.readline() method has been optimized
|
line-by-line input. The file.readline() method has been optimized
|
||||||
quite a bit in platform-specific ways, both on Windows (using an
|
quite a bit in platform-specific ways: on systems (like Linux) that
|
||||||
incredibly complex, but nevertheless thread-safe), and on systems
|
support flockfile(), getc_unlocked(), and funlockfile(), those are
|
||||||
(like Linux) that support flockfile(), getc_unlocked(), and
|
used by default. On systems (like Windows) without getc_unlocked(),
|
||||||
funlockfile(). In addition, the fileinput module, while still slow,
|
a complicated (but still thread-safe) method using fgets() is used by
|
||||||
has been sped up too, by using file.readlines(sizehint).
|
default.
|
||||||
|
|
||||||
|
You can force use of the fgets() method by #define'ing
|
||||||
|
USE_FGETS_IN_GETLINE at build time (it may be faster than
|
||||||
|
getc_unlocked()).
|
||||||
|
|
||||||
|
You can force fgets() not to be used by #define'ing
|
||||||
|
DONT_USE_FGETS_IN_GETLINE (this is the first thing to try if std test
|
||||||
|
test_bufio.py fails -- and let us know if it does!).
|
||||||
|
|
||||||
|
- In addition, the fileinput module, while still slower than the other
|
||||||
|
methods on most platforms, has been sped up too, by using
|
||||||
|
file.readlines(sizehint).
|
||||||
|
|
||||||
- Support for run-time warnings has been added, including a new
|
- Support for run-time warnings has been added, including a new
|
||||||
command line option (-W) to specify the disposition of warnings.
|
command line option (-W) to specify the disposition of warnings.
|
||||||
|
@ -635,7 +635,7 @@ file_readinto(PyFileObject *f, PyObject *args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
Win32 MS routine to get next line.
|
Routine to get next line using platform fgets().
|
||||||
|
|
||||||
Under MSVC 6:
|
Under MSVC 6:
|
||||||
|
|
||||||
@ -651,23 +651,41 @@ So we use fgets for speed(!), despite that it's painful.
|
|||||||
|
|
||||||
MS realloc is also slow.
|
MS realloc is also slow.
|
||||||
|
|
||||||
In the usual case, we have one pleasantly small line already sitting in a
|
Reports from other platforms on this method vs getc_unlocked (which MS doesn't
|
||||||
stdio buffer, and we optimize heavily for that case.
|
have):
|
||||||
|
Linux a wash
|
||||||
|
Solaris a wash
|
||||||
|
Tru64 Unix getline_via_fgets significantly faster
|
||||||
|
|
||||||
CAUTION: This routine cheats, relying on that MSVC 6 fgets doesn't overwrite
|
CAUTION: The C std isn't clear about this: in those cases where fgets
|
||||||
any buffer positions to the right of the terminating null byte. Seems
|
writes something into the buffer, can it write into any position beyond the
|
||||||
unlikely that will change in the future, but ... std test test_bufio should
|
required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
|
||||||
catch it if that changes.
|
known on which it does; and it would be a strange way to code fgets. Still,
|
||||||
|
getline_via_fgets may not work correctly if it does. The std test
|
||||||
|
test_bufio.py should fail if platform fgets() routinely writes beyond the
|
||||||
|
trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
|
||||||
**************************************************************************/
|
**************************************************************************/
|
||||||
|
|
||||||
/* if Win32 and MS's compiler */
|
/* Use this routine if told to, or by default on non-get_unlocked()
|
||||||
#if defined(MS_WIN32) && defined(_MSC_VER)
|
* platforms unless told not to. Yikes! Let's spell that out:
|
||||||
#define USE_MS_GETLINE_HACK
|
* On a platform with getc_unlocked():
|
||||||
|
* By default, use getc_unlocked().
|
||||||
|
* If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
|
||||||
|
* On a platform without getc_unlocked():
|
||||||
|
* By default, use fgets().
|
||||||
|
* If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
|
||||||
|
*/
|
||||||
|
#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
|
||||||
|
#define USE_FGETS_IN_GETLINE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef USE_MS_GETLINE_HACK
|
#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
|
||||||
|
#undef USE_FGETS_IN_GETLINE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_FGETS_IN_GETLINE
|
||||||
static PyObject*
|
static PyObject*
|
||||||
ms_getline_hack(FILE *fp)
|
getline_via_fgets(FILE *fp)
|
||||||
{
|
{
|
||||||
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
|
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
|
||||||
* no-realloc path. get_line uses 100 for its initial size, but isn't trying
|
* no-realloc path. get_line uses 100 for its initial size, but isn't trying
|
||||||
@ -686,14 +704,14 @@ ms_getline_hack(FILE *fp)
|
|||||||
char* pvfree; /* address of next free slot */
|
char* pvfree; /* address of next free slot */
|
||||||
char* pvend; /* address one beyond last free slot */
|
char* pvend; /* address one beyond last free slot */
|
||||||
char* p; /* temp */
|
char* p; /* temp */
|
||||||
char msbuf[INITBUFSIZE];
|
char buf[INITBUFSIZE];
|
||||||
|
|
||||||
/* Optimize for normal case: avoid _PyString_Resize if at all
|
/* Optimize for normal case: avoid _PyString_Resize if at all
|
||||||
* possible via first reading into auto msbuf.
|
* possible via first reading into auto buf.
|
||||||
*/
|
*/
|
||||||
Py_BEGIN_ALLOW_THREADS
|
Py_BEGIN_ALLOW_THREADS
|
||||||
memset(msbuf, '\n', INITBUFSIZE);
|
memset(buf, '\n', INITBUFSIZE);
|
||||||
p = fgets(msbuf, INITBUFSIZE, fp);
|
p = fgets(buf, INITBUFSIZE, fp);
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
if (p == NULL) {
|
if (p == NULL) {
|
||||||
@ -704,7 +722,7 @@ ms_getline_hack(FILE *fp)
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
/* fgets read *something* */
|
/* fgets read *something* */
|
||||||
p = memchr(msbuf, '\n', INITBUFSIZE);
|
p = memchr(buf, '\n', INITBUFSIZE);
|
||||||
if (p != NULL) {
|
if (p != NULL) {
|
||||||
/* Did the \n come from fgets or from us?
|
/* Did the \n come from fgets or from us?
|
||||||
* Since fgets stops at the first \n, and then writes \0, if
|
* Since fgets stops at the first \n, and then writes \0, if
|
||||||
@ -712,34 +730,34 @@ ms_getline_hack(FILE *fp)
|
|||||||
* could not have come from us, since the \n's we filled the
|
* could not have come from us, since the \n's we filled the
|
||||||
* buffer with have only more \n's to the right.
|
* buffer with have only more \n's to the right.
|
||||||
*/
|
*/
|
||||||
pvend = msbuf + INITBUFSIZE;
|
pvend = buf + INITBUFSIZE;
|
||||||
if (p+1 < pvend && *(p+1) == '\0') {
|
if (p+1 < pvend && *(p+1) == '\0') {
|
||||||
/* It's from fgets: we win! In particular, we
|
/* It's from fgets: we win! In particular, we
|
||||||
* haven't done any mallocs yet, and can build the
|
* haven't done any mallocs yet, and can build the
|
||||||
* final result on the first try.
|
* final result on the first try.
|
||||||
*/
|
*/
|
||||||
v = PyString_FromStringAndSize(msbuf, p - msbuf + 1);
|
v = PyString_FromStringAndSize(buf, p - buf + 1);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
/* Must be from us: fgets didn't fill the buffer and didn't
|
/* Must be from us: fgets didn't fill the buffer and didn't
|
||||||
* find a newline, so it must be the last and newline-free
|
* find a newline, so it must be the last and newline-free
|
||||||
* line of the file.
|
* line of the file.
|
||||||
*/
|
*/
|
||||||
assert(p > msbuf && *(p-1) == '\0');
|
assert(p > buf && *(p-1) == '\0');
|
||||||
v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
|
v = PyString_FromStringAndSize(buf, p - buf - 1);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
/* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
|
/* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
|
||||||
* So this line isn't over yet, or maybe it is but we're exactly at
|
* So this line isn't over yet, or maybe it is but we're exactly at
|
||||||
* EOF; in either case, we're tired <wink>.
|
* EOF; in either case, we're tired <wink>.
|
||||||
*/
|
*/
|
||||||
assert(msbuf[INITBUFSIZE-1] == '\0');
|
assert(buf[INITBUFSIZE-1] == '\0');
|
||||||
total_v_size = INITBUFSIZE + INCBUFSIZE;
|
total_v_size = INITBUFSIZE + INCBUFSIZE;
|
||||||
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
|
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return v;
|
return v;
|
||||||
/* copy over everything except the last null byte */
|
/* copy over everything except the last null byte */
|
||||||
memcpy(BUF(v), msbuf, INITBUFSIZE-1);
|
memcpy(BUF(v), buf, INITBUFSIZE-1);
|
||||||
pvfree = BUF(v) + INITBUFSIZE - 1;
|
pvfree = BUF(v) + INITBUFSIZE - 1;
|
||||||
|
|
||||||
/* Keep reading stuff into v; if it ever ends successfully, break
|
/* Keep reading stuff into v; if it ever ends successfully, break
|
||||||
@ -798,7 +816,7 @@ ms_getline_hack(FILE *fp)
|
|||||||
#undef INITBUFSIZE
|
#undef INITBUFSIZE
|
||||||
#undef INCBUFSIZE
|
#undef INCBUFSIZE
|
||||||
}
|
}
|
||||||
#endif /* ifdef USE_MS_GETLINE_HACK */
|
#endif /* ifdef USE_FGETS_IN_GETLINE */
|
||||||
|
|
||||||
/* Internal routine to get a line.
|
/* Internal routine to get a line.
|
||||||
Size argument interpretation:
|
Size argument interpretation:
|
||||||
@ -825,10 +843,9 @@ get_line(PyFileObject *f, int n)
|
|||||||
size_t n1, n2;
|
size_t n1, n2;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
#ifdef USE_MS_GETLINE_HACK
|
#ifdef USE_FGETS_IN_GETLINE
|
||||||
|
|
||||||
if (n <= 0)
|
if (n <= 0)
|
||||||
return ms_getline_hack(fp);
|
return getline_via_fgets(fp);
|
||||||
#endif
|
#endif
|
||||||
n2 = n > 0 ? n : 100;
|
n2 = n > 0 ? n : 100;
|
||||||
v = PyString_FromStringAndSize((char *)NULL, n2);
|
v = PyString_FromStringAndSize((char *)NULL, n2);
|
||||||
@ -967,10 +984,10 @@ static PyObject *
|
|||||||
file_xreadlines(PyFileObject *f, PyObject *args)
|
file_xreadlines(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
static PyObject* xreadlines_function = NULL;
|
static PyObject* xreadlines_function = NULL;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, ":xreadlines"))
|
if (!PyArg_ParseTuple(args, ":xreadlines"))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (!xreadlines_function) {
|
if (!xreadlines_function) {
|
||||||
PyObject *xreadlines_module =
|
PyObject *xreadlines_module =
|
||||||
PyImport_ImportModule("xreadlines");
|
PyImport_ImportModule("xreadlines");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user