Reformulate make_compiled_pathname in terms of unicode objects.

This commit is contained in:
Martin v. Löwis 2011-10-23 17:29:08 +02:00
parent 9715d26305
commit 2db72863fb

View File

@ -904,6 +904,25 @@ rightmost_sep(Py_UCS4 *s)
return found; return found;
} }
/* Like rightmost_sep, but operate on unicode objects. */
static Py_ssize_t
rightmost_sep_obj(PyObject* o)
{
Py_ssize_t found, i;
Py_UCS4 c;
for (found = -1, i = 0; i < PyUnicode_GET_LENGTH(o); i++) {
c = PyUnicode_READ_CHAR(o, i);
if (c == SEP
#ifdef ALTSEP
|| c == ALTSEP
#endif
)
{
found = i;
}
}
return found;
}
/* Given a pathname for a Python source file, fill a buffer with the /* Given a pathname for a Python source file, fill a buffer with the
pathname for the corresponding compiled file. Return the pathname pathname for the corresponding compiled file. Return the pathname
@ -915,123 +934,49 @@ rightmost_sep(Py_UCS4 *s)
static PyObject* static PyObject*
make_compiled_pathname(PyObject *pathstr, int debug) make_compiled_pathname(PyObject *pathstr, int debug)
{ {
Py_UCS4 *pathname; PyObject *result;
Py_UCS4 buf[MAXPATHLEN]; Py_ssize_t fname, ext, len, i, pos, taglen;
size_t buflen = (size_t)MAXPATHLEN; Py_ssize_t pycache_len = sizeof("__pycache__/") - 1;
size_t len; int kind;
size_t i, save; void *data;
Py_UCS4 *pos;
int sep = SEP;
pathname = PyUnicode_AsUCS4Copy(pathstr); /* Compute the output string size. */
if (!pathname) len = PyUnicode_GET_LENGTH(pathstr);
/* If there is no separator, this returns -1, so
lastsep will be 0. */
fname = rightmost_sep_obj(pathstr) + 1;
ext = fname - 1;
for(i = fname; i < len; i++)
if (PyUnicode_READ_CHAR(pathstr, i) == '.')
ext = i + 1;
if (ext < fname)
/* No dot in filename; use entire filename */
ext = len;
/* result = pathstr[:fname] + "__pycache__" + SEP +
pathstr[fname:ext] + tag + ".py[co]" */
taglen = strlen(pyc_tag);
result = PyUnicode_New(ext + pycache_len + taglen + 4,
PyUnicode_MAX_CHAR_VALUE(pathstr));
if (!result)
return NULL; return NULL;
len = Py_UCS4_strlen(pathname); kind = PyUnicode_KIND(result);
data = PyUnicode_DATA(result);
/* Sanity check that the buffer has roughly enough space to hold what PyUnicode_CopyCharacters(result, 0, pathstr, 0, fname);
will eventually be the full path to the compiled file. The 5 extra pos = fname;
bytes include the slash afer __pycache__, the two extra dots, the for (i = 0; i < pycache_len - 1; i++)
extra trailing character ('c' or 'o') and null. This isn't exact PyUnicode_WRITE(kind, data, pos++, "__pycache__"[i]);
because the contents of the buffer can affect how many actual PyUnicode_WRITE(kind, data, pos++, SEP);
characters of the string get into the buffer. We'll do a final PyUnicode_CopyCharacters(result, pos, pathstr,
sanity check before writing the extension to ensure we do not fname, ext - fname);
overflow the buffer. pos += ext - fname;
*/ for (i = 0; pyc_tag[i]; i++)
if (len + Py_UCS4_strlen(CACHEDIR_UNICODE) + Py_UCS4_strlen(PYC_TAG_UNICODE) + 5 > buflen) { PyUnicode_WRITE(kind, data, pos++, pyc_tag[i]);
PyMem_Free(pathname); PyUnicode_WRITE(kind, data, pos++, '.');
return NULL; PyUnicode_WRITE(kind, data, pos++, 'p');
} PyUnicode_WRITE(kind, data, pos++, 'y');
PyUnicode_WRITE(kind, data, pos++, debug ? 'c' : 'o');
/* Find the last path separator and copy everything from the start of return result;
the source string up to and including the separator.
*/
pos = rightmost_sep(pathname);
if (pos == NULL) {
i = 0;
}
else {
sep = *pos;
i = pos - pathname + 1;
Py_UCS4_strncpy(buf, pathname, i);
}
save = i;
buf[i++] = '\0';
/* Add __pycache__/ */
Py_UCS4_strcat(buf, CACHEDIR_UNICODE);
i += Py_UCS4_strlen(CACHEDIR_UNICODE) - 1;
buf[i++] = sep;
buf[i] = '\0';
/* Add the base filename, but remove the .py or .pyw extension, since
the tag name must go before the extension.
*/
Py_UCS4_strcat(buf, pathname + save);
pos = Py_UCS4_strrchr(buf + i, '.');
if (pos != NULL)
*++pos = '\0';
/* pathname is not used from here on. */
PyMem_Free(pathname);
Py_UCS4_strcat(buf, PYC_TAG_UNICODE);
/* The length test above assumes that we're only adding one character
to the end of what would normally be the extension. What if there
is no extension, or the string ends in '.' or '.p', and otherwise
fills the buffer? By appending 4 more characters onto the string
here, we could overrun the buffer.
As a simple example, let's say buflen=32 and the input string is
'xxx.py'. strlen() would be 6 and the test above would yield:
(6 + 11 + 10 + 5 == 32) > 32
which is false and so the name mangling would continue. This would
be fine because we'd end up with this string in buf:
__pycache__/xxx.cpython-32.pyc\0
strlen(of that) == 30 + the nul fits inside a 32 character buffer.
We can even handle an input string of say 'xxxxx' above because
that's (5 + 11 + 10 + 5 == 31) > 32 which is also false. Name
mangling that yields:
__pycache__/xxxxxcpython-32.pyc\0
which is 32 characters including the nul, and thus fits in the
buffer. However, an input string of 'xxxxxx' would yield a result
string of:
__pycache__/xxxxxxcpython-32.pyc\0
which is 33 characters long (including the nul), thus overflowing
the buffer, even though the first test would fail, i.e.: the input
string is also 6 characters long, so 32 > 32 is false.
The reason the first test fails but we still overflow the buffer is
that the test above only expects to add one extra character to be
added to the extension, and here we're adding three (pyc). We
don't add the first dot, so that reclaims one of expected
positions, leaving us overflowing by 1 byte (3 extra - 1 reclaimed
dot - 1 expected extra == 1 overflowed).
The best we can do is ensure that we still have enough room in the
target buffer before we write the extension. Because it's always
only the extension that can cause the overflow, and never the other
path bytes we've written, it's sufficient to just do one more test
here. Still, the assertion that follows can't hurt.
*/
#if 0
printf("strlen(buf): %d; buflen: %d\n", (int)strlen(buf), (int)buflen);
#endif
len = Py_UCS4_strlen(buf);
if (len + 5 > buflen)
return NULL;
buf[len] = '.'; len++;
buf[len] = 'p'; len++;
buf[len] = 'y'; len++;
buf[len] = debug ? 'c' : 'o'; len++;
assert(len <= buflen);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, len);
} }