Patch #494845: Support string concatenation, detect non-string data,
add globbing support, find modules by name instead of by file.
This commit is contained in:
parent
4581cfa326
commit
0d1fdea8ef
@ -220,6 +220,7 @@ Jonathan Hendry
|
|||||||
James Henstridge
|
James Henstridge
|
||||||
Chris Herborth
|
Chris Herborth
|
||||||
Ivan Herman
|
Ivan Herman
|
||||||
|
Jürgen Hermann
|
||||||
Gary Herron
|
Gary Herron
|
||||||
Bernhard Herzog
|
Bernhard Herzog
|
||||||
Magnus L. Hetland
|
Magnus L. Hetland
|
||||||
|
@ -621,6 +621,9 @@ Library
|
|||||||
Tools/Demos
|
Tools/Demos
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
- pygettext.py now supports globbing on Windows, and accepts module
|
||||||
|
names in addition to accepting file names.
|
||||||
|
|
||||||
- The SGI demos (Demo/sgi) have been removed. Nobody thought they
|
- The SGI demos (Demo/sgi) have been removed. Nobody thought they
|
||||||
were interesting any more. (The SGI library modules and extensions
|
were interesting any more. (The SGI library modules and extensions
|
||||||
are still there; it is believed that at least some of these are
|
are still there; it is believed that at least some of these are
|
||||||
|
@ -1,31 +1,49 @@
|
|||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
|
# -*- coding: iso-8859-1 -*-
|
||||||
# Originally written by Barry Warsaw <barry@zope.com>
|
# Originally written by Barry Warsaw <barry@zope.com>
|
||||||
#
|
#
|
||||||
# Minimally patched to make it even more xgettext compatible
|
# Minimally patched to make it even more xgettext compatible
|
||||||
# by Peter Funk <pf@artcom-gmbh.de>
|
# by Peter Funk <pf@artcom-gmbh.de>
|
||||||
|
#
|
||||||
|
# 2002-11-22 Jürgen Hermann <jh@web.de>
|
||||||
|
# Added checks that _() only contains string literals, and
|
||||||
|
# command line args are resolved to module lists, i.e. you
|
||||||
|
# can now pass a filename, a module or package name, or a
|
||||||
|
# directory (including globbing chars, important for Win32).
|
||||||
|
# Made docstring fit in 80 chars wide displays using pydoc.
|
||||||
|
#
|
||||||
|
|
||||||
"""pygettext -- Python equivalent of xgettext(1)
|
# for selftesting
|
||||||
|
try:
|
||||||
|
import fintl
|
||||||
|
_ = fintl.gettext
|
||||||
|
except ImportError:
|
||||||
|
_ = lambda s: s
|
||||||
|
|
||||||
|
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
||||||
|
|
||||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||||
internationalization of C programs. Most of these tools are independent of
|
internationalization of C programs. Most of these tools are independent of
|
||||||
the programming language and can be used from within Python programs. Martin
|
the programming language and can be used from within Python programs.
|
||||||
von Loewis' work[1] helps considerably in this regard.
|
Martin von Loewis' work[1] helps considerably in this regard.
|
||||||
|
|
||||||
There's one problem though; xgettext is the program that scans source code
|
There's one problem though; xgettext is the program that scans source code
|
||||||
looking for message strings, but it groks only C (or C++). Python introduces
|
looking for message strings, but it groks only C (or C++). Python
|
||||||
a few wrinkles, such as dual quoting characters, triple quoted strings, and
|
introduces a few wrinkles, such as dual quoting characters, triple quoted
|
||||||
raw strings. xgettext understands none of this.
|
strings, and raw strings. xgettext understands none of this.
|
||||||
|
|
||||||
Enter pygettext, which uses Python's standard tokenize module to scan Python
|
Enter pygettext, which uses Python's standard tokenize module to scan
|
||||||
source code, generating .pot files identical to what GNU xgettext[2] generates
|
Python source code, generating .pot files identical to what GNU xgettext[2]
|
||||||
for C and C++ code. From there, the standard GNU tools can be used.
|
generates for C and C++ code. From there, the standard GNU tools can be
|
||||||
|
used.
|
||||||
|
|
||||||
A word about marking Python strings as candidates for translation. GNU
|
A word about marking Python strings as candidates for translation. GNU
|
||||||
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
|
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
|
||||||
gettext_noop. But those can be a lot of text to include all over your code.
|
and gettext_noop. But those can be a lot of text to include all over your
|
||||||
C and C++ have a trick: they use the C preprocessor. Most internationalized C
|
code. C and C++ have a trick: they use the C preprocessor. Most
|
||||||
source includes a #define for gettext() to _() so that what has to be written
|
internationalized C source includes a #define for gettext() to _() so that
|
||||||
in the source is much less. Thus these are both translatable strings:
|
what has to be written in the source is much less. Thus these are both
|
||||||
|
translatable strings:
|
||||||
|
|
||||||
gettext("Translatable String")
|
gettext("Translatable String")
|
||||||
_("Translatable String")
|
_("Translatable String")
|
||||||
@ -37,11 +55,11 @@ below for how to augment this.
|
|||||||
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
||||||
[2] http://www.gnu.org/software/gettext/gettext.html
|
[2] http://www.gnu.org/software/gettext/gettext.html
|
||||||
|
|
||||||
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
NOTE: pygettext attempts to be option and feature compatible with GNU
|
||||||
where ever possible. However some options are still missing or are not fully
|
xgettext where ever possible. However some options are still missing or are
|
||||||
implemented. Also, xgettext's use of command line switches with option
|
not fully implemented. Also, xgettext's use of command line switches with
|
||||||
arguments is broken, and in these cases, pygettext just defines additional
|
option arguments is broken, and in these cases, pygettext just defines
|
||||||
switches.
|
additional switches.
|
||||||
|
|
||||||
Usage: pygettext [options] inputfile ...
|
Usage: pygettext [options] inputfile ...
|
||||||
|
|
||||||
@ -61,9 +79,9 @@ Options:
|
|||||||
|
|
||||||
-D
|
-D
|
||||||
--docstrings
|
--docstrings
|
||||||
Extract module, class, method, and function docstrings. These do not
|
Extract module, class, method, and function docstrings. These do
|
||||||
need to be wrapped in _() markers, and in fact cannot be for Python to
|
not need to be wrapped in _() markers, and in fact cannot be for
|
||||||
consider them docstrings. (See also the -X option).
|
Python to consider them docstrings. (See also the -X option).
|
||||||
|
|
||||||
-h
|
-h
|
||||||
--help
|
--help
|
||||||
@ -135,23 +153,17 @@ Options:
|
|||||||
conjunction with the -D option above.
|
conjunction with the -D option above.
|
||||||
|
|
||||||
If `inputfile' is -, standard input is read.
|
If `inputfile' is -, standard input is read.
|
||||||
"""
|
""")
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import getopt
|
import getopt
|
||||||
|
import token
|
||||||
import tokenize
|
import tokenize
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
# for selftesting
|
__version__ = '1.5'
|
||||||
try:
|
|
||||||
import fintl
|
|
||||||
_ = fintl.gettext
|
|
||||||
except ImportError:
|
|
||||||
def _(s): return s
|
|
||||||
|
|
||||||
__version__ = '1.4'
|
|
||||||
|
|
||||||
default_keywords = ['_']
|
default_keywords = ['_']
|
||||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||||
@ -183,7 +195,7 @@ msgstr ""
|
|||||||
|
|
||||||
|
|
||||||
def usage(code, msg=''):
|
def usage(code, msg=''):
|
||||||
print >> sys.stderr, _(__doc__) % globals()
|
print >> sys.stderr, __doc__ % globals()
|
||||||
if msg:
|
if msg:
|
||||||
print >> sys.stderr, msg
|
print >> sys.stderr, msg
|
||||||
sys.exit(code)
|
sys.exit(code)
|
||||||
@ -242,6 +254,103 @@ def normalize(s):
|
|||||||
s = '""\n"' + lineterm.join(lines) + '"'
|
s = '""\n"' + lineterm.join(lines) + '"'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def containsAny(str, set):
|
||||||
|
""" Check whether 'str' contains ANY of the chars in 'set'
|
||||||
|
"""
|
||||||
|
return 1 in [c in str for c in set]
|
||||||
|
|
||||||
|
|
||||||
|
def _visit_pyfiles(list, dirname, names):
|
||||||
|
""" Helper for getFilesForName().
|
||||||
|
"""
|
||||||
|
# get extension for python source files
|
||||||
|
if not globals().has_key('_py_ext'):
|
||||||
|
import imp
|
||||||
|
global _py_ext
|
||||||
|
_py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
|
||||||
|
|
||||||
|
# don't recurse into CVS directories
|
||||||
|
if 'CVS' in names:
|
||||||
|
names.remove('CVS')
|
||||||
|
|
||||||
|
# add all *.py files to list
|
||||||
|
list.extend(
|
||||||
|
[os.path.join(dirname, file)
|
||||||
|
for file in names
|
||||||
|
if os.path.splitext(file)[1] == _py_ext])
|
||||||
|
|
||||||
|
|
||||||
|
def _get_modpkg_path(dotted_name, pathlist=None):
|
||||||
|
""" Get the filesystem path for a module or a package.
|
||||||
|
|
||||||
|
Return the file system path to a file for a module,
|
||||||
|
and to a directory for a package. Return None if
|
||||||
|
the name is not found, or is a builtin or extension module.
|
||||||
|
"""
|
||||||
|
import imp
|
||||||
|
|
||||||
|
# split off top-most name
|
||||||
|
parts = dotted_name.split('.', 1)
|
||||||
|
|
||||||
|
if len(parts) > 1:
|
||||||
|
# we have a dotted path, import top-level package
|
||||||
|
try:
|
||||||
|
file, pathname, description = imp.find_module(parts[0], pathlist)
|
||||||
|
if file: file.close()
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# check if it's indeed a package
|
||||||
|
if description[2] == imp.PKG_DIRECTORY:
|
||||||
|
# recursively handle the remaining name parts
|
||||||
|
pathname = _get_modpkg_path(parts[1], [pathname])
|
||||||
|
else:
|
||||||
|
pathname = None
|
||||||
|
else:
|
||||||
|
# plain name
|
||||||
|
try:
|
||||||
|
file, pathname, description = imp.find_module(dotted_name, pathlist)
|
||||||
|
if file: file.close()
|
||||||
|
if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
|
||||||
|
pathname = None
|
||||||
|
except ImportError:
|
||||||
|
pathname = None
|
||||||
|
|
||||||
|
return pathname
|
||||||
|
|
||||||
|
|
||||||
|
def getFilesForName(name):
|
||||||
|
""" Get a list of module files for a filename, a module or package name,
|
||||||
|
or a directory.
|
||||||
|
"""
|
||||||
|
import imp
|
||||||
|
|
||||||
|
if not os.path.exists(name):
|
||||||
|
# check for glob chars
|
||||||
|
if containsAny(name, "*?[]"):
|
||||||
|
import glob
|
||||||
|
files = glob.glob(name)
|
||||||
|
list = []
|
||||||
|
for file in files:
|
||||||
|
list.extend(getFilesForName(file))
|
||||||
|
return list
|
||||||
|
|
||||||
|
# try to find module or package
|
||||||
|
name = _get_modpkg_path(name)
|
||||||
|
if not name:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if os.path.isdir(name):
|
||||||
|
# find all python files in directory
|
||||||
|
list = []
|
||||||
|
os.path.walk(name, _visit_pyfiles, list)
|
||||||
|
return list
|
||||||
|
elif os.path.exists(name):
|
||||||
|
# a single file
|
||||||
|
return [name]
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
class TokenEater:
|
class TokenEater:
|
||||||
@ -314,7 +423,12 @@ class TokenEater:
|
|||||||
self.__state = self.__waiting
|
self.__state = self.__waiting
|
||||||
elif ttype == tokenize.STRING:
|
elif ttype == tokenize.STRING:
|
||||||
self.__data.append(safe_eval(tstring))
|
self.__data.append(safe_eval(tstring))
|
||||||
# TBD: should we warn if we seen anything else?
|
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
|
||||||
|
token.NEWLINE, tokenize.NL]:
|
||||||
|
# warn if we see anything else than STRING or whitespace
|
||||||
|
print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
|
||||||
|
'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
|
||||||
|
self.__state = self.__waiting
|
||||||
|
|
||||||
def __addentry(self, msg, lineno=None, isdocstring=0):
|
def __addentry(self, msg, lineno=None, isdocstring=0):
|
||||||
if lineno is None:
|
if lineno is None:
|
||||||
@ -495,6 +609,15 @@ def main():
|
|||||||
else:
|
else:
|
||||||
options.toexclude = []
|
options.toexclude = []
|
||||||
|
|
||||||
|
# resolve args to module lists
|
||||||
|
expanded = []
|
||||||
|
for arg in args:
|
||||||
|
if arg == '-':
|
||||||
|
expanded.append(arg)
|
||||||
|
else:
|
||||||
|
expanded.extend(getFilesForName(arg))
|
||||||
|
args = expanded
|
||||||
|
|
||||||
# slurp through all the files
|
# slurp through all the files
|
||||||
eater = TokenEater(options)
|
eater = TokenEater(options)
|
||||||
for filename in args:
|
for filename in args:
|
||||||
@ -539,3 +662,6 @@ if __name__ == '__main__':
|
|||||||
main()
|
main()
|
||||||
# some more test strings
|
# some more test strings
|
||||||
_(u'a unicode string')
|
_(u'a unicode string')
|
||||||
|
_('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
|
||||||
|
_('more' 'than' 'one' 'string')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user