Patch #494845: Support string concatenation, detect non-string data,

add globbing support, find modules by name instead of by file.
2002-11-22 08:36:54 +00:00 · 2002-11-22 08:36:54 +00:00 · 0d1fdea8ef
commit 0d1fdea8ef
parent 4581cfa326
3 changed files with 165 additions and 35 deletions
--- a/Misc/ACKS
+++ b/Misc/ACKS
@ -220,6 +220,7 @@ Jonathan Hendry
 James Henstridge
 Chris Herborth
 Ivan Herman
 Jürgen Hermann
 Gary Herron
 Bernhard Herzog
 Magnus L. Hetland
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -621,6 +621,9 @@ Library
 Tools/Demos
 -----------
 - pygettext.py now supports globbing on Windows, and accepts module
  names in addition to accepting file names.
 - The SGI demos (Demo/sgi) have been removed.  Nobody thought they
  were interesting any more.  (The SGI library modules and extensions
  are still there; it is believed that at least some of these are
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@ -1,31 +1,49 @@
 #! /usr/bin/env python
 # -*- coding: iso-8859-1 -*-
 # Originally written by Barry Warsaw <barry@zope.com>
 #
 # Minimally patched to make it even more xgettext compatible 
 # by Peter Funk <pf@artcom-gmbh.de>
 #
 # 2002-11-22 Jürgen Hermann <jh@web.de>
 # Added checks that _() only contains string literals, and
 # command line args are resolved to module lists, i.e. you
 # can now pass a filename, a module or package name, or a
 # directory (including globbing chars, important for Win32).
 # Made docstring fit in 80 chars wide displays using pydoc.
 #
-"""pygettext -- Python equivalent of xgettext(1)
+# for selftesting
 try:
    import fintl
    _ = fintl.gettext
 except ImportError:
    _ = lambda s: s
 __doc__ = _("""pygettext -- Python equivalent of xgettext(1)
 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
 internationalization of C programs. Most of these tools are independent of
-the programming language and can be used from within Python programs.  Martin
+the programming language and can be used from within Python programs.
-von Loewis' work[1] helps considerably in this regard.
+Martin von Loewis' work[1] helps considerably in this regard. 
 There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++).  Python introduces
+looking for message strings, but it groks only C (or C++). Python
-a few wrinkles, such as dual quoting characters, triple quoted strings, and
+introduces a few wrinkles, such as dual quoting characters, triple quoted
-raw strings.  xgettext understands none of this.
+strings, and raw strings. xgettext understands none of this. 
-Enter pygettext, which uses Python's standard tokenize module to scan Python
+Enter pygettext, which uses Python's standard tokenize module to scan
-source code, generating .pot files identical to what GNU xgettext[2] generates
+Python source code, generating .pot files identical to what GNU xgettext[2]
-for C and C++ code.  From there, the standard GNU tools can be used.
+generates for C and C++ code. From there, the standard GNU tools can be
 used. 
 A word about marking Python strings as candidates for translation. GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
-gettext_noop.  But those can be a lot of text to include all over your code.
+and gettext_noop. But those can be a lot of text to include all over your
-C and C++ have a trick: they use the C preprocessor.  Most internationalized C
+code. C and C++ have a trick: they use the C preprocessor. Most
-source includes a #define for gettext() to _() so that what has to be written
+internationalized C source includes a #define for gettext() to _() so that
-in the source is much less.  Thus these are both translatable strings:
+what has to be written in the source is much less. Thus these are both
 translatable strings: 
    gettext("Translatable String")
    _("Translatable String")
@ -37,11 +55,11 @@ below for how to augment this.
 [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
 [2] http://www.gnu.org/software/gettext/gettext.html
-NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
+NOTE: pygettext attempts to be option and feature compatible with GNU
-where ever possible.  However some options are still missing or are not fully
+xgettext where ever possible. However some options are still missing or are
-implemented.  Also, xgettext's use of command line switches with option
+not fully implemented. Also, xgettext's use of command line switches with
-arguments is broken, and in these cases, pygettext just defines additional
+option arguments is broken, and in these cases, pygettext just defines
-switches.
+additional switches. 
 Usage: pygettext [options] inputfile ...
@ -61,9 +79,9 @@ Options:
    -D
    --docstrings
-        Extract module, class, method, and function docstrings.  These do not
+        Extract module, class, method, and function docstrings.  These do
-        need to be wrapped in _() markers, and in fact cannot be for Python to
+        not need to be wrapped in _() markers, and in fact cannot be for
-        consider them docstrings. (See also the -X option).
+        Python to consider them docstrings. (See also the -X option).
    -h
    --help
@ -135,23 +153,17 @@ Options:
        conjunction with the -D option above.
 If `inputfile' is -, standard input is read.
-"""
+""")
 import os
 import sys
 import time
 import getopt
 import token
 import tokenize
 import operator
-# for selftesting
+__version__ = '1.5'
 try:
    import fintl
    _ = fintl.gettext
 except ImportError:
    def _(s): return s
 __version__ = '1.4'
 default_keywords = ['_']
 DEFAULTKEYWORDS = ', '.join(default_keywords)
@ -183,7 +195,7 @@ msgstr ""
 def usage(code, msg=''):
-    print >> sys.stderr, _(__doc__) % globals()
+    print >> sys.stderr, __doc__ % globals()
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)
@ -242,6 +254,103 @@ def normalize(s):
        s = '""\n"' + lineterm.join(lines) + '"'
    return s
 def containsAny(str, set):
    """ Check whether 'str' contains ANY of the chars in 'set'
    """
    return 1 in [c in str for c in set]
 def _visit_pyfiles(list, dirname, names):
    """ Helper for getFilesForName().
    """
    # get extension for python source files
    if not globals().has_key('_py_ext'):
        import imp
        global _py_ext
        _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
    # don't recurse into CVS directories
    if 'CVS' in names:
        names.remove('CVS')
    # add all *.py files to list
    list.extend(
        [os.path.join(dirname, file)
            for file in names
                if os.path.splitext(file)[1] == _py_ext])
 def _get_modpkg_path(dotted_name, pathlist=None):
    """ Get the filesystem path for a module or a package.
        Return the file system path to a file for a module,
        and to a directory for a package. Return None if
        the name is not found, or is a builtin or extension module.
    """
    import imp
    # split off top-most name
    parts = dotted_name.split('.', 1)
    if len(parts) > 1:
        # we have a dotted path, import top-level package
        try:
            file, pathname, description = imp.find_module(parts[0], pathlist)
            if file: file.close()
        except ImportError:
            return None
        # check if it's indeed a package
        if description[2] == imp.PKG_DIRECTORY:
            # recursively handle the remaining name parts
            pathname = _get_modpkg_path(parts[1], [pathname])
        else:
            pathname = None
    else:
        # plain name
        try:
            file, pathname, description = imp.find_module(dotted_name, pathlist)
            if file: file.close()
            if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
                pathname = None
        except ImportError:
            pathname = None
    return pathname
 def getFilesForName(name):
    """ Get a list of module files for a filename, a module or package name,
        or a directory.
    """
    import imp
    if not os.path.exists(name):
        # check for glob chars
        if containsAny(name, "*?[]"):
            import glob
            files = glob.glob(name)
            list = []
            for file in files:
                list.extend(getFilesForName(file))
            return list
        # try to find module or package
        name = _get_modpkg_path(name)
        if not name:
            return []
    if os.path.isdir(name):
        # find all python files in directory
        list = []
        os.path.walk(name, _visit_pyfiles, list)
        return list
    elif os.path.exists(name):
        # a single file
        return [name]
    return []
 class TokenEater:
@ -314,7 +423,12 @@ class TokenEater:
            self.__state = self.__waiting
        elif ttype == tokenize.STRING:
            self.__data.append(safe_eval(tstring))
-        # TBD: should we warn if we seen anything else?
+        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                           token.NEWLINE, tokenize.NL]:
            # warn if we see anything else than STRING or whitespace
            print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
                'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
            self.__state = self.__waiting
    def __addentry(self, msg, lineno=None, isdocstring=0):
        if lineno is None:
@ -495,6 +609,15 @@ def main():
    else:
        options.toexclude = []
    # resolve args to module lists
    expanded = []
    for arg in args:
        if arg == '-':
            expanded.append(arg)
        else:
            expanded.extend(getFilesForName(arg))
    args = expanded
    # slurp through all the files
    eater = TokenEater(options)
    for filename in args:
@ -539,3 +662,6 @@ if __name__ == '__main__':
    main()
    # some more test strings
    _(u'a unicode string')
    _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
    _('more' 'than' 'one' 'string')