use mimetools; add error handling and authentication
This commit is contained in:
parent
7ff5d7f7c7
commit
bbb0a05972
141
Lib/urllib.py
141
Lib/urllib.py
@ -10,16 +10,16 @@
|
|||||||
# protocol. All you know is that is has methods read(), readline(),
|
# protocol. All you know is that is has methods read(), readline(),
|
||||||
# readlines(), fileno(), close() and info(). The read*(), fileno()
|
# readlines(), fileno(), close() and info(). The read*(), fileno()
|
||||||
# and close() methods work like those of open files.
|
# and close() methods work like those of open files.
|
||||||
# The info() method returns an rfc822.Message object which can be
|
# The info() method returns an mimetools.Message object which can be
|
||||||
# used to query various info about the object, if available.
|
# used to query various info about the object, if available.
|
||||||
# (rfc822.Message objects are queried with the getheader() method.)
|
# (mimetools.Message objects are queried with the getheader() method.)
|
||||||
|
|
||||||
import string
|
import string
|
||||||
import socket
|
import socket
|
||||||
import regex
|
import regex
|
||||||
|
|
||||||
|
|
||||||
__version__ = '1.0'
|
__version__ = '1.3b1'
|
||||||
|
|
||||||
|
|
||||||
# This really consists of two pieces:
|
# This really consists of two pieces:
|
||||||
@ -34,12 +34,12 @@ _urlopener = None
|
|||||||
def urlopen(url):
|
def urlopen(url):
|
||||||
global _urlopener
|
global _urlopener
|
||||||
if not _urlopener:
|
if not _urlopener:
|
||||||
_urlopener = URLopener()
|
_urlopener = FancyURLopener()
|
||||||
return _urlopener.open(url)
|
return _urlopener.open(url)
|
||||||
def urlretrieve(url):
|
def urlretrieve(url):
|
||||||
global _urlopener
|
global _urlopener
|
||||||
if not _urlopener:
|
if not _urlopener:
|
||||||
_urlopener = URLopener()
|
_urlopener = FancyURLopener()
|
||||||
return _urlopener.retrieve(url)
|
return _urlopener.retrieve(url)
|
||||||
def urlcleanup():
|
def urlcleanup():
|
||||||
if _urlopener:
|
if _urlopener:
|
||||||
@ -49,6 +49,9 @@ def urlcleanup():
|
|||||||
# Class to open URLs.
|
# Class to open URLs.
|
||||||
# This is a class rather than just a subroutine because we may need
|
# This is a class rather than just a subroutine because we may need
|
||||||
# more than one set of global protocol-specific options.
|
# more than one set of global protocol-specific options.
|
||||||
|
# Note -- this is a base class for those who don't want the
|
||||||
|
# automatic handling of errors type 302 (relocated) and 401
|
||||||
|
# (authorization needed).
|
||||||
ftpcache = {}
|
ftpcache = {}
|
||||||
class URLopener:
|
class URLopener:
|
||||||
|
|
||||||
@ -147,8 +150,19 @@ class URLopener:
|
|||||||
import httplib
|
import httplib
|
||||||
host, selector = splithost(url)
|
host, selector = splithost(url)
|
||||||
if not host: raise IOError, ('http error', 'no host given')
|
if not host: raise IOError, ('http error', 'no host given')
|
||||||
|
i = string.find(host, '@')
|
||||||
|
if i >= 0:
|
||||||
|
user_passwd, host = host[:i], host[i+1:]
|
||||||
|
else:
|
||||||
|
user_passwd = None
|
||||||
|
if user_passwd:
|
||||||
|
import base64
|
||||||
|
auth = string.strip(base64.encodestring(user_passwd))
|
||||||
|
else:
|
||||||
|
auth = None
|
||||||
h = httplib.HTTP(host)
|
h = httplib.HTTP(host)
|
||||||
h.putrequest('GET', selector)
|
h.putrequest('GET', selector)
|
||||||
|
if auth: h.putheader('Authorization: Basic %s' % auth)
|
||||||
for args in self.addheaders: apply(h.putheader, args)
|
for args in self.addheaders: apply(h.putheader, args)
|
||||||
h.endheaders()
|
h.endheaders()
|
||||||
errcode, errmsg, headers = h.getreply()
|
errcode, errmsg, headers = h.getreply()
|
||||||
@ -156,9 +170,27 @@ class URLopener:
|
|||||||
if errcode == 200:
|
if errcode == 200:
|
||||||
return addinfo(fp, headers)
|
return addinfo(fp, headers)
|
||||||
else:
|
else:
|
||||||
n = len(fp.read())
|
return self.http_error(url,
|
||||||
fp.close()
|
fp, errcode, errmsg, headers)
|
||||||
raise IOError, ('http error', errcode, errmsg, headers)
|
|
||||||
|
# Handle http errors.
|
||||||
|
# Derived class can override this, or provide specific handlers
|
||||||
|
# named http_error_DDD where DDD is the 3-digit error code
|
||||||
|
def http_error(self, url, fp, errcode, errmsg, headers):
|
||||||
|
# First check if there's a specific handler for this error
|
||||||
|
name = 'http_error_%d' % errcode
|
||||||
|
if hasattr(self, name):
|
||||||
|
method = getattr(self, name)
|
||||||
|
result = method(url, fp, errcode, errmsg, headers)
|
||||||
|
if result: return result
|
||||||
|
return self.http_error_default(
|
||||||
|
url, fp, errcode, errmsg, headers)
|
||||||
|
|
||||||
|
# Default http error handler: close the connection and raises IOError
|
||||||
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||||
|
void = fp.read()
|
||||||
|
fp.close()
|
||||||
|
raise IOError, ('http error', errcode, errmsg, headers)
|
||||||
|
|
||||||
# Use Gopher protocol
|
# Use Gopher protocol
|
||||||
def open_gopher(self, url):
|
def open_gopher(self, url):
|
||||||
@ -210,7 +242,6 @@ class URLopener:
|
|||||||
dirs, file = dirs[:-1], dirs[-1]
|
dirs, file = dirs[:-1], dirs[-1]
|
||||||
if dirs and not dirs[0]: dirs = dirs[1:]
|
if dirs and not dirs[0]: dirs = dirs[1:]
|
||||||
key = (user, host, port, string.joinfields(dirs, '/'))
|
key = (user, host, port, string.joinfields(dirs, '/'))
|
||||||
## print 'key =', key
|
|
||||||
try:
|
try:
|
||||||
if not self.ftpcache.has_key(key):
|
if not self.ftpcache.has_key(key):
|
||||||
self.ftpcache[key] = \
|
self.ftpcache[key] = \
|
||||||
@ -229,6 +260,91 @@ class URLopener:
|
|||||||
raise IOError, ('ftp error', msg)
|
raise IOError, ('ftp error', msg)
|
||||||
|
|
||||||
|
|
||||||
|
# Derived class with handlers for errors we can handle (perhaps)
|
||||||
|
class FancyURLopener(URLopener):
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
apply(URLopener.__init__, (self,) + args)
|
||||||
|
self.auth_cache = {}
|
||||||
|
|
||||||
|
# Default error handling -- don't raise an exception
|
||||||
|
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||||
|
return addinfo(fp, headers)
|
||||||
|
|
||||||
|
# Error 302 -- relocated
|
||||||
|
def http_error_302(self, url, fp, errcode, errmsg, headers):
|
||||||
|
# XXX The server can force infinite recursion here!
|
||||||
|
if headers.has_key('location'):
|
||||||
|
newurl = headers['location']
|
||||||
|
elif headers.has_key('uri'):
|
||||||
|
newurl = headers['uri']
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
void = fp.read()
|
||||||
|
fp.close()
|
||||||
|
return self.open(newurl)
|
||||||
|
|
||||||
|
# Error 401 -- authentication required
|
||||||
|
# See this URL for a description of the basic authentication scheme:
|
||||||
|
# http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
|
||||||
|
def http_error_401(self, url, fp, errcode, errmsg, headers):
|
||||||
|
if headers.has_key('www-authenticate'):
|
||||||
|
stuff = headers['www-authenticate']
|
||||||
|
p = regex.compile(
|
||||||
|
'[ \t]*\([^ \t]+\)[ \t]+realm="\([^"]*\)"')
|
||||||
|
if p.match(stuff) >= 0:
|
||||||
|
scheme, realm = p.group(1, 2)
|
||||||
|
if string.lower(scheme) == 'basic':
|
||||||
|
return self.retry_http_basic_auth(
|
||||||
|
url, realm)
|
||||||
|
|
||||||
|
def retry_http_basic_auth(self, url, realm):
|
||||||
|
host, selector = splithost(url)
|
||||||
|
i = string.find(host, '@') + 1
|
||||||
|
host = host[i:]
|
||||||
|
user, passwd = self.get_user_passwd(host, realm, i)
|
||||||
|
if not (user or passwd): return None
|
||||||
|
host = user + ':' + passwd + '@' + host
|
||||||
|
newurl = '//' + host + selector
|
||||||
|
return self.open_http(newurl)
|
||||||
|
|
||||||
|
def get_user_passwd(self, host, realm, clear_cache = 0):
|
||||||
|
key = realm + '@' + string.lower(host)
|
||||||
|
if self.auth_cache.has_key(key):
|
||||||
|
if clear_cache:
|
||||||
|
del self.auth_cache[key]
|
||||||
|
else:
|
||||||
|
return self.auth_cache[key]
|
||||||
|
user, passwd = self.prompt_user_passwd(host, realm)
|
||||||
|
if user or passwd: self.auth_cache[key] = (user, passwd)
|
||||||
|
return user, passwd
|
||||||
|
|
||||||
|
def prompt_user_passwd(self, host, realm):
|
||||||
|
# Override this in a GUI environment!
|
||||||
|
try:
|
||||||
|
user = raw_input("Enter username for %s at %s: " %
|
||||||
|
(realm, host))
|
||||||
|
self.echo_off()
|
||||||
|
try:
|
||||||
|
passwd = raw_input(
|
||||||
|
"Enter password for %s in %s at %s: " %
|
||||||
|
(user, realm, host))
|
||||||
|
finally:
|
||||||
|
self.echo_on()
|
||||||
|
return user, passwd
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def echo_off(self):
|
||||||
|
import os
|
||||||
|
os.system("stty -echo")
|
||||||
|
|
||||||
|
def echo_on(self):
|
||||||
|
import os
|
||||||
|
print
|
||||||
|
os.system("stty echo")
|
||||||
|
|
||||||
|
|
||||||
# Utility functions
|
# Utility functions
|
||||||
|
|
||||||
# Return the IP address of the magic hostname 'localhost'
|
# Return the IP address of the magic hostname 'localhost'
|
||||||
@ -259,13 +375,14 @@ def ftperrors():
|
|||||||
ftplib.error_proto)
|
ftplib.error_proto)
|
||||||
return _ftperrors
|
return _ftperrors
|
||||||
|
|
||||||
# Return an empty rfc822.Message object
|
# Return an empty mimetools.Message object
|
||||||
_noheaders = None
|
_noheaders = None
|
||||||
def noheaders():
|
def noheaders():
|
||||||
global _noheaders
|
global _noheaders
|
||||||
if not _noheaders:
|
if not _noheaders:
|
||||||
import rfc822
|
import mimetools
|
||||||
_noheaders = rfc822.Message(open('/dev/null', 'r'))
|
import StringIO
|
||||||
|
_noheaders = mimetools.Message(StringIO.StringIO(), 0)
|
||||||
_noheaders.fp.close() # Recycle file descriptor
|
_noheaders.fp.close() # Recycle file descriptor
|
||||||
return _noheaders
|
return _noheaders
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user