#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.
This helps in handling certain types invalid urls in a conservative manner.
This commit is contained in:
parent
eb4c9c77b8
commit
c70a6ae49b
@ -234,6 +234,18 @@ bad = ['/some/path']
|
|||||||
|
|
||||||
RobotTest(15, doc, good, bad)
|
RobotTest(15, doc, good, bad)
|
||||||
|
|
||||||
|
# 16. Empty query (issue #17403). Normalizing the url first.
|
||||||
|
doc = """
|
||||||
|
User-agent: *
|
||||||
|
Allow: /some/path?
|
||||||
|
Disallow: /another/path?
|
||||||
|
"""
|
||||||
|
|
||||||
|
good = ['/some/path?']
|
||||||
|
bad = ['/another/path?']
|
||||||
|
|
||||||
|
RobotTest(16, doc, good, bad)
|
||||||
|
|
||||||
|
|
||||||
class NetworkTestCase(unittest.TestCase):
|
class NetworkTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -157,6 +157,7 @@ class RuleLine:
|
|||||||
if path == '' and not allowance:
|
if path == '' and not allowance:
|
||||||
# an empty value means allow all
|
# an empty value means allow all
|
||||||
allowance = True
|
allowance = True
|
||||||
|
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
|
||||||
self.path = urllib.parse.quote(path)
|
self.path = urllib.parse.quote(path)
|
||||||
self.allowance = allowance
|
self.allowance = allowance
|
||||||
|
|
||||||
|
@ -24,6 +24,10 @@ Core and Builtins
|
|||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
|
||||||
|
ruleline. This helps in handling certain types invalid urls in a conservative
|
||||||
|
manner.
|
||||||
|
|
||||||
- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
|
- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
|
||||||
stream's read() returns more bytes than requested.
|
stream's read() returns more bytes than requested.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user