2025-03-12 13:37:17 +01:00

190 lines
7.0 KiB
Python

#!/usr/bin/env python3
#
# net.py - part of the FDroid server tools
# Copyright (C) 2015 Hans-Christoph Steiner <hans@eds.org>
# Copyright (C) 2022 FC Stegerman <flx@obfusk.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import copy
import logging
import os
import random
import requests
import tempfile
import time
import urllib
import urllib3
from requests.adapters import HTTPAdapter, Retry
from . import _, common
logger = logging.getLogger(__name__)
HEADERS = {'User-Agent': 'F-Droid'}
def download_file(url, local_filename=None, dldir='tmp', retries=3, backoff_factor=0.1):
"""Try hard to download the file, including retrying on failures.
This has two retry cycles, one inside of the requests session, the
other provided by this function. The requests retry logic applies
to failed DNS lookups, socket connections and connection timeouts,
never to requests where data has made it to the server. This
handles ChunkedEncodingError during transfer in its own retry
loop. This can result in more retries than are specified in the
retries parameter.
"""
filename = urllib.parse.urlparse(url).path.split('/')[-1]
if local_filename is None:
local_filename = os.path.join(dldir, filename)
for i in range(retries + 1):
if retries:
max_retries = Retry(total=retries - i, backoff_factor=backoff_factor)
adapter = HTTPAdapter(max_retries=max_retries)
session = requests.Session()
session.mount('http://', adapter)
session.mount('https://', adapter)
else:
session = requests
# the stream=True parameter keeps memory usage low
r = session.get(
url, stream=True, allow_redirects=True, headers=HEADERS, timeout=300
)
r.raise_for_status()
try:
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
except requests.exceptions.ChunkedEncodingError as err:
if i == retries:
raise err
logger.warning('Download interrupted, retrying...')
time.sleep(backoff_factor * 2**i)
raise ValueError("retries must be >= 0")
def download_using_mirrors(mirrors, local_filename=None):
"""Try to download the file from any working mirror.
Download the file that all URLs in the mirrors list point to,
trying all the tricks, starting with the most private methods
first. The list of mirrors is converted into a list of mirror
configurations to try, in order that the should be attempted.
This builds mirror_configs_to_try using all possible combos to
try. If a mirror is marked with worksWithoutSNI: True, then this
logic will try it twice: first without SNI, then again with SNI.
"""
mirrors = common.parse_list_of_dicts(mirrors)
mirror_configs_to_try = []
for mirror in mirrors:
mirror_configs_to_try.append(mirror)
if mirror.get('worksWithoutSNI'):
m = copy.deepcopy(mirror)
del m['worksWithoutSNI']
mirror_configs_to_try.append(m)
if not local_filename:
for mirror in mirrors:
filename = urllib.parse.urlparse(mirror['url']).path.split('/')[-1]
if filename:
break
if filename:
local_filename = os.path.join(common.get_cachedir(), filename)
else:
local_filename = tempfile.mkstemp(prefix='fdroid-')
timeouts = (2, 10, 100)
last_exception = None
for timeout in timeouts:
for mirror in mirror_configs_to_try:
last_exception = None
urllib3.util.ssl_.HAS_SNI = not mirror.get('worksWithoutSNI')
try:
# the stream=True parameter keeps memory usage low
r = requests.get(
mirror['url'],
stream=True,
allow_redirects=False,
headers=HEADERS,
# add jitter to the timeout to be less predictable
timeout=timeout + random.randint(0, timeout), # nosec B311
)
if r.status_code != 200:
raise requests.exceptions.HTTPError(r.status_code, response=r)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
except (
ConnectionError,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ConnectionError,
requests.exceptions.ContentDecodingError,
requests.exceptions.HTTPError,
requests.exceptions.SSLError,
requests.exceptions.StreamConsumedError,
requests.exceptions.Timeout,
requests.exceptions.UnrewindableBodyError,
) as e:
last_exception = e
logger.debug(_('Retrying failed download: %s') % str(e))
# if it hasn't succeeded by now, then give up and raise last exception
if last_exception:
raise last_exception
def http_get(url, etag=None, timeout=600):
"""Download the content from the given URL by making a GET request.
If an ETag is given, it will do a HEAD request first, to see if the content changed.
Parameters
----------
url
The URL to download from.
etag
The last ETag to be used for the request (optional).
Returns
-------
A tuple consisting of:
- The raw content that was downloaded or None if it did not change
- The new eTag as returned by the HTTP request
"""
# TODO disable TLS Session IDs and TLS Session Tickets
# (plain text cookie visible to anyone who can see the network traffic)
if etag:
r = requests.head(url, headers=HEADERS, timeout=timeout)
r.raise_for_status()
if 'ETag' in r.headers and etag == r.headers['ETag']:
return None, etag
r = requests.get(url, headers=HEADERS, timeout=timeout)
r.raise_for_status()
new_etag = None
if 'ETag' in r.headers:
new_etag = r.headers['ETag']
return r.content, new_etag