HandBrake/make/df-fetch.py
2022-12-17 15:19:24 -05:00

241 lines
8.4 KiB
Python

###############################################################################
##
## This script is coded for Python 2.7 through Python 3.x
##
## Authors: konablend
##
###############################################################################
import glob
import hashlib
import random
import re
import os
import signal
import sys
import time
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
sys.dont_write_bytecode = True
sys.path.insert(0, os.path.join(sys.path[0], 'lib'))
import hb_distfile
###############################################################################
def signal_handler(signal, frame):
sys.stderr.write('^C')
sys.exit(1)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
###############################################################################
## simple structure object
class Struct(object):
pass
## track resources and ensure cleanup
##
## - items are lambdas accepting no args
## - item order of insertion is important
## - cleanup will run in reverse order of insertion
## - item update does not effect order
##
class Ensure(object):
def __init__(self):
super(Ensure, self).__setattr__('_items', [])
def __delattr__(self, key):
if key in self.__dict__:
self._items.remove(self.__dict__[key])
super(Ensure, self).__delattr__(key)
def __setattr__(self, key, value):
if not key in self.__dict__:
self._items.insert(0, value)
super(Ensure, self).__setattr__(key, value)
def run(self):
for item in self._items:
try:
item()
except Exception:
pass
###############################################################################
class Tool(hb_distfile.Tool):
def __init__(self):
super(Tool, self).__init__()
self.parser.prog = self.name
self.parser.usage = '%(prog)s [OPTIONS] URL...'
self.parser.description = 'Fetch and verify distfile data integrity.'
self.parser.add_argument('--disable', default=False, action='store_true', help='do nothing and exit with error')
self.parser.add_argument('--jobs', default=1, action='store', metavar='N', help='allow N download jobs at once')
self.parser.add_argument('--sha256', default=None, action='store', metavar='HASH', help='verify sha256 HASH against data')
self.parser.add_argument('--accept-url', default=[], action='append', metavar='SPEC', help='accept URL regex pattern')
self.parser.add_argument('--deny-url', default=[], action='append', metavar='SPEC', help='deny URL regex pattern')
self.parser.add_argument('--exhaust-url', default=None, action='store_true', help='try all active distfiles')
self.parser.add_argument('--output', default=None, action='store', metavar='FILE', help='write to FILE')
self._parse()
def _load_config2(self, parser, data):
parser.values.disable = data['disable-fetch']
parser.values.jobs = data['jobs']
parser.values.accept_url = data['accept-url']
parser.values.deny_url = data['deny-url']
def _run(self, error):
# throttle instances
if tool.options.jobs < 1:
tool.options.jobs = 1
if tool.options.jobs > 20:
tool.options.jobs = 20
dirname = os.path.dirname(tool.options.output)
time.sleep(random.uniform(0.1,2))
active = len(glob.glob(dirname + '/*.tmp'))
while active >= tool.options.jobs:
time.sleep(2)
active = len(glob.glob(dirname + '/*.tmp'))
# handle disabled
if self.options.disable:
raise error('administratively disabled')
## create URL objects and keep active
urls = []
i = 0
for arg in self.args[1:]:
url = URL(arg, i)
if url.active:
urls.append(url)
i += 1
## try each URL until first success
error.op = 'download'
if not urls:
raise error('nothing to download')
while urls:
url = urls.pop(0)
try:
url.download(error)
if not self.options.exhaust_url:
break
except Exception as x:
## propagate exception if no remaining urls
if not urls:
raise
self.errln('%s failure; %s' % (error.op,x))
def run(self):
error = hb_distfile.ToolError(self.name)
try:
self._run(error)
except Exception as x:
self.debug_exception()
self.errln('%s failure; %s' % (error.op,x), exit=1)
###############################################################################
class URL(object):
def __init__(self, url, index):
self.index = index
self.url = url
self.active = True
self.rule = 'none'
self._accept()
self._deny()
tool.verbosef('URL[%d]: %s\n' % (self.index,self.url))
tool.verbosef(' active: %s\n' % ('yes' if self.active else 'no'))
tool.verbosef(' rule: %s\n' % (self.rule))
def _accept(self):
if not tool.options.accept_url:
return
index = 0
for spec in tool.options.accept_url:
if re.search(spec, self.url):
self.rule = 'via accept rule[%d]: %s' % (index,spec)
return
index += 1
self.active = False
self.rule = 'no matching accept rule'
def _deny(self):
index = 0
for spec in tool.options.deny_url:
if re.search(spec, self.url):
self.active = False
self.rule = 'via deny rule[%d]: %s' % (index,spec)
return
index += 1
def _download(self, error, ensure):
filename = tool.options.output
hasher = hashlib.sha256()
if filename:
tool.infof('downloading %s to %s\n' % (self.url,filename))
ftmp = tool.mktmpname(filename)
hout = open(ftmp, 'wb')
ensure.unlink_ftmp = lambda: os.unlink(ftmp)
ensure.close_hout = lambda: hout.close()
else:
tool.infof('downloading %s\n' % (self.url))
hin = urlopen(self.url, None, 30)
ensure.close_hin = lambda: hin.close()
info = hin.info()
try:
content_length = int(info.getheader('Content-Length'))
except:
content_length = None
data_total = 0
data_total_percent = 0.0
while True:
data = hin.read(65536)
if not data:
break
if filename:
hout.write(data)
hasher.update(data)
data_total += len(data)
if content_length and content_length > 0:
data_total_percent = float(data_total) / content_length
if data_total_percent >= 1 and data_total < content_length:
data_total_percent = 0.999999
else:
data_total_percent = -1
tool.progressf(data_total_percent, 'downloading... %9d bytes' % data_total)
if content_length and content_length != data_total:
raise error('expected %d bytes, got %d bytes' % (content_length,data_total))
s = 'download total: %9d bytes\n' % data_total
if filename:
s += 'sha256 (%s) = %s' % (filename,hasher.hexdigest())
else:
s += 'sha256 = %s' % (hasher.hexdigest())
if tool.options.sha256:
sha256_pass = tool.options.sha256 == hasher.hexdigest()
s += ' (%s)' % ('pass' if sha256_pass else 'fail; expecting %s' % tool.options.sha256)
tool.infof('%s\n' % s)
if filename and tool.options.sha256:
if sha256_pass:
if os.access(filename, os.F_OK) and not os.access(filename, os.W_OK):
raise error("permission denied: '%s'" % filename)
else:
raise error("expected sha256 hash '%s', got '%s'" % (tool.options.sha256, hasher.hexdigest()))
os.rename(ftmp,filename)
del ensure.unlink_ftmp
def download(self, error):
ensure = Ensure()
try:
self._download(error, ensure)
finally:
ensure.run()
###############################################################################
tool = Tool()
tool.run()