2019-11-29 19:31:34 +02:00
import os
2021-08-17 23:53:49 +03:00
import yt_dlp
2019-11-29 19:31:34 +02:00
from collections import OrderedDict
2022-01-15 12:17:12 +00:00
import shelve
2022-01-21 21:23:59 +00:00
import time
2019-11-29 19:31:34 +02:00
import asyncio
import multiprocessing
import logging
2022-08-24 18:10:49 +02:00
import re
2021-10-28 11:19:17 +01:00
from dl_formats import get_format , get_opts
2019-11-29 19:31:34 +02:00
log = logging . getLogger ( ' ytdl ' )
2019-12-07 21:49:31 +02:00
class DownloadQueueNotifier :
async def added ( self , dl ) :
raise NotImplementedError
async def updated ( self , dl ) :
raise NotImplementedError
async def completed ( self , dl ) :
raise NotImplementedError
async def canceled ( self , id ) :
raise NotImplementedError
async def cleared ( self , id ) :
raise NotImplementedError
2019-11-29 19:31:34 +02:00
class DownloadInfo :
2022-08-29 18:25:29 -04:00
def __init__ ( self , id , title , url , quality , format , folder ) :
2019-11-29 19:31:34 +02:00
self . id , self . title , self . url = id , title , url
2021-07-29 11:12:40 +03:00
self . quality = quality
2021-09-13 20:25:32 +03:00
self . format = format
2022-08-29 18:25:29 -04:00
self . folder = folder
2019-12-06 16:30:07 +02:00
self . status = self . msg = self . percent = self . speed = self . eta = None
2021-12-13 22:35:19 +01:00
self . filename = None
2022-01-21 21:23:59 +00:00
self . timestamp = time . time_ns ( )
2019-11-29 19:31:34 +02:00
class Download :
manager = None
2022-06-06 20:47:13 +02:00
def __init__ ( self , download_dir , output_template , output_template_chapter , quality , format , ytdl_opts , info ) :
2019-11-29 19:31:34 +02:00
self . download_dir = download_dir
2021-05-18 16:15:49 +02:00
self . output_template = output_template
2022-06-06 20:47:13 +02:00
self . output_template_chapter = output_template_chapter
2021-10-28 11:19:17 +01:00
self . format = get_format ( format , quality )
self . ytdl_opts = get_opts ( format , quality , ytdl_opts )
2019-11-29 19:31:34 +02:00
self . info = info
2019-12-07 21:49:31 +02:00
self . canceled = False
2019-11-29 19:31:34 +02:00
self . tmpfilename = None
self . status_queue = None
self . proc = None
self . loop = None
2019-12-07 21:49:31 +02:00
self . notifier = None
2021-07-29 11:12:40 +03:00
2019-11-29 19:31:34 +02:00
def _download ( self ) :
2019-12-06 16:30:07 +02:00
try :
2021-08-17 23:53:49 +03:00
def put_status ( st ) :
self . status_queue . put ( { k : v for k , v in st . items ( ) if k in (
' tmpfilename ' ,
2021-12-15 19:57:06 +02:00
' filename ' ,
2021-08-17 23:53:49 +03:00
' status ' ,
' msg ' ,
' total_bytes ' ,
' total_bytes_estimate ' ,
' downloaded_bytes ' ,
' speed ' ,
' eta ' ,
) } )
2021-12-13 22:35:19 +01:00
def put_status_postprocessor ( d ) :
if d [ ' postprocessor ' ] == ' MoveFiles ' and d [ ' status ' ] == ' finished ' :
self . status_queue . put ( { ' status ' : ' finished ' , ' filename ' : d [ ' info_dict ' ] [ ' filepath ' ] } )
2021-08-17 23:53:49 +03:00
ret = yt_dlp . YoutubeDL ( params = {
2019-12-06 16:30:07 +02:00
' quiet ' : True ,
' no_color ' : True ,
#'skip_download': True,
2022-06-06 20:46:29 +02:00
' paths ' : { " home " : self . download_dir } ,
2022-06-06 20:47:13 +02:00
' outtmpl ' : { " default " : self . output_template , " chapter " : self . output_template_chapter } ,
2019-12-13 22:43:58 +02:00
' format ' : self . format ,
2019-12-06 16:30:07 +02:00
' cachedir ' : False ,
' socket_timeout ' : 30 ,
2021-08-17 23:53:49 +03:00
' progress_hooks ' : [ put_status ] ,
2021-12-13 22:35:19 +01:00
' postprocessor_hooks ' : [ put_status_postprocessor ] ,
2021-08-28 10:32:24 +03:00
* * self . ytdl_opts ,
2019-12-06 16:30:07 +02:00
} ) . download ( [ self . info . url ] )
self . status_queue . put ( { ' status ' : ' finished ' if ret == 0 else ' error ' } )
2021-08-17 23:53:49 +03:00
except yt_dlp . utils . YoutubeDLError as exc :
2019-12-06 16:30:07 +02:00
self . status_queue . put ( { ' status ' : ' error ' , ' msg ' : str ( exc ) } )
2021-07-29 11:12:40 +03:00
2019-12-07 21:49:31 +02:00
async def start ( self , notifier ) :
2019-11-29 19:31:34 +02:00
if Download . manager is None :
Download . manager = multiprocessing . Manager ( )
self . status_queue = Download . manager . Queue ( )
self . proc = multiprocessing . Process ( target = self . _download )
self . proc . start ( )
self . loop = asyncio . get_running_loop ( )
2019-12-07 21:49:31 +02:00
self . notifier = notifier
self . info . status = ' preparing '
await self . notifier . updated ( self . info )
2021-11-13 20:07:14 +02:00
asyncio . create_task ( self . update_status ( ) )
2021-07-29 11:12:40 +03:00
return await self . loop . run_in_executor ( None , self . proc . join )
2019-11-29 19:31:34 +02:00
def cancel ( self ) :
if self . running ( ) :
self . proc . kill ( )
2019-12-07 21:49:31 +02:00
self . canceled = True
2019-11-29 19:31:34 +02:00
def close ( self ) :
2019-12-07 21:49:31 +02:00
if self . started ( ) :
2019-11-29 19:31:34 +02:00
self . proc . close ( )
self . status_queue . put ( None )
def running ( self ) :
2019-12-06 16:30:07 +02:00
try :
return self . proc is not None and self . proc . is_alive ( )
except ValueError :
return False
2019-11-29 19:31:34 +02:00
2019-12-07 21:49:31 +02:00
def started ( self ) :
return self . proc is not None
async def update_status ( self ) :
while True :
2019-11-29 19:31:34 +02:00
status = await self . loop . run_in_executor ( None , self . status_queue . get )
if status is None :
return
self . tmpfilename = status . get ( ' tmpfilename ' )
2021-12-13 22:35:19 +01:00
if ' filename ' in status :
2021-12-15 19:57:06 +02:00
self . info . filename = os . path . relpath ( status . get ( ' filename ' ) , self . download_dir )
2022-08-24 18:10:49 +02:00
# Set correct file extension for thumbnails
if ( self . info . format == ' thumbnail ' ) :
self . info . filename = re . sub ( r ' \ .webm$ ' , ' .jpg ' , self . info . filename )
2019-11-29 19:31:34 +02:00
self . info . status = status [ ' status ' ]
2019-12-06 16:30:07 +02:00
self . info . msg = status . get ( ' msg ' )
2019-11-29 19:31:34 +02:00
if ' downloaded_bytes ' in status :
total = status . get ( ' total_bytes ' ) or status . get ( ' total_bytes_estimate ' )
if total :
self . info . percent = status [ ' downloaded_bytes ' ] / total * 100
self . info . speed = status . get ( ' speed ' )
self . info . eta = status . get ( ' eta ' )
2019-12-07 21:49:31 +02:00
await self . notifier . updated ( self . info )
2019-11-29 19:31:34 +02:00
2022-01-15 12:17:12 +00:00
class PersistentQueue :
2022-01-25 23:56:17 +02:00
def __init__ ( self , path ) :
2022-01-26 08:25:53 +02:00
pdir = os . path . dirname ( path )
if not os . path . isdir ( pdir ) :
os . mkdir ( pdir )
2022-01-25 23:56:17 +02:00
with shelve . open ( path , ' c ' ) :
pass
2022-01-26 08:25:53 +02:00
self . path = path
2022-01-15 12:17:12 +00:00
self . dict = OrderedDict ( )
2022-01-25 23:56:17 +02:00
def load ( self ) :
for k , v in self . saved_items ( ) :
2022-06-06 20:47:13 +02:00
self . dict [ k ] = Download ( None , None , None , None , None , { } , v )
2022-01-15 12:17:12 +00:00
def exists ( self , key ) :
return key in self . dict
def get ( self , key ) :
return self . dict [ key ]
def items ( self ) :
return self . dict . items ( )
2022-01-25 23:56:17 +02:00
def saved_items ( self ) :
with shelve . open ( self . path , ' r ' ) as shelf :
2022-01-21 21:23:59 +00:00
return sorted ( shelf . items ( ) , key = lambda item : item [ 1 ] . timestamp )
2022-01-15 12:17:12 +00:00
2022-01-21 21:23:59 +00:00
def put ( self , value ) :
2023-02-03 10:33:51 -06:00
key = value . info . url
2022-01-15 12:17:12 +00:00
self . dict [ key ] = value
2022-01-25 23:56:17 +02:00
with shelve . open ( self . path , ' w ' ) as shelf :
2022-01-15 12:17:12 +00:00
shelf [ key ] = value . info
def delete ( self , key ) :
del self . dict [ key ]
2022-01-25 23:56:17 +02:00
with shelve . open ( self . path , ' w ' ) as shelf :
2022-01-15 12:17:12 +00:00
shelf . pop ( key )
def next ( self ) :
k , v = next ( iter ( self . dict . items ( ) ) )
return k , v
def empty ( self ) :
return not bool ( self . dict )
2019-11-29 19:31:34 +02:00
class DownloadQueue :
def __init__ ( self , config , notifier ) :
self . config = config
self . notifier = notifier
2022-01-17 18:47:32 +00:00
self . queue = PersistentQueue ( self . config . STATE_DIR + ' /queue ' )
2022-01-25 23:56:17 +02:00
self . done = PersistentQueue ( self . config . STATE_DIR + ' /completed ' )
self . done . load ( )
2022-01-15 12:17:12 +00:00
2022-01-25 23:56:17 +02:00
async def __import_queue ( self ) :
for k , v in self . queue . saved_items ( ) :
2022-08-29 18:25:29 -04:00
await self . add ( v . url , v . quality , v . format , folder = v . folder )
2021-11-13 20:07:14 +02:00
2022-01-25 23:56:17 +02:00
async def initialize ( self ) :
self . event = asyncio . Event ( )
asyncio . create_task ( self . __download ( ) )
asyncio . create_task ( self . __import_queue ( ) )
2019-11-29 19:31:34 +02:00
def __extract_info ( self , url ) :
2021-08-17 23:53:49 +03:00
return yt_dlp . YoutubeDL ( params = {
2019-11-29 19:31:34 +02:00
' quiet ' : True ,
' no_color ' : True ,
' extract_flat ' : True ,
2021-09-25 15:26:29 +03:00
* * self . config . YTDL_OPTIONS ,
2019-11-29 19:31:34 +02:00
} ) . extract_info ( url , download = False )
2022-08-30 01:22:24 -04:00
async def __add_entry ( self , entry , quality , format , folder , already ) :
2019-12-13 19:22:44 +02:00
etype = entry . get ( ' _type ' ) or ' video '
2019-12-06 16:30:07 +02:00
if etype == ' playlist ' :
2019-12-13 19:22:44 +02:00
entries = entry [ ' entries ' ]
2019-11-29 19:31:34 +02:00
log . info ( f ' playlist detected with { len ( entries ) } entries ' )
2022-02-17 15:28:24 +00:00
playlist_index_digits = len ( str ( len ( entries ) ) )
2019-12-13 19:22:44 +02:00
results = [ ]
2022-02-17 15:28:24 +00:00
for index , etr in enumerate ( entries , start = 1 ) :
etr [ " playlist " ] = entry [ " id " ]
etr [ " playlist_index " ] = ' {{ 0:0 {0:d} d}} ' . format ( playlist_index_digits ) . format ( index )
2022-02-17 16:33:17 +00:00
for property in ( " id " , " title " , " uploader " , " uploader_id " ) :
if property in entry :
etr [ f " playlist_ { property } " ] = entry [ property ]
2022-08-30 01:22:24 -04:00
results . append ( await self . __add_entry ( etr , quality , format , folder , already ) )
2019-12-13 19:22:44 +02:00
if any ( res [ ' status ' ] == ' error ' for res in results ) :
return { ' status ' : ' error ' , ' msg ' : ' , ' . join ( res [ ' msg ' ] for res in results if res [ ' status ' ] == ' error ' and ' msg ' in res ) }
return { ' status ' : ' ok ' }
2021-09-15 15:51:18 +03:00
elif etype == ' video ' or etype . startswith ( ' url ' ) and ' id ' in entry and ' title ' in entry :
2022-01-15 12:17:12 +00:00
if not self . queue . exists ( entry [ ' id ' ] ) :
2022-08-29 18:25:29 -04:00
dl = DownloadInfo ( entry [ ' id ' ] , entry [ ' title ' ] , entry . get ( ' webpage_url ' ) or entry [ ' url ' ] , quality , format , folder )
2022-08-30 00:55:16 -04:00
# Keep consistent with frontend
2023-02-19 21:48:18 +01:00
base_directory = self . config . DOWNLOAD_DIR if ( quality != ' audio ' and format not in ( " m4a " , " mp3 " , " opus " , " wav " ) ) else self . config . AUDIO_DOWNLOAD_DIR
2022-08-29 18:25:29 -04:00
if folder :
2023-02-04 11:09:36 +02:00
if not self . config . CUSTOM_DIRS :
2022-08-30 00:55:16 -04:00
return { ' status ' : ' error ' , ' msg ' : f ' A folder for the download was specified but CUSTOM_DIRS is not true in the configuration. ' }
2022-08-29 18:25:29 -04:00
dldirectory = os . path . realpath ( os . path . join ( base_directory , folder ) )
2022-09-19 15:31:46 -04:00
real_base_directory = os . path . realpath ( base_directory )
if not dldirectory . startswith ( real_base_directory ) :
return { ' status ' : ' error ' , ' msg ' : f ' Folder " { folder } " must resolve inside the base download directory " { real_base_directory } " ' }
2022-08-29 18:25:29 -04:00
if not os . path . isdir ( dldirectory ) :
2023-02-04 11:09:36 +02:00
if not self . config . CREATE_CUSTOM_DIRS :
2022-09-19 15:31:46 -04:00
return { ' status ' : ' error ' , ' msg ' : f ' Folder " { folder } " for download does not exist inside base directory " { real_base_directory } " , and CREATE_CUSTOM_DIRS is not true in the configuration. ' }
2022-08-29 18:25:29 -04:00
os . makedirs ( dldirectory , exist_ok = True )
else :
dldirectory = base_directory
2022-02-17 15:28:24 +00:00
output = self . config . OUTPUT_TEMPLATE
2022-06-06 20:47:13 +02:00
output_chapter = self . config . OUTPUT_TEMPLATE_CHAPTER
2022-02-17 16:33:17 +00:00
for property , value in entry . items ( ) :
if property . startswith ( " playlist " ) :
output = output . replace ( f " %( { property } )s " , str ( value ) )
2022-06-06 20:47:13 +02:00
self . queue . put ( Download ( dldirectory , output , output_chapter , quality , format , self . config . YTDL_OPTIONS , dl ) )
2019-12-13 19:22:44 +02:00
self . event . set ( )
2019-11-29 19:31:34 +02:00
await self . notifier . added ( dl )
2019-12-13 19:22:44 +02:00
return { ' status ' : ' ok ' }
2021-12-22 20:19:19 +02:00
elif etype . startswith ( ' url ' ) :
2022-08-30 01:22:24 -04:00
return await self . add ( entry [ ' url ' ] , quality , format , folder , already )
2019-12-13 19:22:44 +02:00
return { ' status ' : ' error ' , ' msg ' : f ' Unsupported resource " { etype } " ' }
2022-08-30 01:22:24 -04:00
async def add ( self , url , quality , format , folder , already = None ) :
2022-08-29 18:25:29 -04:00
log . info ( f ' adding { url } : { quality =} { format =} { already =} { folder =} ' )
2019-12-13 19:22:44 +02:00
already = set ( ) if already is None else already
if url in already :
log . info ( ' recursion detected, skipping ' )
return { ' status ' : ' ok ' }
else :
already . add ( url )
try :
entry = await asyncio . get_running_loop ( ) . run_in_executor ( None , self . __extract_info , url )
2021-08-17 23:53:49 +03:00
except yt_dlp . utils . YoutubeDLError as exc :
2019-12-13 19:22:44 +02:00
return { ' status ' : ' error ' , ' msg ' : str ( exc ) }
2022-08-30 01:22:24 -04:00
return await self . __add_entry ( entry , quality , format , folder , already )
2021-07-29 11:12:40 +03:00
2019-12-03 22:32:07 +02:00
async def cancel ( self , ids ) :
2019-11-29 19:31:34 +02:00
for id in ids :
2022-01-15 12:17:12 +00:00
if not self . queue . exists ( id ) :
2019-12-03 22:32:07 +02:00
log . warn ( f ' requested cancel for non-existent download { id } ' )
continue
2022-01-15 12:17:12 +00:00
if self . queue . get ( id ) . started ( ) :
self . queue . get ( id ) . cancel ( )
2019-12-07 21:49:31 +02:00
else :
2022-01-15 12:17:12 +00:00
self . queue . delete ( id )
2019-12-07 21:49:31 +02:00
await self . notifier . canceled ( id )
2019-12-03 22:32:07 +02:00
return { ' status ' : ' ok ' }
async def clear ( self , ids ) :
for id in ids :
2022-01-15 12:17:12 +00:00
if not self . done . exists ( id ) :
2019-11-29 19:31:34 +02:00
log . warn ( f ' requested delete for non-existent download { id } ' )
continue
2022-01-15 12:17:12 +00:00
self . done . delete ( id )
2019-12-03 22:32:07 +02:00
await self . notifier . cleared ( id )
2019-11-29 19:31:34 +02:00
return { ' status ' : ' ok ' }
def get ( self ) :
2022-01-25 23:56:17 +02:00
return ( list ( ( k , v . info ) for k , v in self . queue . items ( ) ) ,
list ( ( k , v . info ) for k , v in self . done . items ( ) ) )
2021-07-29 11:12:40 +03:00
2019-11-29 19:31:34 +02:00
async def __download ( self ) :
while True :
2022-01-15 12:17:12 +00:00
while self . queue . empty ( ) :
2019-11-29 19:31:34 +02:00
log . info ( ' waiting for item to download ' )
await self . event . wait ( )
self . event . clear ( )
2022-01-15 12:17:12 +00:00
id , entry = self . queue . next ( )
2019-11-29 19:31:34 +02:00
log . info ( f ' downloading { entry . info . title } ' )
2019-12-07 21:49:31 +02:00
await entry . start ( self . notifier )
2019-12-03 22:32:07 +02:00
if entry . info . status != ' finished ' :
if entry . tmpfilename and os . path . isfile ( entry . tmpfilename ) :
try :
os . remove ( entry . tmpfilename )
except :
pass
entry . info . status = ' error '
2019-11-29 19:31:34 +02:00
entry . close ( )
2022-01-15 12:17:12 +00:00
if self . queue . exists ( id ) :
self . queue . delete ( id )
2019-12-07 21:49:31 +02:00
if entry . canceled :
await self . notifier . canceled ( id )
else :
2022-01-21 21:23:59 +00:00
self . done . put ( entry )
2019-12-07 21:49:31 +02:00
await self . notifier . completed ( entry . info )