2019-11-29 19:31:34 +02:00
import os
2021-08-17 23:53:49 +03:00
import yt_dlp
2019-11-29 19:31:34 +02:00
from collections import OrderedDict
2022-01-15 12:17:12 +00:00
import shelve
2022-01-21 21:23:59 +00:00
import time
2019-11-29 19:31:34 +02:00
import asyncio
import multiprocessing
import logging
2022-08-24 18:10:49 +02:00
import re
2023-03-05 10:34:49 +01:00
from dl_formats import get_format , get_opts , AUDIO_FORMATS
2019-11-29 19:31:34 +02:00
2023-05-09 16:05:38 -04:00
log = logging . getLogger ( " ytdl " )
2019-11-29 19:31:34 +02:00
2019-12-07 21:49:31 +02:00
class DownloadQueueNotifier :
async def added ( self , dl ) :
raise NotImplementedError
async def updated ( self , dl ) :
raise NotImplementedError
async def completed ( self , dl ) :
raise NotImplementedError
async def canceled ( self , id ) :
raise NotImplementedError
async def cleared ( self , id ) :
raise NotImplementedError
2023-05-09 16:05:38 -04:00
2019-11-29 19:31:34 +02:00
class DownloadInfo :
2023-04-09 11:27:41 +08:00
def __init__ ( self , id , title , url , quality , format , folder , custom_name_prefix ) :
2023-05-09 16:05:38 -04:00
self . id = id if len ( custom_name_prefix ) == 0 else f " { custom_name_prefix } . { id } "
self . title = title if len ( custom_name_prefix ) == 0 else f " { custom_name_prefix } . { title } "
2023-04-09 11:27:41 +08:00
self . url = url
2021-07-29 11:12:40 +03:00
self . quality = quality
2021-09-13 20:25:32 +03:00
self . format = format
2022-08-29 18:25:29 -04:00
self . folder = folder
2023-05-05 15:35:39 +03:00
self . custom_name_prefix = custom_name_prefix
2019-12-06 16:30:07 +02:00
self . status = self . msg = self . percent = self . speed = self . eta = None
2022-01-21 21:23:59 +00:00
self . timestamp = time . time_ns ( )
2019-11-29 19:31:34 +02:00
2023-05-09 16:05:38 -04:00
2019-11-29 19:31:34 +02:00
class Download :
manager = None
2023-05-09 16:05:38 -04:00
def __init__ (
self , download_dir , temp_dir , output_template , output_template_chapter , quality , format , ytdl_opts , info
) :
2019-11-29 19:31:34 +02:00
self . download_dir = download_dir
2023-05-09 16:05:38 -04:00
self . temp_dir = temp_dir
2021-05-18 16:15:49 +02:00
self . output_template = output_template
2022-06-06 20:47:13 +02:00
self . output_template_chapter = output_template_chapter
2021-10-28 11:19:17 +01:00
self . format = get_format ( format , quality )
self . ytdl_opts = get_opts ( format , quality , ytdl_opts )
2019-11-29 19:31:34 +02:00
self . info = info
2019-12-07 21:49:31 +02:00
self . canceled = False
2019-11-29 19:31:34 +02:00
self . tmpfilename = None
self . status_queue = None
self . proc = None
self . loop = None
2019-12-07 21:49:31 +02:00
self . notifier = None
2021-07-29 11:12:40 +03:00
2019-11-29 19:31:34 +02:00
def _download ( self ) :
2019-12-06 16:30:07 +02:00
try :
2023-05-09 16:05:38 -04:00
2021-08-17 23:53:49 +03:00
def put_status ( st ) :
2023-05-09 16:05:38 -04:00
self . status_queue . put (
{
k : v
for k , v in st . items ( )
if k
in (
" tmpfilename " ,
" filename " ,
" status " ,
" msg " ,
" total_bytes " ,
" total_bytes_estimate " ,
" downloaded_bytes " ,
" speed " ,
" eta " ,
)
}
)
2021-12-13 22:35:19 +01:00
def put_status_postprocessor ( d ) :
2023-05-09 16:05:38 -04:00
if d [ " postprocessor " ] == " MoveFiles " and d [ " status " ] == " finished " :
if " __finaldir " in d [ " info_dict " ] :
filename = os . path . join (
d [ " info_dict " ] [ " __finaldir " ] , os . path . basename ( d [ " info_dict " ] [ " filepath " ] )
)
2023-04-26 17:56:29 +03:00
else :
2023-05-09 16:05:38 -04:00
filename = d [ " info_dict " ] [ " filepath " ]
self . status_queue . put ( { " status " : " finished " , " filename " : filename } )
ret = yt_dlp . YoutubeDL (
params = {
" quiet " : True ,
" no_color " : True ,
#'skip_download': True,
" paths " : { " home " : self . download_dir , " temp " : self . temp_dir } ,
" outtmpl " : { " default " : self . output_template , " chapter " : self . output_template_chapter } ,
" format " : self . format ,
" socket_timeout " : 30 ,
" progress_hooks " : [ put_status ] ,
" postprocessor_hooks " : [ put_status_postprocessor ] ,
* * self . ytdl_opts ,
}
) . download ( [ self . info . url ] )
self . status_queue . put ( { " status " : " finished " if ret == 0 else " error " } )
2021-08-17 23:53:49 +03:00
except yt_dlp . utils . YoutubeDLError as exc :
2023-05-09 16:05:38 -04:00
self . status_queue . put ( { " status " : " error " , " msg " : str ( exc ) } )
2021-07-29 11:12:40 +03:00
2019-12-07 21:49:31 +02:00
async def start ( self , notifier ) :
2019-11-29 19:31:34 +02:00
if Download . manager is None :
Download . manager = multiprocessing . Manager ( )
self . status_queue = Download . manager . Queue ( )
self . proc = multiprocessing . Process ( target = self . _download )
self . proc . start ( )
self . loop = asyncio . get_running_loop ( )
2019-12-07 21:49:31 +02:00
self . notifier = notifier
2023-05-09 16:05:38 -04:00
self . info . status = " preparing "
2019-12-07 21:49:31 +02:00
await self . notifier . updated ( self . info )
2021-11-13 20:07:14 +02:00
asyncio . create_task ( self . update_status ( ) )
2021-07-29 11:12:40 +03:00
return await self . loop . run_in_executor ( None , self . proc . join )
2019-11-29 19:31:34 +02:00
def cancel ( self ) :
if self . running ( ) :
self . proc . kill ( )
2019-12-07 21:49:31 +02:00
self . canceled = True
2019-11-29 19:31:34 +02:00
def close ( self ) :
2019-12-07 21:49:31 +02:00
if self . started ( ) :
2019-11-29 19:31:34 +02:00
self . proc . close ( )
self . status_queue . put ( None )
def running ( self ) :
2019-12-06 16:30:07 +02:00
try :
return self . proc is not None and self . proc . is_alive ( )
except ValueError :
return False
2019-11-29 19:31:34 +02:00
2019-12-07 21:49:31 +02:00
def started ( self ) :
return self . proc is not None
async def update_status ( self ) :
while True :
2019-11-29 19:31:34 +02:00
status = await self . loop . run_in_executor ( None , self . status_queue . get )
if status is None :
return
2023-05-09 16:05:38 -04:00
self . tmpfilename = status . get ( " tmpfilename " )
if " filename " in status :
self . info . filename = os . path . relpath ( status . get ( " filename " ) , self . download_dir )
2022-08-24 18:10:49 +02:00
# Set correct file extension for thumbnails
2023-05-09 16:05:38 -04:00
if self . info . format == " thumbnail " :
self . info . filename = re . sub ( r " \ .webm$ " , " .jpg " , self . info . filename )
self . info . status = status [ " status " ]
self . info . msg = status . get ( " msg " )
if " downloaded_bytes " in status :
total = status . get ( " total_bytes " ) or status . get ( " total_bytes_estimate " )
2019-11-29 19:31:34 +02:00
if total :
2023-05-09 16:05:38 -04:00
self . info . percent = status [ " downloaded_bytes " ] / total * 100
self . info . speed = status . get ( " speed " )
self . info . eta = status . get ( " eta " )
2019-12-07 21:49:31 +02:00
await self . notifier . updated ( self . info )
2019-11-29 19:31:34 +02:00
2023-05-09 16:05:38 -04:00
2022-01-15 12:17:12 +00:00
class PersistentQueue :
2022-01-25 23:56:17 +02:00
def __init__ ( self , path ) :
2022-01-26 08:25:53 +02:00
pdir = os . path . dirname ( path )
if not os . path . isdir ( pdir ) :
os . mkdir ( pdir )
2023-05-09 16:05:38 -04:00
with shelve . open ( path , " c " ) :
2022-01-25 23:56:17 +02:00
pass
2022-01-26 08:25:53 +02:00
self . path = path
2022-01-15 12:17:12 +00:00
self . dict = OrderedDict ( )
2023-05-09 16:05:38 -04:00
2022-01-25 23:56:17 +02:00
def load ( self ) :
for k , v in self . saved_items ( ) :
2023-05-09 16:05:38 -04:00
self . dict [ k ] = Download ( None , None , None , None , None , None , { } , v )
2022-01-15 12:17:12 +00:00
def exists ( self , key ) :
return key in self . dict
2023-05-09 16:05:38 -04:00
2022-01-15 12:17:12 +00:00
def get ( self , key ) :
return self . dict [ key ]
2023-05-09 16:05:38 -04:00
2022-01-15 12:17:12 +00:00
def items ( self ) :
return self . dict . items ( )
2022-01-25 23:56:17 +02:00
def saved_items ( self ) :
2023-05-09 16:05:38 -04:00
with shelve . open ( self . path , " r " ) as shelf :
2022-01-21 21:23:59 +00:00
return sorted ( shelf . items ( ) , key = lambda item : item [ 1 ] . timestamp )
2022-01-15 12:17:12 +00:00
2022-01-21 21:23:59 +00:00
def put ( self , value ) :
2023-02-03 10:33:51 -06:00
key = value . info . url
2022-01-15 12:17:12 +00:00
self . dict [ key ] = value
2023-05-09 16:05:38 -04:00
with shelve . open ( self . path , " w " ) as shelf :
2022-01-15 12:17:12 +00:00
shelf [ key ] = value . info
2023-05-09 16:05:38 -04:00
2022-01-15 12:17:12 +00:00
def delete ( self , key ) :
del self . dict [ key ]
2023-05-09 16:05:38 -04:00
with shelve . open ( self . path , " w " ) as shelf :
2022-01-15 12:17:12 +00:00
shelf . pop ( key )
def next ( self ) :
k , v = next ( iter ( self . dict . items ( ) ) )
return k , v
2023-05-09 16:05:38 -04:00
2022-01-15 12:17:12 +00:00
def empty ( self ) :
return not bool ( self . dict )
2019-11-29 19:31:34 +02:00
class DownloadQueue :
def __init__ ( self , config , notifier ) :
self . config = config
self . notifier = notifier
2023-05-09 16:05:38 -04:00
self . queue = PersistentQueue ( self . config . STATE_DIR + " /queue " )
self . done = PersistentQueue ( self . config . STATE_DIR + " /completed " )
2022-01-25 23:56:17 +02:00
self . done . load ( )
2023-05-09 16:05:38 -04:00
2022-01-25 23:56:17 +02:00
async def __import_queue ( self ) :
for k , v in self . queue . saved_items ( ) :
2023-04-30 21:26:04 +03:00
await self . add ( v . url , v . quality , v . format , v . folder , v . custom_name_prefix )
2021-11-13 20:07:14 +02:00
2022-01-25 23:56:17 +02:00
async def initialize ( self ) :
self . event = asyncio . Event ( )
asyncio . create_task ( self . __download ( ) )
asyncio . create_task ( self . __import_queue ( ) )
2019-11-29 19:31:34 +02:00
def __extract_info ( self , url ) :
2023-05-09 16:05:38 -04:00
return yt_dlp . YoutubeDL (
params = {
" quiet " : True ,
" no_color " : True ,
" extract_flat " : True ,
* * self . config . YTDL_OPTIONS ,
}
) . extract_info ( url , download = False )
2019-11-29 19:31:34 +02:00
2023-04-09 11:27:41 +08:00
async def __add_entry ( self , entry , quality , format , folder , custom_name_prefix , already ) :
2023-05-09 16:05:38 -04:00
etype = entry . get ( " _type " ) or " video "
if etype == " playlist " :
entries = entry [ " entries " ]
log . info ( f " playlist detected with { len ( entries ) } entries " )
2022-02-17 15:28:24 +00:00
playlist_index_digits = len ( str ( len ( entries ) ) )
2019-12-13 19:22:44 +02:00
results = [ ]
2022-02-17 15:28:24 +00:00
for index , etr in enumerate ( entries , start = 1 ) :
etr [ " playlist " ] = entry [ " id " ]
2023-05-09 16:05:38 -04:00
etr [ " playlist_index " ] = " {{ 0:0 {0:d} d}} " . format ( playlist_index_digits ) . format ( index )
2022-02-17 16:33:17 +00:00
for property in ( " id " , " title " , " uploader " , " uploader_id " ) :
if property in entry :
etr [ f " playlist_ { property } " ] = entry [ property ]
2023-04-09 11:27:41 +08:00
results . append ( await self . __add_entry ( etr , quality , format , folder , custom_name_prefix , already ) )
2023-05-09 16:05:38 -04:00
if any ( res [ " status " ] == " error " for res in results ) :
return {
" status " : " error " ,
" msg " : " , " . join ( res [ " msg " ] for res in results if res [ " status " ] == " error " and " msg " in res ) ,
}
return { " status " : " ok " }
elif etype == " video " or etype . startswith ( " url " ) and " id " in entry and " title " in entry :
if not self . queue . exists ( entry [ " id " ] ) :
dl = DownloadInfo (
entry [ " id " ] ,
entry [ " title " ] ,
entry . get ( " webpage_url " ) or entry [ " url " ] ,
quality ,
format ,
folder ,
custom_name_prefix ,
)
2022-08-30 00:55:16 -04:00
# Keep consistent with frontend
2023-05-09 16:05:38 -04:00
base_directory = (
self . config . DOWNLOAD_DIR
if ( quality != " audio " and format not in AUDIO_FORMATS )
else self . config . AUDIO_DOWNLOAD_DIR
)
2022-08-29 18:25:29 -04:00
if folder :
2023-02-04 11:09:36 +02:00
if not self . config . CUSTOM_DIRS :
2023-05-09 16:05:38 -04:00
return {
" status " : " error " ,
" msg " : f " A folder for the download was specified but CUSTOM_DIRS is not true in the configuration. " ,
}
2022-08-29 18:25:29 -04:00
dldirectory = os . path . realpath ( os . path . join ( base_directory , folder ) )
2022-09-19 15:31:46 -04:00
real_base_directory = os . path . realpath ( base_directory )
if not dldirectory . startswith ( real_base_directory ) :
2023-05-09 16:05:38 -04:00
return {
" status " : " error " ,
" msg " : f ' Folder " { folder } " must resolve inside the base download directory " { real_base_directory } " ' ,
}
2022-08-29 18:25:29 -04:00
if not os . path . isdir ( dldirectory ) :
2023-02-04 11:09:36 +02:00
if not self . config . CREATE_CUSTOM_DIRS :
2023-05-09 16:05:38 -04:00
return {
" status " : " error " ,
" msg " : f ' Folder " { folder } " for download does not exist inside base directory " { real_base_directory } " , and CREATE_CUSTOM_DIRS is not true in the configuration. ' ,
}
2022-08-29 18:25:29 -04:00
os . makedirs ( dldirectory , exist_ok = True )
else :
dldirectory = base_directory
2023-05-09 16:05:38 -04:00
output = (
self . config . OUTPUT_TEMPLATE
if len ( custom_name_prefix ) == 0
else f " { custom_name_prefix } . { self . config . OUTPUT_TEMPLATE } "
)
2022-06-06 20:47:13 +02:00
output_chapter = self . config . OUTPUT_TEMPLATE_CHAPTER
2022-02-17 16:33:17 +00:00
for property , value in entry . items ( ) :
if property . startswith ( " playlist " ) :
output = output . replace ( f " %( { property } )s " , str ( value ) )
2023-05-09 16:05:38 -04:00
self . queue . put (
Download (
dldirectory ,
self . config . TEMP_DIR ,
output ,
output_chapter ,
quality ,
format ,
self . config . YTDL_OPTIONS ,
dl ,
)
)
2019-12-13 19:22:44 +02:00
self . event . set ( )
2019-11-29 19:31:34 +02:00
await self . notifier . added ( dl )
2023-05-09 16:05:38 -04:00
return { " status " : " ok " }
elif etype . startswith ( " url " ) :
return await self . add ( entry [ " url " ] , quality , format , folder , custom_name_prefix , already )
return { " status " : " error " , " msg " : f ' Unsupported resource " { etype } " ' }
2019-12-13 19:22:44 +02:00
2023-04-09 11:27:41 +08:00
async def add ( self , url , quality , format , folder , custom_name_prefix , already = None ) :
2023-05-09 16:05:38 -04:00
log . info ( f " adding { url } : { quality =} { format =} { already =} { folder =} { custom_name_prefix =} " )
2019-12-13 19:22:44 +02:00
already = set ( ) if already is None else already
if url in already :
2023-05-09 16:05:38 -04:00
log . info ( " recursion detected, skipping " )
return { " status " : " ok " }
2019-12-13 19:22:44 +02:00
else :
already . add ( url )
try :
entry = await asyncio . get_running_loop ( ) . run_in_executor ( None , self . __extract_info , url )
2021-08-17 23:53:49 +03:00
except yt_dlp . utils . YoutubeDLError as exc :
2023-05-09 16:05:38 -04:00
return { " status " : " error " , " msg " : str ( exc ) }
2023-04-09 11:27:41 +08:00
return await self . __add_entry ( entry , quality , format , folder , custom_name_prefix , already )
2021-07-29 11:12:40 +03:00
2019-12-03 22:32:07 +02:00
async def cancel ( self , ids ) :
2019-11-29 19:31:34 +02:00
for id in ids :
2022-01-15 12:17:12 +00:00
if not self . queue . exists ( id ) :
2023-05-09 16:05:38 -04:00
log . warn ( f " requested cancel for non-existent download { id } " )
2019-12-03 22:32:07 +02:00
continue
2022-01-15 12:17:12 +00:00
if self . queue . get ( id ) . started ( ) :
self . queue . get ( id ) . cancel ( )
2019-12-07 21:49:31 +02:00
else :
2022-01-15 12:17:12 +00:00
self . queue . delete ( id )
2019-12-07 21:49:31 +02:00
await self . notifier . canceled ( id )
2023-05-09 16:05:38 -04:00
return { " status " : " ok " }
2019-12-03 22:32:07 +02:00
async def clear ( self , ids ) :
for id in ids :
2022-01-15 12:17:12 +00:00
if not self . done . exists ( id ) :
2023-05-09 16:05:38 -04:00
log . warn ( f " requested delete for non-existent download { id } " )
2019-11-29 19:31:34 +02:00
continue
2023-04-16 13:07:25 +02:00
if self . config . DELETE_FILE_ON_TRASHCAN :
dl = self . done . get ( id )
os . remove ( os . path . join ( dl . download_dir , dl . info . filename ) )
2022-01-15 12:17:12 +00:00
self . done . delete ( id )
2019-12-03 22:32:07 +02:00
await self . notifier . cleared ( id )
2023-05-09 16:05:38 -04:00
return { " status " : " ok " }
2019-11-29 19:31:34 +02:00
def get ( self ) :
2023-05-09 16:05:38 -04:00
return ( list ( ( k , v . info ) for k , v in self . queue . items ( ) ) , list ( ( k , v . info ) for k , v in self . done . items ( ) ) )
2021-07-29 11:12:40 +03:00
2019-11-29 19:31:34 +02:00
async def __download ( self ) :
while True :
2022-01-15 12:17:12 +00:00
while self . queue . empty ( ) :
2023-05-09 16:05:38 -04:00
log . info ( " waiting for item to download " )
2019-11-29 19:31:34 +02:00
await self . event . wait ( )
self . event . clear ( )
2022-01-15 12:17:12 +00:00
id , entry = self . queue . next ( )
2023-05-09 16:05:38 -04:00
log . info ( f " downloading { entry . info . title } " )
2019-12-07 21:49:31 +02:00
await entry . start ( self . notifier )
2023-05-09 16:05:38 -04:00
if entry . info . status != " finished " :
2019-12-03 22:32:07 +02:00
if entry . tmpfilename and os . path . isfile ( entry . tmpfilename ) :
try :
os . remove ( entry . tmpfilename )
except :
pass
2023-05-09 16:05:38 -04:00
entry . info . status = " error "
2019-11-29 19:31:34 +02:00
entry . close ( )
2022-01-15 12:17:12 +00:00
if self . queue . exists ( id ) :
self . queue . delete ( id )
2019-12-07 21:49:31 +02:00
if entry . canceled :
await self . notifier . canceled ( id )
else :
2022-01-21 21:23:59 +00:00
self . done . put ( entry )
2019-12-07 21:49:31 +02:00
await self . notifier . completed ( entry . info )