index date published as iso timestamp, #902

This commit is contained in:
Simon 2025-06-03 21:41:57 +07:00
parent 62ea518e1b
commit d196d2e4f5
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
4 changed files with 33 additions and 13 deletions

View File

@ -8,7 +8,7 @@ import os
import random import random
import string import string
import subprocess import subprocess
from datetime import datetime from datetime import datetime, timezone
from time import sleep from time import sleep
from typing import Any from typing import Any
from urllib.parse import urlparse from urllib.parse import urlparse
@ -106,13 +106,14 @@ def requests_headers() -> dict[str, str]:
def date_parser(timestamp: int | str) -> str: def date_parser(timestamp: int | str) -> str:
"""return formatted date string""" """return formatted date string"""
if isinstance(timestamp, int): if isinstance(timestamp, int):
date_obj = datetime.fromtimestamp(timestamp) date_obj = datetime.fromtimestamp(timestamp, tz=timezone.utc)
elif isinstance(timestamp, str): elif isinstance(timestamp, str):
date_obj = datetime.strptime(timestamp, "%Y-%m-%d") date_obj = datetime.strptime(timestamp, "%Y-%m-%d")
date_obj = date_obj.replace(tzinfo=timezone.utc)
else: else:
raise TypeError(f"invalid timestamp: {timestamp}") raise TypeError(f"invalid timestamp: {timestamp}")
return date_obj.date().isoformat() return date_obj.isoformat()
def time_parser(timestamp: str) -> float: def time_parser(timestamp: str) -> float:

View File

@ -22,14 +22,14 @@ def test_randomizor_with_positive_length():
def test_date_parser_with_int(): def test_date_parser_with_int():
"""unix timestamp""" """unix timestamp"""
timestamp = 1621539600 timestamp = 1621539600
expected_date = "2021-05-20" expected_date = "2021-05-20T19:40:00+00:00"
assert date_parser(timestamp) == expected_date assert date_parser(timestamp) == expected_date
def test_date_parser_with_str(): def test_date_parser_with_str():
"""iso timestamp""" """iso timestamp"""
date_str = "2021-05-21" date_str = "2021-05-21"
expected_date = "2021-05-21" expected_date = "2021-05-21T00:00:00+00:00"
assert date_parser(date_str) == expected_date assert date_parser(date_str) == expected_date

View File

@ -336,9 +336,6 @@ class PendingList(PendingIndex):
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS): def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS):
"""parse response""" """parse response"""
vid_id = vid.get("id") vid_id = vid.get("id")
published = datetime.strptime(vid["upload_date"], "%Y%m%d").strftime(
"%Y-%m-%d"
)
# build dict # build dict
youtube_details = { youtube_details = {
@ -348,10 +345,23 @@ class PendingList(PendingIndex):
"title": vid["title"], "title": vid["title"],
"channel_id": vid["channel_id"], "channel_id": vid["channel_id"],
"duration": get_duration_str(vid["duration"]), "duration": get_duration_str(vid["duration"]),
"published": published, "published": self._build_published(vid),
"timestamp": int(datetime.now().timestamp()), "timestamp": int(datetime.now().timestamp()),
"vid_type": vid_type.value, "vid_type": vid_type.value,
"channel_indexed": vid["channel_id"] in self.all_channels, "channel_indexed": vid["channel_id"] in self.all_channels,
} }
return youtube_details return youtube_details
@staticmethod
def _build_published(vid):
"""build published date or timestamp"""
timestamp = vid["timestamp"]
if timestamp:
return timestamp
upload_date = vid["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
return published

View File

@ -173,9 +173,6 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
self._validate_id() self._validate_id()
# extract # extract
self.channel_id = self.youtube_meta["channel_id"] self.channel_id = self.youtube_meta["channel_id"]
upload_date = self.youtube_meta["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
last_refresh = int(datetime.now().timestamp()) last_refresh = int(datetime.now().timestamp())
# base64_blur = ThumbManager().get_base64_blur(self.youtube_id) # base64_blur = ThumbManager().get_base64_blur(self.youtube_id)
base64_blur = False base64_blur = False
@ -187,7 +184,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"vid_thumb_url": self.youtube_meta["thumbnail"], "vid_thumb_url": self.youtube_meta["thumbnail"],
"vid_thumb_base64": base64_blur, "vid_thumb_base64": base64_blur,
"tags": self.youtube_meta.get("tags", []), "tags": self.youtube_meta.get("tags", []),
"published": published, "published": self._build_published(),
"vid_last_refresh": last_refresh, "vid_last_refresh": last_refresh,
"date_downloaded": last_refresh, "date_downloaded": last_refresh,
"youtube_id": self.youtube_id, "youtube_id": self.youtube_id,
@ -196,6 +193,18 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"active": True, "active": True,
} }
def _build_published(self):
"""build published date or timestamp"""
timestamp = self.youtube_meta["timestamp"]
if timestamp:
return timestamp
upload_date = self.youtube_meta["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
return published
def _validate_id(self): def _validate_id(self):
"""validate expected video ID, raise value error on mismatch""" """validate expected video ID, raise value error on mismatch"""
remote_id = self.youtube_meta["id"] remote_id = self.youtube_meta["id"]