index date published as iso timestamp, #902

This commit is contained in:
Simon 2025-06-03 21:41:57 +07:00
parent 62ea518e1b
commit d196d2e4f5
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
4 changed files with 33 additions and 13 deletions

View File

@ -8,7 +8,7 @@ import os
import random
import string
import subprocess
from datetime import datetime
from datetime import datetime, timezone
from time import sleep
from typing import Any
from urllib.parse import urlparse
@ -106,13 +106,14 @@ def requests_headers() -> dict[str, str]:
def date_parser(timestamp: int | str) -> str:
"""return formatted date string"""
if isinstance(timestamp, int):
date_obj = datetime.fromtimestamp(timestamp)
date_obj = datetime.fromtimestamp(timestamp, tz=timezone.utc)
elif isinstance(timestamp, str):
date_obj = datetime.strptime(timestamp, "%Y-%m-%d")
date_obj = date_obj.replace(tzinfo=timezone.utc)
else:
raise TypeError(f"invalid timestamp: {timestamp}")
return date_obj.date().isoformat()
return date_obj.isoformat()
def time_parser(timestamp: str) -> float:

View File

@ -22,14 +22,14 @@ def test_randomizor_with_positive_length():
def test_date_parser_with_int():
"""unix timestamp"""
timestamp = 1621539600
expected_date = "2021-05-20"
expected_date = "2021-05-20T19:40:00+00:00"
assert date_parser(timestamp) == expected_date
def test_date_parser_with_str():
"""iso timestamp"""
date_str = "2021-05-21"
expected_date = "2021-05-21"
expected_date = "2021-05-21T00:00:00+00:00"
assert date_parser(date_str) == expected_date

View File

@ -336,9 +336,6 @@ class PendingList(PendingIndex):
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS):
"""parse response"""
vid_id = vid.get("id")
published = datetime.strptime(vid["upload_date"], "%Y%m%d").strftime(
"%Y-%m-%d"
)
# build dict
youtube_details = {
@ -348,10 +345,23 @@ class PendingList(PendingIndex):
"title": vid["title"],
"channel_id": vid["channel_id"],
"duration": get_duration_str(vid["duration"]),
"published": published,
"published": self._build_published(vid),
"timestamp": int(datetime.now().timestamp()),
"vid_type": vid_type.value,
"channel_indexed": vid["channel_id"] in self.all_channels,
}
return youtube_details
@staticmethod
def _build_published(vid):
"""build published date or timestamp"""
timestamp = vid["timestamp"]
if timestamp:
return timestamp
upload_date = vid["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
return published

View File

@ -173,9 +173,6 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
self._validate_id()
# extract
self.channel_id = self.youtube_meta["channel_id"]
upload_date = self.youtube_meta["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
last_refresh = int(datetime.now().timestamp())
# base64_blur = ThumbManager().get_base64_blur(self.youtube_id)
base64_blur = False
@ -187,7 +184,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"vid_thumb_url": self.youtube_meta["thumbnail"],
"vid_thumb_base64": base64_blur,
"tags": self.youtube_meta.get("tags", []),
"published": published,
"published": self._build_published(),
"vid_last_refresh": last_refresh,
"date_downloaded": last_refresh,
"youtube_id": self.youtube_id,
@ -196,6 +193,18 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
"active": True,
}
def _build_published(self):
"""build published date or timestamp"""
timestamp = self.youtube_meta["timestamp"]
if timestamp:
return timestamp
upload_date = self.youtube_meta["upload_date"]
upload_date_time = datetime.strptime(upload_date, "%Y%m%d")
published = upload_date_time.strftime("%Y-%m-%d")
return published
def _validate_id(self):
"""validate expected video ID, raise value error on mismatch"""
remote_id = self.youtube_meta["id"]