add download service

This commit is contained in:
2026-01-10 11:06:45 +03:00
parent c33c5fd674
commit 266f3768ef
44 changed files with 2652 additions and 4 deletions

View File

@@ -0,0 +1,12 @@
# Services for Openings Downloader
from .shikimori import ShikimoriService
from .animethemes import AnimeThemesService
from .downloader import DownloadService
from .storage_tracker import StorageTrackerService
__all__ = [
"ShikimoriService",
"AnimeThemesService",
"DownloadService",
"StorageTrackerService",
]

View File

@@ -0,0 +1,187 @@
import httpx
import logging
import re
from typing import List, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import Anime, AnimeTheme, ThemeType
logger = logging.getLogger(__name__)
# Shared HTTP client for AnimeThemes API
_animethemes_client: Optional[httpx.AsyncClient] = None
def _get_animethemes_client() -> httpx.AsyncClient:
"""Get or create shared HTTP client for AnimeThemes."""
global _animethemes_client
if _animethemes_client is None or _animethemes_client.is_closed:
_animethemes_client = httpx.AsyncClient(
base_url="https://api.animethemes.moe",
timeout=30.0,
)
return _animethemes_client
class AnimeThemesService:
"""Service for AnimeThemes API (api.animethemes.moe)."""
def __init__(self):
self.client = _get_animethemes_client()
async def _find_anime_slug(self, anime: Anime) -> Optional[str]:
"""Find AnimeThemes slug by searching anime title."""
# Try different title variations
search_terms = [
anime.title_english,
anime.title_russian,
anime.title_japanese,
]
for term in search_terms:
if not term:
continue
try:
response = await self.client.get(
"/anime",
params={
"q": term,
"include": "animethemes.animethemeentries.videos.audio",
},
)
if response.status_code == 200:
data = response.json()
animes = data.get("anime", [])
if animes:
slug = animes[0].get("slug")
logger.info(f"Found AnimeThemes slug '{slug}' for '{term}'")
return slug
except Exception as e:
logger.warning(f"Failed to search AnimeThemes for '{term}': {e}")
continue
return None
async def fetch_themes(self, db: AsyncSession, anime: Anime) -> List[AnimeTheme]:
"""Fetch themes from AnimeThemes API and sync to DB."""
# Always reload anime with themes to avoid lazy loading issues
result = await db.execute(
select(Anime)
.where(Anime.id == anime.id)
.options(selectinload(Anime.themes))
)
anime = result.scalar_one()
current_themes = anime.themes or []
# Find slug if not cached
if not anime.animethemes_slug:
logger.info(f"Searching AnimeThemes slug for: {anime.title_english or anime.title_russian}")
slug = await self._find_anime_slug(anime)
logger.info(f"Found slug: {slug}")
if slug:
anime.animethemes_slug = slug
await db.commit()
if not anime.animethemes_slug:
logger.warning(f"No AnimeThemes slug found for anime {anime.id}: {anime.title_english or anime.title_russian}")
return current_themes
# Fetch themes from AnimeThemes API
try:
response = await self.client.get(
f"/anime/{anime.animethemes_slug}",
params={
"include": "animethemes.animethemeentries.videos.audio,animethemes.song.artists",
},
)
if response.status_code != 200:
logger.warning(f"AnimeThemes API returned {response.status_code} for {anime.animethemes_slug}")
return current_themes
data = response.json()
except Exception as e:
logger.error(f"Failed to fetch themes from AnimeThemes for {anime.animethemes_slug}: {e}")
return current_themes
anime_data = data.get("anime", {})
themes_data = anime_data.get("animethemes", [])
logger.info(f"AnimeThemes API returned {len(themes_data)} themes for {anime.animethemes_slug}")
# Build dict of existing themes
existing_themes = {
(t.theme_type, t.sequence): t
for t in current_themes
}
for theme_data in themes_data:
# Parse theme type and sequence: "OP1", "ED1", etc.
slug = theme_data.get("slug", "") # e.g., "OP1", "ED1"
match = re.match(r"(OP|ED)(\d*)", slug)
if not match:
continue
theme_type = ThemeType.OP if match.group(1) == "OP" else ThemeType.ED
sequence = int(match.group(2)) if match.group(2) else 1
# Get video URL (prioritize audio link, then video link)
video_url = None
entries = theme_data.get("animethemeentries", [])
if entries:
videos = entries[0].get("videos", [])
if videos:
# Try to get audio link first
audio = videos[0].get("audio")
if audio:
video_url = audio.get("link")
# Fallback to video link
if not video_url:
video_url = videos[0].get("link")
# Get song info
song_data = theme_data.get("song", {})
song_title = song_data.get("title")
artist = None
artists = song_data.get("artists", [])
if artists:
artist = artists[0].get("name")
key = (theme_type, sequence)
if key in existing_themes:
# Update existing theme
theme = existing_themes[key]
theme.song_title = song_title
theme.artist = artist
if video_url:
theme.animethemes_video_url = video_url
else:
# Create new theme
theme = AnimeTheme(
anime_id=anime.id,
theme_type=theme_type,
sequence=sequence,
song_title=song_title,
artist=artist,
animethemes_video_url=video_url,
)
db.add(theme)
if anime.themes is None:
anime.themes = []
anime.themes.append(theme)
await db.commit()
# Reload anime with themes to get fresh data
result = await db.execute(
select(Anime)
.where(Anime.id == anime.id)
.options(selectinload(Anime.themes))
)
refreshed_anime = result.scalar_one()
return refreshed_anime.themes

View File

@@ -0,0 +1,326 @@
import asyncio
import tempfile
import re
from pathlib import Path
from typing import List
from datetime import datetime, timezone
import httpx
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import AnimeTheme, DownloadTask, DownloadStatus, Anime
from ..schemas import QueueStatusResponse, QueueTaskResponse
from ..config import downloader_settings
from ...storage import storage
from ...db_models import Opening
class DownloadService:
"""Service for downloading and converting anime themes."""
def __init__(self, db: AsyncSession):
self.db = db
async def add_to_queue(self, theme_ids: List[int]) -> int:
"""Add themes to download queue (idempotent). Returns number of tasks added."""
added = 0
for theme_id in theme_ids:
# Check if already in queue
existing = await self.db.execute(
select(DownloadTask).where(DownloadTask.theme_id == theme_id)
)
if existing.scalar_one_or_none():
continue
# Check if theme exists and is not already downloaded
result = await self.db.execute(
select(AnimeTheme).where(AnimeTheme.id == theme_id)
)
theme = result.scalar_one_or_none()
if not theme:
continue
# Skip if already downloaded
if theme.audio_s3_key:
continue
# Skip if no video URL available
if not theme.animethemes_video_url:
continue
task = DownloadTask(
theme_id=theme_id,
status=DownloadStatus.QUEUED,
estimated_size_bytes=downloader_settings.default_estimated_size_bytes,
)
self.db.add(task)
added += 1
await self.db.commit()
return added
async def add_all_anime_themes(self, anime_id: int) -> int:
"""Add all themes from anime to queue. Returns number of tasks added."""
result = await self.db.execute(
select(AnimeTheme)
.where(AnimeTheme.anime_id == anime_id)
.where(AnimeTheme.audio_s3_key.is_(None))
.where(AnimeTheme.animethemes_video_url.isnot(None))
)
themes = result.scalars().all()
return await self.add_to_queue([t.id for t in themes])
async def get_queue_status(self, worker_running: bool = False) -> QueueStatusResponse:
"""Get current queue status."""
result = await self.db.execute(
select(DownloadTask)
.options(selectinload(DownloadTask.theme).selectinload(AnimeTheme.anime))
.order_by(DownloadTask.created_at.desc())
)
tasks = result.scalars().all()
task_responses = []
total_queued = 0
total_downloading = 0
total_done = 0
total_failed = 0
estimated_queue_size = 0
for task in tasks:
theme = task.theme
anime = theme.anime
anime_title = anime.title_russian or anime.title_english or "Unknown"
task_responses.append(QueueTaskResponse(
id=task.id,
theme_id=theme.id,
anime_title=anime_title,
theme_name=theme.full_name,
song_title=theme.song_title,
status=task.status,
progress_percent=task.progress_percent,
error_message=task.error_message,
estimated_size_bytes=task.estimated_size_bytes,
created_at=task.created_at,
started_at=task.started_at,
completed_at=task.completed_at,
))
if task.status == DownloadStatus.QUEUED:
total_queued += 1
estimated_queue_size += task.estimated_size_bytes
elif task.status in (DownloadStatus.DOWNLOADING, DownloadStatus.CONVERTING, DownloadStatus.UPLOADING):
total_downloading += 1
estimated_queue_size += task.estimated_size_bytes
elif task.status == DownloadStatus.DONE:
total_done += 1
elif task.status == DownloadStatus.FAILED:
total_failed += 1
return QueueStatusResponse(
tasks=task_responses,
total_queued=total_queued,
total_downloading=total_downloading,
total_done=total_done,
total_failed=total_failed,
estimated_queue_size_bytes=estimated_queue_size,
worker_running=worker_running,
)
async def process_queue(self) -> None:
"""Process download queue (called as background task)."""
while True:
# Get next queued task
result = await self.db.execute(
select(DownloadTask)
.where(DownloadTask.status == DownloadStatus.QUEUED)
.order_by(DownloadTask.created_at)
.limit(1)
)
task = result.scalar_one_or_none()
if not task:
break
await self._process_task(task)
async def _process_task(self, task: DownloadTask) -> None:
"""Process a single download task."""
try:
# Update status to downloading
task.status = DownloadStatus.DOWNLOADING
task.started_at = datetime.now(timezone.utc)
task.progress_percent = 10
await self.db.commit()
# Get theme info with anime
result = await self.db.execute(
select(AnimeTheme)
.options(selectinload(AnimeTheme.anime))
.where(AnimeTheme.id == task.theme_id)
)
theme = result.scalar_one()
anime = theme.anime
if not theme.animethemes_video_url:
raise ValueError("No video URL available")
# Download and convert in temp directory
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
webm_file = tmp_path / "video.webm"
mp3_file = tmp_path / "audio.mp3"
# Stream download WebM file
async with httpx.AsyncClient() as client:
async with client.stream(
"GET",
theme.animethemes_video_url,
timeout=downloader_settings.download_timeout_seconds,
follow_redirects=True,
) as response:
response.raise_for_status()
with open(webm_file, "wb") as f:
async for chunk in response.aiter_bytes(chunk_size=8192):
f.write(chunk)
task.progress_percent = 40
task.status = DownloadStatus.CONVERTING
await self.db.commit()
# Convert to MP3 with FFmpeg
process = await asyncio.create_subprocess_exec(
"ffmpeg", "-i", str(webm_file),
"-vn", "-acodec", "libmp3lame", "-q:a", "2",
str(mp3_file),
"-y",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError(f"FFmpeg error: {stderr.decode()[:500]}")
if not mp3_file.exists():
raise RuntimeError("FFmpeg did not create output file")
task.progress_percent = 70
task.status = DownloadStatus.UPLOADING
await self.db.commit()
# Generate safe S3 key
anime_name = self._sanitize_filename(
anime.title_english or anime.title_russian or f"anime_{anime.shikimori_id}"
)
theme_name = theme.full_name
song_part = f"_{self._sanitize_filename(theme.song_title)}" if theme.song_title else ""
s3_key = f"audio/{anime_name}_{theme_name}{song_part}.mp3"
# Read file and upload to S3
file_data = mp3_file.read_bytes()
file_size = len(file_data)
success = storage.upload_file(s3_key, file_data, "audio/mpeg")
if not success:
raise RuntimeError("Failed to upload to S3")
# Update theme with file info
theme.audio_s3_key = s3_key
theme.file_size_bytes = file_size
# Create Opening entity in main table
opening = Opening(
anime_name=anime.title_russian or anime.title_english or f"Anime {anime.shikimori_id}",
op_number=theme_name,
song_name=theme.song_title,
audio_file=s3_key.replace("audio/", ""),
)
self.db.add(opening)
await self.db.flush()
theme.opening_id = opening.id
# Mark task as done
task.status = DownloadStatus.DONE
task.progress_percent = 100
task.completed_at = datetime.now(timezone.utc)
task.estimated_size_bytes = file_size
await self.db.commit()
except Exception as e:
task.status = DownloadStatus.FAILED
task.error_message = str(e)[:1000]
task.progress_percent = 0
await self.db.commit()
def _sanitize_filename(self, name: str) -> str:
"""Sanitize string for use in filename."""
if not name:
return "unknown"
# Remove or replace problematic characters
sanitized = re.sub(r'[<>:"/\\|?*]', '', name)
sanitized = sanitized.replace(' ', '_')
# Limit length
return sanitized[:100]
async def cancel_task(self, task_id: int) -> bool:
"""Cancel a queued task. Returns True if cancelled."""
result = await self.db.execute(
select(DownloadTask).where(DownloadTask.id == task_id)
)
task = result.scalar_one_or_none()
if not task or task.status != DownloadStatus.QUEUED:
return False
await self.db.delete(task)
await self.db.commit()
return True
async def retry_task(self, task_id: int) -> bool:
"""Retry a failed task. Returns True if requeued."""
result = await self.db.execute(
update(DownloadTask)
.where(DownloadTask.id == task_id)
.where(DownloadTask.status == DownloadStatus.FAILED)
.values(
status=DownloadStatus.QUEUED,
error_message=None,
progress_percent=0,
started_at=None,
completed_at=None,
)
.returning(DownloadTask.id)
)
updated = result.scalar_one_or_none()
await self.db.commit()
return updated is not None
async def clear_completed_tasks(self, include_failed: bool = False) -> int:
"""Clear completed (and optionally failed) tasks. Returns number of deleted tasks."""
from sqlalchemy import delete
statuses = [DownloadStatus.DONE]
if include_failed:
statuses.append(DownloadStatus.FAILED)
result = await self.db.execute(
delete(DownloadTask)
.where(DownloadTask.status.in_(statuses))
.returning(DownloadTask.id)
)
deleted_ids = result.scalars().all()
await self.db.commit()
return len(deleted_ids)

View File

@@ -0,0 +1,145 @@
import httpx
from typing import List, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import Anime
from ..schemas import AnimeSearchResult
from ..config import downloader_settings
# Shared HTTP client for Shikimori API
_shikimori_client: Optional[httpx.AsyncClient] = None
def _get_shikimori_client() -> httpx.AsyncClient:
"""Get or create shared HTTP client for Shikimori."""
global _shikimori_client
if _shikimori_client is None or _shikimori_client.is_closed:
headers = {
"User-Agent": downloader_settings.shikimori_user_agent,
}
if downloader_settings.shikimori_token:
headers["Authorization"] = f"Bearer {downloader_settings.shikimori_token}"
_shikimori_client = httpx.AsyncClient(
headers=headers,
timeout=30.0,
)
return _shikimori_client
class ShikimoriService:
"""Service for Shikimori GraphQL API."""
GRAPHQL_URL = "https://shikimori.one/api/graphql"
def __init__(self):
self.client = _get_shikimori_client()
async def search(
self,
query: str,
year: Optional[int] = None,
status: Optional[str] = None,
limit: int = 20,
) -> List[AnimeSearchResult]:
"""Search anime by query using Shikimori GraphQL API."""
graphql_query = """
query($search: String, $limit: Int, $season: SeasonString, $status: AnimeStatusString) {
animes(search: $search, limit: $limit, season: $season, status: $status) {
id
russian
english
japanese
airedOn { year }
poster { originalUrl }
}
}
"""
variables = {
"search": query,
"limit": limit,
}
if year:
variables["season"] = str(year)
if status:
variables["status"] = status
response = await self.client.post(
self.GRAPHQL_URL,
json={"query": graphql_query, "variables": variables},
)
response.raise_for_status()
data = response.json()
results = []
for anime in data.get("data", {}).get("animes", []):
results.append(AnimeSearchResult(
shikimori_id=int(anime["id"]),
title_russian=anime.get("russian"),
title_english=anime.get("english"),
title_japanese=anime.get("japanese"),
year=anime.get("airedOn", {}).get("year") if anime.get("airedOn") else None,
poster_url=anime.get("poster", {}).get("originalUrl") if anime.get("poster") else None,
))
return results
async def get_or_create_anime(self, db: AsyncSession, shikimori_id: int) -> Anime:
"""Get anime from DB or fetch from Shikimori and create."""
# Check if exists (with themes eagerly loaded)
query = (
select(Anime)
.where(Anime.shikimori_id == shikimori_id)
.options(selectinload(Anime.themes))
)
result = await db.execute(query)
anime = result.scalar_one_or_none()
if anime:
return anime
# Fetch from Shikimori
graphql_query = """
query($ids: String!) {
animes(ids: $ids, limit: 1) {
id
russian
english
japanese
airedOn { year }
poster { originalUrl }
}
}
"""
response = await self.client.post(
self.GRAPHQL_URL,
json={"query": graphql_query, "variables": {"ids": str(shikimori_id)}},
)
response.raise_for_status()
data = response.json()
animes = data.get("data", {}).get("animes", [])
if not animes:
raise ValueError(f"Anime with ID {shikimori_id} not found on Shikimori")
anime_data = animes[0]
anime = Anime(
shikimori_id=shikimori_id,
title_russian=anime_data.get("russian"),
title_english=anime_data.get("english"),
title_japanese=anime_data.get("japanese"),
year=anime_data.get("airedOn", {}).get("year") if anime_data.get("airedOn") else None,
poster_url=anime_data.get("poster", {}).get("originalUrl") if anime_data.get("poster") else None,
)
db.add(anime)
await db.commit()
await db.refresh(anime)
return anime

View File

@@ -0,0 +1,58 @@
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from ..db_models import AnimeTheme
from ..schemas import StorageStatsResponse
from ..config import downloader_settings
from ...db_models import Opening
class StorageTrackerService:
"""Service for tracking S3 storage usage from DB (without scanning S3)."""
def __init__(self, db: AsyncSession):
self.db = db
async def get_stats(self) -> StorageStatsResponse:
"""Calculate storage stats from database."""
# Sum file sizes from downloaded themes
result = await self.db.execute(
select(func.coalesce(func.sum(AnimeTheme.file_size_bytes), 0))
.where(AnimeTheme.file_size_bytes.isnot(None))
)
used_bytes = result.scalar() or 0
# Count openings in the main Opening table
result = await self.db.execute(
select(func.count(Opening.id))
)
openings_count = result.scalar() or 0
limit_bytes = downloader_settings.s3_storage_limit_bytes
available_bytes = max(0, limit_bytes - used_bytes)
used_percent = (used_bytes / limit_bytes * 100) if limit_bytes > 0 else 0
return StorageStatsResponse(
used_bytes=used_bytes,
limit_bytes=limit_bytes,
used_percent=round(used_percent, 2),
available_bytes=available_bytes,
can_download=used_bytes < limit_bytes,
openings_count=openings_count,
)
async def get_estimated_queue_size(self) -> int:
"""Get estimated size of pending downloads in queue."""
from ..db_models import DownloadTask, DownloadStatus
result = await self.db.execute(
select(func.coalesce(func.sum(DownloadTask.estimated_size_bytes), 0))
.where(DownloadTask.status.in_([
DownloadStatus.QUEUED,
DownloadStatus.DOWNLOADING,
DownloadStatus.CONVERTING,
DownloadStatus.UPLOADING,
]))
)
return result.scalar() or 0