add download service

This commit is contained in:
2026-01-10 11:06:45 +03:00
parent c33c5fd674
commit 266f3768ef
44 changed files with 2652 additions and 4 deletions

View File

@@ -0,0 +1,2 @@
# Openings Downloader Module
# Search and download anime openings via Shikimori + AnimeThemes APIs

View File

@@ -0,0 +1,24 @@
from pydantic_settings import BaseSettings
class DownloaderSettings(BaseSettings):
"""Settings for the Openings Downloader module."""
# Shikimori API
shikimori_user_agent: str = "AnimeQuiz/1.0"
shikimori_token: str = "" # Optional OAuth token for higher rate limits
# S3 Storage limit (100 GB default)
s3_storage_limit_bytes: int = 107_374_182_400 # 100 GB
# Download settings
download_timeout_seconds: int = 300
default_estimated_size_bytes: int = 6_291_456 # 6 MB default for unknown files
class Config:
env_prefix = "DOWNLOADER_"
env_file = ".env"
extra = "ignore"
downloader_settings = DownloaderSettings()

View File

@@ -0,0 +1,157 @@
from datetime import datetime
from typing import List, Optional, TYPE_CHECKING
from sqlalchemy import String, Integer, ForeignKey, DateTime, BigInteger, Enum as SQLEnum, func, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
import enum
from ..database import Base
if TYPE_CHECKING:
from ..db_models import Opening
class ThemeType(str, enum.Enum):
"""Type of anime theme (opening or ending)."""
OP = "OP"
ED = "ED"
class DownloadStatus(str, enum.Enum):
"""Status of a download task."""
QUEUED = "queued"
DOWNLOADING = "downloading"
CONVERTING = "converting"
UPLOADING = "uploading"
DONE = "done"
FAILED = "failed"
class Anime(Base):
"""Anime entity from Shikimori."""
__tablename__ = "anime"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
shikimori_id: Mapped[int] = mapped_column(Integer, unique=True, index=True, nullable=False)
animethemes_slug: Mapped[Optional[str]] = mapped_column(String(255), nullable=True, index=True)
title_russian: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
title_english: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
title_japanese: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
year: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
poster_url: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now()
)
# Relationships
themes: Mapped[List["AnimeTheme"]] = relationship(
"AnimeTheme",
back_populates="anime",
cascade="all, delete-orphan"
)
def __repr__(self):
return f"<Anime {self.shikimori_id}: {self.title_russian or self.title_english}>"
class AnimeTheme(Base):
"""Anime opening/ending theme."""
__tablename__ = "anime_themes"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
anime_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("anime.id", ondelete="CASCADE"),
nullable=False
)
theme_type: Mapped[ThemeType] = mapped_column(
SQLEnum(ThemeType, native_enum=False),
nullable=False
)
sequence: Mapped[int] = mapped_column(Integer, nullable=False, default=1) # 1, 2, 3...
song_title: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
artist: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
# AnimeThemes video URL (WebM source)
animethemes_video_url: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
# Downloaded file info
audio_s3_key: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
file_size_bytes: Mapped[Optional[int]] = mapped_column(BigInteger, nullable=True)
# Link to existing Opening entity (after download)
opening_id: Mapped[Optional[int]] = mapped_column(
Integer,
ForeignKey("openings.id", ondelete="SET NULL"),
nullable=True
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now()
)
# Unique constraint: one anime can have only one OP1, OP2, ED1, etc.
__table_args__ = (
UniqueConstraint('anime_id', 'theme_type', 'sequence', name='uq_anime_theme_sequence'),
)
# Relationships
anime: Mapped["Anime"] = relationship("Anime", back_populates="themes")
opening: Mapped[Optional["Opening"]] = relationship("Opening")
@property
def full_name(self) -> str:
"""Return full theme name like 'OP1' or 'ED2'."""
return f"{self.theme_type.value}{self.sequence}"
@property
def is_downloaded(self) -> bool:
"""Check if theme has been downloaded."""
return self.audio_s3_key is not None
def __repr__(self):
return f"<AnimeTheme {self.full_name}: {self.song_title}>"
class DownloadTask(Base):
"""Download queue task."""
__tablename__ = "download_tasks"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
theme_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("anime_themes.id", ondelete="CASCADE"),
nullable=False,
unique=True # One task per theme
)
status: Mapped[DownloadStatus] = mapped_column(
SQLEnum(DownloadStatus, native_enum=False),
nullable=False,
default=DownloadStatus.QUEUED
)
# Progress tracking
progress_percent: Mapped[int] = mapped_column(Integer, default=0)
error_message: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
# Estimated size (6 MB default if unknown)
estimated_size_bytes: Mapped[int] = mapped_column(BigInteger, default=6_291_456) # 6 MB
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now()
)
started_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
completed_at: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True), nullable=True)
# Relationships
theme: Mapped["AnimeTheme"] = relationship("AnimeTheme")
def __repr__(self):
return f"<DownloadTask {self.id}: {self.status.value}>"

View File

@@ -0,0 +1,231 @@
import asyncio
from fastapi import APIRouter, Depends, HTTPException, Query, BackgroundTasks
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from typing import Optional
# Lock to prevent multiple workers running simultaneously
_worker_lock = asyncio.Lock()
_worker_running = False
from ..database import get_db
from .schemas import (
SearchResponse,
AnimeDetailResponse,
ThemeInfo,
AddToQueueRequest,
AddAllThemesRequest,
QueueStatusResponse,
StorageStatsResponse,
)
from .db_models import Anime, AnimeTheme, DownloadTask, DownloadStatus
from .services.shikimori import ShikimoriService
from .services.animethemes import AnimeThemesService
from .services.downloader import DownloadService
from .services.storage_tracker import StorageTrackerService
router = APIRouter(prefix="/downloader", tags=["openings-downloader"])
# ============== Search ==============
@router.get("/search", response_model=SearchResponse)
async def search_anime(
query: str = Query(..., min_length=1, description="Search query"),
year: Optional[int] = Query(None, description="Filter by year"),
status: Optional[str] = Query(None, description="Filter by status (ongoing, released, announced)"),
limit: int = Query(20, ge=1, le=50, description="Maximum results"),
db: AsyncSession = Depends(get_db),
):
"""Search anime via Shikimori API."""
service = ShikimoriService()
results = await service.search(query, year=year, status=status, limit=limit)
return SearchResponse(results=results, total=len(results))
# ============== Anime Detail ==============
@router.get("/anime/{shikimori_id}", response_model=AnimeDetailResponse)
async def get_anime_detail(
shikimori_id: int,
db: AsyncSession = Depends(get_db),
):
"""Get anime details with available themes from AnimeThemes."""
shikimori_service = ShikimoriService()
animethemes_service = AnimeThemesService()
# Get or create anime record
anime = await shikimori_service.get_or_create_anime(db, shikimori_id)
# Fetch themes from AnimeThemes API
themes = await animethemes_service.fetch_themes(db, anime)
# Get download status for each theme
theme_ids = [t.id for t in themes]
if theme_ids:
result = await db.execute(
select(DownloadTask)
.where(DownloadTask.theme_id.in_(theme_ids))
)
tasks = {t.theme_id: t for t in result.scalars().all()}
else:
tasks = {}
# Build response
theme_infos = []
for theme in themes:
task = tasks.get(theme.id)
theme_infos.append(ThemeInfo(
id=theme.id,
theme_type=theme.theme_type,
sequence=theme.sequence,
full_name=theme.full_name,
song_title=theme.song_title,
artist=theme.artist,
video_url=theme.animethemes_video_url,
is_downloaded=theme.is_downloaded,
download_status=task.status if task else None,
file_size_bytes=theme.file_size_bytes,
))
return AnimeDetailResponse(
id=anime.id,
shikimori_id=anime.shikimori_id,
title_russian=anime.title_russian,
title_english=anime.title_english,
title_japanese=anime.title_japanese,
year=anime.year,
poster_url=anime.poster_url,
themes=theme_infos,
)
# ============== Download Queue ==============
@router.post("/queue/add", response_model=QueueStatusResponse)
async def add_to_queue(
request: AddToQueueRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
):
"""Add specific themes to download queue."""
storage_service = StorageTrackerService(db)
# Check storage limit
stats = await storage_service.get_stats()
if not stats.can_download:
raise HTTPException(
status_code=400,
detail=f"Storage limit exceeded ({stats.used_bytes}/{stats.limit_bytes} bytes)"
)
download_service = DownloadService(db)
added = await download_service.add_to_queue(request.theme_ids)
if added > 0:
# Trigger worker in background
background_tasks.add_task(_run_download_worker)
return await download_service.get_queue_status(worker_running=_worker_running)
@router.post("/queue/add-all", response_model=QueueStatusResponse)
async def add_all_anime_themes(
request: AddAllThemesRequest,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
):
"""Add all themes from an anime to download queue."""
storage_service = StorageTrackerService(db)
stats = await storage_service.get_stats()
if not stats.can_download:
raise HTTPException(status_code=400, detail="Storage limit exceeded")
download_service = DownloadService(db)
added = await download_service.add_all_anime_themes(request.anime_id)
if added > 0:
background_tasks.add_task(_run_download_worker)
return await download_service.get_queue_status(worker_running=_worker_running)
@router.get("/queue", response_model=QueueStatusResponse)
async def get_queue_status(db: AsyncSession = Depends(get_db)):
"""Get current download queue status."""
download_service = DownloadService(db)
return await download_service.get_queue_status(worker_running=_worker_running)
@router.delete("/queue/{task_id}")
async def cancel_task(task_id: int, db: AsyncSession = Depends(get_db)):
"""Cancel a queued task (not downloading)."""
download_service = DownloadService(db)
success = await download_service.cancel_task(task_id)
if not success:
raise HTTPException(status_code=400, detail="Cannot cancel task (not queued or not found)")
return {"message": "Task cancelled"}
@router.post("/queue/{task_id}/retry")
async def retry_task(
task_id: int,
background_tasks: BackgroundTasks,
db: AsyncSession = Depends(get_db),
):
"""Retry a failed task."""
download_service = DownloadService(db)
success = await download_service.retry_task(task_id)
if not success:
raise HTTPException(status_code=400, detail="Cannot retry task (not failed or not found)")
background_tasks.add_task(_run_download_worker)
return {"message": "Task queued for retry"}
@router.delete("/queue/clear")
async def clear_completed_tasks(
include_failed: bool = Query(False, description="Also clear failed tasks"),
db: AsyncSession = Depends(get_db),
):
"""Clear completed (and optionally failed) tasks from the queue."""
download_service = DownloadService(db)
deleted_count = await download_service.clear_completed_tasks(include_failed=include_failed)
return {"message": f"Cleared {deleted_count} tasks", "deleted_count": deleted_count}
# ============== Storage ==============
@router.get("/storage", response_model=StorageStatsResponse)
async def get_storage_stats(db: AsyncSession = Depends(get_db)):
"""Get S3 storage usage stats (calculated from DB, not scanning S3)."""
storage_service = StorageTrackerService(db)
return await storage_service.get_stats()
# ============== Background Worker ==============
async def _run_download_worker():
"""Background task to process download queue."""
global _worker_running
# Skip if another worker is already running
if _worker_running:
return
async with _worker_lock:
if _worker_running:
return
_worker_running = True
try:
from ..database import async_session_maker
async with async_session_maker() as db:
download_service = DownloadService(db)
await download_service.process_queue()
finally:
_worker_running = False

View File

@@ -0,0 +1,114 @@
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel, Field
from .db_models import ThemeType, DownloadStatus
# ============== Shikimori Search ==============
class AnimeSearchResult(BaseModel):
"""Single anime search result from Shikimori."""
shikimori_id: int
title_russian: Optional[str] = None
title_english: Optional[str] = None
title_japanese: Optional[str] = None
year: Optional[int] = None
poster_url: Optional[str] = None
class Config:
from_attributes = True
class SearchResponse(BaseModel):
"""Response for anime search."""
results: List[AnimeSearchResult]
total: int
# ============== Anime Themes ==============
class ThemeInfo(BaseModel):
"""Information about a single anime theme (OP/ED)."""
id: int
theme_type: ThemeType
sequence: int
full_name: str # "OP1", "ED2"
song_title: Optional[str] = None
artist: Optional[str] = None
video_url: Optional[str] = None
is_downloaded: bool
download_status: Optional[DownloadStatus] = None
file_size_bytes: Optional[int] = None
class Config:
from_attributes = True
class AnimeDetailResponse(BaseModel):
"""Detailed anime info with themes."""
id: int
shikimori_id: int
title_russian: Optional[str] = None
title_english: Optional[str] = None
title_japanese: Optional[str] = None
year: Optional[int] = None
poster_url: Optional[str] = None
themes: List[ThemeInfo]
class Config:
from_attributes = True
# ============== Download Queue ==============
class AddToQueueRequest(BaseModel):
"""Request to add specific themes to download queue."""
theme_ids: List[int] = Field(..., min_length=1)
class AddAllThemesRequest(BaseModel):
"""Request to add all themes from an anime."""
anime_id: int
class QueueTaskResponse(BaseModel):
"""Single task in the download queue."""
id: int
theme_id: int
anime_title: str
theme_name: str # "OP1", "ED2"
song_title: Optional[str] = None
status: DownloadStatus
progress_percent: int
error_message: Optional[str] = None
estimated_size_bytes: int
created_at: datetime
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
class Config:
from_attributes = True
class QueueStatusResponse(BaseModel):
"""Current status of the download queue."""
tasks: List[QueueTaskResponse]
total_queued: int
total_downloading: int
total_done: int
total_failed: int
estimated_queue_size_bytes: int
worker_running: bool = False # Indicates if download worker is currently active
# ============== Storage Stats ==============
class StorageStatsResponse(BaseModel):
"""S3 storage usage statistics."""
used_bytes: int
limit_bytes: int
used_percent: float
available_bytes: int
can_download: bool # False if limit exceeded
openings_count: int

View File

@@ -0,0 +1,12 @@
# Services for Openings Downloader
from .shikimori import ShikimoriService
from .animethemes import AnimeThemesService
from .downloader import DownloadService
from .storage_tracker import StorageTrackerService
__all__ = [
"ShikimoriService",
"AnimeThemesService",
"DownloadService",
"StorageTrackerService",
]

View File

@@ -0,0 +1,187 @@
import httpx
import logging
import re
from typing import List, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import Anime, AnimeTheme, ThemeType
logger = logging.getLogger(__name__)
# Shared HTTP client for AnimeThemes API
_animethemes_client: Optional[httpx.AsyncClient] = None
def _get_animethemes_client() -> httpx.AsyncClient:
"""Get or create shared HTTP client for AnimeThemes."""
global _animethemes_client
if _animethemes_client is None or _animethemes_client.is_closed:
_animethemes_client = httpx.AsyncClient(
base_url="https://api.animethemes.moe",
timeout=30.0,
)
return _animethemes_client
class AnimeThemesService:
"""Service for AnimeThemes API (api.animethemes.moe)."""
def __init__(self):
self.client = _get_animethemes_client()
async def _find_anime_slug(self, anime: Anime) -> Optional[str]:
"""Find AnimeThemes slug by searching anime title."""
# Try different title variations
search_terms = [
anime.title_english,
anime.title_russian,
anime.title_japanese,
]
for term in search_terms:
if not term:
continue
try:
response = await self.client.get(
"/anime",
params={
"q": term,
"include": "animethemes.animethemeentries.videos.audio",
},
)
if response.status_code == 200:
data = response.json()
animes = data.get("anime", [])
if animes:
slug = animes[0].get("slug")
logger.info(f"Found AnimeThemes slug '{slug}' for '{term}'")
return slug
except Exception as e:
logger.warning(f"Failed to search AnimeThemes for '{term}': {e}")
continue
return None
async def fetch_themes(self, db: AsyncSession, anime: Anime) -> List[AnimeTheme]:
"""Fetch themes from AnimeThemes API and sync to DB."""
# Always reload anime with themes to avoid lazy loading issues
result = await db.execute(
select(Anime)
.where(Anime.id == anime.id)
.options(selectinload(Anime.themes))
)
anime = result.scalar_one()
current_themes = anime.themes or []
# Find slug if not cached
if not anime.animethemes_slug:
logger.info(f"Searching AnimeThemes slug for: {anime.title_english or anime.title_russian}")
slug = await self._find_anime_slug(anime)
logger.info(f"Found slug: {slug}")
if slug:
anime.animethemes_slug = slug
await db.commit()
if not anime.animethemes_slug:
logger.warning(f"No AnimeThemes slug found for anime {anime.id}: {anime.title_english or anime.title_russian}")
return current_themes
# Fetch themes from AnimeThemes API
try:
response = await self.client.get(
f"/anime/{anime.animethemes_slug}",
params={
"include": "animethemes.animethemeentries.videos.audio,animethemes.song.artists",
},
)
if response.status_code != 200:
logger.warning(f"AnimeThemes API returned {response.status_code} for {anime.animethemes_slug}")
return current_themes
data = response.json()
except Exception as e:
logger.error(f"Failed to fetch themes from AnimeThemes for {anime.animethemes_slug}: {e}")
return current_themes
anime_data = data.get("anime", {})
themes_data = anime_data.get("animethemes", [])
logger.info(f"AnimeThemes API returned {len(themes_data)} themes for {anime.animethemes_slug}")
# Build dict of existing themes
existing_themes = {
(t.theme_type, t.sequence): t
for t in current_themes
}
for theme_data in themes_data:
# Parse theme type and sequence: "OP1", "ED1", etc.
slug = theme_data.get("slug", "") # e.g., "OP1", "ED1"
match = re.match(r"(OP|ED)(\d*)", slug)
if not match:
continue
theme_type = ThemeType.OP if match.group(1) == "OP" else ThemeType.ED
sequence = int(match.group(2)) if match.group(2) else 1
# Get video URL (prioritize audio link, then video link)
video_url = None
entries = theme_data.get("animethemeentries", [])
if entries:
videos = entries[0].get("videos", [])
if videos:
# Try to get audio link first
audio = videos[0].get("audio")
if audio:
video_url = audio.get("link")
# Fallback to video link
if not video_url:
video_url = videos[0].get("link")
# Get song info
song_data = theme_data.get("song", {})
song_title = song_data.get("title")
artist = None
artists = song_data.get("artists", [])
if artists:
artist = artists[0].get("name")
key = (theme_type, sequence)
if key in existing_themes:
# Update existing theme
theme = existing_themes[key]
theme.song_title = song_title
theme.artist = artist
if video_url:
theme.animethemes_video_url = video_url
else:
# Create new theme
theme = AnimeTheme(
anime_id=anime.id,
theme_type=theme_type,
sequence=sequence,
song_title=song_title,
artist=artist,
animethemes_video_url=video_url,
)
db.add(theme)
if anime.themes is None:
anime.themes = []
anime.themes.append(theme)
await db.commit()
# Reload anime with themes to get fresh data
result = await db.execute(
select(Anime)
.where(Anime.id == anime.id)
.options(selectinload(Anime.themes))
)
refreshed_anime = result.scalar_one()
return refreshed_anime.themes

View File

@@ -0,0 +1,326 @@
import asyncio
import tempfile
import re
from pathlib import Path
from typing import List
from datetime import datetime, timezone
import httpx
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import AnimeTheme, DownloadTask, DownloadStatus, Anime
from ..schemas import QueueStatusResponse, QueueTaskResponse
from ..config import downloader_settings
from ...storage import storage
from ...db_models import Opening
class DownloadService:
"""Service for downloading and converting anime themes."""
def __init__(self, db: AsyncSession):
self.db = db
async def add_to_queue(self, theme_ids: List[int]) -> int:
"""Add themes to download queue (idempotent). Returns number of tasks added."""
added = 0
for theme_id in theme_ids:
# Check if already in queue
existing = await self.db.execute(
select(DownloadTask).where(DownloadTask.theme_id == theme_id)
)
if existing.scalar_one_or_none():
continue
# Check if theme exists and is not already downloaded
result = await self.db.execute(
select(AnimeTheme).where(AnimeTheme.id == theme_id)
)
theme = result.scalar_one_or_none()
if not theme:
continue
# Skip if already downloaded
if theme.audio_s3_key:
continue
# Skip if no video URL available
if not theme.animethemes_video_url:
continue
task = DownloadTask(
theme_id=theme_id,
status=DownloadStatus.QUEUED,
estimated_size_bytes=downloader_settings.default_estimated_size_bytes,
)
self.db.add(task)
added += 1
await self.db.commit()
return added
async def add_all_anime_themes(self, anime_id: int) -> int:
"""Add all themes from anime to queue. Returns number of tasks added."""
result = await self.db.execute(
select(AnimeTheme)
.where(AnimeTheme.anime_id == anime_id)
.where(AnimeTheme.audio_s3_key.is_(None))
.where(AnimeTheme.animethemes_video_url.isnot(None))
)
themes = result.scalars().all()
return await self.add_to_queue([t.id for t in themes])
async def get_queue_status(self, worker_running: bool = False) -> QueueStatusResponse:
"""Get current queue status."""
result = await self.db.execute(
select(DownloadTask)
.options(selectinload(DownloadTask.theme).selectinload(AnimeTheme.anime))
.order_by(DownloadTask.created_at.desc())
)
tasks = result.scalars().all()
task_responses = []
total_queued = 0
total_downloading = 0
total_done = 0
total_failed = 0
estimated_queue_size = 0
for task in tasks:
theme = task.theme
anime = theme.anime
anime_title = anime.title_russian or anime.title_english or "Unknown"
task_responses.append(QueueTaskResponse(
id=task.id,
theme_id=theme.id,
anime_title=anime_title,
theme_name=theme.full_name,
song_title=theme.song_title,
status=task.status,
progress_percent=task.progress_percent,
error_message=task.error_message,
estimated_size_bytes=task.estimated_size_bytes,
created_at=task.created_at,
started_at=task.started_at,
completed_at=task.completed_at,
))
if task.status == DownloadStatus.QUEUED:
total_queued += 1
estimated_queue_size += task.estimated_size_bytes
elif task.status in (DownloadStatus.DOWNLOADING, DownloadStatus.CONVERTING, DownloadStatus.UPLOADING):
total_downloading += 1
estimated_queue_size += task.estimated_size_bytes
elif task.status == DownloadStatus.DONE:
total_done += 1
elif task.status == DownloadStatus.FAILED:
total_failed += 1
return QueueStatusResponse(
tasks=task_responses,
total_queued=total_queued,
total_downloading=total_downloading,
total_done=total_done,
total_failed=total_failed,
estimated_queue_size_bytes=estimated_queue_size,
worker_running=worker_running,
)
async def process_queue(self) -> None:
"""Process download queue (called as background task)."""
while True:
# Get next queued task
result = await self.db.execute(
select(DownloadTask)
.where(DownloadTask.status == DownloadStatus.QUEUED)
.order_by(DownloadTask.created_at)
.limit(1)
)
task = result.scalar_one_or_none()
if not task:
break
await self._process_task(task)
async def _process_task(self, task: DownloadTask) -> None:
"""Process a single download task."""
try:
# Update status to downloading
task.status = DownloadStatus.DOWNLOADING
task.started_at = datetime.now(timezone.utc)
task.progress_percent = 10
await self.db.commit()
# Get theme info with anime
result = await self.db.execute(
select(AnimeTheme)
.options(selectinload(AnimeTheme.anime))
.where(AnimeTheme.id == task.theme_id)
)
theme = result.scalar_one()
anime = theme.anime
if not theme.animethemes_video_url:
raise ValueError("No video URL available")
# Download and convert in temp directory
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
webm_file = tmp_path / "video.webm"
mp3_file = tmp_path / "audio.mp3"
# Stream download WebM file
async with httpx.AsyncClient() as client:
async with client.stream(
"GET",
theme.animethemes_video_url,
timeout=downloader_settings.download_timeout_seconds,
follow_redirects=True,
) as response:
response.raise_for_status()
with open(webm_file, "wb") as f:
async for chunk in response.aiter_bytes(chunk_size=8192):
f.write(chunk)
task.progress_percent = 40
task.status = DownloadStatus.CONVERTING
await self.db.commit()
# Convert to MP3 with FFmpeg
process = await asyncio.create_subprocess_exec(
"ffmpeg", "-i", str(webm_file),
"-vn", "-acodec", "libmp3lame", "-q:a", "2",
str(mp3_file),
"-y",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
raise RuntimeError(f"FFmpeg error: {stderr.decode()[:500]}")
if not mp3_file.exists():
raise RuntimeError("FFmpeg did not create output file")
task.progress_percent = 70
task.status = DownloadStatus.UPLOADING
await self.db.commit()
# Generate safe S3 key
anime_name = self._sanitize_filename(
anime.title_english or anime.title_russian or f"anime_{anime.shikimori_id}"
)
theme_name = theme.full_name
song_part = f"_{self._sanitize_filename(theme.song_title)}" if theme.song_title else ""
s3_key = f"audio/{anime_name}_{theme_name}{song_part}.mp3"
# Read file and upload to S3
file_data = mp3_file.read_bytes()
file_size = len(file_data)
success = storage.upload_file(s3_key, file_data, "audio/mpeg")
if not success:
raise RuntimeError("Failed to upload to S3")
# Update theme with file info
theme.audio_s3_key = s3_key
theme.file_size_bytes = file_size
# Create Opening entity in main table
opening = Opening(
anime_name=anime.title_russian or anime.title_english or f"Anime {anime.shikimori_id}",
op_number=theme_name,
song_name=theme.song_title,
audio_file=s3_key.replace("audio/", ""),
)
self.db.add(opening)
await self.db.flush()
theme.opening_id = opening.id
# Mark task as done
task.status = DownloadStatus.DONE
task.progress_percent = 100
task.completed_at = datetime.now(timezone.utc)
task.estimated_size_bytes = file_size
await self.db.commit()
except Exception as e:
task.status = DownloadStatus.FAILED
task.error_message = str(e)[:1000]
task.progress_percent = 0
await self.db.commit()
def _sanitize_filename(self, name: str) -> str:
"""Sanitize string for use in filename."""
if not name:
return "unknown"
# Remove or replace problematic characters
sanitized = re.sub(r'[<>:"/\\|?*]', '', name)
sanitized = sanitized.replace(' ', '_')
# Limit length
return sanitized[:100]
async def cancel_task(self, task_id: int) -> bool:
"""Cancel a queued task. Returns True if cancelled."""
result = await self.db.execute(
select(DownloadTask).where(DownloadTask.id == task_id)
)
task = result.scalar_one_or_none()
if not task or task.status != DownloadStatus.QUEUED:
return False
await self.db.delete(task)
await self.db.commit()
return True
async def retry_task(self, task_id: int) -> bool:
"""Retry a failed task. Returns True if requeued."""
result = await self.db.execute(
update(DownloadTask)
.where(DownloadTask.id == task_id)
.where(DownloadTask.status == DownloadStatus.FAILED)
.values(
status=DownloadStatus.QUEUED,
error_message=None,
progress_percent=0,
started_at=None,
completed_at=None,
)
.returning(DownloadTask.id)
)
updated = result.scalar_one_or_none()
await self.db.commit()
return updated is not None
async def clear_completed_tasks(self, include_failed: bool = False) -> int:
"""Clear completed (and optionally failed) tasks. Returns number of deleted tasks."""
from sqlalchemy import delete
statuses = [DownloadStatus.DONE]
if include_failed:
statuses.append(DownloadStatus.FAILED)
result = await self.db.execute(
delete(DownloadTask)
.where(DownloadTask.status.in_(statuses))
.returning(DownloadTask.id)
)
deleted_ids = result.scalars().all()
await self.db.commit()
return len(deleted_ids)

View File

@@ -0,0 +1,145 @@
import httpx
from typing import List, Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from ..db_models import Anime
from ..schemas import AnimeSearchResult
from ..config import downloader_settings
# Shared HTTP client for Shikimori API
_shikimori_client: Optional[httpx.AsyncClient] = None
def _get_shikimori_client() -> httpx.AsyncClient:
"""Get or create shared HTTP client for Shikimori."""
global _shikimori_client
if _shikimori_client is None or _shikimori_client.is_closed:
headers = {
"User-Agent": downloader_settings.shikimori_user_agent,
}
if downloader_settings.shikimori_token:
headers["Authorization"] = f"Bearer {downloader_settings.shikimori_token}"
_shikimori_client = httpx.AsyncClient(
headers=headers,
timeout=30.0,
)
return _shikimori_client
class ShikimoriService:
"""Service for Shikimori GraphQL API."""
GRAPHQL_URL = "https://shikimori.one/api/graphql"
def __init__(self):
self.client = _get_shikimori_client()
async def search(
self,
query: str,
year: Optional[int] = None,
status: Optional[str] = None,
limit: int = 20,
) -> List[AnimeSearchResult]:
"""Search anime by query using Shikimori GraphQL API."""
graphql_query = """
query($search: String, $limit: Int, $season: SeasonString, $status: AnimeStatusString) {
animes(search: $search, limit: $limit, season: $season, status: $status) {
id
russian
english
japanese
airedOn { year }
poster { originalUrl }
}
}
"""
variables = {
"search": query,
"limit": limit,
}
if year:
variables["season"] = str(year)
if status:
variables["status"] = status
response = await self.client.post(
self.GRAPHQL_URL,
json={"query": graphql_query, "variables": variables},
)
response.raise_for_status()
data = response.json()
results = []
for anime in data.get("data", {}).get("animes", []):
results.append(AnimeSearchResult(
shikimori_id=int(anime["id"]),
title_russian=anime.get("russian"),
title_english=anime.get("english"),
title_japanese=anime.get("japanese"),
year=anime.get("airedOn", {}).get("year") if anime.get("airedOn") else None,
poster_url=anime.get("poster", {}).get("originalUrl") if anime.get("poster") else None,
))
return results
async def get_or_create_anime(self, db: AsyncSession, shikimori_id: int) -> Anime:
"""Get anime from DB or fetch from Shikimori and create."""
# Check if exists (with themes eagerly loaded)
query = (
select(Anime)
.where(Anime.shikimori_id == shikimori_id)
.options(selectinload(Anime.themes))
)
result = await db.execute(query)
anime = result.scalar_one_or_none()
if anime:
return anime
# Fetch from Shikimori
graphql_query = """
query($ids: String!) {
animes(ids: $ids, limit: 1) {
id
russian
english
japanese
airedOn { year }
poster { originalUrl }
}
}
"""
response = await self.client.post(
self.GRAPHQL_URL,
json={"query": graphql_query, "variables": {"ids": str(shikimori_id)}},
)
response.raise_for_status()
data = response.json()
animes = data.get("data", {}).get("animes", [])
if not animes:
raise ValueError(f"Anime with ID {shikimori_id} not found on Shikimori")
anime_data = animes[0]
anime = Anime(
shikimori_id=shikimori_id,
title_russian=anime_data.get("russian"),
title_english=anime_data.get("english"),
title_japanese=anime_data.get("japanese"),
year=anime_data.get("airedOn", {}).get("year") if anime_data.get("airedOn") else None,
poster_url=anime_data.get("poster", {}).get("originalUrl") if anime_data.get("poster") else None,
)
db.add(anime)
await db.commit()
await db.refresh(anime)
return anime

View File

@@ -0,0 +1,58 @@
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from ..db_models import AnimeTheme
from ..schemas import StorageStatsResponse
from ..config import downloader_settings
from ...db_models import Opening
class StorageTrackerService:
"""Service for tracking S3 storage usage from DB (without scanning S3)."""
def __init__(self, db: AsyncSession):
self.db = db
async def get_stats(self) -> StorageStatsResponse:
"""Calculate storage stats from database."""
# Sum file sizes from downloaded themes
result = await self.db.execute(
select(func.coalesce(func.sum(AnimeTheme.file_size_bytes), 0))
.where(AnimeTheme.file_size_bytes.isnot(None))
)
used_bytes = result.scalar() or 0
# Count openings in the main Opening table
result = await self.db.execute(
select(func.count(Opening.id))
)
openings_count = result.scalar() or 0
limit_bytes = downloader_settings.s3_storage_limit_bytes
available_bytes = max(0, limit_bytes - used_bytes)
used_percent = (used_bytes / limit_bytes * 100) if limit_bytes > 0 else 0
return StorageStatsResponse(
used_bytes=used_bytes,
limit_bytes=limit_bytes,
used_percent=round(used_percent, 2),
available_bytes=available_bytes,
can_download=used_bytes < limit_bytes,
openings_count=openings_count,
)
async def get_estimated_queue_size(self) -> int:
"""Get estimated size of pending downloads in queue."""
from ..db_models import DownloadTask, DownloadStatus
result = await self.db.execute(
select(func.coalesce(func.sum(DownloadTask.estimated_size_bytes), 0))
.where(DownloadTask.status.in_([
DownloadStatus.QUEUED,
DownloadStatus.DOWNLOADING,
DownloadStatus.CONVERTING,
DownloadStatus.UPLOADING,
]))
)
return result.scalar() or 0