Compare commits

...

2 Commits

Author SHA1 Message Date
a91b20adc6 Make vision analysis detailed for downstream AI consumption
- Expanded system prompt with structured 6-point analysis
- Increased max_output_tokens from 300 to 1000
- Description now covers game, actions, UI, camera, overlays, text

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 10:58:20 +01:00
811608911a Show streamlink/ffmpeg stderr for debugging, handle startup errors
- stderr from both processes now displayed in console
- streamlink checked for early exit before starting ffmpeg
- RuntimeError shown to user if stream unavailable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 10:52:29 +01:00
3 changed files with 78 additions and 25 deletions

View File

@@ -4,19 +4,29 @@ from google import genai
from google.genai import types
SYSTEM_PROMPT_RU = (
"Ты анализируешь кадры с Twitch-стрима. "
"Кратко опиши что происходит на экране: игра, действия стримера, "
"интерфейс, чат, оверлеи. Будь лаконичен (2-3 предложения). "
"Если ничего не изменилось по сравнению с предыдущим описанием, "
"скажи 'Без изменений' и уточни только новые детали."
"Ты анализируешь кадры с Twitch-стрима. Дай подробное описание всего, что видишь на экране:\n"
"1. Какая игра/приложение на экране, жанр, сеттинг\n"
"2. Что конкретно происходит: действия персонажа, ситуация в игре, этап (меню, геймплей, катсцена, лобби)\n"
"3. Элементы интерфейса: HUD, здоровье, инвентарь, мини-карта, счёт, таймеры\n"
"4. Камера стримера: что видно, эмоции, жесты (если есть)\n"
"5. Оверлеи: донаты, алерты, виджеты, чат\n"
"6. Текст на экране: любой читаемый текст, названия, никнеймы\n"
"Пиши развёрнуто (5-10 предложений). Описание должно быть достаточно детальным, "
"чтобы другая AI-модель могла полностью понять контекст происходящего без просмотра изображения.\n"
"Если сцена похожа на предыдущую, опиши только изменения, но подробно."
)
SYSTEM_PROMPT_EN = (
"You are analyzing frames from a Twitch stream. "
"Briefly describe what's happening on screen: game, streamer actions, "
"UI, chat, overlays. Be concise (2-3 sentences). "
"If nothing changed compared to the previous description, "
"say 'No changes' and only note new details."
"You are analyzing frames from a Twitch stream. Give a detailed description of everything on screen:\n"
"1. What game/application is shown, genre, setting\n"
"2. What exactly is happening: character actions, game situation, stage (menu, gameplay, cutscene, lobby)\n"
"3. UI elements: HUD, health, inventory, minimap, score, timers\n"
"4. Streamer camera: what's visible, emotions, gestures (if present)\n"
"5. Overlays: donations, alerts, widgets, chat\n"
"6. On-screen text: any readable text, names, nicknames\n"
"Write in detail (5-10 sentences). The description must be detailed enough "
"for another AI model to fully understand the context without seeing the image.\n"
"If the scene is similar to the previous one, describe only the changes, but in detail."
)
@@ -67,7 +77,7 @@ class VisionAnalyzer:
contents=contents,
config=types.GenerateContentConfig(
system_instruction=self.system_prompt,
max_output_tokens=300,
max_output_tokens=1000,
temperature=0.3,
),
)

View File

@@ -1,6 +1,10 @@
import asyncio
from collections.abc import AsyncIterator
from rich.console import Console
console = Console()
async def _pipe_stream(source: asyncio.StreamReader, dest: asyncio.StreamWriter):
"""Forward data from streamlink stdout to ffmpeg stdin."""
@@ -17,6 +21,17 @@ async def _pipe_stream(source: asyncio.StreamReader, dest: asyncio.StreamWriter)
dest.close()
async def _log_stderr(proc_name: str, stderr: asyncio.StreamReader):
"""Read and display stderr from a subprocess."""
while True:
line = await stderr.readline()
if not line:
break
text = line.decode("utf-8", errors="replace").rstrip()
if text:
console.print(f"[dim red][{proc_name}] {text}[/dim red]")
async def capture_frames(
channel: str, quality: str, interval: int
) -> AsyncIterator[bytes]:
@@ -33,6 +48,7 @@ async def capture_frames(
ffmpeg_cmd = [
"ffmpeg",
"-loglevel", "warning",
"-i", "pipe:0",
"-vf", f"fps=1/{interval}",
"-f", "image2pipe",
@@ -41,24 +57,43 @@ async def capture_frames(
"pipe:1",
]
console.print("[dim]Starting streamlink...[/dim]")
streamlink_proc = await asyncio.create_subprocess_exec(
*streamlink_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
# Wait a moment and check if streamlink started OK
await asyncio.sleep(2)
if streamlink_proc.returncode is not None:
stderr_out = await streamlink_proc.stderr.read()
raise RuntimeError(
f"streamlink exited with code {streamlink_proc.returncode}: "
f"{stderr_out.decode('utf-8', errors='replace')}"
)
console.print("[dim]Starting ffmpeg...[/dim]")
ffmpeg_proc = await asyncio.create_subprocess_exec(
*ffmpeg_cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.PIPE,
)
# Log stderr from both processes
stderr_tasks = [
asyncio.create_task(_log_stderr("streamlink", streamlink_proc.stderr)),
asyncio.create_task(_log_stderr("ffmpeg", ffmpeg_proc.stderr)),
]
# Forward streamlink → ffmpeg in background
pipe_task = asyncio.create_task(
_pipe_stream(streamlink_proc.stdout, ffmpeg_proc.stdin)
)
console.print("[dim]Pipeline running, waiting for first frame...[/dim]")
try:
buf = b""
while True:
@@ -82,6 +117,8 @@ async def capture_frames(
yield frame
finally:
pipe_task.cancel()
for t in stderr_tasks:
t.cancel()
for proc in (ffmpeg_proc, streamlink_proc):
try:
proc.terminate()

View File

@@ -28,6 +28,7 @@ async def run(config) -> None:
frame_number = 0
try:
async for frame_data in capture_frames(
config.channel, config.quality, config.interval
):
@@ -42,6 +43,11 @@ async def run(config) -> None:
print_description(description, frame_number)
await log_description(config.log_file, description, frame_number)
except RuntimeError as e:
console.print(f"[bold red]Error:[/bold red] {e}")
finally:
if frame_number == 0:
console.print("[bold yellow]No frames were captured.[/bold yellow]")
def main() -> None: