Make vision analysis detailed for downstream AI consumption

- Expanded system prompt with structured 6-point analysis - Increased max_output_tokens from 300 to 1000 - Description now covers game, actions, UI, camera, overlays, text Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Show streamlink/ffmpeg stderr for debugging, handle startup errors
2026-03-05 10:58:20 +01:00 · 2026-03-05 10:52:29 +01:00
3 changed files with 78 additions and 25 deletions
--- a/analyzer.py
+++ b/analyzer.py
@@ -4,19 +4,29 @@ from google import genai
 from google.genai import types

 SYSTEM_PROMPT_RU = (
-    "Ты анализируешь кадры с Twitch-стрима. "
-    "Кратко опиши что происходит на экране: игра, действия стримера, "
-    "интерфейс, чат, оверлеи. Будь лаконичен (2-3 предложения). "
-    "Если ничего не изменилось по сравнению с предыдущим описанием, "
-    "скажи 'Без изменений' и уточни только новые детали."
+    "Ты анализируешь кадры с Twitch-стрима. Дай подробное описание всего, что видишь на экране:\n"
+    "1. Какая игра/приложение на экране, жанр, сеттинг\n"
+    "2. Что конкретно происходит: действия персонажа, ситуация в игре, этап (меню, геймплей, катсцена, лобби)\n"
+    "3. Элементы интерфейса: HUD, здоровье, инвентарь, мини-карта, счёт, таймеры\n"
+    "4. Камера стримера: что видно, эмоции, жесты (если есть)\n"
+    "5. Оверлеи: донаты, алерты, виджеты, чат\n"
+    "6. Текст на экране: любой читаемый текст, названия, никнеймы\n"
+    "Пиши развёрнуто (5-10 предложений). Описание должно быть достаточно детальным, "
+    "чтобы другая AI-модель могла полностью понять контекст происходящего без просмотра изображения.\n"
+    "Если сцена похожа на предыдущую, опиши только изменения, но подробно."
 )

 SYSTEM_PROMPT_EN = (
-    "You are analyzing frames from a Twitch stream. "
-    "Briefly describe what's happening on screen: game, streamer actions, "
-    "UI, chat, overlays. Be concise (2-3 sentences). "
-    "If nothing changed compared to the previous description, "
-    "say 'No changes' and only note new details."
+    "You are analyzing frames from a Twitch stream. Give a detailed description of everything on screen:\n"
+    "1. What game/application is shown, genre, setting\n"
+    "2. What exactly is happening: character actions, game situation, stage (menu, gameplay, cutscene, lobby)\n"
+    "3. UI elements: HUD, health, inventory, minimap, score, timers\n"
+    "4. Streamer camera: what's visible, emotions, gestures (if present)\n"
+    "5. Overlays: donations, alerts, widgets, chat\n"
+    "6. On-screen text: any readable text, names, nicknames\n"
+    "Write in detail (5-10 sentences). The description must be detailed enough "
+    "for another AI model to fully understand the context without seeing the image.\n"
+    "If the scene is similar to the previous one, describe only the changes, but in detail."
 )


@@ -67,7 +77,7 @@ class VisionAnalyzer:
            contents=contents,
            config=types.GenerateContentConfig(
                system_instruction=self.system_prompt,
-                max_output_tokens=300,
+                max_output_tokens=1000,
                temperature=0.3,
            ),
        )
--- a/capture.py
+++ b/capture.py
@@ -1,6 +1,10 @@
 import asyncio
 from collections.abc import AsyncIterator

+from rich.console import Console
+
+console = Console()
+

 async def _pipe_stream(source: asyncio.StreamReader, dest: asyncio.StreamWriter):
    """Forward data from streamlink stdout to ffmpeg stdin."""
@@ -17,6 +21,17 @@ async def _pipe_stream(source: asyncio.StreamReader, dest: asyncio.StreamWriter)
        dest.close()


+async def _log_stderr(proc_name: str, stderr: asyncio.StreamReader):
+    """Read and display stderr from a subprocess."""
+    while True:
+        line = await stderr.readline()
+        if not line:
+            break
+        text = line.decode("utf-8", errors="replace").rstrip()
+        if text:
+            console.print(f"[dim red][{proc_name}] {text}[/dim red]")
+
+
 async def capture_frames(
    channel: str, quality: str, interval: int
 ) -> AsyncIterator[bytes]:
@@ -33,6 +48,7 @@ async def capture_frames(

    ffmpeg_cmd = [
        "ffmpeg",
+        "-loglevel", "warning",
        "-i", "pipe:0",
        "-vf", f"fps=1/{interval}",
        "-f", "image2pipe",
@@ -41,24 +57,43 @@ async def capture_frames(
        "pipe:1",
    ]

+    console.print("[dim]Starting streamlink...[/dim]")
    streamlink_proc = await asyncio.create_subprocess_exec(
        *streamlink_cmd,
        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.DEVNULL,
+        stderr=asyncio.subprocess.PIPE,
    )

+    # Wait a moment and check if streamlink started OK
+    await asyncio.sleep(2)
+    if streamlink_proc.returncode is not None:
+        stderr_out = await streamlink_proc.stderr.read()
+        raise RuntimeError(
+            f"streamlink exited with code {streamlink_proc.returncode}: "
+            f"{stderr_out.decode('utf-8', errors='replace')}"
+        )
+
+    console.print("[dim]Starting ffmpeg...[/dim]")
    ffmpeg_proc = await asyncio.create_subprocess_exec(
        *ffmpeg_cmd,
        stdin=asyncio.subprocess.PIPE,
        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.DEVNULL,
+        stderr=asyncio.subprocess.PIPE,
    )

+    # Log stderr from both processes
+    stderr_tasks = [
+        asyncio.create_task(_log_stderr("streamlink", streamlink_proc.stderr)),
+        asyncio.create_task(_log_stderr("ffmpeg", ffmpeg_proc.stderr)),
+    ]
+
    # Forward streamlink → ffmpeg in background
    pipe_task = asyncio.create_task(
        _pipe_stream(streamlink_proc.stdout, ffmpeg_proc.stdin)
    )

+    console.print("[dim]Pipeline running, waiting for first frame...[/dim]")
+
    try:
        buf = b""
        while True:
@@ -82,6 +117,8 @@ async def capture_frames(
                yield frame
    finally:
        pipe_task.cancel()
+        for t in stderr_tasks:
+            t.cancel()
        for proc in (ffmpeg_proc, streamlink_proc):
            try:
                proc.terminate()
--- a/main.py
+++ b/main.py
@@ -28,6 +28,7 @@ async def run(config) -> None:

    frame_number = 0

+    try:
        async for frame_data in capture_frames(
            config.channel, config.quality, config.interval
        ):
@@ -42,6 +43,11 @@ async def run(config) -> None:

            print_description(description, frame_number)
            await log_description(config.log_file, description, frame_number)
+    except RuntimeError as e:
+        console.print(f"[bold red]Error:[/bold red] {e}")
+    finally:
+        if frame_number == 0:
+            console.print("[bold yellow]No frames were captured.[/bold yellow]")


 def main() -> None: