78 lines
2.9 KiB
Python
78 lines
2.9 KiB
Python
|
|
import base64
|
|||
|
|
|
|||
|
|
from google import genai
|
|||
|
|
from google.genai import types
|
|||
|
|
|
|||
|
|
SYSTEM_PROMPT_RU = (
|
|||
|
|
"Ты анализируешь кадры с Twitch-стрима. "
|
|||
|
|
"Кратко опиши что происходит на экране: игра, действия стримера, "
|
|||
|
|
"интерфейс, чат, оверлеи. Будь лаконичен (2-3 предложения). "
|
|||
|
|
"Если ничего не изменилось по сравнению с предыдущим описанием, "
|
|||
|
|
"скажи 'Без изменений' и уточни только новые детали."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
SYSTEM_PROMPT_EN = (
|
|||
|
|
"You are analyzing frames from a Twitch stream. "
|
|||
|
|
"Briefly describe what's happening on screen: game, streamer actions, "
|
|||
|
|
"UI, chat, overlays. Be concise (2-3 sentences). "
|
|||
|
|
"If nothing changed compared to the previous description, "
|
|||
|
|
"say 'No changes' and only note new details."
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VisionAnalyzer:
|
|||
|
|
def __init__(self, api_key: str, base_url: str | None = None, lang: str = "ru"):
|
|||
|
|
client_kwargs = {"api_key": api_key}
|
|||
|
|
if base_url:
|
|||
|
|
client_kwargs["http_options"] = types.HttpOptions(base_url=base_url)
|
|||
|
|
self.client = genai.Client(**client_kwargs)
|
|||
|
|
self.model = "gemini-2.0-flash"
|
|||
|
|
self.system_prompt = SYSTEM_PROMPT_RU if lang == "ru" else SYSTEM_PROMPT_EN
|
|||
|
|
self.previous_description: str | None = None
|
|||
|
|
|
|||
|
|
async def analyze_frame(self, frame_data: bytes) -> str:
|
|||
|
|
b64_image = base64.b64encode(frame_data).decode("utf-8")
|
|||
|
|
|
|||
|
|
contents = []
|
|||
|
|
if self.previous_description:
|
|||
|
|
contents.append(
|
|||
|
|
types.Content(
|
|||
|
|
role="user",
|
|||
|
|
parts=[
|
|||
|
|
types.Part.from_text(
|
|||
|
|
text=f"Предыдущее описание: {self.previous_description}"
|
|||
|
|
)
|
|||
|
|
],
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
contents.append(
|
|||
|
|
types.Content(
|
|||
|
|
role="model",
|
|||
|
|
parts=[types.Part.from_text(text="Понял, учту контекст.")],
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
contents.append(
|
|||
|
|
types.Content(
|
|||
|
|
role="user",
|
|||
|
|
parts=[
|
|||
|
|
types.Part.from_bytes(data=frame_data, mime_type="image/jpeg"),
|
|||
|
|
types.Part.from_text(text="Опиши что сейчас происходит на стриме."),
|
|||
|
|
],
|
|||
|
|
)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
response = await self.client.aio.models.generate_content(
|
|||
|
|
model=self.model,
|
|||
|
|
contents=contents,
|
|||
|
|
config=types.GenerateContentConfig(
|
|||
|
|
system_instruction=self.system_prompt,
|
|||
|
|
max_output_tokens=300,
|
|||
|
|
temperature=0.3,
|
|||
|
|
),
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
description = response.text or "(нет описания)"
|
|||
|
|
self.previous_description = description
|
|||
|
|
return description
|