Files
twitchStreamVision/analyzer.py
NANDI d0e0e53087 Initial commit: Twitch Stream Vision Analyzer
Async pipeline: streamlink + ffmpeg frame capture → Gemini Vision API analysis → rich console output + log file.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 10:35:37 +01:00

78 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import base64
from google import genai
from google.genai import types
SYSTEM_PROMPT_RU = (
"Ты анализируешь кадры с Twitch-стрима. "
"Кратко опиши что происходит на экране: игра, действия стримера, "
"интерфейс, чат, оверлеи. Будь лаконичен (2-3 предложения). "
"Если ничего не изменилось по сравнению с предыдущим описанием, "
"скажи 'Без изменений' и уточни только новые детали."
)
SYSTEM_PROMPT_EN = (
"You are analyzing frames from a Twitch stream. "
"Briefly describe what's happening on screen: game, streamer actions, "
"UI, chat, overlays. Be concise (2-3 sentences). "
"If nothing changed compared to the previous description, "
"say 'No changes' and only note new details."
)
class VisionAnalyzer:
def __init__(self, api_key: str, base_url: str | None = None, lang: str = "ru"):
client_kwargs = {"api_key": api_key}
if base_url:
client_kwargs["http_options"] = types.HttpOptions(base_url=base_url)
self.client = genai.Client(**client_kwargs)
self.model = "gemini-2.0-flash"
self.system_prompt = SYSTEM_PROMPT_RU if lang == "ru" else SYSTEM_PROMPT_EN
self.previous_description: str | None = None
async def analyze_frame(self, frame_data: bytes) -> str:
b64_image = base64.b64encode(frame_data).decode("utf-8")
contents = []
if self.previous_description:
contents.append(
types.Content(
role="user",
parts=[
types.Part.from_text(
text=f"Предыдущее описание: {self.previous_description}"
)
],
)
)
contents.append(
types.Content(
role="model",
parts=[types.Part.from_text(text="Понял, учту контекст.")],
)
)
contents.append(
types.Content(
role="user",
parts=[
types.Part.from_bytes(data=frame_data, mime_type="image/jpeg"),
types.Part.from_text(text="Опиши что сейчас происходит на стриме."),
],
)
)
response = await self.client.aio.models.generate_content(
model=self.model,
contents=contents,
config=types.GenerateContentConfig(
system_instruction=self.system_prompt,
max_output_tokens=300,
temperature=0.3,
),
)
description = response.text or "(нет описания)"
self.previous_description = description
return description