This commit is contained in:
2025-12-17 22:10:01 +07:00
parent f371178518
commit 967176fab8
7 changed files with 794 additions and 1 deletions

View File

@@ -5,6 +5,7 @@ import sys
from aiogram import Bot, Dispatcher
from aiogram.client.default import DefaultBotProperties
from aiogram.enums import ParseMode
from aiohttp import web
from config import settings
from handlers import start, marathons, link
@@ -23,14 +24,41 @@ logger = logging.getLogger(__name__)
# Set aiogram logging level
logging.getLogger("aiogram").setLevel(logging.INFO)
# Health check state
bot_running = False
async def health_handler(request):
"""Health check endpoint"""
if bot_running:
return web.json_response({"status": "ok", "service": "telegram-bot"})
return web.json_response({"status": "starting"}, status=503)
async def start_health_server():
"""Start health check HTTP server"""
app = web.Application()
app.router.add_get("/health", health_handler)
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, "0.0.0.0", 8080)
await site.start()
logger.info("Health check server started on port 8080")
return runner
async def main():
global bot_running
logger.info("="*50)
logger.info("Starting Game Marathon Bot...")
logger.info(f"API_URL: {settings.API_URL}")
logger.info(f"BOT_TOKEN: {settings.TELEGRAM_BOT_TOKEN[:20]}...")
logger.info("="*50)
# Start health check server
health_runner = await start_health_server()
bot = Bot(
token=settings.TELEGRAM_BOT_TOKEN,
default=DefaultBotProperties(parse_mode=ParseMode.HTML)
@@ -54,11 +82,18 @@ async def main():
dp.include_router(marathons.router)
logger.info("Routers registered: start, link, marathons")
# Mark bot as running
bot_running = True
# Start polling
logger.info("Deleting webhook and starting polling...")
await bot.delete_webhook(drop_pending_updates=True)
logger.info("Polling started! Waiting for messages...")
try:
await dp.start_polling(bot)
finally:
bot_running = False
await health_runner.cleanup()
if __name__ == "__main__":

View File

@@ -85,5 +85,23 @@ services:
- backend
restart: unless-stopped
status:
build:
context: ./status-service
dockerfile: Dockerfile
container_name: marathon-status
environment:
BACKEND_URL: http://backend:8000
FRONTEND_URL: http://frontend:80
BOT_URL: http://bot:8080
CHECK_INTERVAL: "30"
ports:
- "8001:8001"
depends_on:
- backend
- frontend
- bot
restart: unless-stopped
volumes:
postgres_data:

13
status-service/Dockerfile Normal file
View File

@@ -0,0 +1,13 @@
FROM python:3.11-slim
WORKDIR /app
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001"]

109
status-service/main.py Normal file
View File

@@ -0,0 +1,109 @@
import os
import asyncio
from datetime import datetime, timedelta
from typing import Optional
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from monitors import ServiceMonitor, ServiceStatus
# Configuration
BACKEND_URL = os.getenv("BACKEND_URL", "http://backend:8000")
FRONTEND_URL = os.getenv("FRONTEND_URL", "http://frontend:80")
BOT_URL = os.getenv("BOT_URL", "http://bot:8080")
CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", "30"))
# Initialize monitor
monitor = ServiceMonitor()
# Background task reference
background_task: Optional[asyncio.Task] = None
async def periodic_health_check():
"""Background task to check services periodically"""
while True:
await monitor.check_all_services(
backend_url=BACKEND_URL,
frontend_url=FRONTEND_URL,
bot_url=BOT_URL
)
await asyncio.sleep(CHECK_INTERVAL)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Startup and shutdown events"""
global background_task
# Start background health checks
background_task = asyncio.create_task(periodic_health_check())
yield
# Cancel background task on shutdown
if background_task:
background_task.cancel()
try:
await background_task
except asyncio.CancelledError:
pass
app = FastAPI(
title="Status Monitor",
description="Service health monitoring",
lifespan=lifespan
)
templates = Jinja2Templates(directory="templates")
@app.get("/", response_class=HTMLResponse)
async def status_page(request: Request):
"""Main status page"""
services = monitor.get_all_statuses()
overall_status = monitor.get_overall_status()
return templates.TemplateResponse(
"index.html",
{
"request": request,
"services": services,
"overall_status": overall_status,
"last_check": monitor.last_check,
"check_interval": CHECK_INTERVAL
}
)
@app.get("/api/status")
async def api_status():
"""API endpoint for service statuses"""
services = monitor.get_all_statuses()
overall_status = monitor.get_overall_status()
return {
"overall_status": overall_status,
"services": {name: status.to_dict() for name, status in services.items()},
"last_check": monitor.last_check.isoformat() if monitor.last_check else None,
"check_interval_seconds": CHECK_INTERVAL
}
@app.get("/api/health")
async def health():
"""Health check for this service"""
return {"status": "ok", "service": "status-monitor"}
@app.post("/api/refresh")
async def refresh_status():
"""Force refresh all service statuses"""
await monitor.check_all_services(
backend_url=BACKEND_URL,
frontend_url=FRONTEND_URL,
bot_url=BOT_URL
)
return {"status": "refreshed"}

227
status-service/monitors.py Normal file
View File

@@ -0,0 +1,227 @@
import asyncio
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum
import httpx
class Status(str, Enum):
OPERATIONAL = "operational"
DEGRADED = "degraded"
DOWN = "down"
UNKNOWN = "unknown"
@dataclass
class ServiceStatus:
name: str
display_name: str
status: Status = Status.UNKNOWN
latency_ms: Optional[float] = None
last_check: Optional[datetime] = None
last_incident: Optional[datetime] = None
uptime_percent: float = 100.0
message: Optional[str] = None
version: Optional[str] = None
# For uptime calculation
total_checks: int = 0
successful_checks: int = 0
def to_dict(self) -> dict:
return {
"name": self.name,
"display_name": self.display_name,
"status": self.status.value,
"latency_ms": round(self.latency_ms, 2) if self.latency_ms else None,
"last_check": self.last_check.isoformat() if self.last_check else None,
"last_incident": self.last_incident.isoformat() if self.last_incident else None,
"uptime_percent": round(self.uptime_percent, 2),
"message": self.message,
"version": self.version
}
def update_uptime(self, is_success: bool):
self.total_checks += 1
if is_success:
self.successful_checks += 1
if self.total_checks > 0:
self.uptime_percent = (self.successful_checks / self.total_checks) * 100
class ServiceMonitor:
def __init__(self):
self.services: dict[str, ServiceStatus] = {
"backend": ServiceStatus(
name="backend",
display_name="Backend API"
),
"database": ServiceStatus(
name="database",
display_name="Database"
),
"frontend": ServiceStatus(
name="frontend",
display_name="Frontend"
),
"bot": ServiceStatus(
name="bot",
display_name="Telegram Bot"
)
}
self.last_check: Optional[datetime] = None
async def check_backend(self, url: str) -> tuple[Status, Optional[float], Optional[str], Optional[str]]:
"""Check backend API health"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
start = datetime.now()
response = await client.get(f"{url}/health")
latency = (datetime.now() - start).total_seconds() * 1000
if response.status_code == 200:
data = response.json()
return Status.OPERATIONAL, latency, None, data.get("version")
else:
return Status.DEGRADED, latency, f"HTTP {response.status_code}", None
except httpx.TimeoutException:
return Status.DOWN, None, "Timeout", None
except Exception as e:
return Status.DOWN, None, str(e)[:100], None
async def check_database(self, backend_url: str) -> tuple[Status, Optional[float], Optional[str]]:
"""Check database through backend"""
# We check database indirectly - if backend is up, DB is likely up
# Could add a specific /health/db endpoint to backend later
try:
async with httpx.AsyncClient(timeout=10.0) as client:
start = datetime.now()
response = await client.get(f"{backend_url}/health")
latency = (datetime.now() - start).total_seconds() * 1000
if response.status_code == 200:
return Status.OPERATIONAL, latency, None
else:
return Status.DOWN, latency, "Backend reports unhealthy"
except Exception as e:
return Status.DOWN, None, "Cannot reach backend"
async def check_frontend(self, url: str) -> tuple[Status, Optional[float], Optional[str]]:
"""Check frontend availability"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
start = datetime.now()
response = await client.get(url)
latency = (datetime.now() - start).total_seconds() * 1000
if response.status_code == 200:
return Status.OPERATIONAL, latency, None
else:
return Status.DEGRADED, latency, f"HTTP {response.status_code}"
except httpx.TimeoutException:
return Status.DOWN, None, "Timeout"
except Exception as e:
return Status.DOWN, None, str(e)[:100]
async def check_bot(self, url: str) -> tuple[Status, Optional[float], Optional[str]]:
"""Check Telegram bot health"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
start = datetime.now()
response = await client.get(f"{url}/health")
latency = (datetime.now() - start).total_seconds() * 1000
if response.status_code == 200:
return Status.OPERATIONAL, latency, None
else:
return Status.DEGRADED, latency, f"HTTP {response.status_code}"
except httpx.TimeoutException:
return Status.DOWN, None, "Timeout"
except Exception as e:
return Status.DOWN, None, str(e)[:100]
async def check_all_services(self, backend_url: str, frontend_url: str, bot_url: str):
"""Check all services concurrently"""
now = datetime.now()
# Run all checks concurrently
results = await asyncio.gather(
self.check_backend(backend_url),
self.check_database(backend_url),
self.check_frontend(frontend_url),
self.check_bot(bot_url),
return_exceptions=True
)
# Process backend result
if not isinstance(results[0], Exception):
status, latency, message, version = results[0]
svc = self.services["backend"]
was_down = svc.status == Status.DOWN
svc.status = status
svc.latency_ms = latency
svc.message = message
svc.version = version
svc.last_check = now
svc.update_uptime(status == Status.OPERATIONAL)
if status != Status.OPERATIONAL and not was_down:
svc.last_incident = now
# Process database result
if not isinstance(results[1], Exception):
status, latency, message = results[1]
svc = self.services["database"]
was_down = svc.status == Status.DOWN
svc.status = status
svc.latency_ms = latency
svc.message = message
svc.last_check = now
svc.update_uptime(status == Status.OPERATIONAL)
if status != Status.OPERATIONAL and not was_down:
svc.last_incident = now
# Process frontend result
if not isinstance(results[2], Exception):
status, latency, message = results[2]
svc = self.services["frontend"]
was_down = svc.status == Status.DOWN
svc.status = status
svc.latency_ms = latency
svc.message = message
svc.last_check = now
svc.update_uptime(status == Status.OPERATIONAL)
if status != Status.OPERATIONAL and not was_down:
svc.last_incident = now
# Process bot result
if not isinstance(results[3], Exception):
status, latency, message = results[3]
svc = self.services["bot"]
was_down = svc.status == Status.DOWN
svc.status = status
svc.latency_ms = latency
svc.message = message
svc.last_check = now
svc.update_uptime(status == Status.OPERATIONAL)
if status != Status.OPERATIONAL and not was_down:
svc.last_incident = now
self.last_check = now
def get_all_statuses(self) -> dict[str, ServiceStatus]:
return self.services
def get_overall_status(self) -> Status:
"""Get overall system status based on all services"""
statuses = [svc.status for svc in self.services.values()]
if all(s == Status.OPERATIONAL for s in statuses):
return Status.OPERATIONAL
elif any(s == Status.DOWN for s in statuses):
return Status.DOWN
elif any(s == Status.DEGRADED for s in statuses):
return Status.DEGRADED
else:
return Status.UNKNOWN

View File

@@ -0,0 +1,5 @@
fastapi==0.109.0
uvicorn==0.27.0
httpx==0.26.0
jinja2==3.1.3
python-dotenv==1.0.0

View File

@@ -0,0 +1,386 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>System Status</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
background: linear-gradient(135deg, #0f0f23 0%, #1a1a2e 50%, #16213e 100%);
min-height: 100vh;
color: #e0e0e0;
}
.container {
max-width: 900px;
margin: 0 auto;
padding: 40px 20px;
}
header {
text-align: center;
margin-bottom: 40px;
}
h1 {
font-size: 2.5rem;
font-weight: 700;
margin-bottom: 10px;
background: linear-gradient(135deg, #00d4ff, #a855f7);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
}
.overall-status {
display: inline-flex;
align-items: center;
gap: 10px;
padding: 12px 24px;
border-radius: 50px;
font-size: 1.1rem;
font-weight: 600;
margin-bottom: 10px;
}
.overall-status.operational {
background: rgba(34, 197, 94, 0.15);
border: 1px solid rgba(34, 197, 94, 0.3);
color: #22c55e;
box-shadow: 0 0 20px rgba(34, 197, 94, 0.2);
}
.overall-status.degraded {
background: rgba(250, 204, 21, 0.15);
border: 1px solid rgba(250, 204, 21, 0.3);
color: #facc15;
box-shadow: 0 0 20px rgba(250, 204, 21, 0.2);
}
.overall-status.down {
background: rgba(239, 68, 68, 0.15);
border: 1px solid rgba(239, 68, 68, 0.3);
color: #ef4444;
box-shadow: 0 0 20px rgba(239, 68, 68, 0.2);
}
.overall-status.unknown {
background: rgba(148, 163, 184, 0.15);
border: 1px solid rgba(148, 163, 184, 0.3);
color: #94a3b8;
}
.status-dot {
width: 12px;
height: 12px;
border-radius: 50%;
animation: pulse 2s infinite;
}
.status-dot.operational { background: #22c55e; }
.status-dot.degraded { background: #facc15; }
.status-dot.down { background: #ef4444; }
.status-dot.unknown { background: #94a3b8; }
@keyframes pulse {
0%, 100% { opacity: 1; transform: scale(1); }
50% { opacity: 0.7; transform: scale(1.1); }
}
.last-update {
color: #64748b;
font-size: 0.9rem;
}
.services-grid {
display: grid;
gap: 16px;
}
.service-card {
background: rgba(30, 41, 59, 0.5);
border: 1px solid rgba(100, 116, 139, 0.2);
border-radius: 16px;
padding: 24px;
backdrop-filter: blur(10px);
transition: all 0.3s ease;
}
.service-card:hover {
border-color: rgba(0, 212, 255, 0.3);
box-shadow: 0 0 30px rgba(0, 212, 255, 0.1);
}
.service-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 16px;
}
.service-name {
font-size: 1.25rem;
font-weight: 600;
color: #f1f5f9;
}
.service-status {
display: flex;
align-items: center;
gap: 8px;
padding: 6px 14px;
border-radius: 20px;
font-size: 0.85rem;
font-weight: 500;
}
.service-status.operational {
background: rgba(34, 197, 94, 0.15);
color: #22c55e;
}
.service-status.degraded {
background: rgba(250, 204, 21, 0.15);
color: #facc15;
}
.service-status.down {
background: rgba(239, 68, 68, 0.15);
color: #ef4444;
}
.service-status.unknown {
background: rgba(148, 163, 184, 0.15);
color: #94a3b8;
}
.service-status .dot {
width: 8px;
height: 8px;
border-radius: 50%;
}
.service-status.operational .dot { background: #22c55e; }
.service-status.degraded .dot { background: #facc15; }
.service-status.down .dot { background: #ef4444; }
.service-status.unknown .dot { background: #94a3b8; }
.service-metrics {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
gap: 12px;
}
.metric {
background: rgba(15, 23, 42, 0.5);
padding: 12px;
border-radius: 10px;
}
.metric-label {
font-size: 0.75rem;
color: #64748b;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 4px;
}
.metric-value {
font-size: 1.1rem;
font-weight: 600;
color: #e2e8f0;
}
.metric-value.good { color: #22c55e; }
.metric-value.warning { color: #facc15; }
.metric-value.bad { color: #ef4444; }
.service-message {
margin-top: 12px;
padding: 10px 14px;
background: rgba(239, 68, 68, 0.1);
border-left: 3px solid #ef4444;
border-radius: 0 8px 8px 0;
font-size: 0.9rem;
color: #fca5a5;
}
.refresh-btn {
display: inline-flex;
align-items: center;
gap: 8px;
padding: 12px 24px;
background: linear-gradient(135deg, rgba(0, 212, 255, 0.2), rgba(168, 85, 247, 0.2));
border: 1px solid rgba(0, 212, 255, 0.3);
border-radius: 10px;
color: #00d4ff;
font-size: 1rem;
font-weight: 500;
cursor: pointer;
transition: all 0.3s ease;
margin-top: 30px;
}
.refresh-btn:hover {
background: linear-gradient(135deg, rgba(0, 212, 255, 0.3), rgba(168, 85, 247, 0.3));
box-shadow: 0 0 20px rgba(0, 212, 255, 0.3);
transform: translateY(-2px);
}
.refresh-btn:active {
transform: translateY(0);
}
.refresh-btn.loading svg {
animation: spin 1s linear infinite;
}
@keyframes spin {
from { transform: rotate(0deg); }
to { transform: rotate(360deg); }
}
footer {
text-align: center;
margin-top: 50px;
padding-top: 30px;
border-top: 1px solid rgba(100, 116, 139, 0.2);
color: #64748b;
font-size: 0.85rem;
}
footer a {
color: #00d4ff;
text-decoration: none;
}
footer a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>System Status</h1>
<div class="overall-status {{ overall_status.value }}">
<span class="status-dot {{ overall_status.value }}"></span>
{% if overall_status.value == 'operational' %}
All Systems Operational
{% elif overall_status.value == 'degraded' %}
Partial System Outage
{% elif overall_status.value == 'down' %}
Major System Outage
{% else %}
Status Unknown
{% endif %}
</div>
<p class="last-update">
{% if last_check %}
Last updated: {{ last_check.strftime('%d.%m.%Y %H:%M:%S') }}
{% else %}
Checking services...
{% endif %}
&bull; Auto-refresh every {{ check_interval }}s
</p>
</header>
<div class="services-grid">
{% for name, service in services.items() %}
<div class="service-card">
<div class="service-header">
<span class="service-name">{{ service.display_name }}</span>
<span class="service-status {{ service.status.value }}">
<span class="dot"></span>
{% if service.status.value == 'operational' %}
Operational
{% elif service.status.value == 'degraded' %}
Degraded
{% elif service.status.value == 'down' %}
Down
{% else %}
Unknown
{% endif %}
</span>
</div>
<div class="service-metrics">
<div class="metric">
<div class="metric-label">Latency</div>
<div class="metric-value {% if service.latency_ms and service.latency_ms < 200 %}good{% elif service.latency_ms and service.latency_ms < 500 %}warning{% elif service.latency_ms %}bad{% endif %}">
{% if service.latency_ms %}
{{ "%.0f"|format(service.latency_ms) }} ms
{% else %}
{% endif %}
</div>
</div>
<div class="metric">
<div class="metric-label">Uptime</div>
<div class="metric-value {% if service.uptime_percent >= 99 %}good{% elif service.uptime_percent >= 95 %}warning{% else %}bad{% endif %}">
{{ "%.1f"|format(service.uptime_percent) }}%
</div>
</div>
{% if service.version %}
<div class="metric">
<div class="metric-label">Version</div>
<div class="metric-value">{{ service.version }}</div>
</div>
{% endif %}
{% if service.last_incident %}
<div class="metric">
<div class="metric-label">Last Incident</div>
<div class="metric-value warning">{{ service.last_incident.strftime('%d.%m %H:%M') }}</div>
</div>
{% endif %}
</div>
{% if service.message %}
<div class="service-message">{{ service.message }}</div>
{% endif %}
</div>
{% endfor %}
</div>
<center>
<button class="refresh-btn" onclick="refreshStatus(this)">
<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M23 4v6h-6M1 20v-6h6"/>
<path d="M3.51 9a9 9 0 0114.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0020.49 15"/>
</svg>
Refresh
</button>
</center>
<footer>
<p>Game Marathon Status Monitor</p>
</footer>
</div>
<script>
async function refreshStatus(btn) {
btn.classList.add('loading');
btn.disabled = true;
try {
await fetch('/api/refresh', { method: 'POST' });
window.location.reload();
} catch (e) {
console.error('Refresh failed:', e);
btn.classList.remove('loading');
btn.disabled = false;
}
}
// Auto-refresh page
setTimeout(() => {
window.location.reload();
}, {{ check_interval }} * 1000);
</script>
</body>
</html>