Files
game-marathon/status-service/database.py

262 lines
7.2 KiB
Python
Raw Normal View History

"""SQLite database for storing metrics history."""
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Optional
import json
DB_PATH = Path("/app/data/metrics.db")
def get_connection() -> sqlite3.Connection:
"""Get database connection."""
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
return conn
def init_db():
"""Initialize database tables."""
conn = get_connection()
cursor = conn.cursor()
# Metrics history table
cursor.execute("""
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_name TEXT NOT NULL,
status TEXT NOT NULL,
latency_ms REAL,
message TEXT,
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Incidents table
cursor.execute("""
CREATE TABLE IF NOT EXISTS incidents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
service_name TEXT NOT NULL,
status TEXT NOT NULL,
message TEXT,
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
resolved_at TIMESTAMP,
notified BOOLEAN DEFAULT FALSE
)
""")
# SSL certificates table
cursor.execute("""
CREATE TABLE IF NOT EXISTS ssl_certificates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
domain TEXT NOT NULL UNIQUE,
issuer TEXT,
expires_at TIMESTAMP,
days_until_expiry INTEGER,
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Create indexes
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_metrics_service_time
ON metrics(service_name, checked_at DESC)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_incidents_service
ON incidents(service_name, started_at DESC)
""")
conn.commit()
conn.close()
def save_metric(service_name: str, status: str, latency_ms: Optional[float], message: Optional[str]):
"""Save a metric record."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO metrics (service_name, status, latency_ms, message) VALUES (?, ?, ?, ?)",
(service_name, status, latency_ms, message)
)
conn.commit()
conn.close()
def get_latency_history(service_name: str, hours: int = 24) -> list[dict]:
"""Get latency history for a service."""
conn = get_connection()
cursor = conn.cursor()
since = datetime.now() - timedelta(hours=hours)
cursor.execute("""
SELECT latency_ms, status, checked_at
FROM metrics
WHERE service_name = ? AND checked_at > ? AND latency_ms IS NOT NULL
ORDER BY checked_at ASC
""", (service_name, since.isoformat()))
rows = cursor.fetchall()
conn.close()
return [
{
"latency_ms": row["latency_ms"],
"status": row["status"],
"checked_at": row["checked_at"]
}
for row in rows
]
def get_uptime_stats(service_name: str, hours: int = 24) -> dict:
"""Calculate uptime statistics for a service."""
conn = get_connection()
cursor = conn.cursor()
since = datetime.now() - timedelta(hours=hours)
cursor.execute("""
SELECT COUNT(*) as total,
SUM(CASE WHEN status = 'operational' THEN 1 ELSE 0 END) as successful
FROM metrics
WHERE service_name = ? AND checked_at > ?
""", (service_name, since.isoformat()))
row = cursor.fetchone()
conn.close()
total = row["total"] or 0
successful = row["successful"] or 0
return {
"total_checks": total,
"successful_checks": successful,
"uptime_percent": (successful / total * 100) if total > 0 else 100.0
}
def get_avg_latency(service_name: str, hours: int = 24) -> Optional[float]:
"""Get average latency for a service."""
conn = get_connection()
cursor = conn.cursor()
since = datetime.now() - timedelta(hours=hours)
cursor.execute("""
SELECT AVG(latency_ms) as avg_latency
FROM metrics
WHERE service_name = ? AND checked_at > ? AND latency_ms IS NOT NULL
""", (service_name, since.isoformat()))
row = cursor.fetchone()
conn.close()
return row["avg_latency"]
def create_incident(service_name: str, status: str, message: Optional[str]) -> int:
"""Create a new incident."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO incidents (service_name, status, message) VALUES (?, ?, ?)",
(service_name, status, message)
)
incident_id = cursor.lastrowid
conn.commit()
conn.close()
return incident_id
def resolve_incident(service_name: str):
"""Resolve open incidents for a service."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
UPDATE incidents
SET resolved_at = CURRENT_TIMESTAMP
WHERE service_name = ? AND resolved_at IS NULL
""", (service_name,))
conn.commit()
conn.close()
def get_open_incident(service_name: str) -> Optional[dict]:
"""Get open incident for a service."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM incidents
WHERE service_name = ? AND resolved_at IS NULL
ORDER BY started_at DESC LIMIT 1
""", (service_name,))
row = cursor.fetchone()
conn.close()
if row:
return dict(row)
return None
def mark_incident_notified(incident_id: int):
"""Mark incident as notified."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("UPDATE incidents SET notified = TRUE WHERE id = ?", (incident_id,))
conn.commit()
conn.close()
def get_recent_incidents(limit: int = 10) -> list[dict]:
"""Get recent incidents."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM incidents
ORDER BY started_at DESC
LIMIT ?
""", (limit,))
rows = cursor.fetchall()
conn.close()
return [dict(row) for row in rows]
def save_ssl_info(domain: str, issuer: str, expires_at: datetime, days_until_expiry: int):
"""Save SSL certificate info."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO ssl_certificates
(domain, issuer, expires_at, days_until_expiry, checked_at)
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
""", (domain, issuer, expires_at.isoformat(), days_until_expiry))
conn.commit()
conn.close()
def get_ssl_info(domain: str) -> Optional[dict]:
"""Get SSL certificate info."""
conn = get_connection()
cursor = conn.cursor()
cursor.execute("SELECT * FROM ssl_certificates WHERE domain = ?", (domain,))
row = cursor.fetchone()
conn.close()
if row:
return dict(row)
return None
def cleanup_old_metrics(days: int = 7):
"""Delete metrics older than specified days."""
conn = get_connection()
cursor = conn.cursor()
cutoff = datetime.now() - timedelta(days=days)
cursor.execute("DELETE FROM metrics WHERE checked_at < ?", (cutoff.isoformat(),))
deleted = cursor.rowcount
conn.commit()
conn.close()
return deleted