Redesign health service + create backup service
This commit is contained in:
261
status-service/database.py
Normal file
261
status-service/database.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""SQLite database for storing metrics history."""
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
DB_PATH = Path("/app/data/metrics.db")
|
||||
|
||||
|
||||
def get_connection() -> sqlite3.Connection:
|
||||
"""Get database connection."""
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(DB_PATH))
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def init_db():
|
||||
"""Initialize database tables."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Metrics history table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS metrics (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
service_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
latency_ms REAL,
|
||||
message TEXT,
|
||||
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Incidents table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS incidents (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
service_name TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
message TEXT,
|
||||
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
resolved_at TIMESTAMP,
|
||||
notified BOOLEAN DEFAULT FALSE
|
||||
)
|
||||
""")
|
||||
|
||||
# SSL certificates table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS ssl_certificates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
domain TEXT NOT NULL UNIQUE,
|
||||
issuer TEXT,
|
||||
expires_at TIMESTAMP,
|
||||
days_until_expiry INTEGER,
|
||||
checked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_metrics_service_time
|
||||
ON metrics(service_name, checked_at DESC)
|
||||
""")
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_incidents_service
|
||||
ON incidents(service_name, started_at DESC)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def save_metric(service_name: str, status: str, latency_ms: Optional[float], message: Optional[str]):
|
||||
"""Save a metric record."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"INSERT INTO metrics (service_name, status, latency_ms, message) VALUES (?, ?, ?, ?)",
|
||||
(service_name, status, latency_ms, message)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_latency_history(service_name: str, hours: int = 24) -> list[dict]:
|
||||
"""Get latency history for a service."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
since = datetime.now() - timedelta(hours=hours)
|
||||
cursor.execute("""
|
||||
SELECT latency_ms, status, checked_at
|
||||
FROM metrics
|
||||
WHERE service_name = ? AND checked_at > ? AND latency_ms IS NOT NULL
|
||||
ORDER BY checked_at ASC
|
||||
""", (service_name, since.isoformat()))
|
||||
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
|
||||
return [
|
||||
{
|
||||
"latency_ms": row["latency_ms"],
|
||||
"status": row["status"],
|
||||
"checked_at": row["checked_at"]
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def get_uptime_stats(service_name: str, hours: int = 24) -> dict:
|
||||
"""Calculate uptime statistics for a service."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
since = datetime.now() - timedelta(hours=hours)
|
||||
|
||||
cursor.execute("""
|
||||
SELECT COUNT(*) as total,
|
||||
SUM(CASE WHEN status = 'operational' THEN 1 ELSE 0 END) as successful
|
||||
FROM metrics
|
||||
WHERE service_name = ? AND checked_at > ?
|
||||
""", (service_name, since.isoformat()))
|
||||
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
total = row["total"] or 0
|
||||
successful = row["successful"] or 0
|
||||
|
||||
return {
|
||||
"total_checks": total,
|
||||
"successful_checks": successful,
|
||||
"uptime_percent": (successful / total * 100) if total > 0 else 100.0
|
||||
}
|
||||
|
||||
|
||||
def get_avg_latency(service_name: str, hours: int = 24) -> Optional[float]:
|
||||
"""Get average latency for a service."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
since = datetime.now() - timedelta(hours=hours)
|
||||
cursor.execute("""
|
||||
SELECT AVG(latency_ms) as avg_latency
|
||||
FROM metrics
|
||||
WHERE service_name = ? AND checked_at > ? AND latency_ms IS NOT NULL
|
||||
""", (service_name, since.isoformat()))
|
||||
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
return row["avg_latency"]
|
||||
|
||||
|
||||
def create_incident(service_name: str, status: str, message: Optional[str]) -> int:
|
||||
"""Create a new incident."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"INSERT INTO incidents (service_name, status, message) VALUES (?, ?, ?)",
|
||||
(service_name, status, message)
|
||||
)
|
||||
incident_id = cursor.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return incident_id
|
||||
|
||||
|
||||
def resolve_incident(service_name: str):
|
||||
"""Resolve open incidents for a service."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
UPDATE incidents
|
||||
SET resolved_at = CURRENT_TIMESTAMP
|
||||
WHERE service_name = ? AND resolved_at IS NULL
|
||||
""", (service_name,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_open_incident(service_name: str) -> Optional[dict]:
|
||||
"""Get open incident for a service."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT * FROM incidents
|
||||
WHERE service_name = ? AND resolved_at IS NULL
|
||||
ORDER BY started_at DESC LIMIT 1
|
||||
""", (service_name,))
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def mark_incident_notified(incident_id: int):
|
||||
"""Mark incident as notified."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("UPDATE incidents SET notified = TRUE WHERE id = ?", (incident_id,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_recent_incidents(limit: int = 10) -> list[dict]:
|
||||
"""Get recent incidents."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT * FROM incidents
|
||||
ORDER BY started_at DESC
|
||||
LIMIT ?
|
||||
""", (limit,))
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def save_ssl_info(domain: str, issuer: str, expires_at: datetime, days_until_expiry: int):
|
||||
"""Save SSL certificate info."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
INSERT OR REPLACE INTO ssl_certificates
|
||||
(domain, issuer, expires_at, days_until_expiry, checked_at)
|
||||
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
|
||||
""", (domain, issuer, expires_at.isoformat(), days_until_expiry))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_ssl_info(domain: str) -> Optional[dict]:
|
||||
"""Get SSL certificate info."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM ssl_certificates WHERE domain = ?", (domain,))
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def cleanup_old_metrics(days: int = 7):
|
||||
"""Delete metrics older than specified days."""
|
||||
conn = get_connection()
|
||||
cursor = conn.cursor()
|
||||
cutoff = datetime.now() - timedelta(days=days)
|
||||
cursor.execute("DELETE FROM metrics WHERE checked_at < ?", (cutoff.isoformat(),))
|
||||
deleted = cursor.rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return deleted
|
||||
Reference in New Issue
Block a user