Redesign health service + create backup service

This commit is contained in:
2025-12-18 03:35:13 +07:00
parent e43e579329
commit 57bad3b4a8
16 changed files with 1486 additions and 98 deletions

30
backup-service/Dockerfile Normal file
View File

@@ -0,0 +1,30 @@
FROM python:3.11-slim
WORKDIR /app
# Install PostgreSQL client (for pg_dump and psql) and cron
RUN apt-get update && apt-get install -y \
postgresql-client \
cron \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY . .
# Make scripts executable
RUN chmod +x backup.py restore.py
# Setup cron
COPY crontab /etc/cron.d/backup-cron
RUN chmod 0644 /etc/cron.d/backup-cron
RUN crontab /etc/cron.d/backup-cron
# Create log file
RUN touch /var/log/cron.log
# Start cron in foreground and tail logs
CMD ["sh", "-c", "printenv > /etc/environment && cron && tail -f /var/log/cron.log"]

217
backup-service/backup.py Normal file
View File

@@ -0,0 +1,217 @@
#!/usr/bin/env python3
"""
PostgreSQL Backup Service for WebApp.
- Creates pg_dump backup
- Compresses with gzip
- Uploads to S3 FirstVDS
- Rotates old backups (configurable retention)
- Sends Telegram notifications
"""
import gzip
import os
import subprocess
import sys
from datetime import datetime, timedelta, timezone
import boto3
import httpx
from botocore.config import Config as BotoConfig
from botocore.exceptions import ClientError
from config import config
def create_s3_client():
"""Initialize S3 client (same pattern as backend storage.py)."""
return boto3.client(
"s3",
endpoint_url=config.S3_ENDPOINT_URL,
aws_access_key_id=config.S3_ACCESS_KEY_ID,
aws_secret_access_key=config.S3_SECRET_ACCESS_KEY,
region_name=config.S3_REGION or "us-east-1",
config=BotoConfig(signature_version="s3v4"),
)
def send_telegram_notification(message: str, is_error: bool = False) -> None:
"""Send notification to Telegram admin."""
if not config.TELEGRAM_BOT_TOKEN or not config.TELEGRAM_ADMIN_ID:
print("Telegram not configured, skipping notification")
return
emoji = "\u274c" if is_error else "\u2705"
text = f"{emoji} *Database Backup*\n\n{message}"
url = f"https://api.telegram.org/bot{config.TELEGRAM_BOT_TOKEN}/sendMessage"
data = {
"chat_id": config.TELEGRAM_ADMIN_ID,
"text": text,
"parse_mode": "Markdown",
}
try:
response = httpx.post(url, json=data, timeout=30)
response.raise_for_status()
print("Telegram notification sent")
except Exception as e:
print(f"Failed to send Telegram notification: {e}")
def create_backup() -> tuple[str, bytes]:
"""Create pg_dump backup and compress it."""
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
filename = f"marathon_backup_{timestamp}.sql.gz"
# Build pg_dump command
env = os.environ.copy()
env["PGPASSWORD"] = config.DB_PASSWORD
cmd = [
"pg_dump",
"-h",
config.DB_HOST,
"-p",
config.DB_PORT,
"-U",
config.DB_USER,
"-d",
config.DB_NAME,
"--no-owner",
"--no-acl",
"-F",
"p", # plain SQL format
]
print(f"Running pg_dump for database {config.DB_NAME}...")
result = subprocess.run(
cmd,
env=env,
capture_output=True,
)
if result.returncode != 0:
raise Exception(f"pg_dump failed: {result.stderr.decode()}")
# Compress the output
print("Compressing backup...")
compressed = gzip.compress(result.stdout, compresslevel=9)
return filename, compressed
def upload_to_s3(s3_client, filename: str, data: bytes) -> str:
"""Upload backup to S3."""
key = f"{config.S3_BACKUP_PREFIX}{filename}"
print(f"Uploading to S3: {key}...")
s3_client.put_object(
Bucket=config.S3_BUCKET_NAME,
Key=key,
Body=data,
ContentType="application/gzip",
)
return key
def rotate_old_backups(s3_client) -> int:
"""Delete backups older than BACKUP_RETENTION_DAYS."""
cutoff_date = datetime.now(timezone.utc) - timedelta(
days=config.BACKUP_RETENTION_DAYS
)
deleted_count = 0
print(f"Rotating backups older than {config.BACKUP_RETENTION_DAYS} days...")
# List all objects with backup prefix
try:
paginator = s3_client.get_paginator("list_objects_v2")
pages = paginator.paginate(
Bucket=config.S3_BUCKET_NAME,
Prefix=config.S3_BACKUP_PREFIX,
)
for page in pages:
for obj in page.get("Contents", []):
last_modified = obj["LastModified"]
if last_modified.tzinfo is None:
last_modified = last_modified.replace(tzinfo=timezone.utc)
if last_modified < cutoff_date:
s3_client.delete_object(
Bucket=config.S3_BUCKET_NAME,
Key=obj["Key"],
)
deleted_count += 1
print(f"Deleted old backup: {obj['Key']}")
except ClientError as e:
print(f"Error during rotation: {e}")
return deleted_count
def main() -> int:
"""Main backup routine."""
start_time = datetime.now()
print(f"{'=' * 50}")
print(f"Backup started at {start_time}")
print(f"{'=' * 50}")
try:
# Validate configuration
if not config.S3_BUCKET_NAME:
raise Exception("S3_BUCKET_NAME is not configured")
if not config.S3_ACCESS_KEY_ID:
raise Exception("S3_ACCESS_KEY_ID is not configured")
if not config.S3_SECRET_ACCESS_KEY:
raise Exception("S3_SECRET_ACCESS_KEY is not configured")
if not config.S3_ENDPOINT_URL:
raise Exception("S3_ENDPOINT_URL is not configured")
# Create S3 client
s3_client = create_s3_client()
# Create backup
filename, data = create_backup()
size_mb = len(data) / (1024 * 1024)
print(f"Backup created: {filename} ({size_mb:.2f} MB)")
# Upload to S3
s3_key = upload_to_s3(s3_client, filename, data)
print(f"Uploaded to S3: {s3_key}")
# Rotate old backups
deleted_count = rotate_old_backups(s3_client)
print(f"Deleted {deleted_count} old backups")
# Calculate duration
duration = datetime.now() - start_time
# Send success notification
message = (
f"Backup completed successfully!\n\n"
f"*File:* `{filename}`\n"
f"*Size:* {size_mb:.2f} MB\n"
f"*Duration:* {duration.seconds}s\n"
f"*Deleted old:* {deleted_count} files"
)
send_telegram_notification(message, is_error=False)
print(f"{'=' * 50}")
print("Backup completed successfully!")
print(f"{'=' * 50}")
return 0
except Exception as e:
error_msg = f"Backup failed!\n\n*Error:* `{str(e)}`"
send_telegram_notification(error_msg, is_error=True)
print(f"{'=' * 50}")
print(f"Backup failed: {e}")
print(f"{'=' * 50}")
return 1
if __name__ == "__main__":
sys.exit(main())

33
backup-service/config.py Normal file
View File

@@ -0,0 +1,33 @@
"""Configuration for backup service."""
import os
from dataclasses import dataclass
@dataclass
class Config:
"""Backup service configuration from environment variables."""
# Database
DB_HOST: str = os.getenv("DB_HOST", "db")
DB_PORT: str = os.getenv("DB_PORT", "5432")
DB_NAME: str = os.getenv("DB_NAME", "marathon")
DB_USER: str = os.getenv("DB_USER", "marathon")
DB_PASSWORD: str = os.getenv("DB_PASSWORD", "123")
# S3
S3_BUCKET_NAME: str = os.getenv("S3_BUCKET_NAME", "")
S3_REGION: str = os.getenv("S3_REGION", "ru-1")
S3_ACCESS_KEY_ID: str = os.getenv("S3_ACCESS_KEY_ID", "")
S3_SECRET_ACCESS_KEY: str = os.getenv("S3_SECRET_ACCESS_KEY", "")
S3_ENDPOINT_URL: str = os.getenv("S3_ENDPOINT_URL", "")
S3_BACKUP_PREFIX: str = os.getenv("S3_BACKUP_PREFIX", "backups/")
# Telegram
TELEGRAM_BOT_TOKEN: str = os.getenv("TELEGRAM_BOT_TOKEN", "")
TELEGRAM_ADMIN_ID: str = os.getenv("TELEGRAM_ADMIN_ID", "947392854")
# Backup settings
BACKUP_RETENTION_DAYS: int = int(os.getenv("BACKUP_RETENTION_DAYS", "14"))
config = Config()

4
backup-service/crontab Normal file
View File

@@ -0,0 +1,4 @@
# Backup cron job
# Run backup daily at 3:00 AM UTC
0 3 * * * /usr/local/bin/python /app/backup.py >> /var/log/cron.log 2>&1
# Empty line required at end of crontab

View File

@@ -0,0 +1,2 @@
boto3==1.34.0
httpx==0.26.0

158
backup-service/restore.py Normal file
View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python3
"""
Restore PostgreSQL database from S3 backup.
Usage:
python restore.py - List available backups
python restore.py <filename> - Restore from specific backup
"""
import gzip
import os
import subprocess
import sys
import boto3
from botocore.config import Config as BotoConfig
from botocore.exceptions import ClientError
from config import config
def create_s3_client():
"""Initialize S3 client."""
return boto3.client(
"s3",
endpoint_url=config.S3_ENDPOINT_URL,
aws_access_key_id=config.S3_ACCESS_KEY_ID,
aws_secret_access_key=config.S3_SECRET_ACCESS_KEY,
region_name=config.S3_REGION or "us-east-1",
config=BotoConfig(signature_version="s3v4"),
)
def list_backups(s3_client) -> list[tuple[str, float, str]]:
"""List all available backups."""
print("Available backups:\n")
try:
paginator = s3_client.get_paginator("list_objects_v2")
pages = paginator.paginate(
Bucket=config.S3_BUCKET_NAME,
Prefix=config.S3_BACKUP_PREFIX,
)
backups = []
for page in pages:
for obj in page.get("Contents", []):
filename = obj["Key"].replace(config.S3_BACKUP_PREFIX, "")
size_mb = obj["Size"] / (1024 * 1024)
modified = obj["LastModified"].strftime("%Y-%m-%d %H:%M:%S")
backups.append((filename, size_mb, modified))
# Sort by date descending (newest first)
backups.sort(key=lambda x: x[2], reverse=True)
for filename, size_mb, modified in backups:
print(f" {filename} ({size_mb:.2f} MB) - {modified}")
return backups
except ClientError as e:
print(f"Error listing backups: {e}")
return []
def restore_backup(s3_client, filename: str) -> None:
"""Download and restore backup."""
key = f"{config.S3_BACKUP_PREFIX}{filename}"
print(f"Downloading {filename} from S3...")
try:
response = s3_client.get_object(
Bucket=config.S3_BUCKET_NAME,
Key=key,
)
compressed_data = response["Body"].read()
except ClientError as e:
raise Exception(f"Failed to download backup: {e}")
print("Decompressing...")
sql_data = gzip.decompress(compressed_data)
print(f"Restoring to database {config.DB_NAME}...")
# Build psql command
env = os.environ.copy()
env["PGPASSWORD"] = config.DB_PASSWORD
cmd = [
"psql",
"-h",
config.DB_HOST,
"-p",
config.DB_PORT,
"-U",
config.DB_USER,
"-d",
config.DB_NAME,
]
result = subprocess.run(
cmd,
env=env,
input=sql_data,
capture_output=True,
)
if result.returncode != 0:
stderr = result.stderr.decode()
# psql may return warnings that aren't fatal errors
if "ERROR" in stderr:
raise Exception(f"psql restore failed: {stderr}")
else:
print(f"Warnings: {stderr}")
print("Restore completed successfully!")
def main() -> int:
"""Main restore routine."""
# Validate configuration
if not config.S3_BUCKET_NAME:
print("Error: S3_BUCKET_NAME is not configured")
return 1
s3_client = create_s3_client()
if len(sys.argv) < 2:
# List available backups
backups = list_backups(s3_client)
if backups:
print(f"\nTo restore, run: python restore.py <filename>")
else:
print("No backups found.")
return 0
filename = sys.argv[1]
# Confirm restore
print(f"WARNING: This will restore database from {filename}")
print("This may overwrite existing data!")
print()
confirm = input("Type 'yes' to continue: ")
if confirm.lower() != "yes":
print("Restore cancelled.")
return 0
try:
restore_backup(s3_client, filename)
return 0
except Exception as e:
print(f"Restore failed: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())