www/app/docker_monitor.py

148 lines
5.5 KiB
Python
Raw Normal View History

2026-03-17 16:17:53 +02:00
import docker
from typing import Dict, Optional
import logging
logger = logging.getLogger(__name__)
class DockerMonitor:
"""Monitor and manage Docker containers"""
def __init__(self):
try:
self.client = docker.from_env()
self.client.ping()
self.available = True
logger.info("Docker client initialized successfully")
except Exception as e:
logger.warning(f"Docker not available: {e}")
self.client = None
self.available = False
def get_container_status(self, container_name: str) -> Dict:
"""
Get status of a specific container.
Returns:
Dict with: online, state, uptime, cpu_percent, memory_mb
"""
if not self.available:
return {"online": False, "state": "unavailable", "message": "Docker not available"}
try:
container = self.client.containers.get(container_name)
stats = container.stats(stream=False)
# Calculate CPU percentage
cpu_delta = (
stats["cpu_stats"]["cpu_usage"]["total_usage"]
- stats["precpu_stats"]["cpu_usage"]["total_usage"]
)
system_delta = (
stats["cpu_stats"]["system_cpu_usage"]
- stats["precpu_stats"]["system_cpu_usage"]
)
num_cpus = len(stats["cpu_stats"]["cpu_usage"].get("percpu_usage") or []) or 1
2026-03-17 16:17:53 +02:00
cpu_percent = 0.0
if system_delta > 0:
cpu_percent = (cpu_delta / system_delta) * num_cpus * 100.0
2026-03-17 16:17:53 +02:00
# Calculate memory usage
memory_usage = stats["memory_stats"].get("usage", 0)
memory_mb = memory_usage / (1024 * 1024)
return {
"online": container.status == "running",
"state": container.status,
"uptime": self._format_uptime(container),
"cpu_percent": round(cpu_percent, 2),
"memory_mb": round(memory_mb, 2),
}
except docker.errors.NotFound:
return {
"online": False,
"state": "not_found",
"message": f"Container '{container_name}' not found",
}
except Exception as e:
logger.error(f"Error getting container status: {e}")
return {"online": False, "state": "error", "message": str(e)}
def restart_container(self, container_name: str) -> Dict:
"""Restart a specific container."""
if not self.available:
return {"success": False, "message": "Docker not available"}
try:
container = self.client.containers.get(container_name)
container.restart(timeout=10)
logger.info(f"Container '{container_name}' restarted")
return {"success": True, "message": f"Container '{container_name}' restarted"}
except docker.errors.NotFound:
return {"success": False, "message": f"Container '{container_name}' not found"}
except Exception as e:
logger.error(f"Error restarting container '{container_name}': {e}")
return {"success": False, "message": str(e)}
def stop_container(self, container_name: str) -> Dict:
"""Stop a specific container."""
if not self.available:
return {"success": False, "message": "Docker not available"}
try:
container = self.client.containers.get(container_name)
container.stop(timeout=10)
logger.info(f"Container '{container_name}' stopped")
return {"success": True, "message": f"Container '{container_name}' stopped"}
except docker.errors.NotFound:
return {"success": False, "message": f"Container '{container_name}' not found"}
except Exception as e:
logger.error(f"Error stopping container '{container_name}': {e}")
return {"success": False, "message": str(e)}
def get_containers_count(self) -> int:
"""Get total number of containers."""
if not self.available:
return 0
try:
return len(self.client.containers.list(all=True))
except Exception as e:
logger.error(f"Error counting containers: {e}")
return 0
def get_running_containers_count(self) -> int:
"""Get number of running containers."""
if not self.available:
return 0
try:
return len(self.client.containers.list())
except Exception as e:
logger.error(f"Error counting running containers: {e}")
return 0
def _format_uptime(self, container) -> str:
"""Format container uptime."""
try:
from datetime import datetime, timezone
started_at = container.attrs["State"]["StartedAt"]
started = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
now = datetime.now(timezone.utc)
uptime = now - started
days = uptime.days
hours = uptime.seconds // 3600
minutes = (uptime.seconds % 3600) // 60
if days > 0:
return f"{days}d {hours}h"
elif hours > 0:
return f"{hours}h {minutes}m"
else:
return f"{minutes}m"
except Exception as e:
logger.error(f"Error formatting uptime: {e}")
return "unknown"