Source code for scitex_resource._specs._metrics

"""Flat, machine-readable resource metrics for monitoring/heartbeat use.

Companion to ``get_specs()`` (which is rich + human-formatted). ``get_metrics()``
returns a flat dict of integers, floats and short strings — the shape any hub,
dashboard, or heartbeat producer can ship over the wire without reshaping.

Cross-platform via ``psutil`` (Linux / macOS / Windows / WSL). Container-aware:
inside Docker / cgroups, ``psutil.virtual_memory()`` reports the cgroup limit,
not the host kernel's view, so the numbers reflect what the process can
actually use.

Schema (treat as a public contract; bump minor on rename):

    cpu_count          int     logical CPU count (psutil.cpu_count())
    cpu_model          str     human-readable CPU model name; "" if unknown
    load_avg_1m/5m/15m float   POSIX load averages; psutil emulates on Windows
    mem_total_mb       int     RAM total in MiB
    mem_used_mb        int     "used" excluding cache/buffers (psutil's notion)
    mem_free_mb        int     psutil's available — what apps can grab now
    mem_used_percent   float   psutil.virtual_memory().percent
    disk_total_mb      int     home-directory partition total in MiB
    disk_used_mb       int     home-directory partition used in MiB
    disk_used_percent  float   home-directory partition percent
    gpus               list    [{"name", "vram_total_mb", "vram_used_mb"}, ...]
                               empty list when no NVIDIA GPU / nvidia-smi missing

The ``gpu=False`` flag skips the ~200 ms ``nvidia-smi`` shellout for hot paths
that don't need GPU info (e.g. 30 s heartbeats on GPU-less hosts).
"""

from __future__ import annotations

import logging
import os
import shutil
import subprocess
from typing import Any

import psutil as _psutil

log = logging.getLogger(__name__)


[docs] def get_metrics(gpu: bool = True) -> dict[str, Any]: """Return a flat dict of current system metrics suitable for heartbeats. Parameters ---------- gpu : bool When ``True`` (default) probe NVIDIA GPUs via ``nvidia-smi``. Set to ``False`` on hot paths to skip the ~200 ms shellout when you know there's no GPU or when you cache GPU info separately. Returns ------- dict See module docstring for the full key list and contract. """ metrics: dict[str, Any] = {} metrics["cpu_count"] = _psutil.cpu_count(logical=True) or 0 metrics["cpu_model"] = _cpu_model() load1, load5, load15 = _load_avg() metrics["load_avg_1m"] = load1 metrics["load_avg_5m"] = load5 metrics["load_avg_15m"] = load15 vm = _psutil.virtual_memory() metrics["mem_total_mb"] = int(vm.total // (1024 * 1024)) metrics["mem_used_mb"] = int((vm.total - vm.available) // (1024 * 1024)) metrics["mem_free_mb"] = int(vm.available // (1024 * 1024)) metrics["mem_used_percent"] = round(float(vm.percent), 1) try: du = _psutil.disk_usage(os.path.expanduser("~")) metrics["disk_total_mb"] = int(du.total // (1024 * 1024)) metrics["disk_used_mb"] = int(du.used // (1024 * 1024)) metrics["disk_used_percent"] = round(float(du.percent), 1) except OSError: metrics["disk_total_mb"] = 0 metrics["disk_used_mb"] = 0 metrics["disk_used_percent"] = 0.0 metrics["gpus"] = _nvidia_gpus() if gpu else [] return metrics
def _cpu_model() -> str: """Cross-platform CPU model string. Empty string if undetectable.""" import platform if platform.system() == "Linux": try: with open("/proc/cpuinfo") as f: for line in f: if line.startswith("model name"): return line.split(":", 1)[1].strip() except OSError: pass if platform.system() == "Darwin": try: out = subprocess.check_output( ["sysctl", "-n", "machdep.cpu.brand_string"], text=True, timeout=1, ) return out.strip() except (OSError, subprocess.SubprocessError): pass return platform.processor() or "" def _load_avg() -> tuple[float, float, float]: """POSIX load averages. psutil emulates on Windows from CPU samples.""" try: load1, load5, load15 = _psutil.getloadavg() return round(float(load1), 2), round(float(load5), 2), round(float(load15), 2) except (OSError, AttributeError): return 0.0, 0.0, 0.0 def _nvidia_gpus() -> list[dict[str, Any]]: """List of NVIDIA GPUs via nvidia-smi. Empty list if not available.""" if not shutil.which("nvidia-smi"): return [] try: out = subprocess.check_output( [ "nvidia-smi", "--query-gpu=name,memory.total,memory.used", "--format=csv,noheader,nounits", ], text=True, timeout=3, ) except (OSError, subprocess.SubprocessError): return [] gpus: list[dict[str, Any]] = [] for line in out.strip().splitlines(): parts = [p.strip() for p in line.split(",")] if len(parts) < 3: continue try: gpus.append( { "name": parts[0], "vram_total_mb": int(parts[1]), "vram_used_mb": int(parts[2]), } ) except ValueError: continue return gpus