Source code for scitex_resource._log_processor_usages

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Time-stamp: "2024-11-04 16:28:53 (ywatanabe)"
# File: ./scitex_repo/src/scitex/resource/_log_processor_usages.py

"""
Functionality:
    * Monitors and logs system resource utilization over time
Input:
    * Path for saving logs
    * Monitoring duration and interval
Output:
    * CSV file containing time-series resource usage data
Prerequisites:
    * scitex package with processor usage monitoring capabilities
"""

"""Imports"""
import math
import os
import time
from multiprocessing import Process
from typing import Union

import pandas as pd

from ._compat import printc
from ._specs import get_processor_usages


# Vendored minimal load/save: we only need CSV here. Falls back to scitex.io
# at call time for any non-CSV path so downstream behavior is unchanged when
# the umbrella package is available.
def load(path):
    if str(path).endswith(".csv"):
        return pd.read_csv(path)
    try:  # pragma: no cover
        from scitex_io._load import load as _load

        return _load(path)
    except ImportError:  # pragma: no cover
        raise ImportError(
            "Loading non-CSV resource logs requires scitex (umbrella). "
            "Install with: pip install scitex"
        )


def save(obj, path, **kwargs):
    import os

    os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
    if str(path).endswith(".csv") and isinstance(obj, pd.DataFrame):
        obj.to_csv(path, **kwargs)
        return
    try:  # pragma: no cover
        from scitex_io._save import save as _save

        _save(obj, path, **kwargs)
    except ImportError:  # pragma: no cover
        raise ImportError(
            "Saving non-CSV resource logs requires scitex (umbrella). "
            "Install with: pip install scitex"
        )


# Defer scitex.sh import (scitex_sh is the standalone preferred path).
def sh(cmd, **kwargs):
    try:
        from scitex_sh import sh as _sh

        return _sh(cmd, **kwargs)
    except ImportError:
        import subprocess

        if isinstance(cmd, str):
            raise ValueError("scitex_resource: shell commands must be list-form")
        return subprocess.run(cmd, capture_output=True, text=True, check=False).stdout


"""Functions & Classes"""



[docs]
def log_processor_usages(
    path: str = "/tmp/scitex/processor_usages.csv",
    limit_min: float = 30,
    interval_s: float = 1,
    init: bool = True,
    verbose: bool = False,
    background: bool = False,
) -> Union[None, Process]:
    """Logs system resource usage over time.

    Parameters
    ----------
    path : str
        Path to save the log file
    limit_min : float
        Monitoring duration in minutes
    interval_s : float
        Sampling interval in seconds
    init : bool
        Whether to clear existing log file
    verbose : bool
        Whether to print the log
    background : bool
        Whether to run in background

    Returns
    -------
    Union[None, Process]
        Process object if background=True, None otherwise
    """
    if background:
        process = Process(
            target=_log_processor_usages,
            args=(path, limit_min, interval_s, init, verbose),
        )
        process.start()
        return process

    return _log_processor_usages(
        path=path,
        limit_min=limit_min,
        interval_s=interval_s,
        init=init,
        verbose=verbose,
    )



def _log_processor_usages(
    path: str = "/tmp/scitex/processor_usages.csv",
    limit_min: float = 30,
    interval_s: float = 1,
    init: bool = True,
    verbose: bool = False,
) -> None:
    """Logs system resource usage over time.

    Parameters
    ----------
    path : str
        Path to save the log file
    limit_min : float
        Monitoring duration in minutes
    interval_s : float
        Sampling interval in seconds
    init : bool
        Whether to clear existing log file
    verbose : bool
        Whether to print the log

    Example
    -------
    >>> log_processor_usages(path="usage_log.csv", limit_min=5)
    """
    assert path.endswith(".csv"), "Path must end with .csv"

    # Log file initialization
    _ensure_log_file(path, init)
    printc(f"Log file can be monitored with with `tail -f {path}`")

    limit_s = limit_min * 60
    n_max = math.ceil(limit_s // interval_s)

    for _ in range(n_max):
        _add(path, verbose=verbose)
        time.sleep(interval_s)


# def _ensure_log_file(path: str, init: bool) -> None:
#     def _create_path(path):
#         os.makedirs(os.path.dirname(path), exist_ok=True)
#         empty_df = pd.DataFrame()
#         save(empty_df, path, verbose=False)
#         printc(f"{path} created.")

#     if not os.path.exists(path):
#         _create_path(path)

#     else:
#         if init and os.path.exists(path):
#             try:
#                 sh(f"rm -f {path}")
#                 _create_path(path)
#             except Exception as err:
#                 raise RuntimeError(f"Failed to init log file: {err}")

# def _add(path: str, verbose: bool = True) -> None:
#     past = load(path)
#     now = get_processor_usages()

#     combined = pd.concat([past, now]).round(3)
#     save(combined, path, verbose=verbose)


def _add(path: str, verbose: bool = True) -> None:
    """Appends current resource usage to CSV file."""
    now = get_processor_usages()

    # Append mode without loading entire file
    with open(path, "a") as f:
        now.to_csv(f, header=f.tell() == 0, index=False)


def _ensure_log_file(path: str, init: bool) -> None:
    """Creates or reinitializes log file with headers."""

    def _create_path(path):
        os.makedirs(os.path.dirname(path), exist_ok=True)
        # Write only headers
        headers = ["Timestamp", "CPU [%]", "RAM [GiB]", "GPU [%]", "VRAM [GiB]"]
        pd.DataFrame(columns=headers).to_csv(path, index=False)
        printc(f"{path} created.")

    if not os.path.exists(path):
        _create_path(path)
    elif init:
        try:
            os.remove(path)
            _create_path(path)
        except Exception as err:
            raise RuntimeError(f"Failed to init log file: {err}")


main = log_processor_usages

if __name__ == "__main__":
    main()

# python -c "import scitex; scitex.resource.log_processor_usages(\"/tmp/processor_usages.csv\", init=True)"

# EOF