Source code for stormlog.tui.builders

"""Markdown and table-data builders for the Textual TUI."""

from __future__ import annotations

from textwrap import dedent
from typing import Any


[docs] def build_welcome_info() -> str: """Build welcome navigation guide text.""" return dedent( """ # Quick Start Guide ## Navigate the TUI Click on any tab above to explore different features: - **PyTorch** → View PyTorch GPU stats, run profiling samples, and see profile results - **TensorFlow** → View TensorFlow GPU stats, run profiling samples, and see profile results - **Monitoring** → Start live memory tracking, set alert thresholds, export CSV/JSON data - **Visualizations** → Generate timeline plots (PNG/HTML) from tracking sessions - **Diagnostics** → Compare distributed ranks, hidden gaps, and first-cause indicators - **CLI & Actions** → Run CLI commands interactively and execute sample workloads ## Keyboard Shortcuts - **r** - Refresh overview tab - **g** - Log gpumemprof command examples - **t** - Log tfmemprof command examples - **f** - Focus log area in CLI tab - **q** - Quit application ## Getting Started 1. **Check System Info** - Scroll down to see your platform, Python version, and GPU details 2. **View GPU Stats** - Visit **PyTorch** or **TensorFlow** tabs to see real-time GPU memory statistics 3. **Start Tracking** - Go to **Monitoring** tab and click "Start Live Tracking" to begin monitoring 4. **Run Samples** - Use **CLI & Actions** tab to run sample workloads and see profiling results 5. **Export Data** - After tracking, use "Export CSV" or "Export JSON" buttons in Monitoring tab --- """ ).strip()
[docs] def build_system_markdown( *, system_info: dict[str, Any], gpu_info: dict[str, Any], tf_system_info: dict[str, Any], tf_gpu_info: dict[str, Any], ) -> str: lines = [ "# System Overview", "", f"- **Platform**: {system_info.get('platform', 'Unknown')}", f"- **Python**: {system_info.get('python_version', 'Unknown')}", f"- **TensorFlow (Python)**: {tf_system_info.get('tensorflow_version', 'N/A')}", f"- **CUDA Available**: {system_info.get('cuda_available', False)}", ] if system_info.get("cuda_available"): lines.extend( [ f"- **CUDA Version**: {system_info.get('cuda_version', 'Unknown')}", f"- **GPU Count**: {system_info.get('cuda_device_count', 0)}", ] ) if gpu_info: lines.append("") lines.append("## GPU Snapshot") lines.extend( [ f"- **Device Name**: {gpu_info.get('device_name', 'Unknown')}", f"- **Total Memory**: {gpu_info.get('total_memory', 0) / (1024**3):.2f} GB", f"- **Allocated**: {gpu_info.get('allocated_memory', 0) / (1024**3):.2f} GB", f"- **Reserved**: {gpu_info.get('reserved_memory', 0) / (1024**3):.2f} GB", ] ) else: lines.append("") lines.append( "> GPU metrics are unavailable on this system. You can still run the CLI " "and CPU guides." ) lines.append("") if tf_gpu_info and tf_gpu_info.get("devices"): lines.append("") lines.append("## TensorFlow GPU Snapshot") device = tf_gpu_info["devices"][0] lines.extend( [ f"- **TF Device Name**: {device.get('name', 'Unknown')}", f"- **Current Memory**: {device.get('current_memory_mb', 0):.2f} MB", f"- **Peak Memory**: {device.get('peak_memory_mb', 0):.2f} MB", ] ) lines.append("") lines.append("## Getting Started") lines.append("") lines.append("- `python -m examples.basic.pytorch_demo`") lines.append("- `python -m examples.basic.tensorflow_demo`") lines.append("- `python -m examples.cli.quickstart`") lines.append("") lines.append( "Need more? Visit the [Example Test Guides](docs/examples/test_guides/README.md)." ) return "\n".join(lines)
[docs] def build_pytorch_stats_rows(info: dict[str, Any]) -> list[dict[str, Any]]: if not info: return [] return [ { "device": info.get("device_name", "gpu0"), "current": info.get("allocated_memory", 0) / (1024**2), "peak": info.get("max_memory_allocated", info.get("allocated_memory", 0)) / (1024**2), "reserved": info.get("reserved_memory", 0) / (1024**2), } ]
[docs] def build_tensorflow_stats_rows(gpu_info: dict[str, Any]) -> list[dict[str, Any]]: devices = gpu_info.get("devices", []) if gpu_info else [] rows = [] for device in devices: rows.append( { "device": device.get("name", "tf-gpu"), "current": device.get("current_memory_mb", 0), "peak": device.get("peak_memory_mb", 0), "reserved": gpu_info.get("total_memory", 0), } ) return rows
[docs] def build_framework_markdown(framework: str) -> str: if framework == "pytorch": return dedent( """ # PyTorch Playbook 1. **Basic profiling** ```bash python -m examples.basic.pytorch_demo ``` 2. **Advanced tracking (alerts, watchdog)** ```bash python -m examples.advanced.tracking_demo ``` 3. **Telemetry + diagnostics** ```bash python -m examples.scenarios.mps_telemetry_scenario python -m examples.scenarios.oom_flight_recorder_scenario --mode simulated gpumemprof diagnose --duration 0 --output ./artifacts/diag ``` 4. **CLI helpers** ```bash gpumemprof info gpumemprof track --duration 60 --output tracking.json ``` Check the [PyTorch Testing Guide](docs/pytorch_testing_guide.md) for full workflows and troubleshooting steps. """ ).strip() return dedent( """ # TensorFlow Playbook 1. **Basic profiling** ```bash python -m examples.basic.tensorflow_demo ``` 2. **CLI helpers** ```bash tfmemprof info tfmemprof monitor --duration 30 --interval 0.5 tfmemprof track --output tf_results.json ``` 3. **Telemetry + diagnostics** ```bash python -m examples.scenarios.tf_end_to_end_scenario tfmemprof diagnose --duration 0 --output ./artifacts/tf-diag ``` The [TensorFlow Testing Guide](docs/tensorflow_testing_guide.md) includes deeper recipes, including mixed precision and multi-GPU notes. """ ).strip()
[docs] def build_cli_markdown() -> str: return dedent( """ # CLI Quick Samples ```bash gpumemprof info gpumemprof monitor --duration 30 --interval 0.5 gpumemprof track --duration 60 --output tracking.json gpumemprof diagnose --duration 0 --output artifacts/diag tfmemprof info tfmemprof monitor --duration 30 --interval 0.5 tfmemprof track --duration 60 --output tf_tracking.json tfmemprof diagnose --duration 0 --output artifacts/tf_diag python -m examples.scenarios.oom_flight_recorder_scenario --mode simulated python -m examples.cli.capability_matrix --mode smoke --target both --oom-mode simulated --skip-tui # Optional: fuller dashboard stormlog # Ensure pip shows progress pip install --progress-bar on "stormlog[tui,torch]" ``` Use the buttons below to log summaries or copy commands. """ ).strip()
[docs] def build_visual_markdown() -> str: return dedent( """ # Visualization Tips - Start live tracking to collect timeline samples, then refresh the view. - Use `Generate PNG Plot` to save a Matplotlib graph (writes to ./visualizations). - Prefer `Generate HTML Plot` for an interactive Plotly view you can open in a browser. - A lightweight ASCII chart appears below so you can inspect trends without leaving the terminal. """ ).strip()
[docs] def build_diagnostics_markdown() -> str: return dedent( """ # Distributed Diagnostics - **Load Live** to read telemetry directly from the active tracker session. - **Load Artifacts** to merge JSON/CSV exports or diagnose directories. - Use **Rank Filter** (`all`, `0,2,4-7`) to narrow the comparison set. - Select a rank in the table to pin timeline focus. - Review **earliest** and **most severe** first-cause indicators in the anomaly summary. """ ).strip()