diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..330f435 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,58 @@ +name: Test + +on: + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: 'pip' + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + pip install -e . + + - name: Run linting + run: | + ruff check src/ tests/ scripts/ + + - name: Run tests + run: | + pytest tests/ -v --cov=src/servermonitoring --cov-report=term-missing --cov-report=xml + + - name: Test data generation + run: | + python scripts/build_data.py + ls -la _data/ + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.2' + bundler-cache: true + + - name: Test Jekyll build + run: | + bundle exec jekyll build --verbose + ls -la _site/ + + - name: Upload coverage reports + uses: codecov/codecov-action@v5 + if: always() + with: + files: ./coverage.xml + flags: unittests + name: codecov-umbrella + continue-on-error: true diff --git a/.github/workflows/update-dashboard.yml b/.github/workflows/update-dashboard.yml index 6e9a887..394cc55 100644 --- a/.github/workflows/update-dashboard.yml +++ b/.github/workflows/update-dashboard.yml @@ -11,29 +11,67 @@ on: - "scripts/**" - "config/**" - "requirements.txt" + - "_config.yml" + - "*.md" + - "_includes/**" + - "_layouts/**" jobs: - build: + build-and-deploy: runs-on: ubuntu-latest permissions: contents: write + pages: write + id-token: write + steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + + - name: Set up Python + uses: actions/setup-python@v5 with: python-version: "3.11" - - name: Install dependencies - run: pip install -r requirements.txt - - name: Build dashboard - run: python scripts/build_dashboard.py --download-logs - - name: Commit docs + cache: 'pip' + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + pip install -e . + + - name: Download logs + run: python scripts/sync_logs.py + continue-on-error: true + + - name: Generate data + run: python scripts/build_data.py + + - name: Commit generated data run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" - git add docs + git add _data if git diff --staged --quiet; then - echo "No changes to commit." - exit 0 + echo "No data changes to commit." + else + git commit -m "Update data: $(date -u +%Y-%m-%d)" + git push fi - git commit -m "Update dashboard" - git push + + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.2' + bundler-cache: true + + - name: Build Jekyll site + run: bundle exec jekyll build + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: _site + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index a479242..f43e21b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,17 @@ data/logs/ __pycache__/ *.pyc .DS_Store +.venv/ +venv/ +_site/ +.sass-cache/ +.jekyll-cache/ +.jekyll-metadata +Gemfile.lock +vendor/ +.coverage +htmlcov/ +.pytest_cache/ +*.egg-info/ +dist/ +build/ diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..f809cbc --- /dev/null +++ b/Gemfile @@ -0,0 +1,22 @@ +source "https://rubygems.org" + +gem "jekyll", "~> 4.3" +gem "minima", "~> 2.5" + +group :jekyll_plugins do + gem "jekyll-feed", "~> 0.12" +end + +# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem +# and associated library. +platforms :mingw, :x64_mingw, :mswin, :jruby do + gem "tzinfo", ">= 1", "< 3" + gem "tzinfo-data" +end + +# Performance-booster for watching directories on Windows +gem "wdm", "~> 0.1", :platforms => [:mingw, :x64_mingw, :mswin] + +# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem +# do not have a Java counterpart. +gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..47142fe --- /dev/null +++ b/Makefile @@ -0,0 +1,78 @@ +.PHONY: help setup data test lint format site serve clean + +# Default target +help: + @echo "Server Monitoring Dashboard - Makefile targets:" + @echo "" + @echo " make setup - Create virtualenv and install Python dependencies" + @echo " make data - Generate _data/*.json files from logs" + @echo " make test - Run Python unit tests with coverage" + @echo " make lint - Run linting checks (ruff)" + @echo " make format - Auto-format code with ruff" + @echo " make site - Build Jekyll site (requires Ruby/bundler)" + @echo " make serve - Serve Jekyll site locally at http://localhost:4000" + @echo " make clean - Remove build artifacts and generated files" + @echo "" + +# Python setup +setup: + @echo "Creating virtual environment..." + python3 -m venv .venv + @echo "Installing dependencies..." + .venv/bin/pip install --upgrade pip + .venv/bin/pip install -r requirements.txt + .venv/bin/pip install -e . + @echo "Setup complete! Activate with: source .venv/bin/activate" + +# Generate data +data: + @echo "Generating data files..." + .venv/bin/python scripts/build_data.py + @echo "Data generation complete!" + +# Run tests +test: + @echo "Running tests..." + .venv/bin/pytest tests/ -v --cov=src/servermonitoring --cov-report=term-missing + +# Lint code +lint: + @echo "Running linting checks..." + .venv/bin/ruff check src/ tests/ scripts/ + +# Format code +format: + @echo "Formatting code..." + .venv/bin/ruff check --fix src/ tests/ scripts/ + .venv/bin/ruff format src/ tests/ scripts/ + +# Jekyll site build +site: + @echo "Building Jekyll site..." + @if ! command -v bundle >/dev/null 2>&1; then \ + echo "Error: bundler not found. Install with: gem install bundler"; \ + exit 1; \ + fi + bundle install --quiet + bundle exec jekyll build + @echo "Site built to _site/" + +# Serve Jekyll locally +serve: + @echo "Starting Jekyll server..." + @if ! command -v bundle >/dev/null 2>&1; then \ + echo "Error: bundler not found. Install with: gem install bundler"; \ + exit 1; \ + fi + bundle install --quiet + bundle exec jekyll serve --livereload + +# Clean build artifacts +clean: + @echo "Cleaning build artifacts..." + rm -rf _site .sass-cache .jekyll-cache .jekyll-metadata + rm -rf htmlcov .coverage .pytest_cache + rm -rf src/*.egg-info build dist + find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true + find . -type f -name "*.pyc" -delete + @echo "Clean complete!" diff --git a/README.md b/README.md index d065830..2c36ef0 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,140 @@ # Server Monitoring Dashboard -Static dashboard for Department of Computer Engineering server utilization -(GPU and storage). Generated daily and published via GitHub Pages. +Static dashboard for Department of Computer Engineering server utilization (GPU and storage). Generated daily and published via GitHub Pages. -## Live Site +## ๐ŸŒ Live Site -`https://cepdnaclk.github.io/servermonitoring/` +**[https://cepdnaclk.github.io/servermonitoring/](https://cepdnaclk.github.io/servermonitoring/)** -## Data Source +## ๐Ÿ“‹ Overview -Base index (logs are under per-server folders): -`https://tesla.ce.pdn.ac.lk/servermonitoring/` +This project provides automated monitoring and visualization of: +- **Storage utilization** across multiple department servers +- **GPU metrics** including utilization and memory usage over time +- **Historical trends** for the last 90 days -## How It Works +The system uses: +- **Python** for data collection and transformation +- **Jekyll** (minima theme) for static site generation +- **GitHub Actions** for automated daily updates -- Logs are synced from the server into `data/logs/`. -- Storage report HTML is generated at `docs/reports/server-storage-util/index.html`. -- GPU plots and index are generated at `docs/reports/server-gpu-util/`. -- GitHub Actions runs daily and pushes `docs/` to GitHub Pages. +## ๐Ÿ—๏ธ Architecture -## Project Structure +### Data Pipeline -- `scripts/` โ†’ report generation and log sync -- `config/servers.json` โ†’ server list and storage doc links -- `config/gpu-info.json` โ†’ GPU IDs and memory limits -- `config/batches.json` โ†’ student batches (for alumni vs student tagging) -- `data/logs/` โ†’ downloaded logs (ignored by git) -- `docs/` โ†’ published site output +``` +External Logs โ†’ Python Scripts โ†’ JSON Data โ†’ Jekyll โ†’ Static Site +``` -## Local Development +1. **Data Collection**: `sync_logs.py` downloads logs from the monitoring server +2. **Data Transformation**: `build_data.py` processes logs into Jekyll-friendly JSON +3. **Site Generation**: Jekyll renders templates using data from `_data/*.json` +4. **Publishing**: GitHub Actions deploys to GitHub Pages -### Setup +### Directory Structure ``` -python -m venv .venv -source .venv/bin/activate -pip install -r requirements.txt +โ”œโ”€โ”€ _config.yml # Jekyll configuration +โ”œโ”€โ”€ _data/ # Generated JSON data (consumed by Jekyll) +โ”‚ โ”œโ”€โ”€ storage.json # Storage usage data +โ”‚ โ”œโ”€โ”€ gpu.json # GPU metrics data +โ”‚ โ””โ”€โ”€ metadata.json # Site metadata +โ”œโ”€โ”€ _includes/ # Jekyll template fragments +โ”œโ”€โ”€ config/ # Configuration files +โ”‚ โ”œโ”€โ”€ servers.json # Server list and documentation links +โ”‚ โ”œโ”€โ”€ gpu-info.json # GPU specifications +โ”‚ โ””โ”€โ”€ batches.json # Student batch identifiers +โ”œโ”€โ”€ src/servermonitoring/ # Python data processing modules +โ”‚ โ”œโ”€โ”€ config.py # Configuration loading utilities +โ”‚ โ”œโ”€โ”€ storage.py # Storage data processing +โ”‚ โ””โ”€โ”€ gpu.py # GPU data processing +โ”œโ”€โ”€ scripts/ # Executable scripts +โ”‚ โ”œโ”€โ”€ build_data.py # Main data generation entrypoint +โ”‚ โ””โ”€โ”€ sync_logs.py # Log synchronization +โ”œโ”€โ”€ tests/ # Python unit tests +โ”œโ”€โ”€ index.md # Homepage +โ”œโ”€โ”€ storage.md # Storage report page +โ”œโ”€โ”€ gpu.md # GPU report page +โ””โ”€โ”€ Makefile # Build automation ``` -### Download Logs +## ๐Ÿš€ Local Development -``` -python scripts/sync_logs.py -``` +### Prerequisites -Custom base URL (must contain per-server folders like `kepler/`): +- **Python 3.11+** +- **Ruby 3.x** and **Bundler** (for Jekyll) -``` -python scripts/sync_logs.py --base-url https://tesla.ce.pdn.ac.lk/servermonitoring/logging/ -``` +### Setup + +1. **Clone the repository**: + ```bash + git clone https://github.com/cepdnaclk/servermonitoring.git + cd servermonitoring + ``` -### Build the Dashboard +2. **Install Python dependencies**: + ```bash + make setup + ``` -Build using local logs: +3. **Install Jekyll dependencies** (requires Ruby): + ```bash + gem install bundler + bundle install + ``` +### Generate Data + +```bash +make data ``` -python scripts/build_dashboard.py + +### Download Logs + +```bash +python scripts/sync_logs.py ``` -Build and download logs in one step: +### Build and Serve Site +```bash +make site # Build +make serve # Serve locally at http://localhost:4000/servermonitoring/ ``` -python scripts/build_dashboard.py --download-logs + +## ๐Ÿงช Testing + +```bash +make test # Run tests with coverage +make lint # Run linting checks +make format # Auto-format code ``` -## Notes +## ๐Ÿค– CI/CD + +- **Testing**: Automatic PR checks (`.github/workflows/test.yml`) +- **Deployment**: Daily updates (`.github/workflows/update-dashboard.yml`) + +## ๐Ÿ“ Makefile Commands + +| Command | Description | +|---------|-------------| +| `make help` | Show available commands | +| `make setup` | Install dependencies | +| `make data` | Generate JSON data | +| `make test` | Run tests | +| `make lint` | Check code quality | +| `make site` | Build Jekyll site | +| `make serve` | Serve locally | +| `make clean` | Remove artifacts | + +## ๐Ÿ‘ฅ Contributors -- Storage: `babbage` highlights alumni (>10GB, orange) and students (>50GB, yellow) - based on `config/batches.json`. -- GPU plots show daily mean utilization and memory for the last 90 days. +- **E/14/Gihan**: [Profile](https://people.ce.pdn.ac.lk/students/e14/158) +- **E/15/Nuwan**: [Website](https://nuwanjaliyagoda.com/contact/) -## Contact +## ๐Ÿ”— Links -- **E/14/Gihan**: https://people.ce.pdn.ac.lk/students/e14/158 -- **E/15/Nuwan**: https://nuwanjaliyagoda.com/contact/ \ No newline at end of file +- **Live Site**: [cepdnaclk.github.io/servermonitoring](https://cepdnaclk.github.io/servermonitoring/) +- **Repository**: [github.com/cepdnaclk/servermonitoring](https://github.com/cepdnaclk/servermonitoring) diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..abe936e --- /dev/null +++ b/_config.yml @@ -0,0 +1,36 @@ +title: Server Monitoring Dashboard +description: >- + Static dashboard for Department of Computer Engineering server utilization + (GPU and storage). Generated daily and published via GitHub Pages. +baseurl: "/servermonitoring" +url: "https://cepdnaclk.github.io" + +# Build settings +theme: minima +minima: + skin: classic + +# Collections +collections: + pages: + output: true + permalink: /:name + +# Exclude from processing +exclude: + - .venv/ + - venv/ + - __pycache__/ + - "*.pyc" + - .DS_Store + - data/ + - scripts/ + - config/ + - src/ + - tests/ + - requirements.txt + - Makefile + - README.md + - Gemfile + - Gemfile.lock + - vendor/ diff --git a/_data/gpu.json b/_data/gpu.json new file mode 100644 index 0000000..377418d --- /dev/null +++ b/_data/gpu.json @@ -0,0 +1,24 @@ +{ + "servers": { + "turing": { + "name": "turing", + "gpus": {} + }, + "kepler": { + "name": "kepler", + "gpus": {} + }, + "ai4covid": { + "name": "ai4covid", + "gpus": {} + }, + "ampere": { + "name": "ampere", + "gpus": {} + }, + "ada": { + "name": "ada", + "gpus": {} + } + } +} \ No newline at end of file diff --git a/_data/metadata.json b/_data/metadata.json new file mode 100644 index 0000000..4a01d81 --- /dev/null +++ b/_data/metadata.json @@ -0,0 +1,17 @@ +{ + "generated_at": "2026-02-02T11:39:35.995693", + "storage_servers": [ + "kepler", + "turing", + "ampere", + "ada", + "babbage" + ], + "gpu_servers": [ + "turing", + "kepler", + "ai4covid", + "ampere", + "ada" + ] +} \ No newline at end of file diff --git a/_data/storage.json b/_data/storage.json new file mode 100644 index 0000000..6d791f5 --- /dev/null +++ b/_data/storage.json @@ -0,0 +1,29 @@ +{ + "servers": { + "kepler": { + "name": "kepler", + "doc_url": "https://docs.google.com/spreadsheets/d/1qyCokp9XNO9tOghKgX-2SNlR_Gaif1EEOa_mu7_nMOo/edit?usp=sharing", + "entries": [] + }, + "turing": { + "name": "turing", + "doc_url": "https://docs.google.com/spreadsheets/d/1f8G_oQVhBhKbaI_rw_gcaFTY4OFbP6T7RiUE66l-dvU/edit?usp=sharing", + "entries": [] + }, + "ampere": { + "name": "ampere", + "doc_url": "https://docs.google.com/spreadsheets/d/1p2YNp1HqxB9AmEj46M8egR1LJLmsHQ1kYz5Ncp-icJA/edit?usp=sharing", + "entries": [] + }, + "ada": { + "name": "ada", + "doc_url": "https://docs.google.com/spreadsheets/d/1v-OgHJj2iGqFuL6z3fmauJLV7rIlX6-EJBW_b1WpeQA/edit?usp=sharing", + "entries": [] + }, + "babbage": { + "name": "babbage", + "doc_url": null, + "entries": [] + } + } +} \ No newline at end of file diff --git a/_includes/footer.html b/_includes/footer.html new file mode 100644 index 0000000..124ea73 --- /dev/null +++ b/_includes/footer.html @@ -0,0 +1,29 @@ + diff --git a/_includes/header.html b/_includes/header.html new file mode 100644 index 0000000..8e1afb9 --- /dev/null +++ b/_includes/header.html @@ -0,0 +1,25 @@ + diff --git a/gpu.md b/gpu.md new file mode 100644 index 0000000..ae49516 --- /dev/null +++ b/gpu.md @@ -0,0 +1,142 @@ +--- +layout: page +title: GPU Usage Report +permalink: /gpu/ +--- + +
+

GPU utilization and memory usage for the last 90 days.

+ + {% if site.data.gpu.servers %} + {% for server_entry in site.data.gpu.servers %} + {% assign server_name = server_entry[0] %} + {% assign server = server_entry[1] %} + +

{{ server_name | capitalize }}

+ + {% if server.gpus and server.gpus.size > 0 %} + {% for gpu_entry in server.gpus %} + {% assign gpu_id = gpu_entry[0] %} + {% assign gpu = gpu_entry[1] %} + +

GPU {{ gpu_id }}

+ + + + {% if gpu.metrics %} + {% if gpu.metrics.utilization %} +

Utilization (%)

+
+

{{ gpu.metrics.utilization.size }} data points over the last 90 days

+
+ View Data + + + + + + + + + {% for point in gpu.metrics.utilization limit:30 %} + + + + + {% endfor %} + {% if gpu.metrics.utilization.size > 30 %} + + + + {% endif %} + +
DateAvg Utilization (%)
{{ point.date }}{{ point.value | round: 2 }}
... and {{ gpu.metrics.utilization.size | minus: 30 }} more days
+
+
+ {% endif %} + + {% if gpu.metrics.memory %} +

Memory Usage (MB)

+
+

{{ gpu.metrics.memory.size }} data points over the last 90 days

+
+ View Data + + + + + + + + + {% for point in gpu.metrics.memory limit:30 %} + + + + + {% endfor %} + {% if gpu.metrics.memory.size > 30 %} + + + + {% endif %} + +
DateAvg Memory (MB)
{{ point.date }}{{ point.value | round: 0 }}
... and {{ gpu.metrics.memory.size | minus: 30 }} more days
+
+
+ {% endif %} + {% else %} +

No metrics available

+ {% endif %} + {% endfor %} + {% else %} +

No GPU data available

+ {% endif %} + {% endfor %} + {% else %} +

No GPU data available. Please run data generation.

+ {% endif %} +
+ + + +--- + +*This webpage was **VibeCoded** with **ChatGPT 5.2** + **GitHub Copilot Agent*** diff --git a/index.md b/index.md new file mode 100644 index 0000000..ce0beaa --- /dev/null +++ b/index.md @@ -0,0 +1,36 @@ +--- +layout: home +title: Server Monitoring Dashboard +--- + +## Welcome + +Daily updated storage and GPU utilization reports for the Department of Computer Engineering servers. + +### Quick Links + +- [Storage Usage Report](/servermonitoring/storage/) +- [GPU Usage Report](/servermonitoring/gpu/) + +### About + +This dashboard provides automated monitoring of: +- **Storage utilization** across multiple servers +- **GPU metrics** including utilization and memory usage +- **Historical trends** for the last 90 days + +Data is automatically collected, processed, and published daily via GitHub Actions. + +--- + +{% if site.data.metadata %} +*Last updated: {{ site.data.metadata.generated_at | date: "%Y-%m-%d %H:%M" }}* +{% endif %} + +**Maintained by**: [E/14/Gihan](https://people.ce.pdn.ac.lk/students/e14/158) and [E/15/Nuwan](https://nuwanjaliyagoda.com/contact/) + +**Source Code**: [github.com/cepdnaclk/servermonitoring](https://github.com/cepdnaclk/servermonitoring) + +--- + +*This webpage was **VibeCoded** with **ChatGPT 5.2** + **GitHub Copilot Agent*** diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..57b85f4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,59 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "servermonitoring" +version = "1.0.0" +description = "Server monitoring dashboard data pipeline" +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "pandas>=2.2.3", + "matplotlib>=3.9.2", + "tqdm>=4.66.5", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.3.4", + "pytest-cov>=6.0.0", + "ruff>=0.8.4", +] + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "--verbose", + "--strict-markers", + "--cov=src/servermonitoring", + "--cov-report=term-missing", + "--cov-report=html", +] + +[tool.ruff] +line-length = 100 +target-version = "py311" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "UP", # pyupgrade +] +ignore = [ + "E501", # line too long (handled by formatter) +] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] diff --git a/requirements.txt b/requirements.txt index 7a06a36..947be17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,8 @@ pandas==2.2.3 matplotlib==3.9.2 -tqdm==4.66.5 \ No newline at end of file +tqdm==4.66.5 + +# Development dependencies +pytest==8.3.4 +pytest-cov==6.0.0 +ruff==0.8.4 \ No newline at end of file diff --git a/scripts/build_dashboard.py b/scripts/build_dashboard.py index 31b5ce4..6ddd5d3 100644 --- a/scripts/build_dashboard.py +++ b/scripts/build_dashboard.py @@ -3,7 +3,8 @@ from concurrent.futures import ThreadPoolExecutor from pathlib import Path -from generate_gpu_plots import build_gpu_plots, load_servers as load_gpu_servers +from generate_gpu_plots import build_gpu_plots +from generate_gpu_plots import load_servers as load_gpu_servers from generate_storage_report import ( build_storage_report, load_storage_servers, @@ -11,7 +12,6 @@ ) from sync_logs import sync_logs - ROOT_DIR = Path(__file__).resolve().parents[1] diff --git a/scripts/build_data.py b/scripts/build_data.py new file mode 100644 index 0000000..7e5e50e --- /dev/null +++ b/scripts/build_data.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +"""Build all data files for Jekyll site.""" + +import argparse +import json +from pathlib import Path + +from servermonitoring.config import ( + load_gpu_info, + load_gpu_servers, + load_storage_servers, + load_student_batches, +) +from servermonitoring.gpu import build_gpu_data +from servermonitoring.storage import build_storage_data + +ROOT_DIR = Path(__file__).resolve().parents[1] + + +def save_json(data: dict, output_path: Path) -> None: + """Save data as formatted JSON. + + Args: + data: Data to save + output_path: Output file path + """ + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + print(f"Saved: {output_path}") + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser(description="Build Jekyll data files") + parser.add_argument( + "--config-dir", + type=Path, + default=ROOT_DIR / "config", + help="Config directory (default: config/)", + ) + parser.add_argument( + "--data-dir", + type=Path, + default=ROOT_DIR / "data", + help="Data directory containing logs (default: data/)", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=ROOT_DIR / "_data", + help="Output directory for JSON files (default: _data/)", + ) + parser.add_argument( + "--days", + type=int, + default=90, + help="Number of days for GPU metrics (default: 90)", + ) + args = parser.parse_args() + + # Load configurations + print("Loading configurations...") + servers_config = args.config_dir / "servers.json" + gpu_info_config = args.config_dir / "gpu-info.json" + batches_config = args.config_dir / "batches.json" + + storage_servers = load_storage_servers(servers_config) + gpu_servers = load_gpu_servers(servers_config) + gpu_info = load_gpu_info(gpu_info_config) + student_batches = load_student_batches(batches_config) + + # Build storage data + print("Building storage data...") + storage_logs = args.data_dir / "logs" / "storage" + storage_data = build_storage_data(storage_logs, storage_servers, student_batches) + save_json(storage_data, args.output_dir / "storage.json") + + # Build GPU data + print("Building GPU data...") + gpu_logs = args.data_dir / "logs" / "gpu" + gpu_data = build_gpu_data(gpu_logs, gpu_servers, gpu_info, args.days) + save_json(gpu_data, args.output_dir / "gpu.json") + + # Build site metadata + print("Building site metadata...") + metadata = { + "generated_at": __import__("datetime").datetime.now().isoformat(), + "storage_servers": list(storage_servers.keys()), + "gpu_servers": gpu_servers, + } + save_json(metadata, args.output_dir / "metadata.json") + + print("Data build complete!") + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_gpu_plots.py b/scripts/generate_gpu_plots.py index 44e9587..2480b0a 100644 --- a/scripts/generate_gpu_plots.py +++ b/scripts/generate_gpu_plots.py @@ -6,7 +6,6 @@ import matplotlib.pyplot as plt import pandas as pd - matplotlib.use("Agg") ROOT_DIR = Path(__file__).resolve().parents[1] diff --git a/scripts/generate_storage_report.py b/scripts/generate_storage_report.py index 0010e26..8d9a175 100644 --- a/scripts/generate_storage_report.py +++ b/scripts/generate_storage_report.py @@ -4,7 +4,6 @@ import re from pathlib import Path - ROOT_DIR = Path(__file__).resolve().parents[1] DEFAULT_CONFIG = ROOT_DIR / "config" / "servers.json" DEFAULT_BATCHES = ROOT_DIR / "config" / "batches.json" @@ -40,9 +39,7 @@ def classify_babbage( return None, None student_id = match.group(1) batch = student_id[:3].lower() - profile_url = "https://people.ce.pdn.ac.lk/students/{}/{}/".format( - batch, student_id[3:6] - ) + profile_url = f"https://people.ce.pdn.ac.lk/students/{batch}/{student_id[3:6]}/" if batch in student_batches: if usage_gb is not None and usage_gb > 50: return "yellow", profile_url diff --git a/src/servermonitoring/__init__.py b/src/servermonitoring/__init__.py new file mode 100644 index 0000000..2021625 --- /dev/null +++ b/src/servermonitoring/__init__.py @@ -0,0 +1,3 @@ +"""Server monitoring data utilities.""" + +__version__ = "1.0.0" diff --git a/src/servermonitoring/config.py b/src/servermonitoring/config.py new file mode 100644 index 0000000..44502e3 --- /dev/null +++ b/src/servermonitoring/config.py @@ -0,0 +1,89 @@ +"""Configuration file loading utilities.""" + +import json +from pathlib import Path +from typing import Any + + +def load_json_config(config_path: Path) -> dict[str, Any]: + """Load a JSON configuration file. + + Args: + config_path: Path to the JSON config file + + Returns: + Parsed JSON data as dictionary + + Raises: + FileNotFoundError: If config file doesn't exist + json.JSONDecodeError: If config file is invalid JSON + """ + if not config_path.exists(): + raise FileNotFoundError(f"Config file not found: {config_path}") + + with config_path.open("r", encoding="utf-8") as f: + return json.load(f) + + +def load_servers_config(config_path: Path) -> dict[str, Any]: + """Load servers configuration. + + Args: + config_path: Path to servers.json + + Returns: + Server configuration dictionary + """ + return load_json_config(config_path) + + +def load_storage_servers(config_path: Path) -> dict[str, Any]: + """Load storage server configuration. + + Args: + config_path: Path to servers.json + + Returns: + Storage server configuration + """ + config = load_servers_config(config_path) + return config.get("storage", {}) + + +def load_gpu_servers(config_path: Path) -> list[str]: + """Load GPU server list. + + Args: + config_path: Path to servers.json + + Returns: + List of GPU server names + """ + config = load_servers_config(config_path) + return config.get("gpu", []) + + +def load_gpu_info(info_path: Path) -> dict[str, Any]: + """Load GPU information configuration. + + Args: + info_path: Path to gpu-info.json + + Returns: + GPU info configuration + """ + return load_json_config(info_path) + + +def load_student_batches(batches_path: Path) -> set[str]: + """Load student batch identifiers. + + Args: + batches_path: Path to batches.json + + Returns: + Set of batch identifiers (lowercase) + """ + config = load_json_config(batches_path) + batches = config.get("students", []) + return {batch.lower() for batch in batches} diff --git a/src/servermonitoring/gpu.py b/src/servermonitoring/gpu.py new file mode 100644 index 0000000..503df56 --- /dev/null +++ b/src/servermonitoring/gpu.py @@ -0,0 +1,156 @@ +"""GPU data processing utilities.""" + +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any + +import pandas as pd + + +def clean_numeric(series: pd.Series) -> pd.Series: + """Clean and convert series to numeric values. + + Args: + series: Pandas series with potentially non-numeric values + + Returns: + Series with cleaned numeric values + """ + return pd.to_numeric( + series.astype(str).str.replace(r"[^0-9.]+", "", regex=True), + errors="coerce", + ) + + +def read_gpu_logs( + logs_dir: Path, servers: list[str], days: int = 90 +) -> dict[str, pd.DataFrame]: + """Read GPU log files for specified servers. + + Args: + logs_dir: Directory containing GPU log CSV files + servers: List of server names to process + days: Number of days to include (default: 90) + + Returns: + Dictionary mapping server names to DataFrames + """ + cutoff_date = datetime.now() - timedelta(days=days) + data: dict[str, pd.DataFrame] = {} + + for log_file in sorted(logs_dir.glob("*")): + if log_file.suffix.lower() not in {".csv", ".log"}: + continue + + server = log_file.name.split("-")[0] + if server not in servers: + continue + + try: + frame = pd.read_csv(log_file) + except Exception: + continue + + if "timestamp" not in frame.columns: + continue + + frame["timestamp"] = pd.to_datetime(frame["timestamp"], errors="coerce") + frame = frame.dropna(subset=["timestamp"]) + frame = frame[frame["timestamp"] >= cutoff_date] + + if frame.empty: + continue + + if server not in data: + data[server] = frame + else: + data[server] = pd.concat([data[server], frame], ignore_index=True) + + return data + + +def aggregate_gpu_metrics( + df: pd.DataFrame, gpu_id: str +) -> dict[str, list[dict[str, Any]]]: + """Aggregate GPU metrics by date. + + Args: + df: DataFrame with GPU metrics + gpu_id: GPU identifier + + Returns: + Dictionary with daily aggregated metrics + """ + gpu_col = f"gpu.{gpu_id}.gpu" + mem_col = f"gpu.{gpu_id}.memory" + + if gpu_col not in df.columns and mem_col not in df.columns: + return {} + + df = df.copy() + df["date"] = df["timestamp"].dt.date + + result = {} + + if gpu_col in df.columns: + gpu_series = clean_numeric(df[gpu_col]) + daily_gpu = gpu_series.groupby(df["date"]).mean() + result["utilization"] = [ + {"date": str(date), "value": float(value)} + for date, value in daily_gpu.items() + if pd.notna(value) + ] + + if mem_col in df.columns: + mem_series = clean_numeric(df[mem_col]) + daily_mem = mem_series.groupby(df["date"]).mean() + result["memory"] = [ + {"date": str(date), "value": float(value)} + for date, value in daily_mem.items() + if pd.notna(value) + ] + + return result + + +def build_gpu_data( + logs_dir: Path, + servers: list[str], + gpu_info: dict[str, Any], + days: int = 90, +) -> dict[str, Any]: + """Build complete GPU data for Jekyll. + + Args: + logs_dir: Directory containing GPU log files + servers: List of server names + gpu_info: GPU configuration info + days: Number of days to include + + Returns: + Dictionary with GPU data for all servers + """ + logs = read_gpu_logs(logs_dir, servers, days) + gpu_data = {} + + for server in servers: + if server not in logs: + gpu_data[server] = {"name": server, "gpus": {}} + continue + + df = logs[server] + server_gpu_info = gpu_info.get(server, {}) + gpus = {} + + for gpu_id, info in server_gpu_info.items(): + metrics = aggregate_gpu_metrics(df, gpu_id) + gpus[gpu_id] = { + "id": gpu_id, + "active": info.get("active", True), + "memory_limit": info.get("memory", 0), + "metrics": metrics, + } + + gpu_data[server] = {"name": server, "gpus": gpus} + + return {"servers": gpu_data} diff --git a/src/servermonitoring/storage.py b/src/servermonitoring/storage.py new file mode 100644 index 0000000..e3566ad --- /dev/null +++ b/src/servermonitoring/storage.py @@ -0,0 +1,144 @@ +"""Storage data processing utilities.""" + +import re +from pathlib import Path +from typing import Any + + +def parse_usage_gb(usage: str) -> float | None: + """Parse usage string to extract GB value. + + Args: + usage: Usage string like "123.45G" or "1.2T" + + Returns: + Usage in GB, or None if parsing fails + """ + match = re.match(r"([0-9]+(?:\.[0-9]+)?)", usage) + if not match: + return None + return float(match.group(1)) + + +def classify_babbage_user( + folder: str, usage_gb: float | None, student_batches: set[str] +) -> tuple[str | None, str | None]: + """Classify babbage server user and determine highlighting. + + Args: + folder: Folder path containing user ID (e.g., "/home/e14123/...") + usage_gb: Storage usage in GB + student_batches: Set of current student batch IDs + + Returns: + Tuple of (color, profile_url) where color is 'yellow' or 'orange' + for highlighting, or None if no highlighting needed + """ + match = re.search(r"/(e\d{5})(?:/|$)", folder) + if not match: + return None, None + + student_id = match.group(1) + batch = student_id[:3].lower() + profile_url = f"https://people.ce.pdn.ac.lk/students/{batch}/{student_id[3:6]}/" + + if batch in student_batches: + # Current student: highlight if > 50GB + if usage_gb is not None and usage_gb > 50: + return "yellow", profile_url + return None, profile_url + + # Alumni: highlight if > 10GB + if usage_gb is not None and usage_gb > 10: + return "orange", profile_url + + return None, profile_url + + +def process_storage_log( + log_path: Path, server_name: str, student_batches: set[str] +) -> list[dict[str, Any]]: + """Process a storage log file into structured data. + + Args: + log_path: Path to the storage log file + server_name: Name of the server + student_batches: Set of current student batch IDs + + Returns: + List of storage entry dictionaries + """ + entries = [] + + if not log_path.exists(): + return entries + + with log_path.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + + parts = line.split(maxsplit=1) + if len(parts) < 2: + continue + + usage, folder = parts + usage_gb = parse_usage_gb(usage) + + entry: dict[str, Any] = { + "folder": folder, + "usage": usage, + "usage_gb": usage_gb, + } + + # Add classification for babbage + if server_name == "babbage": + color, profile_url = classify_babbage_user( + folder, usage_gb, student_batches + ) + if color: + entry["color"] = color + if profile_url: + entry["profile_url"] = profile_url + + entries.append(entry) + + return entries + + +def build_storage_data( + logs_dir: Path, + servers: dict[str, Any], + student_batches: set[str], +) -> dict[str, Any]: + """Build complete storage data for Jekyll. + + Args: + logs_dir: Directory containing storage log files + servers: Storage server configuration + student_batches: Set of current student batch IDs + + Returns: + Dictionary with storage data for all servers + """ + storage_data = {} + + for server_name, server_config in servers.items(): + log_files = list(logs_dir.glob(f"{server_name}-*.csv")) + if not log_files: + log_files = list(logs_dir.glob(f"{server_name}-*.log")) + + if log_files: + latest_log = max(log_files, key=lambda p: p.stat().st_mtime) + entries = process_storage_log(latest_log, server_name, student_batches) + else: + entries = [] + + storage_data[server_name] = { + "name": server_name, + "doc_url": server_config.get("doc_url"), + "entries": entries, + } + + return {"servers": storage_data} diff --git a/storage.md b/storage.md new file mode 100644 index 0000000..142ab2a --- /dev/null +++ b/storage.md @@ -0,0 +1,85 @@ +--- +layout: page +title: Storage Usage Report +permalink: /storage/ +--- + +
+

Storage usage across department servers. Highlighted entries indicate:

+ + + {% if site.data.storage.servers %} + {% for server_entry in site.data.storage.servers %} + {% assign server_name = server_entry[0] %} + {% assign server = server_entry[1] %} + +

{{ server_name | capitalize }}

+ + {% if server.doc_url %} +

๐Ÿ“„ Documentation

+ {% endif %} + + {% if server.entries and server.entries.size > 0 %} + + + + + + + + + + {% for entry in server.entries %} + + + + + + {% endfor %} + +
FolderUsageLink
{{ entry.folder }}{{ entry.usage }} + {% if entry.profile_url %} + Profile + {% endif %} +
+ {% else %} +

No data available

+ {% endif %} + {% endfor %} + {% else %} +

No storage data available. Please run data generation.

+ {% endif %} +
+ + + +--- + +*This webpage was **VibeCoded** with **ChatGPT 5.2** + **GitHub Copilot Agent*** diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b4d750 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for servermonitoring package.""" diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..d9ac29a --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,120 @@ +"""Test configuration loading utilities.""" + +import json +from pathlib import Path + +import pytest + +from servermonitoring.config import ( + load_gpu_info, + load_gpu_servers, + load_json_config, + load_storage_servers, + load_student_batches, +) + + +def test_load_json_config_success(tmp_path): + """Test successful JSON loading.""" + config_file = tmp_path / "test.json" + config_data = {"key": "value", "number": 42} + config_file.write_text(json.dumps(config_data)) + + result = load_json_config(config_file) + assert result == config_data + + +def test_load_json_config_not_found(): + """Test loading non-existent file raises FileNotFoundError.""" + with pytest.raises(FileNotFoundError): + load_json_config(Path("/nonexistent/file.json")) + + +def test_load_json_config_invalid_json(tmp_path): + """Test loading invalid JSON raises JSONDecodeError.""" + config_file = tmp_path / "invalid.json" + config_file.write_text("not valid json {") + + with pytest.raises(json.JSONDecodeError): + load_json_config(config_file) + + +def test_load_storage_servers(tmp_path): + """Test loading storage servers.""" + config_file = tmp_path / "servers.json" + config_data = { + "storage": { + "server1": {"doc_url": "http://example.com"}, + "server2": {"doc_url": None}, + }, + "gpu": ["gpu1"], + } + config_file.write_text(json.dumps(config_data)) + + result = load_storage_servers(config_file) + assert result == config_data["storage"] + + +def test_load_storage_servers_empty(tmp_path): + """Test loading when storage key is missing.""" + config_file = tmp_path / "servers.json" + config_file.write_text(json.dumps({})) + + result = load_storage_servers(config_file) + assert result == {} + + +def test_load_gpu_servers(tmp_path): + """Test loading GPU servers.""" + config_file = tmp_path / "servers.json" + config_data = { + "storage": {}, + "gpu": ["server1", "server2", "server3"], + } + config_file.write_text(json.dumps(config_data)) + + result = load_gpu_servers(config_file) + assert result == ["server1", "server2", "server3"] + + +def test_load_gpu_servers_empty(tmp_path): + """Test loading when gpu key is missing.""" + config_file = tmp_path / "servers.json" + config_file.write_text(json.dumps({})) + + result = load_gpu_servers(config_file) + assert result == [] + + +def test_load_gpu_info(tmp_path): + """Test loading GPU info.""" + info_file = tmp_path / "gpu-info.json" + info_data = { + "server1": { + "0": {"active": True, "memory": 24000}, + "1": {"active": False, "memory": 12000}, + } + } + info_file.write_text(json.dumps(info_data)) + + result = load_gpu_info(info_file) + assert result == info_data + + +def test_load_student_batches(tmp_path): + """Test loading student batches.""" + batches_file = tmp_path / "batches.json" + batches_data = {"students": ["E20", "E21", "E22"]} + batches_file.write_text(json.dumps(batches_data)) + + result = load_student_batches(batches_file) + assert result == {"e20", "e21", "e22"} + + +def test_load_student_batches_empty(tmp_path): + """Test loading when students key is missing.""" + batches_file = tmp_path / "batches.json" + batches_file.write_text(json.dumps({})) + + result = load_student_batches(batches_file) + assert result == set() diff --git a/tests/test_gpu.py b/tests/test_gpu.py new file mode 100644 index 0000000..a395d5a --- /dev/null +++ b/tests/test_gpu.py @@ -0,0 +1,171 @@ +"""Test GPU data processing utilities.""" + +from datetime import datetime, timedelta + +import pandas as pd + +from servermonitoring.gpu import ( + aggregate_gpu_metrics, + build_gpu_data, + clean_numeric, + read_gpu_logs, +) + + +def test_clean_numeric_valid(): + """Test cleaning numeric series.""" + series = pd.Series(["10", "20.5", "30"]) + result = clean_numeric(series) + assert result.tolist() == [10.0, 20.5, 30.0] + + +def test_clean_numeric_with_text(): + """Test cleaning series with text.""" + series = pd.Series(["10%", "20.5MB", "30GB"]) + result = clean_numeric(series) + assert result.tolist() == [10.0, 20.5, 30.0] + + +def test_clean_numeric_invalid(): + """Test cleaning series with invalid data.""" + series = pd.Series(["invalid", "N/A", ""]) + result = clean_numeric(series) + assert all(pd.isna(result)) + + +def test_read_gpu_logs_no_files(tmp_path): + """Test reading GPU logs with no files.""" + servers = ["server1"] + result = read_gpu_logs(tmp_path, servers) + assert result == {} + + +def test_read_gpu_logs_valid_file(tmp_path): + """Test reading valid GPU log file.""" + log_file = tmp_path / "server1-20240101.csv" + now = datetime.now() + df = pd.DataFrame( + { + "timestamp": [ + now - timedelta(days=1), + now - timedelta(days=2), + ], + "gpu.0.gpu": [50, 60], + "gpu.0.memory": [1000, 1200], + } + ) + df.to_csv(log_file, index=False) + + servers = ["server1"] + result = read_gpu_logs(tmp_path, servers, days=90) + + assert "server1" in result + assert len(result["server1"]) == 2 + + +def test_read_gpu_logs_old_data_filtered(tmp_path): + """Test that old data is filtered out.""" + log_file = tmp_path / "server1-20240101.csv" + now = datetime.now() + df = pd.DataFrame( + { + "timestamp": [ + now - timedelta(days=100), # Too old + now - timedelta(days=10), # Recent + ], + "gpu.0.gpu": [50, 60], + } + ) + df.to_csv(log_file, index=False) + + servers = ["server1"] + result = read_gpu_logs(tmp_path, servers, days=90) + + assert "server1" in result + assert len(result["server1"]) == 1 + + +def test_aggregate_gpu_metrics_no_columns(): + """Test aggregation with missing columns.""" + df = pd.DataFrame({"timestamp": [datetime.now()], "other": [1]}) + result = aggregate_gpu_metrics(df, "0") + assert result == {} + + +def test_aggregate_gpu_metrics_utilization(): + """Test aggregating utilization metrics.""" + now = datetime.now() + df = pd.DataFrame( + { + "timestamp": [ + now.replace(hour=1), + now.replace(hour=2), + now.replace(hour=3), + ], + "gpu.0.gpu": [50, 60, 70], + } + ) + + result = aggregate_gpu_metrics(df, "0") + + assert "utilization" in result + assert len(result["utilization"]) == 1 + assert result["utilization"][0]["value"] == 60.0 # mean + + +def test_aggregate_gpu_metrics_memory(): + """Test aggregating memory metrics.""" + now = datetime.now() + df = pd.DataFrame( + { + "timestamp": [ + now.replace(hour=1), + now.replace(hour=2), + ], + "gpu.0.memory": [1000, 2000], + } + ) + + result = aggregate_gpu_metrics(df, "0") + + assert "memory" in result + assert len(result["memory"]) == 1 + assert result["memory"][0]["value"] == 1500.0 + + +def test_build_gpu_data_no_logs(tmp_path): + """Test building GPU data with no logs.""" + servers = ["server1"] + gpu_info = {"server1": {"0": {"active": True, "memory": 24000}}} + + result = build_gpu_data(tmp_path, servers, gpu_info) + + assert "servers" in result + assert "server1" in result["servers"] + assert result["servers"]["server1"]["gpus"] == {} + + +def test_build_gpu_data_with_logs(tmp_path): + """Test building GPU data with log files.""" + log_file = tmp_path / "server1-20240101.csv" + now = datetime.now() + df = pd.DataFrame( + { + "timestamp": [now - timedelta(days=1), now - timedelta(days=2)], + "gpu.0.gpu": [50, 60], + "gpu.0.memory": [1000, 1200], + } + ) + df.to_csv(log_file, index=False) + + servers = ["server1"] + gpu_info = {"server1": {"0": {"active": True, "memory": 24000}}} + + result = build_gpu_data(tmp_path, servers, gpu_info) + + assert "servers" in result + assert "server1" in result["servers"] + assert "0" in result["servers"]["server1"]["gpus"] + gpu_data = result["servers"]["server1"]["gpus"]["0"] + assert gpu_data["active"] is True + assert gpu_data["memory_limit"] == 24000 diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..648a84c --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,147 @@ +"""Test storage data processing utilities.""" + +from pathlib import Path + +from servermonitoring.storage import ( + build_storage_data, + classify_babbage_user, + parse_usage_gb, + process_storage_log, +) + + +def test_parse_usage_gb_valid(): + """Test parsing valid usage strings.""" + assert parse_usage_gb("123.45") == 123.45 + assert parse_usage_gb("1.2") == 1.2 + assert parse_usage_gb("50") == 50.0 + + +def test_parse_usage_gb_invalid(): + """Test parsing invalid usage strings.""" + assert parse_usage_gb("invalid") is None + assert parse_usage_gb("") is None + assert parse_usage_gb("G") is None + + +def test_classify_babbage_user_student_low_usage(): + """Test classification of current student with low usage.""" + student_batches = {"e20", "e21"} + color, url = classify_babbage_user("/home/e21123/data", 30.0, student_batches) + + assert color is None + assert url == "https://people.ce.pdn.ac.lk/students/e21/123/" + + +def test_classify_babbage_user_student_high_usage(): + """Test classification of current student with high usage.""" + student_batches = {"e20", "e21"} + color, url = classify_babbage_user("/home/e21123/data", 60.0, student_batches) + + assert color == "yellow" + assert url == "https://people.ce.pdn.ac.lk/students/e21/123/" + + +def test_classify_babbage_user_alumni_low_usage(): + """Test classification of alumni with low usage.""" + student_batches = {"e20", "e21"} + color, url = classify_babbage_user("/home/e14158/data", 5.0, student_batches) + + assert color is None + assert url == "https://people.ce.pdn.ac.lk/students/e14/158/" + + +def test_classify_babbage_user_alumni_high_usage(): + """Test classification of alumni with high usage.""" + student_batches = {"e20", "e21"} + color, url = classify_babbage_user("/home/e14158/data", 15.0, student_batches) + + assert color == "orange" + assert url == "https://people.ce.pdn.ac.lk/students/e14/158/" + + +def test_classify_babbage_user_no_match(): + """Test classification with folder not matching pattern.""" + student_batches = {"e20", "e21"} + color, url = classify_babbage_user("/home/other/data", 50.0, student_batches) + + assert color is None + assert url is None + + +def test_process_storage_log_empty_file(tmp_path): + """Test processing empty log file.""" + log_file = tmp_path / "test.log" + log_file.write_text("") + + entries = process_storage_log(log_file, "test", set()) + assert entries == [] + + +def test_process_storage_log_nonexistent(): + """Test processing non-existent log file.""" + entries = process_storage_log(Path("/nonexistent.log"), "test", set()) + assert entries == [] + + +def test_process_storage_log_valid_entries(tmp_path): + """Test processing log file with valid entries.""" + log_file = tmp_path / "test.log" + log_file.write_text( + "50.5G\t/home/user1\n" + "100.0G\t/home/user2\n" + "# comment line\n" + "\n" + "25.3G\t/home/user3\n" + ) + + entries = process_storage_log(log_file, "test", set()) + assert len(entries) == 3 + assert entries[0]["folder"] == "/home/user1" + assert entries[0]["usage"] == "50.5G" + assert entries[0]["usage_gb"] == 50.5 + + +def test_process_storage_log_babbage_classification(tmp_path): + """Test babbage server classification.""" + log_file = tmp_path / "babbage.log" + log_file.write_text( + "60.0G\t/home/e21123/data\n" + "15.0G\t/home/e14158/files\n" + ) + + student_batches = {"e20", "e21"} + entries = process_storage_log(log_file, "babbage", student_batches) + + assert len(entries) == 2 + assert entries[0]["color"] == "yellow" # Student > 50GB + assert entries[1]["color"] == "orange" # Alumni > 10GB + + +def test_build_storage_data_no_logs(tmp_path): + """Test building storage data with no logs.""" + servers = {"server1": {"doc_url": "http://example.com"}} + student_batches = set() + + data = build_storage_data(tmp_path, servers, student_batches) + + assert "servers" in data + assert "server1" in data["servers"] + assert data["servers"]["server1"]["entries"] == [] + + +def test_build_storage_data_with_logs(tmp_path): + """Test building storage data with log files.""" + # Create log file + log_file = tmp_path / "server1-20240101.csv" + log_file.write_text("10.0G\t/home/user1\n20.0G\t/home/user2\n") + + servers = {"server1": {"doc_url": "http://example.com"}} + student_batches = set() + + data = build_storage_data(tmp_path, servers, student_batches) + + assert "servers" in data + assert "server1" in data["servers"] + assert len(data["servers"]["server1"]["entries"]) == 2 + assert data["servers"]["server1"]["doc_url"] == "http://example.com"