From 8ecfe47f30e1915f415b90986f9df493563f8138 Mon Sep 17 00:00:00 2001 From: Helen Oakley <69336525+e2hln@users.noreply.github.com> Date: Tue, 23 Jun 2026 22:30:23 -0400 Subject: [PATCH 1/3] Add regression guardrails and runner tooling --- .codex/skills/regression-suite/SKILL.md | 26 ++ scripts/dev/check_clean_state.py | 318 ++++++++++++++++++++++++ scripts/dev/run_regression_suite.py | 299 ++++++++++++++++++++++ tests/test_clean_state.py | 110 ++++++++ tests/test_regression_runner.py | 62 +++++ 5 files changed, 815 insertions(+) create mode 100644 .codex/skills/regression-suite/SKILL.md create mode 100644 scripts/dev/check_clean_state.py create mode 100644 scripts/dev/run_regression_suite.py create mode 100644 tests/test_clean_state.py create mode 100644 tests/test_regression_runner.py diff --git a/.codex/skills/regression-suite/SKILL.md b/.codex/skills/regression-suite/SKILL.md new file mode 100644 index 0000000..653d1e4 --- /dev/null +++ b/.codex/skills/regression-suite/SKILL.md @@ -0,0 +1,26 @@ +# Regression Suite + +Use this skill after implementation changes in this repository. + +## Required Checks + +- After every implementation change, run targeted regression at minimum: + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode targeted` +- Before staging or committing, run full or pre-stage regression: + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode full` + or + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode pre-stage` +- Use pytest through the runner. Do not rely only on `unittest discover`. +- Never hide test failures. Report the exact command, exit status, and failing check. +- Never stage or commit unless the user explicitly asks. + +## Decision Guidance + +- Small localized fix: run targeted mode. +- Controller, output, formatter, schema, or validation fix: run targeted mode and make sure output-contract tests are included. +- Broad model, service, schema, or cross-path change: run full mode. +- Before commit or staging: run pre-stage mode. + +## Manual Smoke Reminder + +Real Hugging Face generation remains optional and manual because the automated regression suite is offline and deterministic. Mention this residual integration risk in final responses when relevant. diff --git a/scripts/dev/check_clean_state.py b/scripts/dev/check_clean_state.py new file mode 100644 index 0000000..e42526c --- /dev/null +++ b/scripts/dev/check_clean_state.py @@ -0,0 +1,318 @@ +"""Phase A cleanup guardrail for staged files. + +This script is read-only. It does not modify files, stage files, commit files, +validate CycloneDX output, or prove AIBOM correctness. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +import subprocess +import sys +from dataclasses import dataclass + + +FORBIDDEN_DOC_PREFIXES = ( + "docs/analysis/", + "docs/design/", + "docs/reference/", + "docs/requirements/", + "docs/analysis/current-output-samples/", +) + +PHASE_1_LOCAL_FILES = { + "src/models/aibom_domain.py", + "src/models/aibom_normalizer.py", + "tests/test_aibom_domain.py", + "tests/test_aibom_normalizer.py", + "scripts/dev/check_cyclonedx_capabilities.py", +} + +DEPENDENCY_FILES = { + "pyproject.toml", + "requirements.txt", + "requirements-dev.txt", + "uv.lock", + "poetry.lock", + "Pipfile", + "Pipfile.lock", + "setup.py", + "setup.cfg", +} + +SOURCE_PREFIXES = ( + "src/", +) + +TEST_PREFIXES = ( + "tests/", +) + +ALLOWED_GUARDRAIL_TEST_FILES = { + "tests/test_clean_state.py", + "tests/test_regression_runner.py", +} + +WORKSET_PREFIXES = ( + ".ai-workset/", +) + +PYTEST_CACHE_PREFIXES = ( + "pytest-cache-files-", +) + +GENERATED_OR_SYNTHETIC_MARKERS = ( + "current-output-samples/", + "synthetic", + "generated", +) + +GENERATED_OR_SYNTHETIC_PREFIXES = ( + "dist/", + "build/", + "sboms/", +) + + +@dataclass(frozen=True) +class Finding: + path: str + reason: str + + +def run_git(args: list[str]) -> str: + result = subprocess.run( + ["git", *args], + check=False, + capture_output=True, + text=True, + encoding="utf-8", + ) + if result.returncode != 0: + message = result.stderr.strip() or result.stdout.strip() or "git command failed" + raise RuntimeError(message) + return result.stdout + + +def normalize_path(path: str) -> str: + return path.replace("\\", "/").lstrip("./") + + +def staged_paths() -> list[str]: + output = run_git(["diff", "--cached", "--name-only", "--diff-filter=ACMRTD"]) + return [normalize_path(line) for line in output.splitlines() if line.strip()] + + +def unstaged_or_untracked_paths() -> list[str]: + output = run_git(["status", "--porcelain=v1", "-uall"]) + paths: list[str] = [] + + for line in output.splitlines(): + if len(line) < 4: + continue + index_status = line[0] + worktree_status = line[1] + raw_path = line[3:] + + if " -> " in raw_path: + raw_path = raw_path.rsplit(" -> ", maxsplit=1)[1] + + if index_status == "?" or worktree_status not in {" ", "!"}: + paths.append(normalize_path(raw_path.strip('"'))) + + paths.extend(root_pytest_cache_dirs()) + return sorted(set(paths)) + + +def root_pytest_cache_dirs() -> list[str]: + return [ + path.name + for path in Path.cwd().iterdir() + if path.is_dir() and path.name.startswith(PYTEST_CACHE_PREFIXES) + ] + + +def is_generated_or_synthetic(path: str) -> bool: + normalized = normalize_path(path).lower() + if any(normalized.startswith(prefix) for prefix in GENERATED_OR_SYNTHETIC_PREFIXES): + return True + return any(marker in normalized for marker in GENERATED_OR_SYNTHETIC_MARKERS) + + +def is_source(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(SOURCE_PREFIXES) + + +def is_test(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(TEST_PREFIXES) + + +def is_allowed_guardrail_test(path: str) -> bool: + normalized = normalize_path(path) + return normalized in ALLOWED_GUARDRAIL_TEST_FILES + + +def is_readme(path: str) -> bool: + normalized = normalize_path(path).lower() + return normalized in {"readme", "readme.md"} or normalized.startswith("readme.") + + +def is_workset(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(WORKSET_PREFIXES) + + +def is_pytest_cache_files(path: str) -> bool: + normalized = normalize_path(path) + return any(normalized.startswith(prefix) for prefix in PYTEST_CACHE_PREFIXES) + + +def collect_findings( + paths: list[str], + allow_dependency_files: bool, + allow_source_files: bool, + allow_future_mode_files: bool, +) -> list[Finding]: + findings: list[Finding] = [] + + for path in paths: + normalized = normalize_path(path) + + if any(normalized.startswith(prefix) for prefix in FORBIDDEN_DOC_PREFIXES): + findings.append(Finding(normalized, "staged local analysis/design/reference/requirements material")) + + if is_generated_or_synthetic(normalized): + findings.append(Finding(normalized, "staged generated or synthetic sample material")) + + if normalized in DEPENDENCY_FILES and not allow_dependency_files: + findings.append(Finding(normalized, "staged dependency file requires explicit review")) + + if is_source(normalized) and not allow_source_files: + findings.append(Finding(normalized, "staged source file is outside Phase A guardrail scope")) + + if normalized in PHASE_1_LOCAL_FILES and not allow_future_mode_files: + findings.append(Finding(normalized, "staged local Phase 1 implementation-shaped file requires explicit review")) + + if is_test(normalized) and not is_allowed_guardrail_test(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged test file is outside Phase A guardrail scope")) + + if is_readme(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged README or end-user doc is outside Phase A guardrail scope")) + + if is_workset(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged workset artifact is outside Phase A guardrail scope")) + + if is_pytest_cache_files(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged pytest cache file is generated local material")) + + return findings + + +def collect_warnings(paths: list[str]) -> list[Finding]: + warnings: list[Finding] = [] + + for path in paths: + normalized = normalize_path(path) + + if any(normalized.startswith(prefix) for prefix in FORBIDDEN_DOC_PREFIXES): + warnings.append(Finding(normalized, "local analysis/design/reference/requirements material needs human review before staging")) + + if is_generated_or_synthetic(normalized): + warnings.append(Finding(normalized, "generated or synthetic sample material needs human review before staging")) + + if normalized in DEPENDENCY_FILES: + warnings.append(Finding(normalized, "dependency file needs human review before staging")) + + if normalized in PHASE_1_LOCAL_FILES: + warnings.append(Finding(normalized, "local Phase 1 implementation-shaped file needs human review before staging")) + + if is_source(normalized): + warnings.append(Finding(normalized, "source file change needs human review before staging")) + + if is_workset(normalized): + warnings.append(Finding(normalized, "workset artifact needs human review before staging")) + + if is_pytest_cache_files(normalized): + warnings.append(Finding(normalized, "pytest cache temp directory should be cleaned or ignored before cleanup fixes")) + + return warnings + + +def print_status_note() -> None: + status = run_git(["status", "--short", "-uall"]) + if status.strip(): + print("Working tree has unstaged or untracked changes:") + print(status.rstrip()) + else: + print("Working tree is clean.") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Check Phase A cleanup guardrails for staged files.") + parser.add_argument( + "--allow-dependency-files", + action="store_true", + help="Allow staged dependency files after explicit human review.", + ) + parser.add_argument( + "--allow-source-files", + action="store_true", + help="Allow staged source files after leaving Phase A guardrail mode.", + ) + parser.add_argument( + "--allow-future-mode-files", + action="store_true", + help="Allow staged tests, README, workset artifacts, and local investigation files after explicit future-mode review.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + + print("Phase A cleanup guardrail check") + print("Note: this script is read-only and does not validate CycloneDX output or prove AIBOM correctness.") + print() + + try: + paths = staged_paths() + findings = collect_findings( + paths, + args.allow_dependency_files, + args.allow_source_files, + args.allow_future_mode_files, + ) + warnings = collect_warnings(unstaged_or_untracked_paths()) + print_status_note() + except RuntimeError as error: + print(f"FAIL: {error}", file=sys.stderr) + return 2 + + print() + if not paths: + print("No staged files found.") + + if findings: + print("FAIL: staged files violate Phase A cleanup guardrails.") + print("Affected files:") + for finding in findings: + print(f"- {finding.path}: {finding.reason}") + return 1 + + if warnings: + print("WARN: risky unstaged or untracked files exist and need human review before staging.") + print("Affected files:") + for warning in warnings: + print(f"- {warning.path}: {warning.reason}") + print() + + print("PASS: no staged files violate Phase A cleanup guardrails.") + print("PASS does not mean the repository is clean; review any WARN lines and git status output above.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/dev/run_regression_suite.py b/scripts/dev/run_regression_suite.py new file mode 100644 index 0000000..ca9a550 --- /dev/null +++ b/scripts/dev/run_regression_suite.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +MODEL_FILE_TESTS = [ + "tests/test_model_file_extraction.py", + "tests/test_gguf_metadata.py", + "tests/test_safetensors_metadata.py", + "tests/test_hyperparameter_wiring.py", + "tests/test_aibom_hyperparameters.py", +] + +TARGETED_TEST_MAP = { + "src/controllers/cli_controller.py": [ + "tests/test_cli_controller.py", + "tests/test_aibom_output_contract.py", + ], + "src/controllers/web_controller.py": [ + "tests/test_web_controller.py", + "tests/test_aibom_output_contract.py", + ], + "src/utils/formatter.py": [ + "tests/test_formatter.py", + "tests/test_aibom_output_contract.py", + "tests/test_functional_regression.py", + ], + "src/utils/validation.py": [ + "tests/test_validation.py", + "tests/test_aibom_output_contract.py", + ], + "src/models/service.py": [ + "tests/test_service.py", + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", + ], + "src/models/model_file_extractors.py": MODEL_FILE_TESTS, + "src/models/gguf_metadata.py": MODEL_FILE_TESTS, + "src/models/safetensors_metadata.py": MODEL_FILE_TESTS, + "src/models/config_parsing.py": MODEL_FILE_TESTS, + "src/utils/license_utils.py": [ + "tests/test_license_utils.py", + "tests/test_service.py", + ], + "src/models/aibom_domain.py": [ + "tests/test_aibom_domain.py", + ], + "src/models/aibom_normalizer.py": [ + "tests/test_aibom_normalizer.py", + ], + "scripts/dev/check_clean_state.py": [ + "tests/test_clean_state.py", + ], + "scripts/dev/run_regression_suite.py": [ + "tests/test_regression_runner.py", + ], +} + +DEFAULT_TARGETED_TESTS = [ + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", +] + +DOC_ONLY_PREFIXES = ("docs/",) +DOC_ONLY_FILES = {"README.md", "CONTRIBUTING.md"} + + +@dataclass(frozen=True) +class Selection: + changed_files: list[str] + groups: list[str] + pytest_targets: list[str] + run_clean_state: bool + docs_only: bool + full_recommended: bool + + +@dataclass(frozen=True) +class CommandResult: + command: list[str] + returncode: int + + +def normalize_path(path: str) -> str: + return path.strip().replace("\\", "/") + + +def unique(items: list[str]) -> list[str]: + seen: set[str] = set() + result: list[str] = [] + for item in items: + if item not in seen: + seen.add(item) + result.append(item) + return result + + +def is_docs_only_file(path: str) -> bool: + normalized = normalize_path(path) + return normalized in DOC_ONLY_FILES or normalized.startswith(DOC_ONLY_PREFIXES) + + +def command_text(command: list[str]) -> str: + return " ".join(command) + + +def clean_state_command() -> list[str]: + script = "scripts/dev/check_clean_state.py" + if sys.platform.startswith("win") and shutil.which("py"): + return ["py", script] + return [sys.executable, script] + + +def pytest_command(targets: list[str]) -> list[str]: + return [sys.executable, "-m", "pytest", *targets] + + +def collect_git_files(args: list[str]) -> list[str]: + completed = subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0: + if completed.stdout: + print(completed.stdout, end="") + if completed.stderr: + print(completed.stderr, end="", file=sys.stderr) + raise SystemExit(completed.returncode) + return [normalize_path(line) for line in completed.stdout.splitlines() if line.strip()] + + +def changed_files() -> list[str]: + files: list[str] = [] + files.extend(collect_git_files(["diff", "--name-only"])) + files.extend(collect_git_files(["diff", "--cached", "--name-only"])) + files.extend(collect_git_files(["ls-files", "--others", "--exclude-standard"])) + return unique(files) + + +def select_targets(files: list[str]) -> Selection: + normalized_files = unique([normalize_path(path) for path in files]) + if normalized_files and all(is_docs_only_file(path) for path in normalized_files): + return Selection( + changed_files=normalized_files, + groups=["docs-only guardrail"], + pytest_targets=[], + run_clean_state=True, + docs_only=True, + full_recommended=False, + ) + + groups: list[str] = [] + pytest_targets: list[str] = [] + run_clean_state = False + + for path in normalized_files: + mapped_tests = TARGETED_TEST_MAP.get(path) + if mapped_tests: + groups.append(path) + pytest_targets.extend(mapped_tests) + if path == "scripts/dev/check_clean_state.py": + run_clean_state = True + continue + + if path.startswith("tests/test_") and path.endswith(".py"): + groups.append("changed test file") + pytest_targets.append(path) + + pytest_targets = unique(pytest_targets) + + if not pytest_targets and not run_clean_state: + groups.append("default functional/output-contract regression") + pytest_targets = DEFAULT_TARGETED_TESTS.copy() + + return Selection( + changed_files=normalized_files, + groups=unique(groups), + pytest_targets=pytest_targets, + run_clean_state=run_clean_state, + docs_only=False, + full_recommended="default functional/output-contract regression" in groups, + ) + + +def print_file_list(title: str, files: list[str]) -> None: + print(f"\n{title}") + if not files: + print("- none") + return + for path in files: + print(f"- {path}") + + +def print_selection(selection: Selection) -> None: + print_file_list("Changed files detected:", selection.changed_files) + print_file_list("Selected test groups:", selection.groups) + print_file_list("Selected pytest targets:", selection.pytest_targets) + if selection.docs_only: + print("\nDocs-only changes detected; product tests are not run by targeted mode by default.") + if selection.full_recommended: + print("\nNo targeted mapping matched; full suite is recommended before staging.") + + +def run_command(command: list[str]) -> CommandResult: + print(f"\nRunning: {command_text(command)}") + completed = subprocess.run(command, cwd=REPO_ROOT, check=False) + status = "PASS" if completed.returncode == 0 else "FAIL" + print(f"Result: {status} ({completed.returncode})") + return CommandResult(command=command, returncode=completed.returncode) + + +def summarize(results: list[CommandResult], next_action: str) -> int: + failed = [result for result in results if result.returncode != 0] + print("\nCommands run:") + if not results: + print("- none") + for result in results: + status = "PASS" if result.returncode == 0 else "FAIL" + print(f"- {command_text(result.command)} -> {status} ({result.returncode})") + + overall = 1 if failed else 0 + print(f"\nOverall status: {'FAIL' if failed else 'PASS'}") + print(f"Next recommended action: {next_action}") + return overall + + +def run_targeted() -> int: + selection = select_targets(changed_files()) + print_selection(selection) + + results: list[CommandResult] = [] + if selection.pytest_targets: + results.append(run_command(pytest_command(selection.pytest_targets))) + if selection.run_clean_state: + results.append(run_command(clean_state_command())) + + if selection.docs_only: + next_action = "Review docs changes, then run full or pre-stage mode before staging if product behavior changed." + elif selection.full_recommended: + next_action = "Run --mode full before staging because targeted mode used the default fallback." + else: + next_action = "Run --mode full or --mode pre-stage before staging or committing." + return summarize(results, next_action) + + +def run_full() -> int: + results = [ + run_command(pytest_command(["tests"])), + run_command(clean_state_command()), + run_command(["git", "diff", "--cached", "--name-only"]), + ] + return summarize(results, "Address any failures, then run --mode pre-stage before manual staging.") + + +def run_pre_stage() -> int: + print("\nStaging remains manual. This runner does not stage or commit files.") + results = [ + run_command(pytest_command(["tests"])), + run_command(clean_state_command()), + run_command(["git", "diff", "--cached", "--name-only"]), + run_command(["git", "status", "--short", "-uall"]), + ] + return summarize(results, "If all checks pass, manually review git status before staging.") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run repo regression checks.") + parser.add_argument( + "--mode", + choices=("targeted", "full", "pre-stage"), + required=True, + help="Regression mode to run.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(sys.argv[1:] if argv is None else argv) + if args.mode == "targeted": + return run_targeted() + if args.mode == "full": + return run_full() + if args.mode == "pre-stage": + return run_pre_stage() + raise AssertionError(f"Unexpected mode: {args.mode}") + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_clean_state.py b/tests/test_clean_state.py new file mode 100644 index 0000000..3f4d182 --- /dev/null +++ b/tests/test_clean_state.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path +import sys +import unittest + + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "dev" / "check_clean_state.py" + +spec = importlib.util.spec_from_file_location("check_clean_state", SCRIPT_PATH) +check_clean_state = importlib.util.module_from_spec(spec) +assert spec.loader is not None +sys.modules[spec.name] = check_clean_state +spec.loader.exec_module(check_clean_state) + + +class CleanStateGuardrailTests(unittest.TestCase): + def test_private_analysis_path_staged_fails(self) -> None: + findings = check_clean_state.collect_findings( + ["docs/analysis/local-report.md"], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertIn( + "staged local analysis/design/reference/requirements material", + {finding.reason for finding in findings}, + ) + + def test_private_analysis_path_unstaged_warns_only(self) -> None: + warnings = check_clean_state.collect_warnings(["docs/analysis/local-report.md"]) + findings = check_clean_state.collect_findings( + [], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + self.assertIn( + "local analysis/design/reference/requirements material needs human review before staging", + {warning.reason for warning in warnings}, + ) + + def test_dependency_file_staged_fails(self) -> None: + findings = check_clean_state.collect_findings( + ["requirements.txt"], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual( + ["staged dependency file requires explicit review"], + [finding.reason for finding in findings], + ) + + def test_dependency_file_modified_only_warns(self) -> None: + warnings = check_clean_state.collect_warnings(["uv.lock"]) + + self.assertEqual( + ["dependency file needs human review before staging"], + [warning.reason for warning in warnings], + ) + + def test_no_staged_private_files_passes(self) -> None: + findings = check_clean_state.collect_findings( + [ + "docs/dev/cleanup-guardrails.md", + "scripts/dev/check_clean_state.py", + "tests/test_clean_state.py", + ], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + + def test_regression_tooling_staged_set_passes(self) -> None: + findings = check_clean_state.collect_findings( + [ + ".codex/skills/regression-suite/SKILL.md", + "docs/dev/cleanup-guardrails.md", + "docs/dev/regression-testing.md", + "scripts/dev/check_clean_state.py", + "scripts/dev/run_regression_suite.py", + "tests/test_clean_state.py", + "tests/test_regression_runner.py", + ], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + + def test_source_file_modified_only_warns(self) -> None: + warnings = check_clean_state.collect_warnings(["src/models/service.py"]) + + self.assertEqual( + ["source file change needs human review before staging"], + [warning.reason for warning in warnings], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_regression_runner.py b/tests/test_regression_runner.py new file mode 100644 index 0000000..ef71f4c --- /dev/null +++ b/tests/test_regression_runner.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path +import sys +import unittest + + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "dev" / "run_regression_suite.py" + +spec = importlib.util.spec_from_file_location("run_regression_suite", SCRIPT_PATH) +run_regression_suite = importlib.util.module_from_spec(spec) +assert spec.loader is not None +sys.modules[spec.name] = run_regression_suite +spec.loader.exec_module(run_regression_suite) + + +class RegressionRunnerSelectionTests(unittest.TestCase): + def test_formatter_change_runs_output_contract_and_functional_tests(self) -> None: + selection = run_regression_suite.select_targets(["src/utils/formatter.py"]) + + self.assertEqual( + [ + "tests/test_formatter.py", + "tests/test_aibom_output_contract.py", + "tests/test_functional_regression.py", + ], + selection.pytest_targets, + ) + self.assertFalse(selection.docs_only) + self.assertFalse(selection.full_recommended) + + def test_docs_only_change_runs_clean_state_without_product_tests(self) -> None: + selection = run_regression_suite.select_targets( + ["README.md", "docs/dev/regression-testing.md"] + ) + + self.assertEqual([], selection.pytest_targets) + self.assertTrue(selection.run_clean_state) + self.assertTrue(selection.docs_only) + + def test_unmapped_source_change_uses_default_fallback(self) -> None: + selection = run_regression_suite.select_targets(["src/models/new_surface.py"]) + + self.assertEqual( + [ + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", + ], + selection.pytest_targets, + ) + self.assertTrue(selection.full_recommended) + + def test_clean_state_change_runs_its_unit_test_and_guardrail(self) -> None: + selection = run_regression_suite.select_targets(["scripts/dev/check_clean_state.py"]) + + self.assertEqual(["tests/test_clean_state.py"], selection.pytest_targets) + self.assertTrue(selection.run_clean_state) + + +if __name__ == "__main__": + unittest.main() From cee789f1a430aeeda5c970c255a5d14a9c034226 Mon Sep 17 00:00:00 2001 From: Helen Oakley <69336525+e2hln@users.noreply.github.com> Date: Tue, 23 Jun 2026 23:24:02 -0400 Subject: [PATCH 2/3] Fix cleanup output behavior and regressions --- src/controllers/cli_controller.py | 25 ++- src/controllers/web_controller.py | 17 +- src/models/service.py | 2 +- src/templates/result.html | 8 +- src/utils/formatter.py | 7 + src/utils/validation.py | 31 +++- tests/test_aibom_output_contract.py | 263 ++++++++++++++++++++++++++++ tests/test_cli_controller.py | 175 ++++++++++++++++++ tests/test_formatter.py | 68 +++++++ tests/test_functional_regression.py | 96 ++++++++++ tests/test_service.py | 17 ++ tests/test_validation.py | 15 ++ tests/test_web_controller.py | 157 +++++++++++++++++ 13 files changed, 863 insertions(+), 18 deletions(-) create mode 100644 tests/test_aibom_output_contract.py create mode 100644 tests/test_cli_controller.py create mode 100644 tests/test_formatter.py create mode 100644 tests/test_functional_regression.py create mode 100644 tests/test_web_controller.py diff --git a/src/controllers/cli_controller.py b/src/controllers/cli_controller.py index 627bdb8..a035f9e 100644 --- a/src/controllers/cli_controller.py +++ b/src/controllers/cli_controller.py @@ -10,6 +10,13 @@ logger = logging.getLogger(__name__) + +def _ensure_parent_dir(path: str) -> None: + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + + class CLIController: def __init__(self): self.service = AIBOMService() @@ -61,9 +68,12 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf base, ext = os.path.splitext(output_file_1_6) output_file_1_7 = f"{base.replace('_1_6', '')}_1_7{ext}" if '_1_6' in base else f"{base}_1_7{ext}" - with open(output_file_1_6, 'w') as f: + _ensure_parent_dir(output_file_1_6) + _ensure_parent_dir(output_file_1_7) + + with open(output_file_1_6, 'w', encoding="utf-8") as f: f.write(json_1_6) - with open(output_file_1_7, 'w') as f: + with open(output_file_1_7, 'w', encoding="utf-8") as f: f.write(json_1_7) # Check for validation results @@ -125,8 +135,10 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf html_content = template.render(context) html_output_file = output_file_primary.replace("_1_6.json", ".html").replace(".json", ".html") - with open(html_output_file, "w") as f: + html_temp_file = f"{html_output_file}.tmp" + with open(html_temp_file, "w", encoding="utf-8") as f: f.write(html_content) + os.replace(html_temp_file, html_output_file) logger.info("HTML Report: %s", html_output_file) @@ -174,7 +186,12 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf logger.info("License: %s", ", ".join(license_list)) except Exception as e: - logger.warning("Failed to generate HTML report: %s", e) + if "html_temp_file" in locals() and os.path.exists(html_temp_file): + try: + os.remove(html_temp_file) + except OSError: + logger.debug("Failed to remove temporary HTML report: %s", html_temp_file, exc_info=True) + logger.warning("Failed to generate HTML report: %s", e, exc_info=True) for r in reports: spec = r.get("spec_version", "1.6") diff --git a/src/controllers/web_controller.py b/src/controllers/web_controller.py index 2f4c458..502101b 100644 --- a/src/controllers/web_controller.py +++ b/src/controllers/web_controller.py @@ -48,7 +48,7 @@ def is_valid_hf_input(input_str: str) -> bool: @router.get("/", response_class=HTMLResponse) async def root(request: Request): - return templates.TemplateResponse("index.html", { + return templates.TemplateResponse(request, "index.html", { "request": request, "sbom_count": get_sbom_count() }) @@ -67,7 +67,7 @@ async def generate_form( # Security: Validate BEFORE sanitizing to prevent bypass attacks # (e.g., → <script>org/model</script> could slip through) if not is_valid_hf_input(model_id): - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": "Invalid model ID format.", "sbom_count": get_sbom_count(), @@ -86,14 +86,14 @@ async def generate_form( loop = asyncio.get_running_loop() await loop.run_in_executor(None, lambda: HfApi().model_info(normalized_id)) except RepositoryNotFoundError: - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Model {normalized_id} not found on Hugging Face.", "sbom_count": get_sbom_count(), "model_id": normalized_id }) except Exception as e: - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Error verifying model: {e}", "sbom_count": get_sbom_count(), @@ -120,9 +120,10 @@ def _save_task(): json_1_6 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.6") json_1_7 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.7") - with open(filepath, "w") as f: + os.makedirs(OUTPUT_DIR, exist_ok=True) + with open(filepath, "w", encoding="utf-8") as f: f.write(json_1_6) - with open(filepath_1_7, "w") as f: + with open(filepath_1_7, "w", encoding="utf-8") as f: f.write(json_1_7) log_sbom_generation(sanitized_model_id) return json_1_6, json_1_7 @@ -155,11 +156,11 @@ def _save_task(): "result_file": f"/output/{filename}" } - return templates.TemplateResponse("result.html", context) + return templates.TemplateResponse(request, "result.html", context) except Exception as e: logger.error(f"Generation error: {e}", exc_info=True) - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Internal generation error: {e}", "sbom_count": get_sbom_count(), diff --git a/src/models/service.py b/src/models/service.py index c38ad9f..f3b676b 100644 --- a/src/models/service.py +++ b/src/models/service.py @@ -734,7 +734,7 @@ def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any] considerations["technicalLimitations"] = [metadata["technicalLimitations"]] # ethicalConsiderations if "ethicalConsiderations" in metadata: - considerations["ethicalConsiderations"] = [{"name": "Ethical Considerations", "description": metadata["ethicalConsiderations"]}] + considerations["ethicalConsiderations"] = [{"name": metadata["ethicalConsiderations"]}] if considerations: section["considerations"] = considerations diff --git a/src/templates/result.html b/src/templates/result.html index fddf4e3..32a1c47 100644 --- a/src/templates/result.html +++ b/src/templates/result.html @@ -32,10 +32,10 @@

📋  AIBOM Summary

{{ aibom.metadata.timestamp }}
- SBOM Format: + CycloneDX Exports: {{ - aibom.bomFormat }} {{ aibom.specVersion }}, + aibom.bomFormat }} 1.6, {{ aibom.bomFormat }} 1.7 @@ -823,7 +823,7 @@

Penalty Structure:

📄  Raw JSON View

-

This is the complete AIBOM components array in CycloneDX JSON format:

+

Generated components section in CycloneDX JSON format:

{{ components_json }}
@@ -842,4 +842,4 @@

📄  Raw JSON View

- \ No newline at end of file + diff --git a/src/utils/formatter.py b/src/utils/formatter.py index c170924..6dd6ae3 100644 --- a/src/utils/formatter.py +++ b/src/utils/formatter.py @@ -2,6 +2,11 @@ import copy from typing import Dict, Any +CYCLONEDX_SCHEMA_BY_SPEC_VERSION = { + "1.6": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "1.7": "http://cyclonedx.org/schema/bom-1.7.schema.json", +} + def export_aibom(aibom: Dict[str, Any], bom_type: str = "cyclonedx", spec_version: str = "1.6") -> str: """ Exports the internal AIBOM object into a specified format and specification version. @@ -13,6 +18,8 @@ def export_aibom(aibom: Dict[str, Any], bom_type: str = "cyclonedx", spec_versio if bom_type.lower() == "cyclonedx": output["bomFormat"] = "CycloneDX" output["specVersion"] = spec_version + if spec_version in CYCLONEDX_SCHEMA_BY_SPEC_VERSION: + output["$schema"] = CYCLONEDX_SCHEMA_BY_SPEC_VERSION[spec_version] # Any specific CycloneDX mappings or adjustments can be placed here over time. elif bom_type.lower() == "spdx": diff --git a/src/utils/validation.py b/src/utils/validation.py index b463908..3074d59 100644 --- a/src/utils/validation.py +++ b/src/utils/validation.py @@ -16,6 +16,35 @@ _validator = JsonValidator(SchemaVersion.V1_6) +def _format_validation_error(error: Any) -> str: + message = getattr(error, "message", None) + if message is None: + message = str(error) + else: + message = str(message) + + for path_attr in ("data_path", "path", "json_path", "absolute_path", "relative_path"): + try: + path = getattr(error, path_attr) + except Exception: + continue + + if path is None: + continue + + if isinstance(path, str): + location = path + else: + try: + location = ".".join(str(part) for part in path) + except TypeError: + location = str(path) + + return f"[{location or 'root'}] {message}" + + return message + + def validate_aibom(aibom: Dict[str, Any], strict: bool = False) -> Tuple[bool, List[str]]: """ Validate an AIBOM against the CycloneDX 1.6 schema. @@ -31,7 +60,7 @@ def validate_aibom(aibom: Dict[str, Any], strict: bool = False) -> Tuple[bool, L errors = _validator.validate_str(json.dumps(aibom), all_errors=True) if errors is None: return True, [] - messages = [f"[{'.'.join(str(p) for p in e.data_path) or 'root'}] {e.message}" for e in errors] + messages = [_format_validation_error(e) for e in errors] return False, messages except Exception as e: logger.warning("Validation failed unexpectedly: %s", e) diff --git a/tests/test_aibom_output_contract.py b/tests/test_aibom_output_contract.py new file mode 100644 index 0000000..068b85e --- /dev/null +++ b/tests/test_aibom_output_contract.py @@ -0,0 +1,263 @@ +import json +import unittest +from typing import Any, Dict, Iterator, List, Tuple + +from cyclonedx.schema import SchemaVersion +from cyclonedx.validation.json import JsonStrictValidator + +from src.utils.formatter import export_aibom + + +CYCLONEDX_SCHEMAS = { + "1.6": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "1.7": "http://cyclonedx.org/schema/bom-1.7.schema.json", +} + +SCHEMA_VERSIONS = { + "1.6": SchemaVersion.V1_6, + "1.7": SchemaVersion.V1_7, +} + +ALLOWED_PROPERTY_PREFIXES = ("genai:aibom:",) + +# Legacy/current-output compatibility only; prefer namespaced +# genai:aibom:* properties for any new AIBOM metadata. +LEGACY_PROPERTY_NAME_ALLOWLIST = set() + +KNOWN_ROOT_DEPENDENCY_REFS = {"metadata"} + + +def _sample_aibom() -> Dict[str, Any]: + model_ref = "pkg:huggingface/acme/current-contract-model@12345678" + + return { + "$schema": CYCLONEDX_SCHEMAS["1.6"], + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:12345678-1234-5678-1234-567812345678", + "version": 1, + "metadata": { + "component": { + "type": "application", + "name": "aibom-generator", + "version": "test", + "bom-ref": "aibom-generator", + "properties": [ + {"name": "genai:aibom:fixture", "value": "true"}, + ], + }, + "properties": [ + {"name": "genai:aibom:source", "value": "deterministic-test"}, + ], + }, + "components": [ + { + "type": "machine-learning-model", + "name": "current-contract-model", + "version": "12345678", + "bom-ref": model_ref, + "purl": model_ref, + "externalReferences": [ + { + "type": "website", + "url": "https://huggingface.co/acme/current-contract-model", + } + ], + "modelCard": { + "modelParameters": { + "task": "text-classification", + "modelArchitecture": "transformer", + "inputs": [{"format": "string"}], + "outputs": [{"format": "string"}], + "datasets": [ + { + "type": "dataset", + "name": "contract-fixture-dataset", + } + ], + }, + "considerations": { + "ethicalConsiderations": [ + {"name": "May reflect training-data bias."} + ], + }, + "properties": [ + {"name": "genai:aibom:model-card", "value": "present"}, + ], + }, + "properties": [ + {"name": "genai:aibom:model-id", "value": "acme/current-contract-model"}, + ], + } + ], + "dependencies": [ + {"ref": "metadata", "dependsOn": [model_ref]}, + {"ref": model_ref, "dependsOn": []}, + ], + "properties": [ + {"name": "genai:aibom:contract-version", "value": "current"}, + ], + } + + +def _exported_documents() -> Iterator[Tuple[str, Dict[str, Any]]]: + for spec_version in ("1.6", "1.7"): + yield spec_version, json.loads( + export_aibom( + _sample_aibom(), + bom_type="cyclonedx", + spec_version=spec_version, + ) + ) + + +def _format_validation_error(error: Any) -> str: + message = getattr(error, "message", str(error)) + path = getattr(error, "data_path", None) or getattr(error, "path", None) + if path is None: + return str(message) + if isinstance(path, str): + location = path or "root" + else: + location = ".".join(str(part) for part in path) or "root" + return f"[{location}] {message}" + + +def _validate_cdx_schema(document: Dict[str, Any], spec_version: str) -> List[str]: + validator = JsonStrictValidator(SCHEMA_VERSIONS[spec_version]) + errors = validator.validate_str(json.dumps(document), all_errors=True) + if errors is None: + return [] + return [_format_validation_error(error) for error in errors] + + +def _find_model_components(document: Dict[str, Any]) -> List[Dict[str, Any]]: + return [ + component + for component in document.get("components", []) + if component.get("type") == "machine-learning-model" + ] + + +def _iter_properties(document: Dict[str, Any]) -> Iterator[Tuple[str, Dict[str, Any]]]: + property_locations = [ + ("root", document), + ("metadata", document.get("metadata", {})), + ("metadata.component", document.get("metadata", {}).get("component", {})), + ] + + for index, component in enumerate(document.get("components", [])): + property_locations.append((f"components[{index}]", component)) + model_card = component.get("modelCard") + if isinstance(model_card, dict): + property_locations.append((f"components[{index}].modelCard", model_card)) + + for location, container in property_locations: + properties = container.get("properties") if isinstance(container, dict) else None + if properties is None: + continue + for prop in properties: + yield location, prop + + +def _assert_current_ai_ml_contract(test_case: unittest.TestCase, document: Dict[str, Any]) -> None: + model_components = _find_model_components(document) + test_case.assertGreaterEqual(len(model_components), 1) + + model = model_components[0] + for field in ("name", "version", "purl"): + test_case.assertIn(field, model) + test_case.assertTrue(model.get("bom-ref") or model.get("purl")) + + model_card = model.get("modelCard") + test_case.assertIsInstance(model_card, dict) + model_parameters = model_card.get("modelParameters") + test_case.assertIsInstance(model_parameters, dict) + test_case.assertIn("task", model_parameters) + test_case.assertIn("modelArchitecture", model_parameters) + + for field in ("inputs", "outputs", "datasets"): + if field in model_parameters: + test_case.assertIsInstance(model_parameters[field], list) + + if "externalReferences" in model: + test_case.assertIsInstance(model["externalReferences"], list) + + ethical_considerations = ( + model_card.get("considerations", {}).get("ethicalConsiderations", []) + ) + for entry in ethical_considerations: + test_case.assertNotIn("description", entry) + + _assert_dependencies_reference_existing_refs(test_case, document) + + +def _assert_dependencies_reference_existing_refs( + test_case: unittest.TestCase, + document: Dict[str, Any], +) -> None: + known_refs = set(KNOWN_ROOT_DEPENDENCY_REFS) + + metadata_component = document.get("metadata", {}).get("component", {}) + if metadata_component.get("bom-ref"): + known_refs.add(metadata_component["bom-ref"]) + + for component in document.get("components", []): + if component.get("bom-ref"): + known_refs.add(component["bom-ref"]) + + for dependency in document.get("dependencies", []): + test_case.assertIn(dependency.get("ref"), known_refs) + for ref in dependency.get("dependsOn", []): + test_case.assertIn(ref, known_refs) + + +def _assert_property_taxonomy_contract(test_case: unittest.TestCase, document: Dict[str, Any]) -> None: + for location, prop in _iter_properties(document): + name = prop.get("name") + test_case.assertIsInstance(name, str, msg=location) + test_case.assertNotEqual(name.strip(), "", msg=location) + test_case.assertIn("value", prop, msg=location) + test_case.assertIsInstance(prop["value"], str, msg=name) + test_case.assertNotEqual(prop["value"], "", msg=name) + + allowed = name.startswith(ALLOWED_PROPERTY_PREFIXES) + legacy_allowed = name in LEGACY_PROPERTY_NAME_ALLOWLIST + test_case.assertTrue( + allowed or legacy_allowed, + msg=f"Unknown property namespace at {location}: {name}", + ) + + +class AIBOMOutputContractTests(unittest.TestCase): + def test_exported_cdx_1_6_and_1_7_outputs_are_schema_valid(self): + for spec_version, document in _exported_documents(): + with self.subTest(spec_version=spec_version): + self.assertEqual(document["bomFormat"], "CycloneDX") + self.assertEqual(document["specVersion"], spec_version) + self.assertEqual(document["$schema"], CYCLONEDX_SCHEMAS[spec_version]) + self.assertEqual(_validate_cdx_schema(document, spec_version), []) + + def test_exported_outputs_preserve_current_ai_ml_model_contract(self): + for spec_version, document in _exported_documents(): + with self.subTest(spec_version=spec_version): + _assert_current_ai_ml_contract(self, document) + + def test_emitted_properties_use_allowed_taxonomy_namespaces(self): + for spec_version, document in _exported_documents(): + with self.subTest(spec_version=spec_version): + _assert_property_taxonomy_contract(self, document) + + +# Known non-enforced gaps for future implementation work: +# - model as metadata.component when the model is the BOM subject +# - first-class data components for datasets +# - dataset bom-ref relationships +# - model file hashes +# - evidence/source/confidence metadata +# - full G7/BSI completeness +# - schema-aware CycloneDX 1.7 AI/ML mapper + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cli_controller.py b/tests/test_cli_controller.py new file mode 100644 index 0000000..2ab3c59 --- /dev/null +++ b/tests/test_cli_controller.py @@ -0,0 +1,175 @@ +import json +import shutil +import unittest +import uuid +from pathlib import Path + +from src.controllers.cli_controller import CLIController + + +def _sample_aibom() -> dict: + return { + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:12345678-1234-5678-1234-567812345678", + "version": 1, + "metadata": { + "timestamp": "2026-06-10T00:00:00Z", + "tools": { + "components": [ + {"name": "OWASP AIBOM Generator"} + ] + }, + }, + "components": [ + { + "type": "machine-learning-model", + "name": "local-cli-model", + "version": "1.0.0", + "description": "CLI HTML report unicode regression path \u2705", + "purl": "pkg:huggingface/owner/local-cli-model", + "licenses": [{"license": {"id": "Apache-2.0"}}], + } + ], + "externalReferences": [ + {"type": "website", "url": "https://huggingface.co/owner/local-cli-model"} + ], + } + + +def _sample_score() -> dict: + category_details = { + "required_fields": {"present_fields": 4, "total_fields": 4, "percentage": 100, "max_points": 20}, + "metadata": {"present_fields": 2, "total_fields": 2, "percentage": 100, "max_points": 20}, + "component_basic": {"present_fields": 4, "total_fields": 4, "percentage": 100, "max_points": 20}, + "component_model_card": {"present_fields": 0, "total_fields": 0, "percentage": 0, "max_points": 30}, + "external_references": {"present_fields": 1, "total_fields": 1, "percentage": 100, "max_points": 10}, + } + section_scores = { + "required_fields": 20, + "metadata": 20, + "component_basic": 20, + "component_model_card": 0, + "external_references": 10, + } + return { + "total_score": 70, + "subtotal_score": 70, + "completeness_profile": {"name": "Basic", "description": "Regression test profile"}, + "field_checklist": { + "bomFormat": "\u2714 present", + "specVersion": "\u2714 present", + "serialNumber": "\u2714 present", + "version": "\u2714 present", + }, + "field_types": {}, + "reference_urls": {}, + "category_details": category_details, + "category_fields_list": { + "component_model_card": [], + "external_references": [], + }, + "section_scores": section_scores, + "max_scores": { + "required_fields": 20, + "metadata": 20, + "component_basic": 20, + "component_model_card": 30, + "external_references": 10, + }, + "missing_counts": {"critical": 0, "important": 0, "supplementary": 0}, + "recommendations": [], + "penalty_applied": False, + "penalty_percentage": 0, + "penalty_reason": "", + "penalty_factor": 1, + "validation": {"valid": True, "issues": []}, + } + + +class _FakeService: + def __init__(self): + self._aibom = _sample_aibom() + self._score = _sample_score() + + def generate_aibom(self, model_id, include_inference=False, enable_summarization=False, metadata_overrides=None): + return self._aibom + + def get_enhancement_report(self): + return {"final_score": self._score} + + @staticmethod + def _normalise_model_id(model_id): + return model_id + + +class CLIControllerTests(unittest.TestCase): + def test_generate_creates_parent_directory_for_nested_custom_output(self): + repo_root = Path(__file__).resolve().parents[1] + output_dir = repo_root / "sboms" / f"test-cli-controller-nested-{uuid.uuid4().hex}" / "reports" + try: + self.assertFalse(output_dir.exists()) + + output_file = output_dir / "nested-cli-output.json" + controller = CLIController() + controller.service = _FakeService() + + controller.generate( + "owner/local-cli-model", + output_file=str(output_file), + ) + + json_1_6 = output_file + json_1_7 = output_dir / "nested-cli-output_1_7.json" + html_report = output_dir / "nested-cli-output.html" + + self.assertTrue(output_dir.exists()) + self.assertGreater(json_1_6.stat().st_size, 0) + self.assertGreater(json_1_7.stat().st_size, 0) + self.assertTrue(html_report.exists()) + self.assertGreater(html_report.stat().st_size, 0) + + json.loads(json_1_6.read_text(encoding="utf-8")) + json.loads(json_1_7.read_text(encoding="utf-8")) + + html = html_report.read_text(encoding="utf-8") + self.assertIn("AIBOM Summary", html) + finally: + shutil.rmtree(output_dir.parent, ignore_errors=True) + + def test_generate_writes_non_empty_utf8_html_report(self): + repo_root = Path(__file__).resolve().parents[1] + output_dir = repo_root / "sboms" / f"test-cli-controller-{uuid.uuid4().hex}" + output_dir.mkdir(parents=True) + try: + output_file = output_dir / "cli-html-regression.json" + controller = CLIController() + controller.service = _FakeService() + + controller.generate( + "owner/local-cli-model", + output_file=str(output_file), + ) + + json_1_6 = output_file + json_1_7 = output_dir / "cli-html-regression_1_7.json" + html_report = output_dir / "cli-html-regression.html" + + self.assertGreater(json_1_6.stat().st_size, 0) + self.assertGreater(json_1_7.stat().st_size, 0) + self.assertTrue(html_report.exists()) + self.assertGreater(html_report.stat().st_size, 0) + + html = html_report.read_text(encoding="utf-8") + self.assertIn("AIBOM Summary", html) + self.assertIn("CycloneDX 1.6", html) + self.assertIn("CLI HTML report unicode regression path \u2705", html) + + json.loads(json_1_6.read_text(encoding="utf-8")) + json.loads(json_1_7.read_text(encoding="utf-8")) + finally: + shutil.rmtree(output_dir, ignore_errors=True) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_formatter.py b/tests/test_formatter.py new file mode 100644 index 0000000..1682a13 --- /dev/null +++ b/tests/test_formatter.py @@ -0,0 +1,68 @@ +import json +import unittest + +from src.utils.formatter import export_aibom + + +def _sample_aibom() -> dict: + return { + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:12345678-1234-5678-1234-567812345678", + "version": 1, + "components": [ + { + "type": "machine-learning-model", + "name": "example-model", + "modelCard": { + "modelParameters": {"task": "text-classification"}, + "considerations": { + "ethicalConsiderations": [ + {"name": "May reflect training-data bias."} + ] + }, + }, + } + ], + "dependencies": [ + {"ref": "metadata", "dependsOn": ["example-model"]} + ], + } + + +class TestFormatter(unittest.TestCase): + def test_export_cyclonedx_1_6_pairs_schema_and_spec_version(self): + exported = json.loads(export_aibom(_sample_aibom(), spec_version="1.6")) + + self.assertEqual(exported["bomFormat"], "CycloneDX") + self.assertEqual(exported["specVersion"], "1.6") + self.assertEqual( + exported["$schema"], + "http://cyclonedx.org/schema/bom-1.6.schema.json", + ) + + def test_export_cyclonedx_1_7_pairs_schema_and_spec_version(self): + exported = json.loads(export_aibom(_sample_aibom(), spec_version="1.7")) + + self.assertEqual(exported["bomFormat"], "CycloneDX") + self.assertEqual(exported["specVersion"], "1.7") + self.assertEqual( + exported["$schema"], + "http://cyclonedx.org/schema/bom-1.7.schema.json", + ) + + def test_export_cyclonedx_1_7_does_not_change_body_mapping(self): + exported_1_6 = json.loads(export_aibom(_sample_aibom(), spec_version="1.6")) + exported_1_7 = json.loads(export_aibom(_sample_aibom(), spec_version="1.7")) + + for output in (exported_1_6, exported_1_7): + output.pop("$schema") + output.pop("specVersion") + output.pop("bomFormat") + + self.assertEqual(exported_1_7, exported_1_6) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_functional_regression.py b/tests/test_functional_regression.py new file mode 100644 index 0000000..9d0c981 --- /dev/null +++ b/tests/test_functional_regression.py @@ -0,0 +1,96 @@ +import json +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +from src.models.service import AIBOMService +from src.utils.formatter import export_aibom + + +class FunctionalRegressionTests(unittest.TestCase): + def _generate_local_aibom(self) -> dict: + local_metadata = { + "name": "local-regression-model", + "author": "local tester", + "commit": "abcdef1234567890", + "pipeline_tag": "text-classification", + "ethicalConsiderations": "May reflect training-data bias.", + } + + with ( + patch("src.models.service.EnhancedExtractor") as extractor_cls, + patch("src.models.service.ModelCard.load", return_value=None), + patch("src.models.service.calculate_completeness_score") as score, + ): + extractor = extractor_cls.return_value + extractor.extract_metadata.return_value = local_metadata + extractor.extraction_results = {} + score.return_value = {"total_score": 50} + + service = AIBOMService( + hf_token="fake-token", + use_inference=False, + model_file_extractors=[], + ) + service.hf_api = MagicMock() + service.hf_api.model_info.return_value = MagicMock(sha=local_metadata["commit"]) + + return service.generate_aibom("owner/local-regression-model") + + def test_local_service_generation_produces_parseable_core_aibom(self): + aibom = self._generate_local_aibom() + parsed = json.loads(json.dumps(aibom)) + + for key in ( + "bomFormat", + "specVersion", + "serialNumber", + "version", + "metadata", + "components", + ): + self.assertIn(key, parsed) + + self.assertEqual(parsed["bomFormat"], "CycloneDX") + self.assertIsInstance(parsed["components"], list) + self.assertGreaterEqual(len(parsed["components"]), 1) + self.assertEqual(parsed["components"][0]["type"], "machine-learning-model") + + def test_ethical_considerations_do_not_emit_description_field(self): + aibom = self._generate_local_aibom() + ethical_considerations = ( + aibom["components"][0]["modelCard"]["considerations"]["ethicalConsiderations"] + ) + + self.assertGreaterEqual(len(ethical_considerations), 1) + for entry in ethical_considerations: + self.assertNotIn("description", entry) + + def test_cyclonedx_exports_pair_spec_version_and_schema_and_parse(self): + aibom = self._generate_local_aibom() + + expected_schemas = { + "1.6": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "1.7": "http://cyclonedx.org/schema/bom-1.7.schema.json", + } + + for spec_version, schema in expected_schemas.items(): + with self.subTest(spec_version=spec_version): + exported = json.loads(export_aibom(aibom, spec_version=spec_version)) + + self.assertEqual(exported["bomFormat"], "CycloneDX") + self.assertEqual(exported["specVersion"], spec_version) + self.assertEqual(exported["$schema"], schema) + self.assertIsInstance(exported["components"], list) + + def test_result_template_uses_neutral_export_wording(self): + template_path = Path(__file__).resolve().parents[1] / "src" / "templates" / "result.html" + template_text = template_path.read_text(encoding="utf-8").lower() + + self.assertNotIn("compatibility export", template_text) + self.assertNotIn("schema-aware", template_text) + self.assertNotIn("planned separately", template_text) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_service.py b/tests/test_service.py index 432b564..3bcfeb2 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -119,6 +119,23 @@ def test_create_aibom_structure_uses_cyclonedx_outputter(self): self.assertIn("$schema", aibom) self.assertEqual(aibom["components"][0]["type"], "machine-learning-model") + def test_create_aibom_structure_uses_valid_ethical_considerations(self): + metadata = { + "name": "test-model", + "ethicalConsiderations": "May reflect bias in training data." + } + + aibom = self.service._create_aibom_structure("owner/test-model", metadata) + + ethical_considerations = ( + aibom["components"][0]["modelCard"]["considerations"]["ethicalConsiderations"] + ) + self.assertEqual( + ethical_considerations, + [{"name": "May reflect bias in training data."}] + ) + self.assertNotIn("description", ethical_considerations[0]) + def test_create_minimal_aibom(self): aibom = self.service._create_minimal_aibom("owner/model") self.assertEqual(aibom["bomFormat"], "CycloneDX") diff --git a/tests/test_validation.py b/tests/test_validation.py index bc43e5d..821648f 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -1,5 +1,6 @@ import json import unittest +from unittest.mock import patch from cyclonedx.model.bom import Bom from cyclonedx.model.component import Component, ComponentType from cyclonedx.output.json import JsonV1Dot6 @@ -25,6 +26,20 @@ def test_validate_aibom_invalid(self): self.assertFalse(is_valid) self.assertTrue(len(errors) > 0) + def test_validate_aibom_preserves_error_message_without_data_path(self): + class ValidatorErrorWithoutDataPath: + message = "useful validation detail" + + with patch( + "src.utils.validation._validator.validate_str", + return_value=[ValidatorErrorWithoutDataPath()], + ): + is_valid, errors = validate_aibom({"otherField": "value"}) + + self.assertFalse(is_valid) + self.assertIn("useful validation detail", "\n".join(errors)) + self.assertNotIn("data_path", "\n".join(errors)) + def test_validate_aibom_wrong_bom_format(self): aibom = _valid_aibom() aibom["bomFormat"] = "NotCycloneDX" diff --git a/tests/test_web_controller.py b/tests/test_web_controller.py new file mode 100644 index 0000000..2e69096 --- /dev/null +++ b/tests/test_web_controller.py @@ -0,0 +1,157 @@ +import json +import shutil +import unittest +import uuid +from pathlib import Path +from unittest.mock import patch + +from fastapi import FastAPI +from fastapi.testclient import TestClient +from fastapi.templating import Jinja2Templates + +from src.controllers import web_controller + + +def _sample_aibom() -> dict: + return { + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:12345678-1234-5678-1234-567812345678", + "version": 1, + "metadata": { + "timestamp": "2026-06-10T00:00:00Z", + "tools": { + "components": [ + {"name": "OWASP AIBOM Generator"} + ] + }, + }, + "components": [ + { + "type": "machine-learning-model", + "name": "local-web-model", + "version": "1.0.0", + "description": "Web controller UTF-8 regression path \u2713", + "purl": "pkg:huggingface/owner/local-web-model", + "licenses": [{"license": {"id": "Apache-2.0"}}], + } + ], + "externalReferences": [ + {"type": "website", "url": "https://huggingface.co/owner/local-web-model"} + ], + } + + +def _sample_score() -> dict: + category_details = { + "required_fields": {"present_fields": 4, "total_fields": 4, "percentage": 100, "max_points": 20}, + "metadata": {"present_fields": 2, "total_fields": 2, "percentage": 100, "max_points": 20}, + "component_basic": {"present_fields": 4, "total_fields": 4, "percentage": 100, "max_points": 20}, + "component_model_card": {"present_fields": 0, "total_fields": 0, "percentage": 0, "max_points": 30}, + "external_references": {"present_fields": 1, "total_fields": 1, "percentage": 100, "max_points": 10}, + } + section_scores = { + "required_fields": 20, + "metadata": 20, + "component_basic": 20, + "component_model_card": 0, + "external_references": 10, + } + return { + "total_score": 70, + "subtotal_score": 70, + "completeness_profile": {"name": "Basic", "description": "Regression test profile"}, + "field_checklist": { + "bomFormat": "\u2714 present", + "specVersion": "\u2714 present", + "serialNumber": "\u2714 present", + "version": "\u2714 present", + }, + "field_types": {}, + "reference_urls": {}, + "category_details": category_details, + "category_fields_list": { + "component_model_card": [], + "external_references": [], + }, + "section_scores": section_scores, + "max_scores": { + "required_fields": 20, + "metadata": 20, + "component_basic": 20, + "component_model_card": 30, + "external_references": 10, + }, + "missing_counts": {"critical": 0, "important": 0, "supplementary": 0}, + "recommendations": [], + "penalty_applied": False, + "penalty_percentage": 0, + "penalty_reason": "", + "penalty_factor": 1, + "validation": {"valid": True, "issues": []}, + } + + +class _FakeHfApi: + def model_info(self, model_id): + return {"modelId": model_id} + + +class _FakeService: + def __init__(self, use_best_practices=True): + self.use_best_practices = use_best_practices + + @staticmethod + def _normalise_model_id(model_id): + return model_id + + def generate_aibom(self, model_id, include_inference=False): + return _sample_aibom() + + def get_enhancement_report(self): + return {"final_score": _sample_score()} + + +class WebControllerTests(unittest.TestCase): + def test_generate_route_writes_utf8_json_to_patched_output_dir(self): + app = FastAPI() + app.include_router(web_controller.router) + + repo_root = Path(__file__).resolve().parents[1] + temp_root = repo_root / "sboms" / f"test-web-controller-{uuid.uuid4().hex}" + output_dir = temp_root / "nested-output" + try: + self.assertFalse(output_dir.exists()) + + with patch.object(web_controller, "OUTPUT_DIR", str(output_dir)), \ + patch.object(web_controller, "HfApi", _FakeHfApi), \ + patch.object(web_controller, "AIBOMService", _FakeService), \ + patch.object(web_controller, "log_sbom_generation", lambda model_id: None), \ + patch.object(web_controller, "get_sbom_count", lambda: "0"): + self.assertIsInstance(web_controller.templates, Jinja2Templates) + client = TestClient(app) + response = client.post( + "/generate", + data={"model_id": "owner/local-web-model"}, + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.template.name, "result.html") + self.assertIn("AIBOM Summary", response.text) + self.assertIn("CycloneDX 1.6", response.text) + self.assertIn("CycloneDX 1.7", response.text) + + json_1_6 = output_dir / "owner_local-web-model_ai_sbom_1_6.json" + json_1_7 = output_dir / "owner_local-web-model_ai_sbom_1_7.json" + + self.assertGreater(json_1_6.stat().st_size, 0) + self.assertGreater(json_1_7.stat().st_size, 0) + + json.loads(json_1_6.read_text(encoding="utf-8")) + json.loads(json_1_7.read_text(encoding="utf-8")) + finally: + shutil.rmtree(temp_root, ignore_errors=True) + + +if __name__ == "__main__": + unittest.main() From 443c2dd3534b6b8a6fdf43b55d14a3b02aae1fe0 Mon Sep 17 00:00:00 2001 From: Helen Oakley <69336525+e2hln@users.noreply.github.com> Date: Thu, 25 Jun 2026 22:29:55 -0400 Subject: [PATCH 3/3] Fix CLI output file contract --- src/controllers/cli_controller.py | 44 ++++++++++++----- tests/test_cli_controller.py | 80 +++++++++++++++++++++++++++++-- 2 files changed, 109 insertions(+), 15 deletions(-) diff --git a/src/controllers/cli_controller.py b/src/controllers/cli_controller.py index a035f9e..ffb9294 100644 --- a/src/controllers/cli_controller.py +++ b/src/controllers/cli_controller.py @@ -17,6 +17,32 @@ def _ensure_parent_dir(path: str) -> None: os.makedirs(parent, exist_ok=True) +def _strip_1_6_suffix(path_without_ext: str) -> str: + parent, name = os.path.split(path_without_ext) + if name.endswith("_1_6"): + name = name[:-len("_1_6")] + return os.path.join(parent, name) if parent else name + + +def _resolve_output_files(output_file: Optional[str], default_json_1_6: str) -> tuple[str, str, str]: + requested_output = output_file or default_json_1_6 + base, ext = os.path.splitext(requested_output) + ext_lower = ext.lower() + + if ext_lower == ".json": + output_file_1_6 = requested_output + output_base = _strip_1_6_suffix(base) + return output_file_1_6, f"{output_base}_1_7.json", f"{output_base}.html" + + if ext_lower == ".html": + return f"{base}_1_6.json", f"{base}_1_7.json", requested_output + + if not ext: + return f"{requested_output}_1_6.json", f"{requested_output}_1_7.json", f"{requested_output}.html" + + return requested_output, f"{base}_1_7{ext}", f"{base}.html" + + class CLIController: def __init__(self): self.service = AIBOMService() @@ -60,16 +86,15 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf # Determine output filenames normalized_id = self.service._normalise_model_id(model_id) os.makedirs("sboms", exist_ok=True) - - output_file_1_6 = output_file - if not output_file_1_6: - output_file_1_6 = os.path.join("sboms", f"{normalized_id.replace('/', '_')}_ai_sbom_1_6.json") - - base, ext = os.path.splitext(output_file_1_6) - output_file_1_7 = f"{base.replace('_1_6', '')}_1_7{ext}" if '_1_6' in base else f"{base}_1_7{ext}" + default_output_file = os.path.join("sboms", f"{normalized_id.replace('/', '_')}_ai_sbom_1_6.json") + output_file_1_6, output_file_1_7, html_output_file = _resolve_output_files( + output_file, + default_output_file, + ) _ensure_parent_dir(output_file_1_6) _ensure_parent_dir(output_file_1_7) + _ensure_parent_dir(html_output_file) with open(output_file_1_6, 'w', encoding="utf-8") as f: f.write(json_1_6) @@ -134,7 +159,6 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf } html_content = template.render(context) - html_output_file = output_file_primary.replace("_1_6.json", ".html").replace(".json", ".html") html_temp_file = f"{html_output_file}.tmp" with open(html_temp_file, "w", encoding="utf-8") as f: f.write(html_content) @@ -155,9 +179,7 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf static_dst = os.path.join(output_dir, "static") if os.path.exists(static_src): - if os.path.exists(static_dst): - shutil.rmtree(static_dst) - shutil.copytree(static_src, static_dst) + shutil.copytree(static_src, static_dst, dirs_exist_ok=True) logger.debug("Static assets copied to: %s", static_dst) else: logger.warning("Static source directory not found: %s", static_src) diff --git a/tests/test_cli_controller.py b/tests/test_cli_controller.py index 2ab3c59..19af382 100644 --- a/tests/test_cli_controller.py +++ b/tests/test_cli_controller.py @@ -104,6 +104,14 @@ def _normalise_model_id(model_id): class CLIControllerTests(unittest.TestCase): + def _assert_cyclonedx_export(self, exported: dict, spec_version: str): + self.assertEqual(exported["bomFormat"], "CycloneDX") + self.assertEqual(exported["specVersion"], spec_version) + self.assertTrue( + exported["$schema"].endswith(f"bom-{spec_version}.schema.json"), + exported["$schema"], + ) + def test_generate_creates_parent_directory_for_nested_custom_output(self): repo_root = Path(__file__).resolve().parents[1] output_dir = repo_root / "sboms" / f"test-cli-controller-nested-{uuid.uuid4().hex}" / "reports" @@ -129,14 +137,78 @@ def test_generate_creates_parent_directory_for_nested_custom_output(self): self.assertTrue(html_report.exists()) self.assertGreater(html_report.stat().st_size, 0) - json.loads(json_1_6.read_text(encoding="utf-8")) - json.loads(json_1_7.read_text(encoding="utf-8")) + self._assert_cyclonedx_export(json.loads(json_1_6.read_text(encoding="utf-8")), "1.6") + self._assert_cyclonedx_export(json.loads(json_1_7.read_text(encoding="utf-8")), "1.7") html = html_report.read_text(encoding="utf-8") self.assertIn("AIBOM Summary", html) finally: shutil.rmtree(output_dir.parent, ignore_errors=True) + def test_generate_extensionless_output_writes_distinct_json_and_html_files(self): + repo_root = Path(__file__).resolve().parents[1] + output_dir = repo_root / "sboms" / f"test-cli-controller-extensionless-{uuid.uuid4().hex}" + output_dir.mkdir(parents=True) + try: + output_base = output_dir / "qwythos-current" + controller = CLIController() + controller.service = _FakeService() + + controller.generate( + "owner/local-cli-model", + output_file=str(output_base), + ) + + json_1_6 = output_dir / "qwythos-current_1_6.json" + json_1_7 = output_dir / "qwythos-current_1_7.json" + html_report = output_dir / "qwythos-current.html" + + self.assertEqual( + len({json_1_6.resolve(), json_1_7.resolve(), html_report.resolve()}), + 3, + ) + self.assertFalse(output_base.exists()) + self.assertGreater(json_1_6.stat().st_size, 0) + self.assertGreater(json_1_7.stat().st_size, 0) + self.assertGreater(html_report.stat().st_size, 0) + self.assertTrue((output_dir / "static").is_dir()) + + json_1_6_text = json_1_6.read_text(encoding="utf-8") + self.assertFalse(json_1_6_text.lstrip().startswith("<")) + self._assert_cyclonedx_export(json.loads(json_1_6_text), "1.6") + self._assert_cyclonedx_export(json.loads(json_1_7.read_text(encoding="utf-8")), "1.7") + + html = html_report.read_text(encoding="utf-8") + self.assertIn("AIBOM Summary", html) + finally: + shutil.rmtree(output_dir, ignore_errors=True) + + def test_generate_html_output_writes_html_exactly_and_json_side_files(self): + repo_root = Path(__file__).resolve().parents[1] + output_dir = repo_root / "sboms" / f"test-cli-controller-html-output-{uuid.uuid4().hex}" + output_dir.mkdir(parents=True) + try: + html_report = output_dir / "cli-report.html" + controller = CLIController() + controller.service = _FakeService() + + controller.generate( + "owner/local-cli-model", + output_file=str(html_report), + ) + + json_1_6 = output_dir / "cli-report_1_6.json" + json_1_7 = output_dir / "cli-report_1_7.json" + + self.assertGreater(html_report.stat().st_size, 0) + self._assert_cyclonedx_export(json.loads(json_1_6.read_text(encoding="utf-8")), "1.6") + self._assert_cyclonedx_export(json.loads(json_1_7.read_text(encoding="utf-8")), "1.7") + + html = html_report.read_text(encoding="utf-8") + self.assertIn("AIBOM Summary", html) + finally: + shutil.rmtree(output_dir, ignore_errors=True) + def test_generate_writes_non_empty_utf8_html_report(self): repo_root = Path(__file__).resolve().parents[1] output_dir = repo_root / "sboms" / f"test-cli-controller-{uuid.uuid4().hex}" @@ -165,8 +237,8 @@ def test_generate_writes_non_empty_utf8_html_report(self): self.assertIn("CycloneDX 1.6", html) self.assertIn("CLI HTML report unicode regression path \u2705", html) - json.loads(json_1_6.read_text(encoding="utf-8")) - json.loads(json_1_7.read_text(encoding="utf-8")) + self._assert_cyclonedx_export(json.loads(json_1_6.read_text(encoding="utf-8")), "1.6") + self._assert_cyclonedx_export(json.loads(json_1_7.read_text(encoding="utf-8")), "1.7") finally: shutil.rmtree(output_dir, ignore_errors=True)