From 8ecfe47f30e1915f415b90986f9df493563f8138 Mon Sep 17 00:00:00 2001 From: Helen Oakley <69336525+e2hln@users.noreply.github.com> Date: Tue, 23 Jun 2026 22:30:23 -0400 Subject: [PATCH 1/3] Add regression guardrails and runner tooling --- .codex/skills/regression-suite/SKILL.md | 26 ++ scripts/dev/check_clean_state.py | 318 ++++++++++++++++++++++++ scripts/dev/run_regression_suite.py | 299 ++++++++++++++++++++++ tests/test_clean_state.py | 110 ++++++++ tests/test_regression_runner.py | 62 +++++ 5 files changed, 815 insertions(+) create mode 100644 .codex/skills/regression-suite/SKILL.md create mode 100644 scripts/dev/check_clean_state.py create mode 100644 scripts/dev/run_regression_suite.py create mode 100644 tests/test_clean_state.py create mode 100644 tests/test_regression_runner.py diff --git a/.codex/skills/regression-suite/SKILL.md b/.codex/skills/regression-suite/SKILL.md new file mode 100644 index 0000000..653d1e4 --- /dev/null +++ b/.codex/skills/regression-suite/SKILL.md @@ -0,0 +1,26 @@ +# Regression Suite + +Use this skill after implementation changes in this repository. + +## Required Checks + +- After every implementation change, run targeted regression at minimum: + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode targeted` +- Before staging or committing, run full or pre-stage regression: + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode full` + or + `.\.venv\Scripts\python.exe scripts\dev\run_regression_suite.py --mode pre-stage` +- Use pytest through the runner. Do not rely only on `unittest discover`. +- Never hide test failures. Report the exact command, exit status, and failing check. +- Never stage or commit unless the user explicitly asks. + +## Decision Guidance + +- Small localized fix: run targeted mode. +- Controller, output, formatter, schema, or validation fix: run targeted mode and make sure output-contract tests are included. +- Broad model, service, schema, or cross-path change: run full mode. +- Before commit or staging: run pre-stage mode. + +## Manual Smoke Reminder + +Real Hugging Face generation remains optional and manual because the automated regression suite is offline and deterministic. Mention this residual integration risk in final responses when relevant. diff --git a/scripts/dev/check_clean_state.py b/scripts/dev/check_clean_state.py new file mode 100644 index 0000000..e42526c --- /dev/null +++ b/scripts/dev/check_clean_state.py @@ -0,0 +1,318 @@ +"""Phase A cleanup guardrail for staged files. + +This script is read-only. It does not modify files, stage files, commit files, +validate CycloneDX output, or prove AIBOM correctness. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +import subprocess +import sys +from dataclasses import dataclass + + +FORBIDDEN_DOC_PREFIXES = ( + "docs/analysis/", + "docs/design/", + "docs/reference/", + "docs/requirements/", + "docs/analysis/current-output-samples/", +) + +PHASE_1_LOCAL_FILES = { + "src/models/aibom_domain.py", + "src/models/aibom_normalizer.py", + "tests/test_aibom_domain.py", + "tests/test_aibom_normalizer.py", + "scripts/dev/check_cyclonedx_capabilities.py", +} + +DEPENDENCY_FILES = { + "pyproject.toml", + "requirements.txt", + "requirements-dev.txt", + "uv.lock", + "poetry.lock", + "Pipfile", + "Pipfile.lock", + "setup.py", + "setup.cfg", +} + +SOURCE_PREFIXES = ( + "src/", +) + +TEST_PREFIXES = ( + "tests/", +) + +ALLOWED_GUARDRAIL_TEST_FILES = { + "tests/test_clean_state.py", + "tests/test_regression_runner.py", +} + +WORKSET_PREFIXES = ( + ".ai-workset/", +) + +PYTEST_CACHE_PREFIXES = ( + "pytest-cache-files-", +) + +GENERATED_OR_SYNTHETIC_MARKERS = ( + "current-output-samples/", + "synthetic", + "generated", +) + +GENERATED_OR_SYNTHETIC_PREFIXES = ( + "dist/", + "build/", + "sboms/", +) + + +@dataclass(frozen=True) +class Finding: + path: str + reason: str + + +def run_git(args: list[str]) -> str: + result = subprocess.run( + ["git", *args], + check=False, + capture_output=True, + text=True, + encoding="utf-8", + ) + if result.returncode != 0: + message = result.stderr.strip() or result.stdout.strip() or "git command failed" + raise RuntimeError(message) + return result.stdout + + +def normalize_path(path: str) -> str: + return path.replace("\\", "/").lstrip("./") + + +def staged_paths() -> list[str]: + output = run_git(["diff", "--cached", "--name-only", "--diff-filter=ACMRTD"]) + return [normalize_path(line) for line in output.splitlines() if line.strip()] + + +def unstaged_or_untracked_paths() -> list[str]: + output = run_git(["status", "--porcelain=v1", "-uall"]) + paths: list[str] = [] + + for line in output.splitlines(): + if len(line) < 4: + continue + index_status = line[0] + worktree_status = line[1] + raw_path = line[3:] + + if " -> " in raw_path: + raw_path = raw_path.rsplit(" -> ", maxsplit=1)[1] + + if index_status == "?" or worktree_status not in {" ", "!"}: + paths.append(normalize_path(raw_path.strip('"'))) + + paths.extend(root_pytest_cache_dirs()) + return sorted(set(paths)) + + +def root_pytest_cache_dirs() -> list[str]: + return [ + path.name + for path in Path.cwd().iterdir() + if path.is_dir() and path.name.startswith(PYTEST_CACHE_PREFIXES) + ] + + +def is_generated_or_synthetic(path: str) -> bool: + normalized = normalize_path(path).lower() + if any(normalized.startswith(prefix) for prefix in GENERATED_OR_SYNTHETIC_PREFIXES): + return True + return any(marker in normalized for marker in GENERATED_OR_SYNTHETIC_MARKERS) + + +def is_source(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(SOURCE_PREFIXES) + + +def is_test(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(TEST_PREFIXES) + + +def is_allowed_guardrail_test(path: str) -> bool: + normalized = normalize_path(path) + return normalized in ALLOWED_GUARDRAIL_TEST_FILES + + +def is_readme(path: str) -> bool: + normalized = normalize_path(path).lower() + return normalized in {"readme", "readme.md"} or normalized.startswith("readme.") + + +def is_workset(path: str) -> bool: + normalized = normalize_path(path) + return normalized.startswith(WORKSET_PREFIXES) + + +def is_pytest_cache_files(path: str) -> bool: + normalized = normalize_path(path) + return any(normalized.startswith(prefix) for prefix in PYTEST_CACHE_PREFIXES) + + +def collect_findings( + paths: list[str], + allow_dependency_files: bool, + allow_source_files: bool, + allow_future_mode_files: bool, +) -> list[Finding]: + findings: list[Finding] = [] + + for path in paths: + normalized = normalize_path(path) + + if any(normalized.startswith(prefix) for prefix in FORBIDDEN_DOC_PREFIXES): + findings.append(Finding(normalized, "staged local analysis/design/reference/requirements material")) + + if is_generated_or_synthetic(normalized): + findings.append(Finding(normalized, "staged generated or synthetic sample material")) + + if normalized in DEPENDENCY_FILES and not allow_dependency_files: + findings.append(Finding(normalized, "staged dependency file requires explicit review")) + + if is_source(normalized) and not allow_source_files: + findings.append(Finding(normalized, "staged source file is outside Phase A guardrail scope")) + + if normalized in PHASE_1_LOCAL_FILES and not allow_future_mode_files: + findings.append(Finding(normalized, "staged local Phase 1 implementation-shaped file requires explicit review")) + + if is_test(normalized) and not is_allowed_guardrail_test(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged test file is outside Phase A guardrail scope")) + + if is_readme(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged README or end-user doc is outside Phase A guardrail scope")) + + if is_workset(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged workset artifact is outside Phase A guardrail scope")) + + if is_pytest_cache_files(normalized) and not allow_future_mode_files: + findings.append(Finding(normalized, "staged pytest cache file is generated local material")) + + return findings + + +def collect_warnings(paths: list[str]) -> list[Finding]: + warnings: list[Finding] = [] + + for path in paths: + normalized = normalize_path(path) + + if any(normalized.startswith(prefix) for prefix in FORBIDDEN_DOC_PREFIXES): + warnings.append(Finding(normalized, "local analysis/design/reference/requirements material needs human review before staging")) + + if is_generated_or_synthetic(normalized): + warnings.append(Finding(normalized, "generated or synthetic sample material needs human review before staging")) + + if normalized in DEPENDENCY_FILES: + warnings.append(Finding(normalized, "dependency file needs human review before staging")) + + if normalized in PHASE_1_LOCAL_FILES: + warnings.append(Finding(normalized, "local Phase 1 implementation-shaped file needs human review before staging")) + + if is_source(normalized): + warnings.append(Finding(normalized, "source file change needs human review before staging")) + + if is_workset(normalized): + warnings.append(Finding(normalized, "workset artifact needs human review before staging")) + + if is_pytest_cache_files(normalized): + warnings.append(Finding(normalized, "pytest cache temp directory should be cleaned or ignored before cleanup fixes")) + + return warnings + + +def print_status_note() -> None: + status = run_git(["status", "--short", "-uall"]) + if status.strip(): + print("Working tree has unstaged or untracked changes:") + print(status.rstrip()) + else: + print("Working tree is clean.") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Check Phase A cleanup guardrails for staged files.") + parser.add_argument( + "--allow-dependency-files", + action="store_true", + help="Allow staged dependency files after explicit human review.", + ) + parser.add_argument( + "--allow-source-files", + action="store_true", + help="Allow staged source files after leaving Phase A guardrail mode.", + ) + parser.add_argument( + "--allow-future-mode-files", + action="store_true", + help="Allow staged tests, README, workset artifacts, and local investigation files after explicit future-mode review.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + + print("Phase A cleanup guardrail check") + print("Note: this script is read-only and does not validate CycloneDX output or prove AIBOM correctness.") + print() + + try: + paths = staged_paths() + findings = collect_findings( + paths, + args.allow_dependency_files, + args.allow_source_files, + args.allow_future_mode_files, + ) + warnings = collect_warnings(unstaged_or_untracked_paths()) + print_status_note() + except RuntimeError as error: + print(f"FAIL: {error}", file=sys.stderr) + return 2 + + print() + if not paths: + print("No staged files found.") + + if findings: + print("FAIL: staged files violate Phase A cleanup guardrails.") + print("Affected files:") + for finding in findings: + print(f"- {finding.path}: {finding.reason}") + return 1 + + if warnings: + print("WARN: risky unstaged or untracked files exist and need human review before staging.") + print("Affected files:") + for warning in warnings: + print(f"- {warning.path}: {warning.reason}") + print() + + print("PASS: no staged files violate Phase A cleanup guardrails.") + print("PASS does not mean the repository is clean; review any WARN lines and git status output above.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/dev/run_regression_suite.py b/scripts/dev/run_regression_suite.py new file mode 100644 index 0000000..ca9a550 --- /dev/null +++ b/scripts/dev/run_regression_suite.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import argparse +import shutil +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +MODEL_FILE_TESTS = [ + "tests/test_model_file_extraction.py", + "tests/test_gguf_metadata.py", + "tests/test_safetensors_metadata.py", + "tests/test_hyperparameter_wiring.py", + "tests/test_aibom_hyperparameters.py", +] + +TARGETED_TEST_MAP = { + "src/controllers/cli_controller.py": [ + "tests/test_cli_controller.py", + "tests/test_aibom_output_contract.py", + ], + "src/controllers/web_controller.py": [ + "tests/test_web_controller.py", + "tests/test_aibom_output_contract.py", + ], + "src/utils/formatter.py": [ + "tests/test_formatter.py", + "tests/test_aibom_output_contract.py", + "tests/test_functional_regression.py", + ], + "src/utils/validation.py": [ + "tests/test_validation.py", + "tests/test_aibom_output_contract.py", + ], + "src/models/service.py": [ + "tests/test_service.py", + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", + ], + "src/models/model_file_extractors.py": MODEL_FILE_TESTS, + "src/models/gguf_metadata.py": MODEL_FILE_TESTS, + "src/models/safetensors_metadata.py": MODEL_FILE_TESTS, + "src/models/config_parsing.py": MODEL_FILE_TESTS, + "src/utils/license_utils.py": [ + "tests/test_license_utils.py", + "tests/test_service.py", + ], + "src/models/aibom_domain.py": [ + "tests/test_aibom_domain.py", + ], + "src/models/aibom_normalizer.py": [ + "tests/test_aibom_normalizer.py", + ], + "scripts/dev/check_clean_state.py": [ + "tests/test_clean_state.py", + ], + "scripts/dev/run_regression_suite.py": [ + "tests/test_regression_runner.py", + ], +} + +DEFAULT_TARGETED_TESTS = [ + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", +] + +DOC_ONLY_PREFIXES = ("docs/",) +DOC_ONLY_FILES = {"README.md", "CONTRIBUTING.md"} + + +@dataclass(frozen=True) +class Selection: + changed_files: list[str] + groups: list[str] + pytest_targets: list[str] + run_clean_state: bool + docs_only: bool + full_recommended: bool + + +@dataclass(frozen=True) +class CommandResult: + command: list[str] + returncode: int + + +def normalize_path(path: str) -> str: + return path.strip().replace("\\", "/") + + +def unique(items: list[str]) -> list[str]: + seen: set[str] = set() + result: list[str] = [] + for item in items: + if item not in seen: + seen.add(item) + result.append(item) + return result + + +def is_docs_only_file(path: str) -> bool: + normalized = normalize_path(path) + return normalized in DOC_ONLY_FILES or normalized.startswith(DOC_ONLY_PREFIXES) + + +def command_text(command: list[str]) -> str: + return " ".join(command) + + +def clean_state_command() -> list[str]: + script = "scripts/dev/check_clean_state.py" + if sys.platform.startswith("win") and shutil.which("py"): + return ["py", script] + return [sys.executable, script] + + +def pytest_command(targets: list[str]) -> list[str]: + return [sys.executable, "-m", "pytest", *targets] + + +def collect_git_files(args: list[str]) -> list[str]: + completed = subprocess.run( + ["git", *args], + cwd=REPO_ROOT, + check=False, + capture_output=True, + text=True, + ) + if completed.returncode != 0: + if completed.stdout: + print(completed.stdout, end="") + if completed.stderr: + print(completed.stderr, end="", file=sys.stderr) + raise SystemExit(completed.returncode) + return [normalize_path(line) for line in completed.stdout.splitlines() if line.strip()] + + +def changed_files() -> list[str]: + files: list[str] = [] + files.extend(collect_git_files(["diff", "--name-only"])) + files.extend(collect_git_files(["diff", "--cached", "--name-only"])) + files.extend(collect_git_files(["ls-files", "--others", "--exclude-standard"])) + return unique(files) + + +def select_targets(files: list[str]) -> Selection: + normalized_files = unique([normalize_path(path) for path in files]) + if normalized_files and all(is_docs_only_file(path) for path in normalized_files): + return Selection( + changed_files=normalized_files, + groups=["docs-only guardrail"], + pytest_targets=[], + run_clean_state=True, + docs_only=True, + full_recommended=False, + ) + + groups: list[str] = [] + pytest_targets: list[str] = [] + run_clean_state = False + + for path in normalized_files: + mapped_tests = TARGETED_TEST_MAP.get(path) + if mapped_tests: + groups.append(path) + pytest_targets.extend(mapped_tests) + if path == "scripts/dev/check_clean_state.py": + run_clean_state = True + continue + + if path.startswith("tests/test_") and path.endswith(".py"): + groups.append("changed test file") + pytest_targets.append(path) + + pytest_targets = unique(pytest_targets) + + if not pytest_targets and not run_clean_state: + groups.append("default functional/output-contract regression") + pytest_targets = DEFAULT_TARGETED_TESTS.copy() + + return Selection( + changed_files=normalized_files, + groups=unique(groups), + pytest_targets=pytest_targets, + run_clean_state=run_clean_state, + docs_only=False, + full_recommended="default functional/output-contract regression" in groups, + ) + + +def print_file_list(title: str, files: list[str]) -> None: + print(f"\n{title}") + if not files: + print("- none") + return + for path in files: + print(f"- {path}") + + +def print_selection(selection: Selection) -> None: + print_file_list("Changed files detected:", selection.changed_files) + print_file_list("Selected test groups:", selection.groups) + print_file_list("Selected pytest targets:", selection.pytest_targets) + if selection.docs_only: + print("\nDocs-only changes detected; product tests are not run by targeted mode by default.") + if selection.full_recommended: + print("\nNo targeted mapping matched; full suite is recommended before staging.") + + +def run_command(command: list[str]) -> CommandResult: + print(f"\nRunning: {command_text(command)}") + completed = subprocess.run(command, cwd=REPO_ROOT, check=False) + status = "PASS" if completed.returncode == 0 else "FAIL" + print(f"Result: {status} ({completed.returncode})") + return CommandResult(command=command, returncode=completed.returncode) + + +def summarize(results: list[CommandResult], next_action: str) -> int: + failed = [result for result in results if result.returncode != 0] + print("\nCommands run:") + if not results: + print("- none") + for result in results: + status = "PASS" if result.returncode == 0 else "FAIL" + print(f"- {command_text(result.command)} -> {status} ({result.returncode})") + + overall = 1 if failed else 0 + print(f"\nOverall status: {'FAIL' if failed else 'PASS'}") + print(f"Next recommended action: {next_action}") + return overall + + +def run_targeted() -> int: + selection = select_targets(changed_files()) + print_selection(selection) + + results: list[CommandResult] = [] + if selection.pytest_targets: + results.append(run_command(pytest_command(selection.pytest_targets))) + if selection.run_clean_state: + results.append(run_command(clean_state_command())) + + if selection.docs_only: + next_action = "Review docs changes, then run full or pre-stage mode before staging if product behavior changed." + elif selection.full_recommended: + next_action = "Run --mode full before staging because targeted mode used the default fallback." + else: + next_action = "Run --mode full or --mode pre-stage before staging or committing." + return summarize(results, next_action) + + +def run_full() -> int: + results = [ + run_command(pytest_command(["tests"])), + run_command(clean_state_command()), + run_command(["git", "diff", "--cached", "--name-only"]), + ] + return summarize(results, "Address any failures, then run --mode pre-stage before manual staging.") + + +def run_pre_stage() -> int: + print("\nStaging remains manual. This runner does not stage or commit files.") + results = [ + run_command(pytest_command(["tests"])), + run_command(clean_state_command()), + run_command(["git", "diff", "--cached", "--name-only"]), + run_command(["git", "status", "--short", "-uall"]), + ] + return summarize(results, "If all checks pass, manually review git status before staging.") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run repo regression checks.") + parser.add_argument( + "--mode", + choices=("targeted", "full", "pre-stage"), + required=True, + help="Regression mode to run.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(sys.argv[1:] if argv is None else argv) + if args.mode == "targeted": + return run_targeted() + if args.mode == "full": + return run_full() + if args.mode == "pre-stage": + return run_pre_stage() + raise AssertionError(f"Unexpected mode: {args.mode}") + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_clean_state.py b/tests/test_clean_state.py new file mode 100644 index 0000000..3f4d182 --- /dev/null +++ b/tests/test_clean_state.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path +import sys +import unittest + + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "dev" / "check_clean_state.py" + +spec = importlib.util.spec_from_file_location("check_clean_state", SCRIPT_PATH) +check_clean_state = importlib.util.module_from_spec(spec) +assert spec.loader is not None +sys.modules[spec.name] = check_clean_state +spec.loader.exec_module(check_clean_state) + + +class CleanStateGuardrailTests(unittest.TestCase): + def test_private_analysis_path_staged_fails(self) -> None: + findings = check_clean_state.collect_findings( + ["docs/analysis/local-report.md"], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertIn( + "staged local analysis/design/reference/requirements material", + {finding.reason for finding in findings}, + ) + + def test_private_analysis_path_unstaged_warns_only(self) -> None: + warnings = check_clean_state.collect_warnings(["docs/analysis/local-report.md"]) + findings = check_clean_state.collect_findings( + [], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + self.assertIn( + "local analysis/design/reference/requirements material needs human review before staging", + {warning.reason for warning in warnings}, + ) + + def test_dependency_file_staged_fails(self) -> None: + findings = check_clean_state.collect_findings( + ["requirements.txt"], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual( + ["staged dependency file requires explicit review"], + [finding.reason for finding in findings], + ) + + def test_dependency_file_modified_only_warns(self) -> None: + warnings = check_clean_state.collect_warnings(["uv.lock"]) + + self.assertEqual( + ["dependency file needs human review before staging"], + [warning.reason for warning in warnings], + ) + + def test_no_staged_private_files_passes(self) -> None: + findings = check_clean_state.collect_findings( + [ + "docs/dev/cleanup-guardrails.md", + "scripts/dev/check_clean_state.py", + "tests/test_clean_state.py", + ], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + + def test_regression_tooling_staged_set_passes(self) -> None: + findings = check_clean_state.collect_findings( + [ + ".codex/skills/regression-suite/SKILL.md", + "docs/dev/cleanup-guardrails.md", + "docs/dev/regression-testing.md", + "scripts/dev/check_clean_state.py", + "scripts/dev/run_regression_suite.py", + "tests/test_clean_state.py", + "tests/test_regression_runner.py", + ], + allow_dependency_files=False, + allow_source_files=False, + allow_future_mode_files=False, + ) + + self.assertEqual([], findings) + + def test_source_file_modified_only_warns(self) -> None: + warnings = check_clean_state.collect_warnings(["src/models/service.py"]) + + self.assertEqual( + ["source file change needs human review before staging"], + [warning.reason for warning in warnings], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_regression_runner.py b/tests/test_regression_runner.py new file mode 100644 index 0000000..ef71f4c --- /dev/null +++ b/tests/test_regression_runner.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path +import sys +import unittest + + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "dev" / "run_regression_suite.py" + +spec = importlib.util.spec_from_file_location("run_regression_suite", SCRIPT_PATH) +run_regression_suite = importlib.util.module_from_spec(spec) +assert spec.loader is not None +sys.modules[spec.name] = run_regression_suite +spec.loader.exec_module(run_regression_suite) + + +class RegressionRunnerSelectionTests(unittest.TestCase): + def test_formatter_change_runs_output_contract_and_functional_tests(self) -> None: + selection = run_regression_suite.select_targets(["src/utils/formatter.py"]) + + self.assertEqual( + [ + "tests/test_formatter.py", + "tests/test_aibom_output_contract.py", + "tests/test_functional_regression.py", + ], + selection.pytest_targets, + ) + self.assertFalse(selection.docs_only) + self.assertFalse(selection.full_recommended) + + def test_docs_only_change_runs_clean_state_without_product_tests(self) -> None: + selection = run_regression_suite.select_targets( + ["README.md", "docs/dev/regression-testing.md"] + ) + + self.assertEqual([], selection.pytest_targets) + self.assertTrue(selection.run_clean_state) + self.assertTrue(selection.docs_only) + + def test_unmapped_source_change_uses_default_fallback(self) -> None: + selection = run_regression_suite.select_targets(["src/models/new_surface.py"]) + + self.assertEqual( + [ + "tests/test_functional_regression.py", + "tests/test_aibom_output_contract.py", + ], + selection.pytest_targets, + ) + self.assertTrue(selection.full_recommended) + + def test_clean_state_change_runs_its_unit_test_and_guardrail(self) -> None: + selection = run_regression_suite.select_targets(["scripts/dev/check_clean_state.py"]) + + self.assertEqual(["tests/test_clean_state.py"], selection.pytest_targets) + self.assertTrue(selection.run_clean_state) + + +if __name__ == "__main__": + unittest.main() From cee789f1a430aeeda5c970c255a5d14a9c034226 Mon Sep 17 00:00:00 2001 From: Helen Oakley <69336525+e2hln@users.noreply.github.com> Date: Tue, 23 Jun 2026 23:24:02 -0400 Subject: [PATCH 2/3] Fix cleanup output behavior and regressions --- src/controllers/cli_controller.py | 25 ++- src/controllers/web_controller.py | 17 +- src/models/service.py | 2 +- src/templates/result.html | 8 +- src/utils/formatter.py | 7 + src/utils/validation.py | 31 +++- tests/test_aibom_output_contract.py | 263 ++++++++++++++++++++++++++++ tests/test_cli_controller.py | 175 ++++++++++++++++++ tests/test_formatter.py | 68 +++++++ tests/test_functional_regression.py | 96 ++++++++++ tests/test_service.py | 17 ++ tests/test_validation.py | 15 ++ tests/test_web_controller.py | 157 +++++++++++++++++ 13 files changed, 863 insertions(+), 18 deletions(-) create mode 100644 tests/test_aibom_output_contract.py create mode 100644 tests/test_cli_controller.py create mode 100644 tests/test_formatter.py create mode 100644 tests/test_functional_regression.py create mode 100644 tests/test_web_controller.py diff --git a/src/controllers/cli_controller.py b/src/controllers/cli_controller.py index 627bdb8..a035f9e 100644 --- a/src/controllers/cli_controller.py +++ b/src/controllers/cli_controller.py @@ -10,6 +10,13 @@ logger = logging.getLogger(__name__) + +def _ensure_parent_dir(path: str) -> None: + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + + class CLIController: def __init__(self): self.service = AIBOMService() @@ -61,9 +68,12 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf base, ext = os.path.splitext(output_file_1_6) output_file_1_7 = f"{base.replace('_1_6', '')}_1_7{ext}" if '_1_6' in base else f"{base}_1_7{ext}" - with open(output_file_1_6, 'w') as f: + _ensure_parent_dir(output_file_1_6) + _ensure_parent_dir(output_file_1_7) + + with open(output_file_1_6, 'w', encoding="utf-8") as f: f.write(json_1_6) - with open(output_file_1_7, 'w') as f: + with open(output_file_1_7, 'w', encoding="utf-8") as f: f.write(json_1_7) # Check for validation results @@ -125,8 +135,10 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf html_content = template.render(context) html_output_file = output_file_primary.replace("_1_6.json", ".html").replace(".json", ".html") - with open(html_output_file, "w") as f: + html_temp_file = f"{html_output_file}.tmp" + with open(html_temp_file, "w", encoding="utf-8") as f: f.write(html_content) + os.replace(html_temp_file, html_output_file) logger.info("HTML Report: %s", html_output_file) @@ -174,7 +186,12 @@ def generate(self, model_id: str, output_file: Optional[str] = None, include_inf logger.info("License: %s", ", ".join(license_list)) except Exception as e: - logger.warning("Failed to generate HTML report: %s", e) + if "html_temp_file" in locals() and os.path.exists(html_temp_file): + try: + os.remove(html_temp_file) + except OSError: + logger.debug("Failed to remove temporary HTML report: %s", html_temp_file, exc_info=True) + logger.warning("Failed to generate HTML report: %s", e, exc_info=True) for r in reports: spec = r.get("spec_version", "1.6") diff --git a/src/controllers/web_controller.py b/src/controllers/web_controller.py index 2f4c458..502101b 100644 --- a/src/controllers/web_controller.py +++ b/src/controllers/web_controller.py @@ -48,7 +48,7 @@ def is_valid_hf_input(input_str: str) -> bool: @router.get("/", response_class=HTMLResponse) async def root(request: Request): - return templates.TemplateResponse("index.html", { + return templates.TemplateResponse(request, "index.html", { "request": request, "sbom_count": get_sbom_count() }) @@ -67,7 +67,7 @@ async def generate_form( # Security: Validate BEFORE sanitizing to prevent bypass attacks # (e.g., → <script>org/model</script> could slip through) if not is_valid_hf_input(model_id): - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": "Invalid model ID format.", "sbom_count": get_sbom_count(), @@ -86,14 +86,14 @@ async def generate_form( loop = asyncio.get_running_loop() await loop.run_in_executor(None, lambda: HfApi().model_info(normalized_id)) except RepositoryNotFoundError: - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Model {normalized_id} not found on Hugging Face.", "sbom_count": get_sbom_count(), "model_id": normalized_id }) except Exception as e: - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Error verifying model: {e}", "sbom_count": get_sbom_count(), @@ -120,9 +120,10 @@ def _save_task(): json_1_6 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.6") json_1_7 = export_aibom(aibom, bom_type="cyclonedx", spec_version="1.7") - with open(filepath, "w") as f: + os.makedirs(OUTPUT_DIR, exist_ok=True) + with open(filepath, "w", encoding="utf-8") as f: f.write(json_1_6) - with open(filepath_1_7, "w") as f: + with open(filepath_1_7, "w", encoding="utf-8") as f: f.write(json_1_7) log_sbom_generation(sanitized_model_id) return json_1_6, json_1_7 @@ -155,11 +156,11 @@ def _save_task(): "result_file": f"/output/{filename}" } - return templates.TemplateResponse("result.html", context) + return templates.TemplateResponse(request, "result.html", context) except Exception as e: logger.error(f"Generation error: {e}", exc_info=True) - return templates.TemplateResponse("error.html", { + return templates.TemplateResponse(request, "error.html", { "request": request, "error": f"Internal generation error: {e}", "sbom_count": get_sbom_count(), diff --git a/src/models/service.py b/src/models/service.py index c38ad9f..f3b676b 100644 --- a/src/models/service.py +++ b/src/models/service.py @@ -734,7 +734,7 @@ def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any] considerations["technicalLimitations"] = [metadata["technicalLimitations"]] # ethicalConsiderations if "ethicalConsiderations" in metadata: - considerations["ethicalConsiderations"] = [{"name": "Ethical Considerations", "description": metadata["ethicalConsiderations"]}] + considerations["ethicalConsiderations"] = [{"name": metadata["ethicalConsiderations"]}] if considerations: section["considerations"] = considerations diff --git a/src/templates/result.html b/src/templates/result.html index fddf4e3..32a1c47 100644 --- a/src/templates/result.html +++ b/src/templates/result.html @@ -32,10 +32,10 @@
This is the complete AIBOM components array in CycloneDX JSON format:
+Generated components section in CycloneDX JSON format:
{{ components_json }}