From 31a5109d0983ae3efba87315c1527c1f7b176df6 Mon Sep 17 00:00:00 2001 From: skypank Date: Tue, 16 Jun 2026 18:49:32 +0530 Subject: [PATCH 1/3] feat(api): add feature-flagged /rest/v1/health endpoint Adds a lightweight deploy/uptime health probe at GET /rest/v1/health, gated behind the CRE_ENABLE_HEALTH feature flag (off by default). Behavior: - Flag off (default): endpoint returns 404, as if it does not exist. - Flag on, healthy: 200 with {ok, db_reachable, cre_count, standards_count} when the serving DB is reachable and holds a non-empty dataset. - Flag on, unhealthy: 503 when the DB is unreachable or the dataset is empty/broken (reason explains which). Node_collection.health_check() runs cheap COUNT queries over CRE and Node, never raises (connectivity errors are reported as ok=False), and treats a zero count for either as an empty dataset. Scope is intentionally limited to DB reachability + data sanity. Deeper checks (gap-analysis completeness, mapping coverage, Neo4j, Redis) are deliberately excluded by design and belong in ops tooling. --- application/database/db.py | 48 +++++++++++++++++++++++ application/feature_flags.py | 4 ++ application/tests/web_main_test.py | 63 ++++++++++++++++++++++++++++++ application/web/web_main.py | 23 ++++++++++- 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/application/database/db.py b/application/database/db.py index 25ece0be7..50528c22e 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -2264,6 +2264,54 @@ def get_root_cres(self): ) return self._hydrate_cres_batch(list(cres)) + def health_check(self) -> Dict[str, Any]: + """Lightweight liveness/readiness probe for the serving database. + + Intended for use by a deploy/uptime health endpoint, NOT for deep + operational checks (GA completeness, mapping coverage, etc.) which are + slow and belong in ops tooling. Performs cheap COUNT queries and never + raises: connectivity failures are reported as ``ok=False`` so the caller + can return an appropriate status code. + + Returns a dict with: + - ``ok``: True only if the DB is reachable AND holds a non-empty + dataset (at least one CRE and one standard/node). + - ``db_reachable``: True if the COUNT queries executed. + - ``cre_count`` / ``standards_count``: populated when reachable. + - ``reason``: short human-readable explanation when ``ok`` is False. + """ + try: + cre_count = self.session.query(func.count(CRE.id)).scalar() or 0 + standards_count = self.session.query(func.count(Node.id)).scalar() or 0 + except OperationalError: + return { + "ok": False, + "db_reachable": False, + "reason": "database unreachable", + } + except Exception: # pragma: no cover - defensive, never fail open + return { + "ok": False, + "db_reachable": False, + "reason": "database health query failed", + } + + if cre_count == 0 or standards_count == 0: + return { + "ok": False, + "db_reachable": True, + "cre_count": cre_count, + "standards_count": standards_count, + "reason": "empty dataset", + } + + return { + "ok": True, + "db_reachable": True, + "cre_count": cre_count, + "standards_count": standards_count, + } + def get_embeddings_by_doc_type(self, doc_type: str) -> Dict[str, List[float]]: res = {} embeddings = ( diff --git a/application/feature_flags.py b/application/feature_flags.py index 464e3238a..9c77b0e86 100644 --- a/application/feature_flags.py +++ b/application/feature_flags.py @@ -5,3 +5,7 @@ def is_cre_import_allowed() -> bool: return os.getenv("CRE_ALLOW_IMPORT", "").strip().lower() in TRUE_VALUES + + +def is_health_endpoint_enabled() -> bool: + return os.getenv("CRE_ENABLE_HEALTH", "").strip().lower() in TRUE_VALUES diff --git a/application/tests/web_main_test.py b/application/tests/web_main_test.py index c0547ed83..e112c2aad 100644 --- a/application/tests/web_main_test.py +++ b/application/tests/web_main_test.py @@ -1360,3 +1360,66 @@ def test_get_cre_csv(self) -> None: data.getvalue(), response.data.decode(), ) + + def test_health_disabled_by_default_returns_404(self) -> None: + os.environ.pop("CRE_ENABLE_HEALTH", None) + with self.app.test_client() as client: + response = client.get("/rest/v1/health") + self.assertEqual(404, response.status_code) + + def test_health_enabled_empty_dataset_returns_503(self) -> None: + os.environ["CRE_ENABLE_HEALTH"] = "1" + try: + with self.app.test_client() as client: + response = client.get("/rest/v1/health") + self.assertEqual(503, response.status_code) + body = json.loads(response.data.decode()) + self.assertFalse(body["ok"]) + self.assertTrue(body["db_reachable"]) + self.assertEqual("empty dataset", body["reason"]) + finally: + os.environ.pop("CRE_ENABLE_HEALTH", None) + + def test_health_enabled_populated_returns_200(self) -> None: + os.environ["CRE_ENABLE_HEALTH"] = "1" + try: + collection = db.Node_collection() + collection.add_cre( + defs.CRE(id="111-115", description="CA", name="CA", tags=["ta"]) + ) + collection.add_node( + defs.Standard( + name="s1", section="s11", subsection="s111", version="1.1.1" + ) + ) + with self.app.test_client() as client: + response = client.get("/rest/v1/health") + self.assertEqual(200, response.status_code) + body = json.loads(response.data.decode()) + self.assertTrue(body["ok"]) + self.assertTrue(body["db_reachable"]) + self.assertGreaterEqual(body["cre_count"], 1) + self.assertGreaterEqual(body["standards_count"], 1) + finally: + os.environ.pop("CRE_ENABLE_HEALTH", None) + + def test_health_db_unreachable_returns_503(self) -> None: + os.environ["CRE_ENABLE_HEALTH"] = "1" + try: + with patch.object( + db.Node_collection, + "health_check", + return_value={ + "ok": False, + "db_reachable": False, + "reason": "database unreachable", + }, + ): + with self.app.test_client() as client: + response = client.get("/rest/v1/health") + self.assertEqual(503, response.status_code) + body = json.loads(response.data.decode()) + self.assertFalse(body["ok"]) + self.assertFalse(body["db_reachable"]) + finally: + os.environ.pop("CRE_ENABLE_HEALTH", None) diff --git a/application/web/web_main.py b/application/web/web_main.py index df2dac79f..cfe93c548 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -22,7 +22,7 @@ from application.cmd import cre_main from application.defs import cre_defs as defs from application.defs import cre_exceptions -from application.feature_flags import is_cre_import_allowed +from application.feature_flags import is_cre_import_allowed, is_health_endpoint_enabled from application.utils import spreadsheet as sheet_utils from application.utils import mdutils, redirectors, gap_analysis @@ -584,6 +584,27 @@ def text_search() -> Any: abort(404, "No object matches the given search terms") +@app.route("/rest/v1/health", methods=["GET"]) +def health() -> Any: + """Deploy/uptime health probe (feature-flagged, off by default). + + Enable with CRE_ENABLE_HEALTH=1. Scope is intentionally narrow and fast so + it can gate deploys without failing for the wrong reason: + - 200: app up, serving DB reachable, dataset non-empty (CREs and + standards present). + - 503: DB unreachable or dataset empty/broken. + Deeper checks (gap-analysis completeness, mapping coverage, Neo4j/Redis) + are deliberately excluded and live in ops tooling instead. + """ + if not is_health_endpoint_enabled(): + abort(404) + + database = db.Node_collection() + result = database.health_check() + status_code = 200 if result.get("ok") else 503 + return jsonify(result), status_code + + @app.route("/rest/v1/root_cres", methods=["GET"]) def find_root_cres() -> Any: """ From 7f10448d13422cf41896787c03d086bd090a2d80 Mon Sep 17 00:00:00 2001 From: skypank Date: Sat, 20 Jun 2026 19:26:00 +0530 Subject: [PATCH 2/3] fix: load .env in feature_flags and document CRE_ENABLE_HEALTH flag --- .env.example | 6 ++++++ README.md | 1 + application/feature_flags.py | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/.env.example b/.env.example index a4a9a6b65..a90e5ee32 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,12 @@ REDIS_NO_SSL=false FLASK_CONFIG=development INSECURE_REQUESTS=false +# Feature Flags +# Enable the deploy/uptime health probe at GET /rest/v1/health. +# Off by default; when unset the endpoint returns 404. + +CRE_ENABLE_HEALTH=false + # Embeddings NO_GEN_EMBEDDINGS=false diff --git a/README.md b/README.md index 02fb3e9dd..642e9002c 100644 --- a/README.md +++ b/README.md @@ -289,6 +289,7 @@ Then edit `.env` and provide values appropriate for your environment. * Google Auth: `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_SECRET`, `GOOGLE_SECRET_JSON`, `LOGIN_ALLOWED_DOMAINS` * GCP: `GCP_NATIVE` * Spreadsheet Auth: `OpenCRE_gspread_Auth` +* Feature flags: `CRE_ENABLE_HEALTH` (enable the `GET /rest/v1/health` deploy/uptime probe; off by default, returns 404 when unset) See `.env.example` for full list and defaults. diff --git a/application/feature_flags.py b/application/feature_flags.py index 9c77b0e86..fe1aae2dc 100644 --- a/application/feature_flags.py +++ b/application/feature_flags.py @@ -1,5 +1,12 @@ import os +try: + from dotenv import load_dotenv # type: ignore + + load_dotenv() +except ImportError: + pass + TRUE_VALUES = {"1", "true", "yes"} From ceb189cf005365384ed2ff166725fb3535a26b44 Mon Sep 17 00:00:00 2001 From: skypank Date: Sat, 20 Jun 2026 19:34:19 +0530 Subject: [PATCH 3/3] Modified the .env issue --- .env.example | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index a90e5ee32..6e3efc0fb 100644 --- a/.env.example +++ b/.env.example @@ -26,7 +26,8 @@ INSECURE_REQUESTS=false # Feature Flags # Enable the deploy/uptime health probe at GET /rest/v1/health. -# Off by default; when unset the endpoint returns 404. +# Set to one of 1, true, yes (case-insensitive) to enable; any other value +# (including unset or false) leaves it off and the endpoint returns 404. CRE_ENABLE_HEALTH=false