From 9d37c81f8b67d94c041617b1e87a49e930963912 Mon Sep 17 00:00:00 2001 From: Brendan Ryan <1572504+brendanjryan@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:09:43 -0700 Subject: [PATCH] fix: harden generated pages and dev server paths --- scripts/gen_index.py | 7 +++-- scripts/gen_problems.py | 7 +++-- scripts/serve.py | 63 ++++++++++++++++++++++++++++++----------- scripts/test_serve.py | 26 +++++++++++++++++ 4 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 scripts/test_serve.py diff --git a/scripts/gen_index.py b/scripts/gen_index.py index 877fd3f2..ed575c0f 100644 --- a/scripts/gen_index.py +++ b/scripts/gen_index.py @@ -11,7 +11,7 @@ import os import re -from jinja2 import Environment, FileSystemLoader +from jinja2 import Environment, FileSystemLoader, select_autoescape ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) SPECS_DIR = os.path.join(ROOT, "specs") @@ -139,7 +139,10 @@ def build_tree(specs): def main(): - env = Environment(loader=FileSystemLoader(TEMPLATES_DIR), autoescape=False) + env = Environment( + loader=FileSystemLoader(TEMPLATES_DIR), + autoescape=select_autoescape(default_for_string=True, default=True), + ) tpl = env.get_template("index.html") specs = collect_specs() diff --git a/scripts/gen_problems.py b/scripts/gen_problems.py index ee9cdaa8..c9db6585 100644 --- a/scripts/gen_problems.py +++ b/scripts/gen_problems.py @@ -12,7 +12,7 @@ import json import os -from jinja2 import Environment, FileSystemLoader +from jinja2 import Environment, FileSystemLoader, select_autoescape ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) TEMPLATES_DIR = os.path.join(ROOT, "pages", "templates") @@ -86,7 +86,10 @@ def make_example(slug, title, http_status): def main(): - env = Environment(loader=FileSystemLoader(TEMPLATES_DIR), autoescape=False) + env = Environment( + loader=FileSystemLoader(TEMPLATES_DIR), + autoescape=select_autoescape(default_for_string=True, default=True), + ) problem_tpl = env.get_template("problem.html") index_tpl = env.get_template("problems_index.html") diff --git a/scripts/serve.py b/scripts/serve.py index c94744bd..7e0110c0 100755 --- a/scripts/serve.py +++ b/scripts/serve.py @@ -2,34 +2,63 @@ """Dev server that serves pages/ and artifacts/ directly. No copying needed.""" import http.server -import os +import posixpath import sys +import urllib.parse +from pathlib import Path -ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -PAGES = os.path.join(ROOT, "pages") -ARTIFACTS = os.path.join(ROOT, "artifacts") +ROOT = Path(__file__).resolve().parents[1] +PAGES = ROOT / "pages" +ARTIFACTS = ROOT / "artifacts" +INVALID_PATH = PAGES / "__invalid_path__" + + +def clean_request_parts(path): + path = urllib.parse.unquote(path.split("?", 1)[0].split("#", 1)[0]) + if ".." in path.lstrip("/").split("/"): + return None + + path = posixpath.normpath(path).lstrip("/") + if path in ("", "."): + return [] + + parts = path.split("/") + if any(part in ("", ".", "..") for part in parts): + return None + return parts + + +def resolve_under(root, *parts): + candidate = root.joinpath(*parts).resolve() + try: + candidate.relative_to(root) + except ValueError: + return INVALID_PATH + return candidate class Handler(http.server.SimpleHTTPRequestHandler): def translate_path(self, path): - path = path.split("?", 1)[0].split("#", 1)[0] - path = path.strip("/") - if path == "" or path == "index.html": - return os.path.join(PAGES, "index.html") - if path.startswith("fonts/"): - return os.path.join(PAGES, path) - if path.startswith("problems"): + parts = clean_request_parts(path) + if parts is None: + return str(INVALID_PATH) + + if parts == [] or parts == ["index.html"]: + return str(PAGES / "index.html") + if parts[0] == "fonts": + return str(resolve_under(PAGES, *parts)) + if parts[0] == "problems": # Serve problem pages with directory-style index.html - candidate = os.path.join(PAGES, path) - if os.path.isdir(candidate): - return os.path.join(candidate, "index.html") - return candidate - return os.path.join(ARTIFACTS, path) + candidate = resolve_under(PAGES, *parts) + if candidate.is_dir(): + return str(candidate / "index.html") + return str(candidate) + return str(resolve_under(ARTIFACTS, *parts)) if __name__ == "__main__": port = int(sys.argv[1]) if len(sys.argv) > 1 else 8765 - server = http.server.HTTPServer(("", port), Handler) + server = http.server.HTTPServer(("127.0.0.1", port), Handler) print(f"Serving at http://localhost:{port}") print(f" pages/ -> index.html") print(f" artifacts/ -> everything else") diff --git a/scripts/test_serve.py b/scripts/test_serve.py new file mode 100644 index 00000000..ecddb2c7 --- /dev/null +++ b/scripts/test_serve.py @@ -0,0 +1,26 @@ +"""Tests for the local site server path handling.""" + +import pytest + +from serve import ARTIFACTS, INVALID_PATH, PAGES, clean_request_parts, resolve_under + + +@pytest.mark.parametrize( + ("path", "parts"), + [ + ("/draft-example.html?download=1#section", ["draft-example.html"]), + ("/%2e%2e/LICENSE.md", None), + ("/problems/../LICENSE.md", None), + ("/problems/session/channel-not-found/", ["problems", "session", "channel-not-found"]), + ], +) +def test_clean_request_parts(path, parts): + assert clean_request_parts(path) == parts + + +def test_resolve_under_accepts_child_path(): + assert resolve_under(PAGES, "problems") == PAGES / "problems" + + +def test_resolve_under_rejects_parent_escape(): + assert resolve_under(ARTIFACTS, "..", "LICENSE.md") == INVALID_PATH