Skip to content

Commit c5badd5

Browse files
authored
Add: cache rendered pages on disk, and serve from there on cache-hit (#81)
This uses the same method as we use to detect if the page is a cache hit when if-modified-since is sent. For OpenTTD, the total cache-on-disk would be ~150MB, for 10k pages.
1 parent 352cde0 commit c5badd5

3 files changed

Lines changed: 71 additions & 18 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,4 @@ COPY truewiki /code/truewiki
4747
RUN mkdir /data
4848

4949
ENTRYPOINT ["python", "-m", "truewiki"]
50-
CMD ["--bind", "0.0.0.0", "--storage", "local", "--storage-folder", "/data", "--cache-metadata-file", "/cache/metadata.json", "--user", "developer"]
50+
CMD ["--bind", "0.0.0.0", "--storage", "local", "--storage-folder", "/data", "--cache-metadata-file", "/cache/metadata.json", "--cache-page-folder", "/cache/pages", "--user", "developer"]

truewiki/__main__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
remove_session_cookie,
2525
SESSION_COOKIE_NAME,
2626
)
27+
from .views.page import click_page
2728
from .web_routes import (
2829
click_web_routes,
2930
routes,
@@ -100,6 +101,7 @@ async def wait_for_storage():
100101
@click_storage_github
101102
@click_user_session
102103
@click_user_github
104+
@click_page
103105
@click.option("--validate-all", help="Validate all mediawiki files and report all errors", is_flag=True)
104106
def main(bind, port, storage, validate_all):
105107
log.info("Reload storage ..")

truewiki/views/page.py

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1+
import click
2+
import os
13
import time
24

35
from aiohttp import web
6+
from openttd_helpers import click_helper
47

58
from . import error
69
from .. import metadata
710
from ..content import breadcrumb
811
from ..wiki_page import WikiPage
912
from ..wrapper import wrap_page
1013

14+
CACHE_PAGE_FOLDER = None
15+
1116

1217
def _view(wiki_page, user, page: str) -> web.Response:
1318
templates = {
@@ -24,10 +29,7 @@ def _view(wiki_page, user, page: str) -> web.Response:
2429
templates["footer"] = wiki_page.add_footer(page)
2530
templates["content"] += wiki_page.add_content(page)
2631

27-
body = wrap_page(page, "Page", variables, templates)
28-
29-
status_code = 200 if wiki_page.page_exists(page) else 404
30-
return web.Response(body=body, content_type="text/html", status=status_code)
32+
return wrap_page(page, "Page", variables, templates)
3133

3234

3335
def view(user, page: str, if_modified_since) -> web.Response:
@@ -48,20 +50,69 @@ def view(user, page: str, if_modified_since) -> web.Response:
4850
f'"{page}" does not exist; did you mean [[{correct_page}]]?',
4951
)
5052

51-
# Check if we already rendered this page before. If the browser has it in
52-
# his cache, he can simply reuse that if we haven't rendered since.
53-
if (
54-
if_modified_since is not None
55-
and f"Page/{page}" in metadata.LAST_TIME_RENDERED
56-
and metadata.LAST_TIME_RENDERED[f"Page/{page}"] <= if_modified_since.timestamp()
57-
):
58-
response = web.HTTPNotModified()
53+
status_code = 200 if wiki_page.page_exists(page) else 404
54+
namespaced_page = page
55+
if not namespaced_page.startswith(("Category/", "File/", "Template/")):
56+
namespaced_page = f"Page/{namespaced_page}"
57+
58+
if CACHE_PAGE_FOLDER:
59+
cache_filename = f"{CACHE_PAGE_FOLDER}/{namespaced_page}.html"
5960
else:
60-
response = _view(wiki_page, user, page)
61-
metadata.LAST_TIME_RENDERED[f"Page/{page}"] = time.time()
61+
cache_filename = None
62+
63+
response = None
64+
65+
# Check as we might have this page already on cache.
66+
if status_code == 200 and namespaced_page in metadata.LAST_TIME_RENDERED:
67+
if (
68+
if_modified_since is not None
69+
and metadata.LAST_TIME_RENDERED[namespaced_page] <= if_modified_since.timestamp()
70+
):
71+
# We already rendered this page before. If the browser has it in his
72+
# cache, he can simply reuse that if we haven't rendered since.
73+
response = web.HTTPNotModified()
74+
elif not user and cache_filename:
75+
# We already rendered this page to disk. Serve from there.
76+
with open(cache_filename) as fp:
77+
body = fp.read()
78+
response = web.Response(body=body, content_type="text/html", status=status_code)
79+
80+
# Cache miss; render the page.
81+
if response is None:
82+
body = _view(wiki_page, user, page)
83+
84+
if status_code == 200:
85+
metadata.LAST_TIME_RENDERED[namespaced_page] = time.time()
86+
87+
if cache_filename:
88+
# Cache the file on disk
89+
os.makedirs(os.path.dirname(cache_filename), exist_ok=True)
90+
with open(cache_filename, "w") as fp:
91+
fp.write(body)
92+
93+
response = web.Response(body=body, content_type="text/html", status=status_code)
6294

6395
# Inform the browser under which rules it can cache this page.
64-
response.last_modified = metadata.LAST_TIME_RENDERED[f"Page/{page}"]
65-
response.headers["Vary"] = "Accept-Encoding, Cookie"
66-
response.headers["Cache-Control"] = "private, must-revalidate, max-age=0"
96+
if status_code == 200:
97+
response.last_modified = metadata.LAST_TIME_RENDERED[namespaced_page]
98+
response.headers["Vary"] = "Accept-Encoding, Cookie"
99+
response.headers["Cache-Control"] = "private, must-revalidate, max-age=0"
67100
return response
101+
102+
103+
@click_helper.extend
104+
@click.option(
105+
"--cache-page-folder",
106+
help="Folder used to cache rendered pages.",
107+
default=None,
108+
show_default=True,
109+
)
110+
def click_page(cache_page_folder):
111+
global CACHE_PAGE_FOLDER
112+
113+
if cache_page_folder and cache_page_folder.endswith("/"):
114+
cache_page_folder = cache_page_folder[:-1]
115+
if not cache_page_folder:
116+
cache_page_folder = None
117+
118+
CACHE_PAGE_FOLDER = cache_page_folder

0 commit comments

Comments
 (0)