Skip to content

Commit 593e099

Browse files
committed
Percent encode artifact names in fetch-content
Previously, trying to use fetch-content for an artifact that had a space in it would result in errors like `Download failed: URL can't contain control characters. '[...]' (found at least ' ')` This commit changes fetch-content so we encode the artifact name when building the download URL instead of putting it verbatim in the URL. Because we're later reusing the URL to get the base name to know which file to write, we now decode the basename so the resulting file doesn't contain percent encoded characters. Before: ``` $ export MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight Princess-0.2.3.apworld","extract":false}]' $ uv run python3 src/taskgraph/run-task/fetch-content task-artifacts -d /tmp/fetch-test attempt 1/5 Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld to /tmp/fetch-test/Twilight Princess-0.3.0.apworld Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld Download failed: URL can't contain control characters. '/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld' (found at least ' ') sleeping for 60.00s (attempt 1/5) ``` After: ``` $ export MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight Princess-0.2.3.apworld","extract":false}]' $ uv run python3 src/taskgraph/run-task/fetch-content task-artifacts -d /tmp/fetch-test attempt 1/5 Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld to /tmp/fetch-test/Twilight Princess-0.3.0.apworld Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld resolved to 155321 bytes with sha256 77c8ccca3da7b040b94a7c8b4c68dc5234556dbc1c5b51f65afc19655f6d00ab in 0.486s PERFHERDER_DATA: {"framework": {"name": "build_metrics"}, "suites": [{"name": "fetch_content", "value": 0.4895111569785513, "lowerIsBetter": true, "shouldAlert": false, "subtests": []}]} ``` As another workaround, users could percent encode the artifact name in the fetch config but that would result with the filename on disk still having the percent encoded characters in it and be confusing because the artifact in the config wouldn't match what the task actually has. i.e: MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight%20Princess-0.2.3.apworld","extract":false}]'
1 parent 31b2a36 commit 593e099

2 files changed

Lines changed: 78 additions & 4 deletions

File tree

src/taskgraph/run-task/fetch-content

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
626626
the destination directory.
627627
"""
628628

629-
basename = urllib.parse.urlparse(url).path.split("/")[-1]
629+
basename = urllib.parse.unquote(urllib.parse.urlparse(url).path.split("/")[-1])
630630
dest_path = dest_dir / basename
631631

632632
download_to_path(url, dest_path, sha256=sha256, size=size)
@@ -920,16 +920,17 @@ def command_task_artifacts(args):
920920
sha256 = None
921921
if fetch.get("verify-hash"):
922922
sha256 = get_hash(fetch, root_url)
923+
encoded_artifact = urllib.parse.quote(fetch["artifact"])
923924
if fetch["artifact"].startswith("public/"):
924925
path = "task/{task}/artifacts/{artifact}".format(
925-
task=fetch["task"], artifact=fetch["artifact"]
926+
task=fetch["task"], artifact=encoded_artifact
926927
)
927928
url = api(root_url, "queue", "v1", path)
928929
else:
929930
url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format(
930931
proxy_url=os.environ["TASKCLUSTER_PROXY_URL"],
931932
task=fetch["task"],
932-
artifact=fetch["artifact"],
933+
artifact=encoded_artifact,
933934
)
934935
downloads.append((url, extdir, fetch["extract"], sha256))
935936

test/test_scripts_fetch_content.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import json
12
import os
23
import pathlib
34
import urllib.request
45
from importlib.machinery import SourceFileLoader
56
from importlib.util import module_from_spec, spec_from_loader
6-
from unittest.mock import MagicMock
7+
from unittest.mock import MagicMock, patch
78

89
import pytest
910

@@ -97,6 +98,78 @@ def getheader(field):
9798
raise
9899

99100

101+
@pytest.mark.parametrize(
102+
"artifact,expected_url_suffix",
103+
(
104+
pytest.param(
105+
"public/foo.apworld",
106+
"task/abc123/artifacts/public/foo.apworld",
107+
id="simple artifact name",
108+
),
109+
pytest.param(
110+
"public/Twilight Princess-0.2.3.apworld",
111+
"task/abc123/artifacts/public/Twilight%20Princess-0.2.3.apworld",
112+
id="artifact name with space",
113+
),
114+
),
115+
)
116+
def test_command_task_artifacts_url_encoding(
117+
monkeypatch, tmp_path, fetch_content_mod,
118+
artifact, expected_url_suffix,
119+
):
120+
fetches = [{"task": "abc123", "artifact": artifact, "extract": False}]
121+
monkeypatch.setenv("MOZ_FETCHES", json.dumps(fetches))
122+
monkeypatch.setenv("TASKCLUSTER_ROOT_URL", "https://tc.example.com")
123+
124+
captured_urls = []
125+
126+
def mock_fetch_urls(downloads):
127+
for url, dest_dir, extract, sha256 in downloads:
128+
captured_urls.append(url)
129+
130+
monkeypatch.setattr(fetch_content_mod, "fetch_urls", mock_fetch_urls)
131+
132+
args = MagicMock()
133+
args.dest = str(tmp_path)
134+
fetch_content_mod.command_task_artifacts(args)
135+
136+
assert len(captured_urls) == 1
137+
url = captured_urls[0]
138+
assert url == f"https://tc.example.com/api/queue/v1/{expected_url_suffix}"
139+
140+
141+
@pytest.mark.parametrize(
142+
"url,expected_dest_filename",
143+
(
144+
pytest.param(
145+
"https://tc.example.com/api/queue/v1/task/abc/artifacts/public/foo.apworld",
146+
"foo.apworld",
147+
id="simple",
148+
),
149+
pytest.param(
150+
"https://tc.example.com/api/queue/v1/task/abc/artifacts/public/Twilight%20Princess-0.2.3.apworld",
151+
"Twilight Princess-0.2.3.apworld",
152+
id="url-encoded space",
153+
),
154+
),
155+
)
156+
def test_fetch_and_extract_dest_filename(
157+
monkeypatch, tmp_path, fetch_content_mod, url, expected_dest_filename,
158+
):
159+
downloaded_to = []
160+
161+
def mock_download_to_path(url, path, sha256=None, size=None):
162+
downloaded_to.append(path)
163+
path.touch()
164+
165+
monkeypatch.setattr(fetch_content_mod, "download_to_path", mock_download_to_path)
166+
167+
fetch_content_mod.fetch_and_extract(url, tmp_path, extract=False)
168+
169+
assert len(downloaded_to) == 1
170+
assert downloaded_to[0].name == expected_dest_filename
171+
172+
100173
@pytest.mark.parametrize(
101174
"expected,orig,dest,strip_components,add_prefix",
102175
[

0 commit comments

Comments
 (0)