Skip to content

Commit 7b4f5a1

Browse files
committed
Be able to specify which branch of a DataFederation repo to clone
Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent 022e8c6 commit 7b4f5a1

1 file changed

Lines changed: 46 additions & 32 deletions

File tree

src/aboutcode/federated/__init__.py

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ def from_url(
566566
name: str,
567567
remote_root_url: str,
568568
local_root_dir: Path = None,
569+
branch: str = "main",
569570
) -> "DataFederation":
570571
"""
571572
Return a DataFederation loaded from a remote configuration file.
@@ -574,6 +575,7 @@ def from_url(
574575
remote_root_url=remote_root_url,
575576
federation_name=name,
576577
config_filename=cls.CONFIG_FILENAME,
578+
branch=branch,
577579
)
578580
headers = {"User-Agent": "AboutCode/FederatedCode"}
579581
response = requests.get(url=rcf_url, headers=headers)
@@ -715,6 +717,9 @@ class DataCluster:
715717
# this is the name of cluster
716718
data_kind: str
717719

720+
# The filename used when saving data.
721+
datafile_name: str
722+
718723
# a URI template to build the path to the datafile for this data kind.
719724
# this is the path relative to the root of a cluster directory. It does not
720725
# include directory and repository.
@@ -800,6 +805,7 @@ def from_dict(cls, data: dict) -> "DataCluster":
800805

801806
return cls(
802807
data_kind=data["data_kind"],
808+
datafile_name=data.get("datafile_name"),
803809
datafile_path_template=data.get("datafile_path_template"),
804810
purl_type_configs=ptcs,
805811
data_schema_url=data.get("data_schema_url"),
@@ -812,6 +818,7 @@ def from_dict(cls, data: dict) -> "DataCluster":
812818
def to_dict(self):
813819
return dict(
814820
data_kind=self.data_kind,
821+
datafile_name=self.datafile_name,
815822
datafile_path_template=self.datafile_path_template,
816823
purl_type_configs=[pt.to_dict() for pt in self.purl_type_configs],
817824
data_schema_url=self.data_schema_url,
@@ -834,19 +841,6 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
834841
"""
835842
raise NotImplementedError()
836843

837-
purl = as_purl(purl)
838-
# FIXME: create as member
839-
purl_type_config_by_type = {ptc.purl_type: ptc for ptc in self.purl_type_configs}
840-
purl_type_config = purl_type_config_by_type(purl.type, self.default_config())
841-
842-
ppe = package_path_elements(purl, max_value=purl_type_config.number_of_dirs)
843-
purl_hash, core_path, version, extra_path = ppe
844-
845-
direct_url = None
846-
# construct a path based on path template
847-
# construct a URL
848-
return direct_url
849-
850844
def get_local_datafile(self, purl: Union[str, PackageURL]) -> LocalDataFile:
851845
"""
852846
Return a LocalDataFile of the data kind stored in this cluster given a
@@ -862,7 +856,7 @@ def get_config(self, purl_type: str) -> "PurlTypeConfig":
862856
return self._configs_by_purl_type["default"]
863857
return self._configs_by_purl_type[purl_type]
864858

865-
def get_datafile_relative_path(self, purl: Union[str, PackageURL]) -> str:
859+
def get_datafile_relative_path(self, purl: Union[str, PackageURL], datafile_name=None) -> str:
866860
"""
867861
Return the datfile path relative to the root of a cluster directory
868862
given a PURL.
@@ -874,11 +868,15 @@ def get_datafile_relative_path(self, purl: Union[str, PackageURL]) -> str:
874868
f"DataCluster '{self.data_kind}' needs PackageURL with version to generate path."
875869
)
876870

871+
if not datafile_name:
872+
datafile_name = self.datafile_name
873+
877874
template = uritemplate.URITemplate(self.datafile_path_template)
878875
return template.expand(
879876
namespace=purl.namespace,
880877
name=purl.name,
881878
version=purl.version,
879+
datafile_name=datafile_name,
882880
)
883881

884882
def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str]:
@@ -892,14 +890,14 @@ def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str]
892890
repo_hash = purl_hash - (purl_hash % ptc.numbers_of_dirs_per_repo)
893891
return f"{repo_hash:04}", purl_hashid
894892

895-
def get_datafile_repo_and_path(self, purl: Union[str, PackageURL]) -> Tuple[str, str]:
893+
def get_datafile_repo_and_path(self, purl: Union[str, PackageURL], datafile_name=None) -> Tuple[str, str]:
896894
"""
897895
Return the repository name and relative path to the datafile of the data kind stored
898896
in this cluster given a PURL.
899897
"""
900898
purl = as_purl(purl)
901899
repo_hash, dir_hash = self.get_repo_and_dir_hash(purl)
902-
relative_datafile_path = self.get_datafile_relative_path(purl)
900+
relative_datafile_path = self.get_datafile_relative_path(purl, datafile_name=datafile_name)
903901

904902
directory_name = f"{purl.type}-{dir_hash}"
905903
repository_name = f"{self.data_kind}-{purl.type}-{repo_hash}"
@@ -1143,7 +1141,8 @@ def cluster_preset():
11431141
DataCluster(
11441142
data_kind="purls",
11451143
description="List of fully qualified PURL strings for a package, sorted by version.",
1146-
datafile_path_template="{/namespace}/{name}/purls.yml",
1144+
datafile_name="purls.yml",
1145+
datafile_path_template="{/namespace}/{name}/{datafile_name}",
11471146
purl_type_configs=PurlTypeConfig.small_size_configs(),
11481147
data_schema_url="",
11491148
documentation_url="https://github.com/package-url/purl-spec/",
@@ -1155,7 +1154,8 @@ def cluster_preset():
11551154
"Each datafile path and schema is PURL type-specific "
11561155
"and not documented here.",
11571156
# FIXME: a POM is in XML, some metadata files may be code
1158-
datafile_path_template="",
1157+
datafile_name="api_package_metadata.json",
1158+
datafile_path_template="{/namespace}/{name}/{datafile_name}",
11591159
purl_type_configs=PurlTypeConfig.large_size_configs(),
11601160
data_schema_url="",
11611161
documentation_url="",
@@ -1167,7 +1167,8 @@ def cluster_preset():
11671167
"Each datafile path and schema is PURL type-specific "
11681168
"and not documented here.",
11691169
# FIXME: a POM is in XML, some metadata files may be code
1170-
datafile_path_template="",
1170+
datafile_name="api_package_version_responses.json",
1171+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
11711172
purl_type_configs=PurlTypeConfig.large_size_configs(),
11721173
data_schema_url="",
11731174
documentation_url="",
@@ -1177,7 +1178,8 @@ def cluster_preset():
11771178
data_kind="purldb",
11781179
description="PurlDB normalized metadata datafiles for each package "
11791180
"versions. Does not include fingerprints and symbols.",
1180-
datafile_path_template="{/namespace}/{name}/{version}/purldb.json",
1181+
datafile_name="purldb.json",
1182+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
11811183
purl_type_configs=PurlTypeConfig.large_size_configs(),
11821184
data_schema_url="",
11831185
documentation_url="",
@@ -1188,7 +1190,8 @@ def cluster_preset():
11881190
data_kind="vulnerabilities",
11891191
description="VulnerableCode vulnerabilities for each package. "
11901192
"Also includes a separate vulnerabilities directory/",
1191-
datafile_path_template="{/namespace}/{name}/vulnerabilities.json",
1193+
datafile_name="vulnerabilities.json",
1194+
datafile_path_template="{/namespace}/{name}/{datafile_name}",
11921195
purl_type_configs=[PurlTypeConfig.default_config()],
11931196
data_schema_url="",
11941197
documentation_url="",
@@ -1197,7 +1200,8 @@ def cluster_preset():
11971200
DataCluster(
11981201
data_kind="security_advisories",
11991202
description="VulnerableCode security advisories for each package version.",
1200-
datafile_path_template="{/namespace}/{name}/{version}/advisories.yml",
1203+
datafile_name="advisories.yml",
1204+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12011205
purl_type_configs=[PurlTypeConfig.default_config()],
12021206
data_schema_url="",
12031207
documentation_url="",
@@ -1206,7 +1210,8 @@ def cluster_preset():
12061210
DataCluster(
12071211
data_kind="scancode_toolkit_scans",
12081212
description="scancode toolkit scans for each package version.",
1209-
datafile_path_template="{/namespace}/{name}/{version}/scancode-toolkit.json",
1213+
datafile_name="scancode-toolkit.json",
1214+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12101215
purl_type_configs=PurlTypeConfig.large_size_configs(),
12111216
data_schema_url="",
12121217
documentation_url="",
@@ -1215,7 +1220,8 @@ def cluster_preset():
12151220
DataCluster(
12161221
data_kind="scancode_fingerprints",
12171222
description="scancode_fingerprints for each package version.",
1218-
datafile_path_template="{/namespace}/{name}/{version}/scancode-fingerprints.json",
1223+
datafile_name="scancode-fingerprints.json",
1224+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12191225
purl_type_configs=PurlTypeConfig.large_size_configs(),
12201226
data_schema_url="",
12211227
documentation_url="",
@@ -1224,7 +1230,8 @@ def cluster_preset():
12241230
DataCluster(
12251231
data_kind="cyclonedx14_sboms",
12261232
description="CycloneDX v1.4 sboms for each package version",
1227-
datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-14.json",
1233+
datafile_name="cyclonedx-14.json",
1234+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12281235
purl_type_configs=PurlTypeConfig.large_size_configs(),
12291236
data_schema_url="",
12301237
documentation_url="",
@@ -1233,7 +1240,8 @@ def cluster_preset():
12331240
DataCluster(
12341241
data_kind="cyclonedx15_sboms",
12351242
description="CycloneDX v1.5 sboms for each package version",
1236-
datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-15.json",
1243+
datafile_name="cyclonedx-15.json",
1244+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12371245
purl_type_configs=PurlTypeConfig.large_size_configs(),
12381246
data_schema_url="",
12391247
documentation_url="",
@@ -1242,7 +1250,8 @@ def cluster_preset():
12421250
DataCluster(
12431251
data_kind="cyclonedx16_sboms",
12441252
description="CycloneDX v1.6 sboms for each package version",
1245-
datafile_path_template="{/namespace}/{name}/{version}/cyclonedx-16.json",
1253+
datafile_name="cyclonedx-16.json",
1254+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12461255
purl_type_configs=PurlTypeConfig.large_size_configs(),
12471256
data_schema_url="",
12481257
documentation_url="",
@@ -1251,7 +1260,8 @@ def cluster_preset():
12511260
DataCluster(
12521261
data_kind="spdx2_sboms",
12531262
description="SPDX version 2.x sboms for each package version",
1254-
datafile_path_template="{/namespace}/{name}/{version}/spdx-2.json",
1263+
datafile_name="spdx-2.json",
1264+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12551265
purl_type_configs=PurlTypeConfig.large_size_configs(),
12561266
data_schema_url="",
12571267
documentation_url="",
@@ -1260,7 +1270,8 @@ def cluster_preset():
12601270
DataCluster(
12611271
data_kind="atom_slices",
12621272
description="Atom slices for each package version",
1263-
datafile_path_template="{/namespace}/{name}/{version}/atom.json",
1273+
datafile_name="atom.json",
1274+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12641275
purl_type_configs=PurlTypeConfig.large_size_configs(),
12651276
data_schema_url="",
12661277
documentation_url="",
@@ -1270,7 +1281,8 @@ def cluster_preset():
12701281
data_kind="atom_vulnerable_slices",
12711282
description="Atom vulnerable_slices for each vulnerable package version",
12721283
# FIXME: need to qualify these with an advisory / CVE?
1273-
datafile_path_template="{/namespace}/{name}/{version}/atom-vulnerable.json",
1284+
datafile_name="atom-vulnerable.json",
1285+
datafile_path_template="{/namespace}/{name}/{version}/{datafile_name}",
12741286
purl_type_configs=PurlTypeConfig.large_size_configs(),
12751287
data_schema_url="",
12761288
documentation_url="",
@@ -1280,7 +1292,8 @@ def cluster_preset():
12801292
data_kind="openssf_security_scorecards",
12811293
description="OpenSSf security_scorecards for package",
12821294
# FIXME: need to qualify these with an advisory / CVE?
1283-
datafile_path_template="{/namespace}/{name}/security_scorecard.json",
1295+
datafile_name="security_scorecard.json",
1296+
datafile_path_template="{/namespace}/{name}/{datafile_name}",
12841297
purl_type_configs=PurlTypeConfig.medium_size_configs(),
12851298
data_schema_url="",
12861299
documentation_url="",
@@ -1433,6 +1446,7 @@ def build_direct_federation_config_file_url(
14331446
remote_root_url: str,
14341447
federation_name: str,
14351448
config_filename: str,
1449+
branch:str,
14361450
):
14371451
"""
14381452
Return the URL to download a remote config file for a federation
@@ -1441,7 +1455,7 @@ def build_direct_federation_config_file_url(
14411455
root_url=remote_root_url,
14421456
repo=federation_name,
14431457
path=config_filename,
1444-
branch="main",
1458+
branch=branch,
14451459
)
14461460

14471461

0 commit comments

Comments
 (0)