@@ -566,6 +566,7 @@ def from_url(
566566 name : str ,
567567 remote_root_url : str ,
568568 local_root_dir : Path = None ,
569+ branch : str = "main" ,
569570 ) -> "DataFederation" :
570571 """
571572 Return a DataFederation loaded from a remote configuration file.
@@ -574,6 +575,7 @@ def from_url(
574575 remote_root_url = remote_root_url ,
575576 federation_name = name ,
576577 config_filename = cls .CONFIG_FILENAME ,
578+ branch = branch ,
577579 )
578580 headers = {"User-Agent" : "AboutCode/FederatedCode" }
579581 response = requests .get (url = rcf_url , headers = headers )
@@ -715,6 +717,9 @@ class DataCluster:
715717 # this is the name of cluster
716718 data_kind : str
717719
720+ # The filename used when saving data.
721+ datafile_name : str
722+
718723 # a URI template to build the path to the datafile for this data kind.
719724 # this is the path relative to the root of a cluster directory. It does not
720725 # include directory and repository.
@@ -800,6 +805,7 @@ def from_dict(cls, data: dict) -> "DataCluster":
800805
801806 return cls (
802807 data_kind = data ["data_kind" ],
808+ datafile_name = data .get ("datafile_name" ),
803809 datafile_path_template = data .get ("datafile_path_template" ),
804810 purl_type_configs = ptcs ,
805811 data_schema_url = data .get ("data_schema_url" ),
@@ -812,6 +818,7 @@ def from_dict(cls, data: dict) -> "DataCluster":
812818 def to_dict (self ):
813819 return dict (
814820 data_kind = self .data_kind ,
821+ datafile_name = self .datafile_name ,
815822 datafile_path_template = self .datafile_path_template ,
816823 purl_type_configs = [pt .to_dict () for pt in self .purl_type_configs ],
817824 data_schema_url = self .data_schema_url ,
@@ -834,19 +841,6 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
834841 """
835842 raise NotImplementedError ()
836843
837- purl = as_purl (purl )
838- # FIXME: create as member
839- purl_type_config_by_type = {ptc .purl_type : ptc for ptc in self .purl_type_configs }
840- purl_type_config = purl_type_config_by_type (purl .type , self .default_config ())
841-
842- ppe = package_path_elements (purl , max_value = purl_type_config .number_of_dirs )
843- purl_hash , core_path , version , extra_path = ppe
844-
845- direct_url = None
846- # construct a path based on path template
847- # construct a URL
848- return direct_url
849-
850844 def get_local_datafile (self , purl : Union [str , PackageURL ]) -> LocalDataFile :
851845 """
852846 Return a LocalDataFile of the data kind stored in this cluster given a
@@ -862,7 +856,7 @@ def get_config(self, purl_type: str) -> "PurlTypeConfig":
862856 return self ._configs_by_purl_type ["default" ]
863857 return self ._configs_by_purl_type [purl_type ]
864858
865- def get_datafile_relative_path (self , purl : Union [str , PackageURL ]) -> str :
859+ def get_datafile_relative_path (self , purl : Union [str , PackageURL ], datafile_name = None ) -> str :
866860 """
867861 Return the datfile path relative to the root of a cluster directory
868862 given a PURL.
@@ -874,11 +868,15 @@ def get_datafile_relative_path(self, purl: Union[str, PackageURL]) -> str:
874868 f"DataCluster '{ self .data_kind } ' needs PackageURL with version to generate path."
875869 )
876870
871+ if not datafile_name :
872+ datafile_name = self .datafile_name
873+
877874 template = uritemplate .URITemplate (self .datafile_path_template )
878875 return template .expand (
879876 namespace = purl .namespace ,
880877 name = purl .name ,
881878 version = purl .version ,
879+ datafile_name = datafile_name ,
882880 )
883881
884882 def get_repo_and_dir_hash (self , purl : Union [str , PackageURL ]) -> Tuple [str , str ]:
@@ -892,14 +890,14 @@ def get_repo_and_dir_hash(self, purl: Union[str, PackageURL]) -> Tuple[str, str]
892890 repo_hash = purl_hash - (purl_hash % ptc .numbers_of_dirs_per_repo )
893891 return f"{ repo_hash :04} " , purl_hashid
894892
895- def get_datafile_repo_and_path (self , purl : Union [str , PackageURL ]) -> Tuple [str , str ]:
893+ def get_datafile_repo_and_path (self , purl : Union [str , PackageURL ], datafile_name = None ) -> Tuple [str , str ]:
896894 """
897895 Return the repository name and relative path to the datafile of the data kind stored
898896 in this cluster given a PURL.
899897 """
900898 purl = as_purl (purl )
901899 repo_hash , dir_hash = self .get_repo_and_dir_hash (purl )
902- relative_datafile_path = self .get_datafile_relative_path (purl )
900+ relative_datafile_path = self .get_datafile_relative_path (purl , datafile_name = datafile_name )
903901
904902 directory_name = f"{ purl .type } -{ dir_hash } "
905903 repository_name = f"{ self .data_kind } -{ purl .type } -{ repo_hash } "
@@ -1143,7 +1141,8 @@ def cluster_preset():
11431141 DataCluster (
11441142 data_kind = "purls" ,
11451143 description = "List of fully qualified PURL strings for a package, sorted by version." ,
1146- datafile_path_template = "{/namespace}/{name}/purls.yml" ,
1144+ datafile_name = "purls.yml" ,
1145+ datafile_path_template = "{/namespace}/{name}/{datafile_name}" ,
11471146 purl_type_configs = PurlTypeConfig .small_size_configs (),
11481147 data_schema_url = "" ,
11491148 documentation_url = "https://github.com/package-url/purl-spec/" ,
@@ -1155,7 +1154,8 @@ def cluster_preset():
11551154 "Each datafile path and schema is PURL type-specific "
11561155 "and not documented here." ,
11571156 # FIXME: a POM is in XML, some metadata files may be code
1158- datafile_path_template = "" ,
1157+ datafile_name = "api_package_metadata.json" ,
1158+ datafile_path_template = "{/namespace}/{name}/{datafile_name}" ,
11591159 purl_type_configs = PurlTypeConfig .large_size_configs (),
11601160 data_schema_url = "" ,
11611161 documentation_url = "" ,
@@ -1167,7 +1167,8 @@ def cluster_preset():
11671167 "Each datafile path and schema is PURL type-specific "
11681168 "and not documented here." ,
11691169 # FIXME: a POM is in XML, some metadata files may be code
1170- datafile_path_template = "" ,
1170+ datafile_name = "api_package_version_responses.json" ,
1171+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
11711172 purl_type_configs = PurlTypeConfig .large_size_configs (),
11721173 data_schema_url = "" ,
11731174 documentation_url = "" ,
@@ -1177,7 +1178,8 @@ def cluster_preset():
11771178 data_kind = "purldb" ,
11781179 description = "PurlDB normalized metadata datafiles for each package "
11791180 "versions. Does not include fingerprints and symbols." ,
1180- datafile_path_template = "{/namespace}/{name}/{version}/purldb.json" ,
1181+ datafile_name = "purldb.json" ,
1182+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
11811183 purl_type_configs = PurlTypeConfig .large_size_configs (),
11821184 data_schema_url = "" ,
11831185 documentation_url = "" ,
@@ -1188,7 +1190,8 @@ def cluster_preset():
11881190 data_kind = "vulnerabilities" ,
11891191 description = "VulnerableCode vulnerabilities for each package. "
11901192 "Also includes a separate vulnerabilities directory/" ,
1191- datafile_path_template = "{/namespace}/{name}/vulnerabilities.json" ,
1193+ datafile_name = "vulnerabilities.json" ,
1194+ datafile_path_template = "{/namespace}/{name}/{datafile_name}" ,
11921195 purl_type_configs = [PurlTypeConfig .default_config ()],
11931196 data_schema_url = "" ,
11941197 documentation_url = "" ,
@@ -1197,7 +1200,8 @@ def cluster_preset():
11971200 DataCluster (
11981201 data_kind = "security_advisories" ,
11991202 description = "VulnerableCode security advisories for each package version." ,
1200- datafile_path_template = "{/namespace}/{name}/{version}/advisories.yml" ,
1203+ datafile_name = "advisories.yml" ,
1204+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12011205 purl_type_configs = [PurlTypeConfig .default_config ()],
12021206 data_schema_url = "" ,
12031207 documentation_url = "" ,
@@ -1206,7 +1210,8 @@ def cluster_preset():
12061210 DataCluster (
12071211 data_kind = "scancode_toolkit_scans" ,
12081212 description = "scancode toolkit scans for each package version." ,
1209- datafile_path_template = "{/namespace}/{name}/{version}/scancode-toolkit.json" ,
1213+ datafile_name = "scancode-toolkit.json" ,
1214+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12101215 purl_type_configs = PurlTypeConfig .large_size_configs (),
12111216 data_schema_url = "" ,
12121217 documentation_url = "" ,
@@ -1215,7 +1220,8 @@ def cluster_preset():
12151220 DataCluster (
12161221 data_kind = "scancode_fingerprints" ,
12171222 description = "scancode_fingerprints for each package version." ,
1218- datafile_path_template = "{/namespace}/{name}/{version}/scancode-fingerprints.json" ,
1223+ datafile_name = "scancode-fingerprints.json" ,
1224+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12191225 purl_type_configs = PurlTypeConfig .large_size_configs (),
12201226 data_schema_url = "" ,
12211227 documentation_url = "" ,
@@ -1224,7 +1230,8 @@ def cluster_preset():
12241230 DataCluster (
12251231 data_kind = "cyclonedx14_sboms" ,
12261232 description = "CycloneDX v1.4 sboms for each package version" ,
1227- datafile_path_template = "{/namespace}/{name}/{version}/cyclonedx-14.json" ,
1233+ datafile_name = "cyclonedx-14.json" ,
1234+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12281235 purl_type_configs = PurlTypeConfig .large_size_configs (),
12291236 data_schema_url = "" ,
12301237 documentation_url = "" ,
@@ -1233,7 +1240,8 @@ def cluster_preset():
12331240 DataCluster (
12341241 data_kind = "cyclonedx15_sboms" ,
12351242 description = "CycloneDX v1.5 sboms for each package version" ,
1236- datafile_path_template = "{/namespace}/{name}/{version}/cyclonedx-15.json" ,
1243+ datafile_name = "cyclonedx-15.json" ,
1244+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12371245 purl_type_configs = PurlTypeConfig .large_size_configs (),
12381246 data_schema_url = "" ,
12391247 documentation_url = "" ,
@@ -1242,7 +1250,8 @@ def cluster_preset():
12421250 DataCluster (
12431251 data_kind = "cyclonedx16_sboms" ,
12441252 description = "CycloneDX v1.6 sboms for each package version" ,
1245- datafile_path_template = "{/namespace}/{name}/{version}/cyclonedx-16.json" ,
1253+ datafile_name = "cyclonedx-16.json" ,
1254+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12461255 purl_type_configs = PurlTypeConfig .large_size_configs (),
12471256 data_schema_url = "" ,
12481257 documentation_url = "" ,
@@ -1251,7 +1260,8 @@ def cluster_preset():
12511260 DataCluster (
12521261 data_kind = "spdx2_sboms" ,
12531262 description = "SPDX version 2.x sboms for each package version" ,
1254- datafile_path_template = "{/namespace}/{name}/{version}/spdx-2.json" ,
1263+ datafile_name = "spdx-2.json" ,
1264+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12551265 purl_type_configs = PurlTypeConfig .large_size_configs (),
12561266 data_schema_url = "" ,
12571267 documentation_url = "" ,
@@ -1260,7 +1270,8 @@ def cluster_preset():
12601270 DataCluster (
12611271 data_kind = "atom_slices" ,
12621272 description = "Atom slices for each package version" ,
1263- datafile_path_template = "{/namespace}/{name}/{version}/atom.json" ,
1273+ datafile_name = "atom.json" ,
1274+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12641275 purl_type_configs = PurlTypeConfig .large_size_configs (),
12651276 data_schema_url = "" ,
12661277 documentation_url = "" ,
@@ -1270,7 +1281,8 @@ def cluster_preset():
12701281 data_kind = "atom_vulnerable_slices" ,
12711282 description = "Atom vulnerable_slices for each vulnerable package version" ,
12721283 # FIXME: need to qualify these with an advisory / CVE?
1273- datafile_path_template = "{/namespace}/{name}/{version}/atom-vulnerable.json" ,
1284+ datafile_name = "atom-vulnerable.json" ,
1285+ datafile_path_template = "{/namespace}/{name}/{version}/{datafile_name}" ,
12741286 purl_type_configs = PurlTypeConfig .large_size_configs (),
12751287 data_schema_url = "" ,
12761288 documentation_url = "" ,
@@ -1280,7 +1292,8 @@ def cluster_preset():
12801292 data_kind = "openssf_security_scorecards" ,
12811293 description = "OpenSSf security_scorecards for package" ,
12821294 # FIXME: need to qualify these with an advisory / CVE?
1283- datafile_path_template = "{/namespace}/{name}/security_scorecard.json" ,
1295+ datafile_name = "security_scorecard.json" ,
1296+ datafile_path_template = "{/namespace}/{name}/{datafile_name}" ,
12841297 purl_type_configs = PurlTypeConfig .medium_size_configs (),
12851298 data_schema_url = "" ,
12861299 documentation_url = "" ,
@@ -1433,6 +1446,7 @@ def build_direct_federation_config_file_url(
14331446 remote_root_url : str ,
14341447 federation_name : str ,
14351448 config_filename : str ,
1449+ branch :str ,
14361450):
14371451 """
14381452 Return the URL to download a remote config file for a federation
@@ -1441,7 +1455,7 @@ def build_direct_federation_config_file_url(
14411455 root_url = remote_root_url ,
14421456 repo = federation_name ,
14431457 path = config_filename ,
1444- branch = "main" ,
1458+ branch = branch ,
14451459 )
14461460
14471461
0 commit comments