1515from argparse import ArgumentParser
1616from lib import SourceIndex
1717from pathlib import Path
18- from repeats import main as flag_repeats
1918import sys
2019
2120
22- def main (
23- source_path , dbsnp_path , chain_path , rmsk_path , exclusions = ["Auton2015" ], check_only = False
24- ):
25- validate_paths (dbsnp_path , chain_path , rmsk_path )
21+ def main (source_path , dbsnp_path , chain_path , exclusions = ["Auton2015" ], check_only = False ):
22+ validate_paths (dbsnp_path , chain_path )
2623 if check_only :
2724 return
2825 index = SourceIndex (source_path , dbsnp_path , chain_path , exclude = exclusions )
@@ -35,12 +32,10 @@ def main(
3532 frequencies .to_csv ("frequency.csv.gz" , index = False , float_format = "%.5f" , compression = "gzip" )
3633 index .populations .to_csv ("population.csv" , index = False )
3734 index .merges .to_csv ("merged.csv" , index = False )
38- repeats = flag_repeats (Path (rmsk_path ) / "rmsk.txt.gz" , "marker.csv" , delta = 25 )
39- repeats .to_csv ("repeats.csv" , index = False )
4035 print (index )
4136
4237
43- def validate_paths (dbsnp_path , rmsk_path , chain_path ):
38+ def validate_paths (dbsnp_path , chain_path ):
4439 paths = list ()
4540 for version in (37 , 38 ):
4641 for extension in ("vcf.gz" , "vcf.gz.tbi" , "rsidx" ):
@@ -49,7 +44,6 @@ def validate_paths(dbsnp_path, rmsk_path, chain_path):
4944 paths .append (Path (dbsnp_path ) / "refsnp-merged.csv.gz" )
5045 paths .append (Path (chain_path ) / "hg19ToHg38.over.chain.gz" )
5146 paths .append (Path (chain_path ) / "hg38ToHg19.over.chain.gz" )
52- paths .append (Path (rmsk_path ) / "rmsk.txt.gz" )
5347 files_present = [p .is_file () for p in paths ]
5448 print ("-" * 60 , "[Auxiliary data file check]\n " , "Present Path" , sep = "\n " , file = sys .stderr )
5549 for path , present in zip (paths , files_present ):
@@ -81,7 +75,6 @@ def get_parser():
8175 parser = ArgumentParser (description = "MicroHapDB database build procedure" )
8276 parser .add_argument ("dbsnp_path" )
8377 parser .add_argument ("chain_path" )
84- parser .add_argument ("rmsk_path" )
8578 parser .add_argument (
8679 "--sources" ,
8780 default = "sources" ,
@@ -107,7 +100,6 @@ def get_parser():
107100 args .sources ,
108101 args .dbsnp_path ,
109102 args .chain_path ,
110- args .rmsk_path ,
111103 exclusions = args .exclude ,
112104 check_only = args .check ,
113105 )
0 commit comments