Skip to content

Commit 422271a

Browse files
committed
plot Ae for entire candidate pool
1 parent f4df928 commit 422271a

4 files changed

Lines changed: 80 additions & 1 deletion

File tree

notebooks/panel-design/markII/Snakefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,31 @@
11
rule all:
22
input:
33
"data/results/aes_markII_vs_codis.png",
4+
"data/results/aes_pool.png",
45
"data/results/ideogram.png",
56

67

78
rule plot_aes_vs_codis:
89
input:
9-
script="code/plot_ae.py",
10+
script="code/plot_ae_panel.py",
1011
panel="data/results/final-panel.tsv",
1112
codis="data/input/codis-strs-aes.tsv",
1213
output:
1314
figure="data/results/aes_markII_vs_codis.png",
1415
shell:
1516
"python {input} {output}"
1617

18+
19+
rule plot_pool_aes:
20+
input:
21+
script="code/plot_ae_pool.py",
22+
panel="data/results/markers-passed-filter.csv",
23+
aes="data/input/marker-aes-0.12.csv",
24+
output:
25+
figure="data/results/aes_pool.png",
26+
shell:
27+
"python {input} {output}"
28+
1729
rule plot_ideogram:
1830
input:
1931
script="code/plot_ideogram.py",
File renamed without changes.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python
2+
3+
# -------------------------------------------------------------------------------------------------
4+
# Copyright (c) 2025, DHS.
5+
#
6+
# This file is part of MicroHapDB (http://github.com/bioforensics/MicroHapDB) and is licensed under
7+
# the BSD license: see LICENSE.txt.
8+
#
9+
# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
10+
# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
11+
# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
12+
# Development Center.
13+
# -------------------------------------------------------------------------------------------------
14+
15+
from argparse import ArgumentParser
16+
import matplotlib
17+
from matplotlib import pyplot as plt
18+
from util import load_markers
19+
20+
21+
def main(microhaps, outfile):
22+
fig = plt.figure(figsize=(6, 4), dpi=300)
23+
microhaps = bin_by_ads(microhaps)
24+
plot_by_bins(microhaps)
25+
plt.xlim((0, 275))
26+
plt.ylim((0, 18))
27+
plt.yticks([0, 3, 6, 9, 12, 15, 18])
28+
plt.xlabel("Extent (bp)")
29+
plt.ylabel("Effective Number of Alleles ($A_e$)")
30+
ax = plt.gca()
31+
ax.set_axisbelow(True)
32+
plt.grid(True, linestyle="--", color="#eeeeee")
33+
ax.legend(loc="upper left", fontsize=8)
34+
fig.savefig(outfile, bbox_inches="tight")
35+
36+
37+
def bin_by_ads(microhaps):
38+
microhaps = microhaps[microhaps.NumVars > 1].copy()
39+
microhaps["BinByADS"] = microhaps["NumVars"].apply(lambda nv: nv if nv % 2 == 0 else nv - 1)
40+
microhaps["BinByADS"] = microhaps["BinByADS"].apply(lambda ads: ads if ads < 10 else 10)
41+
return microhaps
42+
43+
44+
def plot_by_bins(microhaps):
45+
set1 = matplotlib.colormaps["Set1"]
46+
max_num_snps = microhaps.NumVars.max()
47+
for numsnps, data in microhaps.groupby("BinByADS"):
48+
colorindex = int((numsnps / 2) - 1)
49+
color = set1(colorindex)
50+
markersizes = [nv / max_num_snps * 15 for nv in data.NumVars]
51+
label = f"{numsnps}-{numsnps+1} ADSs" if numsnps < 10 else "≥10 ADSs"
52+
marker = "." if numsnps == 2 else "o"
53+
plt.scatter(data.Extent, data.Ae, marker=marker, s=markersizes, color=color, alpha=0.9, label=label)
54+
55+
56+
def get_parser():
57+
parser = ArgumentParser()
58+
parser.add_argument("microhaps", help="path to final microhap panel in CSV format")
59+
parser.add_argument("aes", help="path to MicroHapDB Ae table in CSV format")
60+
parser.add_argument("outfile", help="output filename")
61+
return parser
62+
63+
64+
if __name__ == "__main__":
65+
args = get_parser().parse_args()
66+
markers = load_markers(args.microhaps, args.aes, objectify=False)
67+
main(markers, args.outfile)
367 KB
Loading

0 commit comments

Comments
 (0)