|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +# ------------------------------------------------------------------------------------------------- |
| 4 | +# Copyright (c) 2025, DHS. |
| 5 | +# |
| 6 | +# This file is part of MicroHapDB (http://github.com/bioforensics/MicroHapDB) and is licensed under |
| 7 | +# the BSD license: see LICENSE.txt. |
| 8 | +# |
| 9 | +# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National |
| 10 | +# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the |
| 11 | +# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and |
| 12 | +# Development Center. |
| 13 | +# ------------------------------------------------------------------------------------------------- |
| 14 | + |
| 15 | +from argparse import ArgumentParser |
| 16 | +import matplotlib |
| 17 | +from matplotlib import pyplot as plt |
| 18 | +from util import load_markers |
| 19 | + |
| 20 | + |
| 21 | +def main(microhaps, outfile): |
| 22 | + fig = plt.figure(figsize=(6, 4), dpi=300) |
| 23 | + microhaps = bin_by_ads(microhaps) |
| 24 | + plot_by_bins(microhaps) |
| 25 | + plt.xlim((0, 275)) |
| 26 | + plt.ylim((0, 18)) |
| 27 | + plt.yticks([0, 3, 6, 9, 12, 15, 18]) |
| 28 | + plt.xlabel("Extent (bp)") |
| 29 | + plt.ylabel("Effective Number of Alleles ($A_e$)") |
| 30 | + ax = plt.gca() |
| 31 | + ax.set_axisbelow(True) |
| 32 | + plt.grid(True, linestyle="--", color="#eeeeee") |
| 33 | + ax.legend(loc="upper left", fontsize=8) |
| 34 | + fig.savefig(outfile, bbox_inches="tight") |
| 35 | + |
| 36 | + |
| 37 | +def bin_by_ads(microhaps): |
| 38 | + microhaps = microhaps[microhaps.NumVars > 1].copy() |
| 39 | + microhaps["BinByADS"] = microhaps["NumVars"].apply(lambda nv: nv if nv % 2 == 0 else nv - 1) |
| 40 | + microhaps["BinByADS"] = microhaps["BinByADS"].apply(lambda ads: ads if ads < 10 else 10) |
| 41 | + return microhaps |
| 42 | + |
| 43 | + |
| 44 | +def plot_by_bins(microhaps): |
| 45 | + set1 = matplotlib.colormaps["Set1"] |
| 46 | + max_num_snps = microhaps.NumVars.max() |
| 47 | + for numsnps, data in microhaps.groupby("BinByADS"): |
| 48 | + colorindex = int((numsnps / 2) - 1) |
| 49 | + color = set1(colorindex) |
| 50 | + markersizes = [nv / max_num_snps * 15 for nv in data.NumVars] |
| 51 | + label = f"{numsnps}-{numsnps+1} ADSs" if numsnps < 10 else "≥10 ADSs" |
| 52 | + marker = "." if numsnps == 2 else "o" |
| 53 | + plt.scatter(data.Extent, data.Ae, marker=marker, s=markersizes, color=color, alpha=0.9, label=label) |
| 54 | + |
| 55 | + |
| 56 | +def get_parser(): |
| 57 | + parser = ArgumentParser() |
| 58 | + parser.add_argument("microhaps", help="path to final microhap panel in CSV format") |
| 59 | + parser.add_argument("aes", help="path to MicroHapDB Ae table in CSV format") |
| 60 | + parser.add_argument("outfile", help="output filename") |
| 61 | + return parser |
| 62 | + |
| 63 | + |
| 64 | +if __name__ == "__main__": |
| 65 | + args = get_parser().parse_args() |
| 66 | + markers = load_markers(args.microhaps, args.aes, objectify=False) |
| 67 | + main(markers, args.outfile) |
0 commit comments