-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReformat_virusSam2.pl
More file actions
99 lines (96 loc) · 3.38 KB
/
Reformat_virusSam2.pl
File metadata and controls
99 lines (96 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
use strict;
#
# Author: Xun Chen
# Email: xunchen85@gmail.com
# Date: 09/10/2020
#
my $line="";
my @line=();
my %read=();
my $read_name="";
my $name="";
my $order=0;
my $order1=0;
my %human=();
my %virus=();
my $virus_list=$ARGV[1];
#####################
system ("sort -k 2 -k 3 -k 4 -k 5 $ARGV[0] |uniq | sort -k 2 >$ARGV[0]_2");
#####################
open VIRUS,"${virus_list}";
while(<VIRUS>){
@line=split;
if($line[1] =~ "hbv" || $line[1] =~ "hepatitis_b"){$line[1]="hepatitis_b_virus";}
$virus{$line[0]}=$line[1];
}
open CANDIDATE,"$ARGV[0]_2";
#####################
while(<CANDIDATE>){
@line=split;
if(exists($virus{$line[3]})){$line[5]=$virus{$line[3]};}
else{$line[5]=$line[3];}
my $name2=$line[1]."_".$line[3]."_".$line[8];
$name=$line[1]."_".$line[3]."_".$line[4];
if(!(%read)){
@{$read{$name}}=@line;
$read_name=$line[1];
unshift(@{$read{$name}},$line[11]);
unshift(@{$read{$name}},$line[3]);
unshift(@{$read{$name}},$line[0]);
if(eof(CANDIDATE)){print"@{$read{$name}}\n";}
next;
}
if($line[1] eq $read_name){
if(exists($read{$name2}) && (($line[0] eq "PF" && ${$read{$name2}}[0] eq "PR") || ($line[0] eq "PR" && ${$read{$name2}}[0] eq "PF")) && $line[7] eq "="){
if((($line[0] eq "PF" && ${$read{$name2}}[0] eq "PR") || ($line[0] eq "PF" && ${$read{$name2}}[0] eq "PR")) && $line[3] eq ${$read{$name2}}[6] && $line[4] eq ${$read{$name2}}[11] && $line[8] eq ${$read{$name2}}[7]){
splice @{$read{$name2}},3,0,@line;
${$read{$name2}}[0]="P";
${$read{$name2}}[2]+=$line[11]; }
else{
if((${$read{$name2}}[6] ne ${$read{$name2}}[18])){
$name=$line[1]."_".$line[3]."_".$line[4];
@{$read{$name}}=@line;
unshift(@{$read{$name}},$line[11]);
unshift(@{$read{$name}},$line[3]);
unshift(@{$read{$name}},$line[0]);
}
else{
splice @{$read{$name2}},3,0,@line; ${$read{$name2}}[0]="P2";
${$read{$name2}}[2]+=$line[11];
}
}
}
else{
if($order eq 1){next;}
$name=$line[1]."_".$line[3]."_".$line[4];
@{$read{$name}}=@line;
unshift(@{$read{$name}},$line[11]);
unshift(@{$read{$name}},$line[3]);
unshift(@{$read{$name}},$line[0]);
}
if(eof(CANDIDATE)){ ### start
my @order=sort{$read{$b}->[2]<=>$read{$a}->[2]} keys %read;
for(my $j=0,my $max=${$read{$order[0]}}[2];$j<@order;$j++){
if(${$read{$order[$j]}}[2]<$max){last;}
else{ print "@{$read{$order[$j]}}\n";}
}
} ### end
}
else{
my @order=sort{$read{$b}->[2]<=>$read{$a}->[2]} keys %read;
for(my $j=0,my $max=${$read{$order[0]}}[2];$j<@order;$j++){
if(${$read{$order[$j]}}[2]<$max){last;}
else{ print "@{$read{$order[$j]}}\n";}
}
%read=();
$name=$line[1]."_".$line[3]."_".$line[4];
@{$read{$name}}=@line;
$read_name=$line[1];
unshift(@{$read{$name}},$line[11]);
unshift(@{$read{$name}},$line[3]);
unshift(@{$read{$name}},$line[0]);
if(eof(CANDIDATE)){print "@{$read{$name}}\n";}
}
}
system("rm $ARGV[0]_2");