-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_CHMMAIRRa.jl
More file actions
102 lines (85 loc) · 4.42 KB
/
run_CHMMAIRRa.jl
File metadata and controls
102 lines (85 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Script to run CHMMAIRRa on all 5 real datasets in this paper.
"""
# load packages
using Pkg
Pkg.activate("..")
using CHMMAIRRa, Glob, CSV, DataFrames
# set up args
args = Dict()
args["align-database"] = true
args["detailed"] = true
args["count-chimeric-segments"] = true
args["chimeric-airr"] = "final/chimeric_22-1-25.tsv.gz"
args["chimeric-alignments"] = "final/chimeric_22-1-25.fasta"
args["V_fasta"] = "final/database/V.fasta"
args["assignments"] = "final/filtered.tsv.gz"
args["out"] = "final/CHMMAIRRa_out_22-1-25.tsv.gz"
igdiscover_dir = "../../data/igdiscover22/"
cd(igdiscover_dir)
PCR_conditions_igdiscover_folders = filter(isdir, glob("PCR_conditions/D*"))
PRJNA308641_igdiscover_folders = filter(isdir, glob("PRJNA308641/Ig[MG]/SRR*"))
IML369_igdiscover_folders = filter(isdir, glob("IML369/Ig[MG]/IML*"))
PRJNA300878_igdiscover_folders = filter(isdir, glob("PRJNA300878/TR[AB]/*"))
GKH_TCR_igdiscover_folders = vcat(filter(isdir, glob("GKH_TCR/TRA/D*")),
sort(filter(isdir, glob("GKH_TCR/TRB/D*"))),
sort(filter(isdir, glob("GKH_TCR/TRD/D*"))),
sort(filter(isdir, glob("GKH_TCR/TRG/D*"))))
for PCR_conditions_igdiscover_folder in PCR_conditions_igdiscover_folders
cd(joinpath(igdiscover_dir, PCR_conditions_igdiscover_folder))
@info "Processing $PCR_conditions_igdiscover_folder"
@time CHMMAIRRa.detect_chimeras_from_files(args["V_fasta"], args["assignments"], args["out"],
receptor = "IG",
align_database = args["align-database"],
detailed = args["detailed"],
count_chimeric_segments = args["count-chimeric-segments"],
chimeric_alignments = args["chimeric-alignments"],
)
end
for PRJNA308641_igdiscover_folder in PRJNA308641_igdiscover_folders
cd(joinpath(igdiscover_dir, PRJNA308641_igdiscover_folder))
@info "Processing $PRJNA308641_igdiscover_folder"
@time CHMMAIRRa.detect_chimeras_from_files(args["V_fasta"], args["assignments"], args["out"],
receptor = "IG",
align_database = args["align-database"],
detailed = args["detailed"],
count_chimeric_segments = args["count-chimeric-segments"],
chimeric_alignments = args["chimeric-alignments"],
)
end
for IML369_igdiscover_folder in IML369_igdiscover_folders
cd(joinpath(igdiscover_dir, IML369_igdiscover_folder))
@info "Processing $IML369_igdiscover_folder"
@time CHMMAIRRa.detect_chimeras_from_files(args["V_fasta"], args["assignments"], args["out"],
receptor = "IG",
align_database = args["align-database"],
detailed = args["detailed"],
count_chimeric_segments = args["count-chimeric-segments"],
chimeric_alignments = args["chimeric-alignments"],
)
end
for PRJNA300878_igdiscover_folder in PRJNA300878_igdiscover_folders
cd(joinpath(igdiscover_dir, PRJNA300878_igdiscover_folder))
@info "Processing $PRJNA300878_igdiscover_folder"
@time CHMMAIRRa.detect_chimeras_from_files(args["V_fasta"], args["assignments"], args["out"],
receptor = "TCR",
align_database = args["align-database"],
detailed = args["detailed"],
count_chimeric_segments = args["count-chimeric-segments"],
chimeric_alignments = args["chimeric-alignments"],
)
end
for GKH_TCR_igdiscover_folder in GKH_TCR_igdiscover_folders
cd(joinpath(igdiscover_dir, GKH_TCR_igdiscover_folder))
@info "Processing $GKH_TCR_igdiscover_folder"
@time CHMMAIRRa.detect_chimeras_from_files(args["V_fasta"], args["assignments"], args["out"],
receptor = "TCR",
align_database = args["align-database"],
detailed = args["detailed"],
count_chimeric_segments = args["count-chimeric-segments"],
chimeric_alignments = args["chimeric-alignments"],
)
end
# collect results like so
# igdiscover batch collect --file final/CHMMAIRRa_out_22-1-25.tsv.gz ../ collected_CHMMAIRRa_out.tsv
# pigz -p 20 collected_CHMMAIRRa_out.tsv