2626use strict;
2727use warnings;
2828use diagnostics;
29- use bytes; # Force byte semantics for DNA sequences
29+ use bytes;
3030use Time::HiRes qw( gettimeofday tv_interval) ;
3131
32- # -----------------------------------------------------
33- # Usage message and command-line argument processing
34- # -----------------------------------------------------
35- sub usage {
36- print <<"END_USAGE" ;
37- Usage: $0 <reads_choice> <query_choice>
38-
39- <reads_choice> options:
40- 1 Use illumina_reads_40.fasta.gz
41- 2 Use illumina_reads_60.fasta.gz
42- 3 Use illumina_reads_80.fasta.gz
43- 4 Use illumina_reads_100.fasta.gz
44-
45- <query_choice> options:
46- 1 Load 1,000 queries
47- 2 Load 10,000 queries
48- 3 Load 100,000 queries
49- 4 Load 1,000,000 queries
50-
51- Example:
52- $0 2 3
53- => Use illumina_reads_60.fasta.gz and load 100,000 queries.
54-
55- END_USAGE
56- exit 1;
57- }
58-
59- # If no arguments or help flag provided, show usage.
60- if ( @ARGV != 2 or $ARGV [0] eq ' --help' or $ARGV [0] eq ' -h' ) {
61- usage();
62- }
63-
64- my ($reads_choice , $query_choice ) = @ARGV ;
65-
6632# -----------------------------------------------------
6733# Record initial time and memory usage
6834# -----------------------------------------------------
6935my $start_time = [gettimeofday()];
7036my $start_timestamp = scalar (localtime ());
7137my $start_mem = get_memory_usage();
7238
73- # -----------------------------------------------------
74- # File and Parameter Setup
75- # -----------------------------------------------------
76- my $ref_file = ' hg38_partial.fasta.gz' ;
77- my %reads_map = (
78- 1 => ' illumina_reads_40.fasta.gz' ,
79- 2 => ' illumina_reads_60.fasta.gz' ,
80- 3 => ' illumina_reads_80.fasta.gz' ,
81- 4 => ' illumina_reads_100.fasta.gz' ,
82- );
83- my @query_opts = ( 1000, 10_000, 100_000, 1_000_000 );
84- my $result_file = ' matchedseqs.txt' ;
85-
86- # Validate command-line arguments.
87- exists $reads_map {$reads_choice } or die " Invalid read file choice: $reads_choice \n " ;
88- $query_choice =~ / ^[1-4]$ / or die " Invalid query choice: $query_choice \n " ;
89- my $num_queries = $query_opts [$query_choice - 1];
39+ # ----------------------------------------
40+ # Usage: match.pl <reads.fasta.gz> <ref.fasta.gz> [<max_queries>]
41+ # ----------------------------------------
42+ sub usage {
43+ die <<"USAGE" ;
44+ Usage: $0 <reads_file> <reference_file> [<max_queries>]
45+ <reads_file> FASTA or FASTA.GZ of your markers
46+ <reference_file> FASTA or FASTA.GZ of your reference
47+ <max_queries> (optional) number of reads to load; default = all
48+ USAGE
49+ }
50+
51+ # require at least two arguments
52+ usage() if @ARGV < 2;
53+
54+ # assign files directly
55+ my ($reads_file , $ref_file , $num_queries ) = @ARGV ;
56+ $num_queries //= 0; # zero or undef -> load _all_ sequences
57+ # my $result_file = 'matchedseqs.txt';
58+ my $OUT = *STDOUT ; # send everything to STDOUT
9059
9160# -----------------------------------------------------
9261# Read Reference Genome
@@ -98,12 +67,13 @@ sub usage {
9867# Load marker sequences (queries)
9968# Now each query is stored as a hash with key 'id' and 'seq'
10069# -----------------------------------------------------
101- my $reads_file = $reads_map {$reads_choice };
102- my @queries = read_n_sequences($reads_file , $num_queries );
70+ my @queries = $num_queries
71+ ? read_n_sequences($reads_file , $num_queries )
72+ : read_n_sequences($reads_file ); # load all if no limit
10373my $actual_queries = scalar @queries ;
10474
10575# -----------------------------------------------------
106- # Detect number of cores (using external command)
76+ # Detect number of cores
10777# -----------------------------------------------------
10878my $num_cores = ` lscpu -p | grep -v '^#' | wc -l` ;
10979chomp ($num_cores );
@@ -148,9 +118,11 @@ sub usage {
148118my $end_mem = get_memory_usage();
149119
150120# -----------------------------------------------------
151- # Combine temporary files into final output file with header
121+ # Combine temporary files into final output
152122# -----------------------------------------------------
153- open (my $OUT , ' >' , $result_file ) or die " Cannot open $result_file : $! " ;
123+
124+ # open(my $OUT, '>', $result_file) or die "Cannot open $result_file: $!";
125+
154126print $OUT " # ReferenceFile: $ref_file \n " ;
155127print $OUT " # QueryFile: $reads_file \n " ;
156128print $OUT " # Total Queries Processed: $actual_queries \n " ;
@@ -176,9 +148,11 @@ sub usage {
176148 unlink $tmp_file or warn " Couldn't remove $tmp_file : $! " ;
177149 }
178150}
179- close $OUT or die " Cannot close $result_file : $! " ;
180- print " \n Done. $total_matched match(es) reported.\n " ;
181- print " See '$result_file ' for the matched markers.\n " ;
151+ # -----------------------------------------------------
152+ # close $OUT or die "Cannot close $result_file: $!";
153+ # print "\nDone. $total_matched match(es) reported.\n";
154+ # print "See '$result_file' for the matched markers.\n";
155+ # -----------------------------------------------------
182156exit 0;
183157
184158# =====================================================
@@ -195,8 +169,8 @@ sub get_memory_usage {
195169 my $mem = ` ps -o rss= -p $pid ` ;
196170 chomp ($mem );
197171 return $mem ; # in KB
198- }
199-
172+ }
173+
200174# -----------------------------------------------------
201175# Subroutine: format_memory_usage
202176# Converts a memory value in KB into a human-readable string,
@@ -260,7 +234,8 @@ sub read_n_sequences {
260234 if ($line =~ / ^>(.*)/ ) {
261235 if ($seq ne ' ' ) {
262236 push @list , { id => $id , seq => $seq };
263- last if @list == $max ;
237+ # only stop early if the caller really passed a max>0
238+ last if defined ($max ) && $max > 0 && @list == $max ;
264239 $seq = ' ' ;
265240 }
266241 $id = $1 ; # capture header (without '>')
@@ -269,7 +244,8 @@ sub read_n_sequences {
269244 $seq .= $line ;
270245 }
271246 }
272- if ($seq ne ' ' and @list < $max ) {
247+ # always push the final sequence
248+ if ($seq ne ' ' ) {
273249 push @list , { id => $id , seq => $seq };
274250 }
275251 close $fh ;
0 commit comments