v1.0.2 fixing issue #5

azufre451 · azufre451 · commit 86144c0d49a0 · 2020-09-09T18:30:47.000+02:00
diff --git a/cmseq/cmseq.py b/cmseq/cmseq.py
@@ -9,9 +9,9 @@
 from collections import defaultdict
 import pickle,os
 
-__author__ = 'Moreno Zolfo (moreno.zolfo@unitn.it),	Nicolai Karcher'
-__version__ = '1.2.2'
-__date__ = '25 July 2019'
+__author__ = 'Moreno Zolfo (moreno.zolfo@unitn.it), Nicolai Karcher, Kun Huang'
+__version__ = '1.0.2'
+__date__ = '9 September 2020'
 
 def _initt(terminating_,_consensus_bamFile,_consensus_args):
 	global terminating
@@ -363,7 +363,7 @@ def rev_pos(cur_pos, gene_start, gene_end):
 	def easy_polymorphism_rate(self,mincov=CMSEQ_DEFAULTS.mincov,minqual=CMSEQ_DEFAULTS.minqual,dominant_frq_thrsh=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh):
 
 		from Bio.Seq import Seq
-		from Bio.Alphabet import IUPAC
+		#from Bio.Alphabet import IUPAC
 
 		bases = self.get_base_stats_for_poly(minqual=minqual)
 		
@@ -404,8 +404,8 @@ def easy_polymorphism_rate(self,mincov=CMSEQ_DEFAULTS.mincov,minqual=CMSEQ_DEFAU
 
 			if len(codon_f1) == 3 and len(codon_f2) == 3:
 
-				codon_s1 = Seq(''.join(codon_f1),IUPAC.ambiguous_dna)
-				codon_s2 = Seq(''.join(codon_f2),IUPAC.ambiguous_dna)
+				codon_s1 = Seq(''.join(codon_f1))
+				codon_s2 = Seq(''.join(codon_f2))
 				codon_t1 = codon_s1.translate()
 				codon_t2 = codon_s2.translate()
 
diff --git a/cmseq/consensus.py b/cmseq/consensus.py
@@ -5,7 +5,6 @@
 from Bio import SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
-from Bio.Alphabet import IUPAC
 
 from .cmseq import CMSEQ_DEFAULTS
 from .cmseq import BamFile
@@ -39,7 +38,7 @@ def consensus_from_file():
 		sq = i.reference_free_consensus(mincov=args.mincov,minqual=args.minqual,dominant_frq_thrsh=args.dominant_frq_thrsh,noneCharacter='N',trimReads=trimParam)
 		
 		if sq is not None:
-			lst.append(SeqRecord(Seq(sq, IUPAC.IUPACAmbiguousDNA), id=i.name+"_consensus", description=''))
+			lst.append(SeqRecord(Seq(sq), id=i.name+"_consensus", description=''))
 	SeqIO.write(lst,sys.stdout,'fasta')
 
 
diff --git a/cmseq/consensus_aDNA.py b/cmseq/consensus_aDNA.py
@@ -1,9 +1,6 @@
-#!/usr/bin/env python
-from __future__ import print_function
-
-from cmseq import CMSEQ_DEFAULTS
-from cmseq import BamFile
-from cmseq import BamContig
+from .cmseq import CMSEQ_DEFAULTS
+from .cmseq import BamFile
+from .cmseq import BamContig
 import os
 import pysam
 import math
@@ -15,7 +12,11 @@
 from Bio import SeqIO
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
-from Bio.Alphabet import IUPAC
+
+
+
+
+
 
 __author__ = 'Kun D. Huang (kun.huang@unitn.it), Moreno Zolfo (moreno.zolfo@unitn.it)'
 __version__ = '1.0'
@@ -209,7 +210,23 @@ def get_base_stats(self, min_read_depth=CMSEQ_DEFAULTS.mincov, min_base_quality=
 			
 		return base_stats
 
-def consensus_from_file(args):
+def consensus_from_file():
+
+	parser = argparse.ArgumentParser(description="outputs the consensus in FASTA format. Non covered positions (or quality-trimmed positions) are reported as a dashes: -")
+	parser.add_argument('BAMFILE', help='The file on which to operate')
+	parser.add_argument('-c','--contig', help='Focus on a subset of references in the BAM file. Can be a list of references separated by commas or a FASTA file (the IDs are used to subset)', metavar="REFERENCE ID" ,default=None)
+	parser.add_argument('-f', help='If set unmapped (FUNMAP), secondary (FSECONDARY), qc-fail (FQCFAIL) and duplicate (FDUP) are excluded. If unset ALL reads are considered (bedtools genomecov style). Default: unset',action='store_true')
+	parser.add_argument('-r', '--refseq', help='Input the refrence genome sequence', type=str)
+	parser.add_argument('--sortindex', help='Sort and index the file',action='store_true')
+	parser.add_argument('--minqual', help='Minimum base quality. Bases with quality score lower than this will be discarded. This is performed BEFORE --mincov. Default: '+str(CMSEQ_DEFAULTS.minqual), type=int, default=CMSEQ_DEFAULTS.minqual)
+	parser.add_argument('--mincov', help='Minimum position coverage to perform the polymorphism calculation. Position with a lower depth of coverage will be discarded (i.e. considered as zero-coverage positions). This is calculated AFTER --minqual. Default: '+str(CMSEQ_DEFAULTS.minlen), type=int, default=CMSEQ_DEFAULTS.mincov)
+	parser.add_argument('--dominant_frq_thrsh', help='Cutoff for degree of `allele dominance` for a position to be considered polymorphic. Default: '+str(CMSEQ_DEFAULTS.poly_dominant_frq_thrsh), type=float, default=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh)
+	parser.add_argument('--minlen', help='Minimum Reference Length for a reference to be considered. Default: '+str(CMSEQ_DEFAULTS.minlen),default=CMSEQ_DEFAULTS.minlen, type=int)
+	parser.add_argument('--pos_specific_prob_tab', help='Stats_out_MCMC_correct_prob table produced from mapdamage2. It contains the position specific probability of observing a C->T or G->A due to a post-mortem damage.',default=CMSEQ_DEFAULTS_Ancient.position_specific_prob, type=str)
+	parser.add_argument('--pos_damage_prob_thrsh', help = 'Maximum post-mortem damage probability for a nucletide on a read to be considered when building consensus.', default=CMSEQ_DEFAULTS_Ancient.position_specific_prob_thrsh, type = float)
+
+	args = parser.parse_args()
+
 	si = True if args.sortindex else False
 	mode = 'all' if args.f else 'nofilter'
 
@@ -248,23 +265,9 @@ def consensus_from_file(args):
 			trimReads=None,post_damage_prob=pos_prob_thrsh,pos_prob_db=pos_stats_db, refseq_idx=RefSeq_idx)
 		
 		if sq is not None:
-			lst.append(SeqRecord(Seq(sq, IUPAC.IUPACAmbiguousDNA), id=i.name+"_consensus", description=''))
+			lst.append(SeqRecord(Seq(sq), id=i.name+"_consensus", description=''))
 	SeqIO.write(lst,sys.stdout,'fasta')
 
 
 if __name__ == "__main__":
-
-	parser = argparse.ArgumentParser(description="outputs the consensus in FASTA format. Non covered positions (or quality-trimmed positions) are reported as a dashes: -")
-	parser.add_argument('BAMFILE', help='The file on which to operate')
-	parser.add_argument('-c','--contig', help='Focus on a subset of references in the BAM file. Can be a list of references separated by commas or a FASTA file (the IDs are used to subset)', metavar="REFERENCE ID" ,default=None)
-	parser.add_argument('-f', help='If set unmapped (FUNMAP), secondary (FSECONDARY), qc-fail (FQCFAIL) and duplicate (FDUP) are excluded. If unset ALL reads are considered (bedtools genomecov style). Default: unset',action='store_true')
-	parser.add_argument('-r', '--refseq', help='Input the refrence genome sequence', type=str)
-	parser.add_argument('--sortindex', help='Sort and index the file',action='store_true')
-	parser.add_argument('--minqual', help='Minimum base quality. Bases with quality score lower than this will be discarded. This is performed BEFORE --mincov. Default: '+str(CMSEQ_DEFAULTS.minqual), type=int, default=CMSEQ_DEFAULTS.minqual)
-	parser.add_argument('--mincov', help='Minimum position coverage to perform the polymorphism calculation. Position with a lower depth of coverage will be discarded (i.e. considered as zero-coverage positions). This is calculated AFTER --minqual. Default: '+str(CMSEQ_DEFAULTS.minlen), type=int, default=CMSEQ_DEFAULTS.mincov)
-	parser.add_argument('--dominant_frq_thrsh', help='Cutoff for degree of `allele dominance` for a position to be considered polymorphic. Default: '+str(CMSEQ_DEFAULTS.poly_dominant_frq_thrsh), type=float, default=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh)
-	parser.add_argument('--minlen', help='Minimum Reference Length for a reference to be considered. Default: '+str(CMSEQ_DEFAULTS.minlen),default=CMSEQ_DEFAULTS.minlen, type=int)
-	parser.add_argument('--pos_specific_prob_tab', help='Stats_out_MCMC_correct_prob table produced from mapdamage2. It contains the position specific probability of observing a C->T or G->A due to a post-mortem damage.',default=CMSEQ_DEFAULTS_Ancient.position_specific_prob, type=str)
-	parser.add_argument('--pos_damage_prob_thrsh', help = 'Maximum post-mortem damage probability for a nucletide on a read to be considered when building consensus.', default=CMSEQ_DEFAULTS_Ancient.position_specific_prob_thrsh, type = float)
-
-	consensus_from_file(parser.parse_args())
+	consensus_from_file()
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 install_requires = ["numpy", "scipy", "pysam", "pandas", "biopython", "bcbio-gff"]
 setuptools.setup(
     name='CMSeq',
-    version='1.0.1',
+    version='1.0.2',
     author='Moreno Zolfo',
     author_email='moreno.zolfo@unitn.it',
     url='http://github.com/SegataLab/cmseq/',
@@ -15,6 +15,7 @@
         'console_scripts': [
             'breadth_depth.py = cmseq.breadth_depth:bd_from_file',
             'consensus.py = cmseq.consensus:consensus_from_file',
+            'consensus_aDNA.py = cmseq.consensus_aDNA:consensus_from_file',
             'polymut.py = cmseq.polymut:polymut_from_file',
             'poly.py = cmseq.poly:poly_from_file'
         ]