Skip to content

Commit 86144c0

Browse files
committed
v1.0.2 fixing issue #5
1 parent 20ea524 commit 86144c0

File tree

4 files changed

+36
-33
lines changed

4 files changed

+36
-33
lines changed

cmseq/cmseq.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from collections import defaultdict
1010
import pickle,os
1111

12-
__author__ = 'Moreno Zolfo ([email protected]), Nicolai Karcher'
13-
__version__ = '1.2.2'
14-
__date__ = '25 July 2019'
12+
__author__ = 'Moreno Zolfo ([email protected]), Nicolai Karcher, Kun Huang'
13+
__version__ = '1.0.2'
14+
__date__ = '9 September 2020'
1515

1616
def _initt(terminating_,_consensus_bamFile,_consensus_args):
1717
global terminating
@@ -363,7 +363,7 @@ def rev_pos(cur_pos, gene_start, gene_end):
363363
def easy_polymorphism_rate(self,mincov=CMSEQ_DEFAULTS.mincov,minqual=CMSEQ_DEFAULTS.minqual,dominant_frq_thrsh=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh):
364364

365365
from Bio.Seq import Seq
366-
from Bio.Alphabet import IUPAC
366+
#from Bio.Alphabet import IUPAC
367367

368368
bases = self.get_base_stats_for_poly(minqual=minqual)
369369

@@ -404,8 +404,8 @@ def easy_polymorphism_rate(self,mincov=CMSEQ_DEFAULTS.mincov,minqual=CMSEQ_DEFAU
404404

405405
if len(codon_f1) == 3 and len(codon_f2) == 3:
406406

407-
codon_s1 = Seq(''.join(codon_f1),IUPAC.ambiguous_dna)
408-
codon_s2 = Seq(''.join(codon_f2),IUPAC.ambiguous_dna)
407+
codon_s1 = Seq(''.join(codon_f1))
408+
codon_s2 = Seq(''.join(codon_f2))
409409
codon_t1 = codon_s1.translate()
410410
codon_t2 = codon_s2.translate()
411411

cmseq/consensus.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from Bio import SeqIO
66
from Bio.Seq import Seq
77
from Bio.SeqRecord import SeqRecord
8-
from Bio.Alphabet import IUPAC
98

109
from .cmseq import CMSEQ_DEFAULTS
1110
from .cmseq import BamFile
@@ -39,7 +38,7 @@ def consensus_from_file():
3938
sq = i.reference_free_consensus(mincov=args.mincov,minqual=args.minqual,dominant_frq_thrsh=args.dominant_frq_thrsh,noneCharacter='N',trimReads=trimParam)
4039

4140
if sq is not None:
42-
lst.append(SeqRecord(Seq(sq, IUPAC.IUPACAmbiguousDNA), id=i.name+"_consensus", description=''))
41+
lst.append(SeqRecord(Seq(sq), id=i.name+"_consensus", description=''))
4342
SeqIO.write(lst,sys.stdout,'fasta')
4443

4544

cmseq/consensus_aDNA.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
#!/usr/bin/env python
2-
from __future__ import print_function
3-
4-
from cmseq import CMSEQ_DEFAULTS
5-
from cmseq import BamFile
6-
from cmseq import BamContig
1+
from .cmseq import CMSEQ_DEFAULTS
2+
from .cmseq import BamFile
3+
from .cmseq import BamContig
74
import os
85
import pysam
96
import math
@@ -15,7 +12,11 @@
1512
from Bio import SeqIO
1613
from Bio.Seq import Seq
1714
from Bio.SeqRecord import SeqRecord
18-
from Bio.Alphabet import IUPAC
15+
16+
17+
18+
19+
1920

2021
__author__ = 'Kun D. Huang ([email protected]), Moreno Zolfo ([email protected])'
2122
__version__ = '1.0'
@@ -209,7 +210,23 @@ def get_base_stats(self, min_read_depth=CMSEQ_DEFAULTS.mincov, min_base_quality=
209210

210211
return base_stats
211212

212-
def consensus_from_file(args):
213+
def consensus_from_file():
214+
215+
parser = argparse.ArgumentParser(description="outputs the consensus in FASTA format. Non covered positions (or quality-trimmed positions) are reported as a dashes: -")
216+
parser.add_argument('BAMFILE', help='The file on which to operate')
217+
parser.add_argument('-c','--contig', help='Focus on a subset of references in the BAM file. Can be a list of references separated by commas or a FASTA file (the IDs are used to subset)', metavar="REFERENCE ID" ,default=None)
218+
parser.add_argument('-f', help='If set unmapped (FUNMAP), secondary (FSECONDARY), qc-fail (FQCFAIL) and duplicate (FDUP) are excluded. If unset ALL reads are considered (bedtools genomecov style). Default: unset',action='store_true')
219+
parser.add_argument('-r', '--refseq', help='Input the refrence genome sequence', type=str)
220+
parser.add_argument('--sortindex', help='Sort and index the file',action='store_true')
221+
parser.add_argument('--minqual', help='Minimum base quality. Bases with quality score lower than this will be discarded. This is performed BEFORE --mincov. Default: '+str(CMSEQ_DEFAULTS.minqual), type=int, default=CMSEQ_DEFAULTS.minqual)
222+
parser.add_argument('--mincov', help='Minimum position coverage to perform the polymorphism calculation. Position with a lower depth of coverage will be discarded (i.e. considered as zero-coverage positions). This is calculated AFTER --minqual. Default: '+str(CMSEQ_DEFAULTS.minlen), type=int, default=CMSEQ_DEFAULTS.mincov)
223+
parser.add_argument('--dominant_frq_thrsh', help='Cutoff for degree of `allele dominance` for a position to be considered polymorphic. Default: '+str(CMSEQ_DEFAULTS.poly_dominant_frq_thrsh), type=float, default=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh)
224+
parser.add_argument('--minlen', help='Minimum Reference Length for a reference to be considered. Default: '+str(CMSEQ_DEFAULTS.minlen),default=CMSEQ_DEFAULTS.minlen, type=int)
225+
parser.add_argument('--pos_specific_prob_tab', help='Stats_out_MCMC_correct_prob table produced from mapdamage2. It contains the position specific probability of observing a C->T or G->A due to a post-mortem damage.',default=CMSEQ_DEFAULTS_Ancient.position_specific_prob, type=str)
226+
parser.add_argument('--pos_damage_prob_thrsh', help = 'Maximum post-mortem damage probability for a nucletide on a read to be considered when building consensus.', default=CMSEQ_DEFAULTS_Ancient.position_specific_prob_thrsh, type = float)
227+
228+
args = parser.parse_args()
229+
213230
si = True if args.sortindex else False
214231
mode = 'all' if args.f else 'nofilter'
215232

@@ -248,23 +265,9 @@ def consensus_from_file(args):
248265
trimReads=None,post_damage_prob=pos_prob_thrsh,pos_prob_db=pos_stats_db, refseq_idx=RefSeq_idx)
249266

250267
if sq is not None:
251-
lst.append(SeqRecord(Seq(sq, IUPAC.IUPACAmbiguousDNA), id=i.name+"_consensus", description=''))
268+
lst.append(SeqRecord(Seq(sq), id=i.name+"_consensus", description=''))
252269
SeqIO.write(lst,sys.stdout,'fasta')
253270

254271

255272
if __name__ == "__main__":
256-
257-
parser = argparse.ArgumentParser(description="outputs the consensus in FASTA format. Non covered positions (or quality-trimmed positions) are reported as a dashes: -")
258-
parser.add_argument('BAMFILE', help='The file on which to operate')
259-
parser.add_argument('-c','--contig', help='Focus on a subset of references in the BAM file. Can be a list of references separated by commas or a FASTA file (the IDs are used to subset)', metavar="REFERENCE ID" ,default=None)
260-
parser.add_argument('-f', help='If set unmapped (FUNMAP), secondary (FSECONDARY), qc-fail (FQCFAIL) and duplicate (FDUP) are excluded. If unset ALL reads are considered (bedtools genomecov style). Default: unset',action='store_true')
261-
parser.add_argument('-r', '--refseq', help='Input the refrence genome sequence', type=str)
262-
parser.add_argument('--sortindex', help='Sort and index the file',action='store_true')
263-
parser.add_argument('--minqual', help='Minimum base quality. Bases with quality score lower than this will be discarded. This is performed BEFORE --mincov. Default: '+str(CMSEQ_DEFAULTS.minqual), type=int, default=CMSEQ_DEFAULTS.minqual)
264-
parser.add_argument('--mincov', help='Minimum position coverage to perform the polymorphism calculation. Position with a lower depth of coverage will be discarded (i.e. considered as zero-coverage positions). This is calculated AFTER --minqual. Default: '+str(CMSEQ_DEFAULTS.minlen), type=int, default=CMSEQ_DEFAULTS.mincov)
265-
parser.add_argument('--dominant_frq_thrsh', help='Cutoff for degree of `allele dominance` for a position to be considered polymorphic. Default: '+str(CMSEQ_DEFAULTS.poly_dominant_frq_thrsh), type=float, default=CMSEQ_DEFAULTS.poly_dominant_frq_thrsh)
266-
parser.add_argument('--minlen', help='Minimum Reference Length for a reference to be considered. Default: '+str(CMSEQ_DEFAULTS.minlen),default=CMSEQ_DEFAULTS.minlen, type=int)
267-
parser.add_argument('--pos_specific_prob_tab', help='Stats_out_MCMC_correct_prob table produced from mapdamage2. It contains the position specific probability of observing a C->T or G->A due to a post-mortem damage.',default=CMSEQ_DEFAULTS_Ancient.position_specific_prob, type=str)
268-
parser.add_argument('--pos_damage_prob_thrsh', help = 'Maximum post-mortem damage probability for a nucletide on a read to be considered when building consensus.', default=CMSEQ_DEFAULTS_Ancient.position_specific_prob_thrsh, type = float)
269-
270-
consensus_from_file(parser.parse_args())
273+
consensus_from_file()

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
install_requires = ["numpy", "scipy", "pysam", "pandas", "biopython", "bcbio-gff"]
77
setuptools.setup(
88
name='CMSeq',
9-
version='1.0.1',
9+
version='1.0.2',
1010
author='Moreno Zolfo',
1111
author_email='[email protected]',
1212
url='http://github.com/SegataLab/cmseq/',
@@ -15,6 +15,7 @@
1515
'console_scripts': [
1616
'breadth_depth.py = cmseq.breadth_depth:bd_from_file',
1717
'consensus.py = cmseq.consensus:consensus_from_file',
18+
'consensus_aDNA.py = cmseq.consensus_aDNA:consensus_from_file',
1819
'polymut.py = cmseq.polymut:polymut_from_file',
1920
'poly.py = cmseq.poly:poly_from_file'
2021
]

0 commit comments

Comments
 (0)