Source code for epitopepredict.peptutils

#!/usr/bin/env python

"""
    Module implementing peptide sequence/structure utilities.
    Created March 2013
    Copyright (C) Damien Farrell
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 3
    of the License, or (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""

from __future__ import absolute_import, print_function
import os, random, csv
import pandas as pd
from Bio.Seq import Seq
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils.ProtParam import ProteinAnalysis
#from Bio.Alphabet import IUPAC
from . import utilities

AAletters = ['A', 'C', 'E', 'D', 'G', 'F', 'I', 'H', 'K', 'M', 'L', 'N', 'Q', 'P',\
             'S', 'R', 'T', 'W', 'V', 'Y']
AAcodes3 ={'V':'VAL', 'I':'ILE', 'L':'LEU', 'E':'GLU', 'Q':'GLN', \
'D':'ASP', 'N':'ASN', 'H':'HIS', 'W':'TRP', 'F':'PHE', 'Y':'TYR', \
'R':'ARG', 'K':'LYS', 'S':'SER', 'T':'THR', 'M':'MET', 'A':'ALA', \
'G':'GLY', 'P':'PRO', 'C':'CYS'}

[docs]def create_random_sequences(size=100,length=9): """Create library of all possible peptides given length""" aa = AAletters vals=[] for i in range(0, size): seq = "" for s in range(0,length): index = random.randint(0, len(aa)-1) seq += aa[index] vals.append(seq) return vals
[docs]def create_random_peptides(size=100,length=9): """Create random peptide structures of given length""" seqs = create_random_sequences(size, length) files=[] for seq in seqs: sfile = buildPeptideStructure(seq) files.append(sfile) return files
[docs]def get_fragments(seq=None, overlap=1, length=11, **kwargs): """Generate peptide fragments from a sequence. Returns: dataframe of peptides with position column. """ frags=[] for i in range(0,len(seq),overlap): if i+length>len(seq): continue frags.append([i+1,seq[i:i+length]]) df = pd.DataFrame(frags, columns=['pos','peptide']) return df
[docs]def create_fragments(protfile=None, seq=None, length=9, overlap=1, quiet=True): """generate peptide fragments from a sequence""" if protfile != None: rec = SeqIO.parse(open(protfile,'r'),'fasta').next() seq = str(rec.seq) frags=[] for i in range(0,len(seq),overlap): if i+length>len(seq): continue frags.append(seq[i:i+length]) if quiet == False: print ('created %s fragments of length %s' %(len(frags),length)) if protfile != None: cw = csv.writer(open('%s_fragments' %protfile,'w')) for f in frags: cw.writerow([f]) return frags, seq
[docs]def get_all_fragments(exp, length=11): P=pd.read_csv(exp) peptides = P['SEQ_SEQUENCE'] allseqs=[] for p in peptides: seqs, seqstr = createFragments(seq=p,length=length) allseqs.extend(seqs) cw=csv.writer(open('all_fragments.csv','w')) for i in allseqs: cw.writerow([i]) print ('got %s fragments' %len(allseqs)) return allseqs
[docs]def get_AAsubstitutions(template): """ Get all the possible sequences from substituting every AA into the given sequence at each position. This gives a total of 19 by n amino acid positions. """ aas = AAletters seqs = [] matrix = [] for pos in range(len(template)): s = template[pos] x=[] for a in aas: if a == s: continue newseq = list(template) newseq[pos] = a newseq = ''.join(newseq) seqs.append(newseq) x.append(newseq) matrix.append(x) return seqs, matrix
[docs]def get_AAfraction(seq, amino_acids=None): """Get fraction of give amino acids in a sequence""" X = ProteinAnalysis(seq) #h = X.protein_scale(ProtParam.ProtParamData.kd, len(seq), 0.4) nonpolar = ['A','V','L','F','I','W','P'] if amino_acids == None: amino_acids = nonpolar count=0 for aa, i in X.count_amino_acids().items(): if aa in amino_acids: count+=i if count == 0: return 0 frac = round(float(count)/len(seq),2) return frac
[docs]def net_charge(seq): """Get net charge of a peptide sequence""" X = ProteinAnalysis(seq) ac = 0 ba = 0 for aa, i in X.count_amino_acids().items(): if aa in ['D','E']: ac -= i elif aa in ['K','R']: ba += i return ac + ba
[docs]def compare_anchor_positions(x1, x2): """Check if anchor positions in 9-mers are mutated""" if x1 is None or x2 is None: return 0 p1 = list(get_fragments(x1, length=9).peptide) p2 = list(get_fragments(x2, length=9).peptide) #is mutation in anchor residue for a,b in zip(p1,p2): if a[1] != b[1] or a[8] != b[8]: return 1 return 0
[docs]def main(): from optparse import OptionParser parser = OptionParser() parser.add_option("-t", "--test", dest="test", action='store_true', help="test") opts, remainder = parser.parse_args() if opts.test == True: getAllFragments(opts.file,9)
if __name__ == '__main__': main()