Source code for epitopepredict.plotting

#!/usr/bin/env python

"""
    epitopepredict plotting
    Created February 2016
    Copyright (C) Damien Farrell
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 3
    of the License, or (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""


from __future__ import absolute_import, print_function
import sys, os, math
from collections import OrderedDict
try:
    import matplotlib
    matplotlib.use('agg', warn=False)
    import pylab as plt
except:
    pass
import numpy as np
import pandas as pd
from . import base

colormaps={'tepitope':'Greens','netmhciipan':'Oranges','iedbmhc2':'Pinks',
            'iedbmhc1':'Blues'}
defaultcolors = {'tepitope':'green','netmhciipan':'orange','basicmhc1':'yellow',
                 'iedbmhc1':'blue','iedbmhc2':'pink'}


[docs]def plot_heatmap(df, ax=None, figsize=(6,6), **kwargs): """Plot a generic heatmap """ if ax==None: fig=plt.figure(figsize=figsize) ax=fig.add_subplot(111) else: fig = ax.get_figure() df = df._get_numeric_data() hm = ax.pcolor(df, **kwargs) #fig.colorbar(hm, ax=ax) ax.set_xticks(np.arange(0.5, len(df.columns))) ax.set_yticks(np.arange(0.5, len(df.index))) ax.set_xticklabels(df.columns, minor=False, fontsize=14,rotation=90) ax.set_yticklabels(df.index, minor=False, fontsize=14) ax.set_ylim(0, len(df.index)) hm.set_clim(0,1) #ax.grid(True) plt.tight_layout() return ax
[docs]def get_seq_from_binders(P, name=None): """Get sequence from binder data. Probably better to store the sequences in the object?""" if P.data is None or len(P.data)==0: return if name is not None: data=P.data[P.data.name==name] else: data=P.data l = len(data.iloc[0].peptide) seqlen = data.pos.max()+l return seqlen
[docs]def get_bokeh_colors(palette='Set1'): from bokeh.palettes import brewer n = len(base.predictors) pal = brewer[palette][n] i=0 clrs={} for m in base.predictors: clrs[m] = pal[i] i+=1 return clrs
[docs]def get_sequence_colors(seq): """Get colors for a sequence""" from bokeh.palettes import brewer, viridis, plasma from Bio.PDB.Polypeptide import aa1 pal = plasma(20) pal.append('white') aa1 = list(aa1) aa1.append('-') pcolors = {i:j for i,j in zip(aa1,pal)} text = list(seq) clrs = {'A':'red','T':'green','G':'orange','C':'blue','-':'white'} try: colors = [clrs[i] for i in text] except: colors = [pcolors[i] for i in text] return colors
[docs]def bokeh_test(n=20,height=400): from bokeh.models import ColumnDataSource from bokeh.plotting import figure from bokeh.models.glyphs import Text, Rect, Circle data = {'x_values': np.random.random(n), 'y_values': np.random.random(n)} source = ColumnDataSource(data=data) tools = "pan,wheel_zoom,hover,tap,reset,save" p = figure(plot_height=height,tools=tools) c = Circle(x='x_values', y='y_values', radius=.02, line_color='black', fill_color='blue', fill_alpha=.6) #p.circle(x='x_values', y='y_values', radius=.02, line_color='black', fill_color='blue', fill_alpha=.6, source=source) p.add_glyph(source, c) return p
[docs]def bokeh_summary_plot(df, savepath=None): """Summary plot""" from bokeh.plotting import figure from bokeh.layouts import column from bokeh.models import ColumnDataSource,Range1d,HoverTool,TapTool,CustomJS,OpenURL TOOLS = "pan,wheel_zoom,hover,tap,reset,save" colors = get_bokeh_colors() df=df.rename(columns={'level_0':'predictor'}) df['color'] = [colors[x] for x in df['predictor']] p = figure(title = "Summary", tools=TOOLS, width=500, height=500) p.xaxis.axis_label = 'binder_density' p.yaxis.axis_label = 'binders' #make metric for point sizes #df['point_size'] = df.binder_density source = ColumnDataSource(data=df) p.circle(x='binder_density', y='binders', line_color='black', fill_color='color', fill_alpha=0.4, size=10, source=source, legend_group='predictor') hover = p.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("name", "@name"), ("length", "@length"), ("binders", "@binders"), ("binder_density", "@binder_density"), ("top_peptide", "@top_peptide"), ("max_score", "@max_score"), ]) p.toolbar.logo = None if savepath != None: url = "http://localhost:8000/sequence?savepath=%s&name=@name" %savepath taptool = p.select(type=TapTool) taptool.callback = OpenURL(url=url) callback = CustomJS(args=dict(source=source), code=""" var data = source.data; var f = cb_obj.value data['x'] = f source.trigger('change'); source.change.emit(); """) from bokeh.layouts import widgetbox from bokeh.models.widgets import Select menu = [(i,i) for i in df.columns] select = Select(title='X', value='A', options=list(df.columns), width=8) select.js_on_change('value', callback) #layout = column(p, select, sizing_mode='scale_width') return p
[docs]def bokeh_plot_tracks(preds, title='', n=2, name=None, cutoff=.95, cutoff_method='default', width=None, height=None, x_range=None, tools=True, palette='Set1', seqdepot=None, exp=None): """ Plot binding predictions as parallel tracks of blocks for each allele. This uses Bokeh. Args: title: plot title n: min alleles to display name: name of protein to show if more than one in data Returns: a bokeh figure for embedding or displaying in a notebook """ from collections import OrderedDict from bokeh.models import Range1d, HoverTool, FactorRange, ColumnDataSource, Text, Rect from bokeh.plotting import figure if tools == True: tools="xpan, xwheel_zoom, hover, reset, save" else: tools='' if width == None: width=1000 sizing_mode='scale_width' else: sizing_mode='fixed' alls=1 seqlen=0 for P in preds: if P.data is None or len(P.data)==0: continue seqlen = get_seq_from_binders(P, name=name) #print (seqlen) alls += len(P.data.groupby('allele')) if seqlen == 0: return if height==None: height = 140+10*alls if x_range == None: x_range = Range1d(0, seqlen, bounds='auto') yrange = Range1d(start=0, end=alls+3) plot = figure(title=title, plot_width=width, sizing_mode=sizing_mode, plot_height=height, y_range=yrange, x_range=x_range, y_axis_label='allele', tools=tools) h=3 if exp is not None: plotExp(plot, exp) colors = get_bokeh_colors(palette) x=[];y=[];allele=[];widths=[];clrs=[];peptide=[] predictor=[];position=[];score=[];leg=[];seqs=[];text=[] l=80 i=0 for pred in preds: m = pred.name df = pred.data seq = base.sequence_from_peptides(df) if df is None or len(df) == 0: print('no data to plot for %s' %m) continue if name != None: df = df[df.name==name] sckey = pred.scorekey binders = pred.get_binders(name=name, cutoff=cutoff, cutoff_method=cutoff_method) #print (cutoff, n) pb = pred.promiscuous_binders(n=n, name=name, cutoff=cutoff, cutoff_method=cutoff_method) if len(pb) == 0: continue l = base.get_length(pb) grps = df.groupby('allele') alleles = grps.groups.keys() #seqs.extend([seq for i in alleles]) #t = [i for s in list(seqs) for i in s] #text.extend(t) if len(pb)==0: continue c = colors[m] leg.append(m) seqlen = df.pos.max()+l for a,g in grps: b = binders[binders.allele==a] b = b[b.pos.isin(pb.pos)] #only promiscuous b.sort_values('pos',inplace=True) scores = b[sckey].values score.extend(scores) pos = b['pos'].values position.extend(pos) x.extend(pos+(l/2.0)) #offset as coords are rect centers widths.extend([l for i in scores]) clrs.extend([c for i in scores]) y.extend([h+0.5 for i in scores]) alls = [a for i in scores] allele.extend(alls) peptide.extend(list(b.peptide.values)) predictor.extend([m for i in scores]) h+=1 i+=1 data = dict(x=x,y=y,allele=allele,peptide=peptide,width=widths,color=clrs, predictor=predictor,position=position,score=score) source = ColumnDataSource(data=data) plot.rect(x='x', y='y', source=source, width='width', height=0.8, legend_group='predictor', color='color',line_color='gray',alpha=0.7) #glyph = Text(x="x", y="y", text="text", text_align='center', text_color="black", # text_font="monospace", text_font_size="10pt") #plot.add_glyph(source, glyph) hover = plot.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ ("allele", "@allele"), ("position", "@position"), ("peptide", "@peptide"), ("score", "@score"), ("predictor", "@predictor"), ]) plot.xaxis.major_label_text_font_size = "9pt" plot.xaxis.major_label_text_font_style = "bold" plot.ygrid.grid_line_color = None plot.xgrid.minor_grid_line_alpha = 0.1 plot.xgrid.minor_grid_line_color = 'gray' #plot.xgrid.minor_grid_line_dash = [6, 4] plot.yaxis.major_label_text_font_size = '0pt' #plot.xaxis.major_label_orientation = np.pi/4 plot.min_border = 10 plot.background_fill_color = "#fafaf4" plot.background_fill_alpha = 0.5 plot.legend.orientation = "horizontal" plot.legend.location = "bottom_right" #plot.legend.label_text_font_size = 12 plot.toolbar.logo = None plot.toolbar_location = "right" return plot
[docs]def bokeh_plot_sequence(preds, name=None, n=2, cutoff=.95, cutoff_method='default', width=1000, color_sequence=False, title=''): """Plot sequence view of binders """ from bokeh.plotting import figure from bokeh.models import ColumnDataSource, LinearAxis, Grid, Range1d, Text, Rect, CustomJS, Slider, RangeSlider, FactorRange from bokeh.layouts import gridplot, column colors = [] seqs = [] text = [] alleles = [] ylabels = [] pcolors = get_bokeh_colors() for P in preds: print (P.name) df = P.data #get sequence from results dataframe seq = base.sequence_from_peptides(df) l = base.get_length(df) b = P.get_binders(name=name, cutoff=cutoff, cutoff_method=cutoff_method) pb = P.promiscuous_binders(name=name, cutoff=cutoff, n=n, cutoff_method=cutoff_method) b = b[b.pos.isin(pb.pos)] #only promiscuous grps = b.groupby('allele') al = list(grps.groups) alleles.extend(al) ylabels.extend([P.name+' '+i for i in al]) currseq=[seq for i in al] seqs.extend(currseq) t = [i for s in currseq for i in s] text.extend(t) print (len(seqs),len(text)) for a in al: pos=[] f = list(b[b.allele==a].pos) for i in f: pos.extend(np.arange(i,i+l)) if color_sequence is True: c = plotting.get_sequence_colors(seq) else: c = ['white' for i in seq] for i in pos: c[i] = pcolors[P.name] colors.extend(c) #put into columndatasource for plotting N = len(seqs[0]) S = len(alleles) x = np.arange(1, N+1) y = np.arange(0,S,1) xx, yy = np.meshgrid(x, y) gx = xx.ravel() gy = yy.flatten() recty = gy+.5 source = ColumnDataSource(dict(x=gx, y=gy, recty=recty, text=text, colors=colors)) plot_height = len(seqs)*15+60 x_range = Range1d(0,N+1, bounds='auto') L=100 if len(seq)<100: L=len(seq) view_range = (0,L) viewlen = view_range[1]-view_range[0] fontsize="8.5pt" tools="xpan, reset, save" p = figure(title=title, plot_width=width, plot_height=plot_height, x_range=view_range, y_range=ylabels, tools=tools, min_border=0, sizing_mode='stretch_both', lod_factor=10, lod_threshold=1000) seqtext = Text(x="x", y="y", text="text", text_align='center',text_color="black", text_font="monospace", text_font_size=fontsize) rects = Rect(x="x", y="recty", width=1, height=1, fill_color="colors", line_color='gray', fill_alpha=0.6) p.add_glyph(source, rects) p.add_glyph(source, seqtext) p.xaxis.major_label_text_font_style = "bold" p.grid.visible = False p.toolbar.logo = None #preview view (no text) p1 = figure(title=None, plot_width=width, plot_height=S*3+5, x_range=x_range, y_range=(0,S), tools=[], min_border=0, sizing_mode='stretch_width', lod_factor=10, lod_threshold=10) rects = Rect(x="x", y="recty", width=1, height=1, fill_color="colors", line_color=None, fill_alpha=0.6) previewrect = Rect(x=viewlen/2,y=S/2, width=viewlen, height=S*.99, line_color='darkblue', fill_color=None) p1.add_glyph(source, rects) p1.add_glyph(source, previewrect) p1.yaxis.visible = False p1.grid.visible = False p1.toolbar_location = None #callback for slider move jscode=""" var start = cb_obj.value[0]; var end = cb_obj.value[1]; x_range.setv({"start": start, "end": end}) rect.width = end-start; rect.x = start+rect.width/2; var fac = rect.width/width; console.log(fac); if (fac>=.14) { fontsize = 0;} else { fontsize = 8.5; } text.text_font_size=fontsize+"pt"; """ callback = CustomJS( args=dict(x_range=p.x_range,rect=previewrect,text=seqtext,width=p.plot_width), code=jscode) slider = RangeSlider (start=0, end=N, value=(0,L), step=10)#, callback_policy="throttle") slider.js_on_change('value_throttled', callback) #callback for plot drag jscode=""" start = parseInt(range.start); end = parseInt(range.end); slider.value[0] = start; rect.width = end-start; rect.x = start+rect.width/2; """ #p.x_range.callback = CustomJS(args=dict(slider=slider, range=p.x_range, rect=previewrect), # code=jscode) p = gridplot([[p1],[p],[slider]], toolbar_location="below", merge_tools=False) return p
[docs]def bokeh_plot_grid(pred, name=None, width=None, palette='Blues', **kwargs): """Plot heatmap of binding results for a predictor.""" from bokeh.plotting import figure from bokeh.models import (Range1d,HoverTool,FactorRange,ColumnDataSource, LinearColorMapper,LogColorMapper,callbacks,DataRange) from bokeh.palettes import all_palettes TOOLS = "xpan, xwheel_zoom, hover, reset, save" if width == None: sizing_mode = 'scale_width' width=900 else: sizing_mode = 'fixed' P=pred df = P.data if df is None: return cols = ['allele','pos','peptide',P.scorekey] #d = df[cols].copy() b = P.get_binders(name=name,**kwargs) d = P.data.copy() #mark binders mask = d.index.isin(b.index) d['binder'] = mask l = base.get_length(df) grps = df.groupby('allele') alleles = grps.groups seqlen = get_seq_from_binders(P, name) seq = base.seq_from_binders(df) height = 300 alls = len(alleles) x_range = Range1d(0,seqlen-l+1, bounds='auto') #x_range = list(seq) y_range = df.allele.unique() val = P.scorekey cut = P.cutoff if P.name not in ['tepitope']: d['score1'] = d[val].apply( lambda x: 1-math.log(x, 50000)) val='score1' d[val][d.binder==False] = min(d[val]) source = ColumnDataSource(d) colors = all_palettes[palette][7] mapper = LinearColorMapper(palette=colors, low=d[val].max(), high=d[val].min()) p = figure(title=P.name+' '+name, x_range=x_range, y_range=y_range, x_axis_location="above", plot_width=width, plot_height=height, tools=TOOLS, toolbar_location='below', sizing_mode=sizing_mode) p.rect(x="pos", y="allele", width=1, height=1, source=source, fill_color={'field': val,'transform':mapper}, line_color='gray', line_width=.1) p.select_one(HoverTool).tooltips = [ ('allele', '@allele'), (P.scorekey, '@%s{1.11}' %P.scorekey), ('pos', '@pos'), ('peptide', '@peptide') ] p.toolbar.logo = None p.yaxis.major_label_text_font_size = "10pt" p.yaxis.major_label_text_font_style = "bold" return p
[docs]def bokeh_plot_bar(preds, name=None, allele=None, title='', width=None, height=100, palette='Set1', tools=True, x_range=None): """Plot bars combining one or more prediction results for a set of peptides in a protein/sequence""" from bokeh.models import Range1d,HoverTool,ColumnDataSource from bokeh.plotting import figure from bokeh.transform import dodge from bokeh.core.properties import value height = 180 seqlen = 0 if width == None: width=700 sizing_mode='scale_width' for P in preds: if P.data is None or len(P.data)==0: continue seqlen = get_seq_from_binders(P) if x_range == None: x_range = Range1d(0,seqlen) y_range = Range1d(start=0, end=1) if tools == True: tools="xpan, xwheel_zoom, reset, hover" else: tools=None plot = figure(title=title,plot_width=width,sizing_mode=sizing_mode, plot_height=height, y_range=y_range, x_range=x_range, y_axis_label='rank', tools=tools) colors = get_bokeh_colors(palette) data = {} mlist = [] for pred in preds: m = pred.name df = pred.data if df is None or len(df) == 0: continue if name != None: df = df[df.name==name] grps = df.groupby('allele') alleles = grps.groups.keys() if allele not in alleles: continue #print (m, alleles, allele) df = df[df.allele==allele] df = df.sort_values('pos').set_index('pos') key = pred.scorekey X = df[key] X = (X+abs(X.min())) / (X.max() - X.min()) data[m] = X.values data['pos'] = list(X.index) #data['peptide'] = df.peptide.values mlist.append(m) source = ColumnDataSource(data) w = round(1.0/len(mlist),1)-.1 i=-w/2 for m in mlist: #m = pred.name c = colors[m] plot.vbar(x=dodge('pos', i, range=plot.x_range), top=m, width=w, source=source, color=c, legend=value(m), alpha=.8) i+=w hover = plot.select(dict(type=HoverTool)) hover.tooltips = OrderedDict([ #("allele", "@allele"), ("pos", "@pos") ]) plot.min_border = 10 plot.background_fill_color = "beige" plot.background_fill_alpha = 0.5 plot.toolbar.logo = None plot.toolbar_location = "right" plot.legend.location = "top_right" plot.legend.orientation = "horizontal" return plot
[docs]def bokeh_vbar(x, height=200, title='', color='navy'): from bokeh.plotting import figure from bokeh.models import ColumnDataSource source = ColumnDataSource(data={'chr':list(x.index),'x':range(len(x)),'y':x.values}) plot = figure(title=title, x_range = list(x.index), plot_height=height, tools='save,reset') plot.vbar(x='chr',top='y', width=.8, bottom=0,source=source, color=color) plot.ygrid.grid_line_color = None plot.xgrid.grid_line_color = None plot.xaxis.major_label_orientation = np.pi/4 return plot
[docs]def bokeh_pie_chart(df, title='', radius=.5, width=400, height=400, palette='Spectral'): """Bokeh pie chart""" from bokeh.plotting import figure from bokeh.models import HoverTool,ColumnDataSource from math import pi s = df.cumsum()/df.sum() cats = s.index p=[0]+list(s) #print (p) starts = [1/2*pi-(i*2*pi) for i in p[:-1]] ends = [1/2*pi-(i*2*pi) for i in p[1:]] from bokeh.palettes import brewer n = len(s) pal = brewer[palette][6] source = ColumnDataSource( dict(x=[0 for x in s], y=[0 for x in s], radius = [radius for x in s], category= cats, starts=starts, ends=ends, colors=pal, counts = df )) plot = figure(title=title, plot_width=width, plot_height=height, tools='save,reset') plot.wedge(x='x', y='y', radius='radius', direction="clock", fill_color='colors', color='black', start_angle='starts', end_angle='ends', legend='category', source=source) plot.axis.visible = False plot.ygrid.visible = False plot.xgrid.visible = False #hover = plot.select(dict(type=HoverTool)) #hover.tooltips = [ # ('category', '@category'), # ('percents','@counts') #] return plot
[docs]def plot_tracks(preds, name, n=1, cutoff=.95, cutoff_method='default', regions=None, legend=False, colormap='Paired', figsize=None, ax=None, **kwargs): """ Plot binders as bars per allele using matplotlib. Args: preds: list of one or more predictors name: name of protein to plot n: number of alleles binder should be found in to be displayed cutoff: percentile cutoff to determine binders to show """ import matplotlib as mpl import pylab as plt from matplotlib.patches import Rectangle if ax == None: if figsize==None: h = sum([len(p.data.groupby('allele')) for p in preds]) w = 10 h = round(h*.1+2) figsize = (w,h) #plt.clf() fig = plt.figure(figsize=figsize,facecolor='white') ax = fig.add_subplot(111) p = len(preds) cmap = mpl.cm.get_cmap(colormap) colors = { preds[i].name : cmap(float(i)/p) for i in range(p) } alleles = [] leg = [] y=0 handles = [] for pred in preds: m = pred.name df = pred.data if df is None or len(df) == 0: print('no data to plot for %s' %m) continue if name != None: if name not in df.name.unique(): print ('no such sequence %s' %name) continue df = df[df.name==name] sckey = pred.scorekey binders = pred.get_binders(name=name, cutoff=cutoff, cutoff_method=cutoff_method) #print (binders) pb = pred.promiscuous_binders(binders=binders, n=n) if len(pb) == 0: continue l = base.get_length(pb) seqlen = df.pos.max()+l #print (name,m,df.pos.max(),l,seqlen) grps = df.groupby('allele') if m in colors: c=colors[m] else: c='blue' leg.append(m) order = sorted(grps.groups) alleles.extend(order) #for a,g in grps: for a in order: g = grps.groups[a] b = binders[binders.allele==a] b = b[b.pos.isin(pb.pos)] #only promiscuous b.sort_values('pos',inplace=True) pos = b['pos'].values+1 #assumes pos is zero indexed #clrs = [scmap.to_rgba(i) for i in b[sckey]] #for x,c in zip(pos,clrs): for x in pos: rect = ax.add_patch(Rectangle((x,y), l, 1, facecolor=c, edgecolor='black', lw=1.5, alpha=0.6)) y+=1 handles.append(rect) if len(leg) == 0: return ax.set_xlim(0, seqlen) ax.set_ylim(0, len(alleles)) w=20 if seqlen>500: w=100 ax.set_xticks(np.arange(0, seqlen, w)) ax.set_ylabel('allele') ax.set_yticks(np.arange(.5,len(alleles)+.5)) fsize = 14-1*len(alleles)/40. ax.set_yticklabels(alleles, fontsize=fsize ) ax.grid(b=True, which='major', alpha=0.5) ax.set_title(name, fontsize=16, loc='right') if regions is not None: r = regions[regions.name==name] coords = (list(r.start),list(r.end-r.start)) coords = zip(*coords) plot_regions(coords, ax, color='gray') if legend == True: ax.legend(handles, leg, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3) plt.tight_layout() return ax
[docs]def plot_regions(coords, ax, color='red', label='', alpha=0.6): """Highlight regions in a prot binder plot""" from matplotlib.patches import Rectangle #l = len(seqs.head(1)['key'].max()) h = ax.get_ylim()[1] for c in coords: x,l = c ax.add_patch(Rectangle((x,0), l, h, facecolor=color, lw=.8, alpha=alpha, zorder=0)) return
[docs]def draw_labels(labels, coords, ax): """Add labels on axis""" bbox_args = dict(boxstyle='square',fc='whitesmoke') from matplotlib.transforms import blended_transform_factory tform = blended_transform_factory(ax.transData, ax.transAxes) for text, x in zip(labels,coords): xy = (x,-.05) an = ax.annotate(text, xy=xy, xycoords=tform, ha='center', va="center", size=14, zorder=10, textcoords='offset points', bbox=bbox_args) plt.subplots_adjust(bottom=0.1) return
[docs]def plot_bars(P, name, chunks=1, how='median', cutoff=20, color='black'): """ Bar plots for sequence using median/mean/total scores. Args: P: predictor with data name: name of protein sequence chunks: break sequence up into 1 or more chunks how: method to calculate score bar value perc: percentile cutoff to show peptide """ import seaborn as sns df = P.data[P.data.name==name].sort_values('pos') w = 10 l= base.get_length(df) seqlen = df.pos.max()+l funcs = {'median': np.median, 'mean': np.mean, 'sum': np.sum} grps = df.groupby('pos') key = P.scorekey X = grps.agg({key: np.median, 'peptide': base.first}) q = (1-cutoff/100.) #score quantile value cutoff = X[key].quantile(q) X[key][X[key]<cutoff] = np.nan if len(X)<20: chunks = 1 seqlist = X.peptide.apply( lambda x : x[0]) seqchunks = np.array_split(X.index, chunks) f,axs = plt.subplots(chunks,1,figsize=(15,2+2.5*chunks)) if chunks == 1: axs = [axs] else: axs = list(axs.flat) for c in range(chunks): #print (c) ax = axs[c] st = seqchunks[c][0] end = seqchunks[c][-1] p = X[st:end] #p = p[p.peptide.isin(cb.peptide)] ax.bar(p.index, p[key], width=1, color=color) ax.set_title(str(st)+'-'+str(end), loc='right') xseq = seqlist[st:end] if len(xseq)<150: fsize = 16-1*len(xseq)/20. ax.set_xlim(st,end) ax.set_xticks(p.index+0.5) ax.set_xticklabels(xseq, rotation=0, fontsize=fsize) ax.set_ylim(X[key].min(), X[key].max()) f.suptitle(name+' - '+P.name) plt.tight_layout() return axs
[docs]def plot_bcell(plot,pred,height,ax=None): """Line plot of iedb bcell results""" x = pred.data.Position y = pred.data.Score h = height y = y+abs(min(y)) y = y*(h/max(y))+3 #plot.line(x, y, line_color="red", line_width=2, alpha=0.6,legend='bcell') ax.plot(x,y,color='blue') return
[docs]def plot_seqdepot(annotation, ax): """Plot sedepot annotations - replace with generic plot coords track""" from matplotlib.patches import Rectangle y=-1.5 fontsize=12 if 'signalp' in annotation: bbox_args = dict(boxstyle='rarrow', fc='white', lw=1, alpha=0.8) pos = annotation['signalp'].values() print (pos) for x in pos: an = ax.annotate('SP', xy=(x,y), xycoords='data', ha='left', va="center", bbox=bbox_args, size=fontsize) if 'tmhmm' in annotation: vals = annotation['tmhmm'] pos = [i[0]+(i[1]-i[0])/2.0 for i in vals] widths = [i[1]-i[0] for i in vals] bbox_args = dict(boxstyle='round', fc='deepskyblue', lw=1, alpha=0.8) for x,w in zip(pos,widths): an = ax.annotate('TMHMM', xy=(x,y), xycoords='data', ha='left', va="center", bbox=bbox_args, size=fontsize) if 'pfam27' in annotation: vals = annotation['pfam27'] text = [i[0] for i in vals] pos = [i[1]+(i[2]-i[1])/2.0 for i in vals] widths = [i[2]-i[1] for i in vals] #print (pos,widths,text) bbox_args = dict(boxstyle='round', fc='white', lw=1, alpha=0.8) for x,w,t in zip(pos,widths,text): an = ax.annotate(t, xy=(x,y), xycoords='data', ha='left', va="center", bbox=bbox_args, size=fontsize) ax.set_ylim(y-1, ax.get_ylim()[1]) return
[docs]def plot_multiple(preds, names, kind='tracks', regions=None, genome=None, **kwargs): """Plot results for multiple proteins""" for prot in names: if kind == 'tracks': ax = plot_tracks(preds,name=prot,**kwargs) elif kind == 'bar': axs = plot_bars(preds[0],name=prot) ax = axs[0] if regions is not None: r = regions[regions.name==prot] print (r) #print genome[genome.locus_tag==prot] coords = (list(r.start),list(r.end-r.start)) coords = zip(*coords) plot_regions(coords, ax, color='gray') #labels = list(r.peptide) #plotting.draw_labels(labels, coords, ax) if genome is not None: p = genome[genome['locus_tag']==prot] seq = p.translation.iloc[0] from . import analysis sd = analysis.get_seqdepot(seq)['t'] plot_seqdepot(sd, ax) plt.tight_layout() plt.show() return
[docs]def plot_binder_map(P, name, values='rank', cutoff=20, chunks=1, cmap=None): """ Plot heatmap of binders above a cutoff by rank or score. Args: P: predictor object with data name: name of protein to plot values: data column to use for plot data, 'score' or 'rank' cutoff: cutoff if using rank as values chunks: number of plots to split the sequence into """ import pylab as plt import seaborn as sns df = P.data[P.data.name==name].sort_values('pos') w = 10 l= base.get_length(df) seqlen = df.pos.max()+l f,axs = plt.subplots(chunks,1,figsize=(15,3+2.5*chunks)) if chunks == 1: axs = [axs] else: axs = list(axs.flat) if values == 'score': values = P.scorekey if cmap == None: cmap='RdBu_r' X = df.pivot_table(index='allele', columns='pos', values=values) if values == P.scorekey: #normalise score across alleles for clarity zscore = lambda x: (x - x.mean()) / x.std() X = X.apply(zscore, 1) if values == 'rank': X[X > cutoff] = 0 if cmap == None: cmap='Blues' s = df.drop_duplicates(['peptide','pos']) seqlist = s.set_index('pos').peptide.apply( lambda x : x[0]) #print seqlist seqchunks = np.array_split(X.columns, chunks) for c in range(chunks): ax = axs[c] p = X[seqchunks[c]] #plot heatmap vmin=min(X.min()); vmax=max(X.max()) center = vmin+(vmax-vmin)/2 sns.heatmap(p, cmap=cmap, cbar_kws={"shrink": .5}, vmin=vmin, vmax=vmax, #center=center, ax=ax, xticklabels=20) #show sequence on x-axis st = seqchunks[c][0] end = seqchunks[c][-1] xseq = seqlist[st:end] ax.set_title(str(st)+'-'+str(end), loc='right') ax.spines['bottom'].set_visible(True) if len(seqchunks[c])<150: fsize = 16-1*len(seqchunks[c])/20. ax.set_xticks(np.arange(0,len(xseq))+0.5) ax.set_xticklabels(xseq, rotation=0, fontsize=fsize) f.suptitle(name+' - '+P.name) plt.tight_layout() return ax
[docs]def binders_to_coords(df): """Convert binder results to dict of coords for plotting""" coords = {} if not 'start' in df.columns: df=base.get_coords(df) if 'start' in df.columns: for i,g in df.groupby('name'): l = g.end-g.start coords[i] = zip(g.start,l) return coords
[docs]def plot_overview(genome, coords=None, cols=2, colormap='Paired', legend=True, figsize=None): """ Plot regions of interest in a group of protein sequences. Useful for seeing how your binders/epitopes are distributed in a small genome or subset of genes. Args: genome: dataframe with protein sequences coords: a list/dict of tuple lists of the form {protein name: [(start,length)..]} cols: number of columns for plot, integer """ import pylab as plt if type(coords) is list: coords = { i:coords[i] for i in range(len(coords)) } legend=False import matplotlib as mpl import seaborn as sns #sns.reset_orig() cmap = mpl.cm.get_cmap(colormap) t = len(coords) colors = [cmap(float(i)/t) for i in range(t)] from matplotlib.patches import Rectangle names = [coords[c].keys() for c in coords][0] df = genome[genome.locus_tag.isin(names)] rows = int(np.ceil(len(names)/float(cols))) if figsize == None: h = round(len(names)*.2+10./cols) figsize = (14,h) f,axs=plt.subplots(rows,cols,figsize=figsize) grid=axs.flat rects = {} i=0 for idx,prot in df.iterrows(): ax=grid[i] protname = prot.locus_tag seq = prot.translation if 'description' in prot: title = prot.description else: title = protname y=0 for label in coords: c = coords[label] if not protname in c: continue vals = c[protname] #print vals for v in vals: x,l = v[0],v[1] rect = ax.add_patch(Rectangle((x,y), l, .9, facecolor=colors[y], lw=1.2, alpha=0.8)) if len(v)>2: s = v[2] bbox_args = dict(fc=colors[y], lw=1.2, alpha=0.8) ax.annotate(s, (x+l/2, y), fontsize=12, ha='center', va='bottom') if not label in rects: rects[label] = rect y+=1 i+=1 slen = len(seq) w = round(float(slen)/20.) w = math.ceil(w/20)*20 ax.set_xlim(0, slen) ax.set_ylim(0, t) ax.set_xticks(np.arange(0, slen, w)) ax.set_yticks([]) ax.set_title(title, fontsize=16, loc='right') if i|2!=0 and cols>1: try: f.delaxes(grid[i]) except: pass if legend == True: f.legend(rects.values(), rects.keys(), loc=4) plt.tight_layout() return
[docs]def seqdepot_to_coords(sd, key='pfam27'): """ Convert seqdepot annotations to coords for plotting """ coords=[] if len(sd['t'])==0 or not key in sd['t']: return [] x = sd['t'][key] #print x if key in ['pfam27','pfam28']: coords = [(i[1],i[2]-i[1],i[0]) for i in x] elif key in ['gene3d','prints']: coords = [(i[2],i[3]-i[2],i[1]) for i in x] elif key == 'tmhmm': coords = [(i[0],i[1]-i[0]) for i in x] elif key == 'signalp': x = x.items() coords = [(i[1],10,'SP') for i in x] return coords
[docs]def get_seqdepot_annotation(genome, key='pfam27'): """ Get seqdepot annotations for a set of proteins in dataframe. """ from . import seqdepot annot={} for i,row in genome.iterrows(): n = row.locus_tag seq = row.translation #print n,seq sd = seqdepot.new() aseqid = sd.aseqIdFromSequence(seq) result = sd.findOne(aseqid) #for x in result['t']: # print x, result['t'][x] x = seqdepot_to_coords(result, key) annot[n] = x return annot