Source code for epitopepredict.plotting

#!/usr/bin/env python

"""
    epitopepredict plotting
    Created February 2016
    Copyright (C) Damien Farrell
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 3
    of the License, or (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""


from __future__ import absolute_import, print_function
import sys, os, math
from collections import OrderedDict
try:
    import matplotlib
    matplotlib.use('agg', warn=False)
    import pylab as plt
except:
    pass
import numpy as np
import pandas as pd
from . import base

colormaps={'tepitope':'Greens','netmhciipan':'Oranges','iedbmhc2':'Pinks',
            'iedbmhc1':'Blues'}
defaultcolors = {'tepitope':'green','netmhciipan':'orange','basicmhc1':'yellow',
                 'iedbmhc1':'blue','iedbmhc2':'pink'}


[docs]def plot_heatmap(df, ax=None, figsize=(6,6), **kwargs):
    """Plot a generic heatmap """

    if ax==None:
        fig=plt.figure(figsize=figsize)
        ax=fig.add_subplot(111)
    else:
        fig = ax.get_figure()
    df = df._get_numeric_data()
    hm = ax.pcolor(df, **kwargs)
    #fig.colorbar(hm, ax=ax)
    ax.set_xticks(np.arange(0.5, len(df.columns)))
    ax.set_yticks(np.arange(0.5, len(df.index)))
    ax.set_xticklabels(df.columns, minor=False, fontsize=14,rotation=90)
    ax.set_yticklabels(df.index, minor=False, fontsize=14)
    ax.set_ylim(0, len(df.index))
    hm.set_clim(0,1)
    #ax.grid(True)
    plt.tight_layout()
    return ax

[docs]def get_seq_from_binders(P, name=None):
    """Get sequence from binder data. Probably better to store the sequences in the object?"""

    if P.data is None or len(P.data)==0:
        return
    if name is not None:
        data=P.data[P.data.name==name]
    else:
        data=P.data
    l = len(data.iloc[0].peptide)
    seqlen = data.pos.max()+l
    return seqlen

[docs]def get_bokeh_colors(palette='Set1'):

    from bokeh.palettes import brewer
    n = len(base.predictors)
    pal = brewer[palette][n]
    i=0
    clrs={}
    for m in base.predictors:
        clrs[m] = pal[i]
        i+=1
    return clrs

[docs]def get_sequence_colors(seq):
    """Get colors for a sequence"""

    from bokeh.palettes import brewer, viridis, plasma
    from Bio.PDB.Polypeptide import aa1
    pal = plasma(20)
    pal.append('white')
    aa1 = list(aa1)
    aa1.append('-')
    pcolors = {i:j for i,j in zip(aa1,pal)}
    text = list(seq)
    clrs =  {'A':'red','T':'green','G':'orange','C':'blue','-':'white'}
    try:
        colors = [clrs[i] for i in text]
    except:
        colors = [pcolors[i] for i in text]
    return colors

[docs]def bokeh_test(n=20,height=400):

    from bokeh.models import ColumnDataSource
    from bokeh.plotting import figure
    from bokeh.models.glyphs import Text, Rect, Circle
    data = {'x_values': np.random.random(n),
            'y_values': np.random.random(n)}
    source = ColumnDataSource(data=data)
    tools = "pan,wheel_zoom,hover,tap,reset,save"
    p = figure(plot_height=height,tools=tools)
    c = Circle(x='x_values', y='y_values', radius=.02, line_color='black', fill_color='blue', fill_alpha=.6)
    #p.circle(x='x_values', y='y_values', radius=.02, line_color='black', fill_color='blue', fill_alpha=.6, source=source)
    p.add_glyph(source, c)
    return p

[docs]def bokeh_summary_plot(df, savepath=None):
    """Summary plot"""

    from bokeh.plotting import figure
    from bokeh.layouts import column
    from bokeh.models import ColumnDataSource,Range1d,HoverTool,TapTool,CustomJS,OpenURL

    TOOLS = "pan,wheel_zoom,hover,tap,reset,save"

    colors = get_bokeh_colors()
    df=df.rename(columns={'level_0':'predictor'})
    df['color'] = [colors[x] for x in df['predictor']]
    p = figure(title = "Summary", tools=TOOLS, width=500, height=500)
    p.xaxis.axis_label = 'binder_density'
    p.yaxis.axis_label = 'binders'

    #make metric for point sizes
    #df['point_size'] = df.binder_density
    source = ColumnDataSource(data=df)

    p.circle(x='binder_density', y='binders', line_color='black', fill_color='color',
             fill_alpha=0.4, size=10, source=source, legend_group='predictor')
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("name", "@name"),
        ("length", "@length"),
        ("binders", "@binders"),
        ("binder_density", "@binder_density"),
        ("top_peptide", "@top_peptide"),
        ("max_score", "@max_score"),
    ])
    p.toolbar.logo = None
    if savepath != None:
        url = "http://localhost:8000/sequence?savepath=%s&name=@name" %savepath
        taptool = p.select(type=TapTool)
        taptool.callback = OpenURL(url=url)

    callback = CustomJS(args=dict(source=source), code="""
        var data = source.data;
        var f = cb_obj.value

        data['x'] = f
        source.trigger('change');
        source.change.emit();
    """)

    from bokeh.layouts import widgetbox
    from bokeh.models.widgets import Select
    menu = [(i,i) for i in df.columns]
    select = Select(title='X', value='A', options=list(df.columns), width=8)
    select.js_on_change('value', callback)

    #layout = column(p, select, sizing_mode='scale_width')
    return p

[docs]def bokeh_plot_tracks(preds, title='', n=2, name=None, cutoff=.95, cutoff_method='default',
                width=None, height=None, x_range=None, tools=True, palette='Set1',
                seqdepot=None, exp=None):
    """
    Plot binding predictions as parallel tracks of blocks for each allele.
    This uses Bokeh.
    Args:
        title: plot title
        n: min alleles to display
        name: name of protein to show if more than one in data

    Returns: a bokeh figure for embedding or displaying in a notebook
    """

    from collections import OrderedDict
    from bokeh.models import Range1d, HoverTool, FactorRange, ColumnDataSource, Text, Rect
    from bokeh.plotting import figure

    if tools == True:
        tools="xpan, xwheel_zoom, hover, reset, save"
    else:
        tools=''
    if width == None:
        width=1000
        sizing_mode='scale_width'
    else:
        sizing_mode='fixed'
    alls=1
    seqlen=0
    for P in preds:
        if P.data is None or len(P.data)==0:
            continue
        seqlen = get_seq_from_binders(P, name=name)
        #print (seqlen)
        alls += len(P.data.groupby('allele'))
    if seqlen == 0:
        return
    if height==None:
        height = 140+10*alls
    if x_range == None:
        x_range = Range1d(0, seqlen, bounds='auto')
    yrange = Range1d(start=0, end=alls+3)

    plot = figure(title=title, plot_width=width, sizing_mode=sizing_mode,
                    plot_height=height, y_range=yrange, x_range=x_range,
                    y_axis_label='allele',
                    tools=tools)
    h=3

    if exp is not None:
        plotExp(plot, exp)

    colors = get_bokeh_colors(palette)
    x=[];y=[];allele=[];widths=[];clrs=[];peptide=[]
    predictor=[];position=[];score=[];leg=[];seqs=[];text=[]
    l=80
    i=0
    for pred in preds:
        m = pred.name
        df = pred.data
        seq = base.sequence_from_peptides(df)
        if df is None or len(df) == 0:
            print('no data to plot for %s' %m)
            continue
        if name != None:
            df = df[df.name==name]

        sckey = pred.scorekey
        binders = pred.get_binders(name=name, cutoff=cutoff, cutoff_method=cutoff_method)
        #print (cutoff, n)
        pb = pred.promiscuous_binders(n=n, name=name, cutoff=cutoff, cutoff_method=cutoff_method)
        if len(pb) == 0:
            continue
        l = base.get_length(pb)
        grps = df.groupby('allele')
        alleles = grps.groups.keys()
        #seqs.extend([seq for i in alleles])
        #t = [i for s in list(seqs) for i in s]
        #text.extend(t)
        if len(pb)==0:
            continue
        c = colors[m]

        leg.append(m)
        seqlen = df.pos.max()+l

        for a,g in grps:
            b = binders[binders.allele==a]
            b = b[b.pos.isin(pb.pos)] #only promiscuous
            b.sort_values('pos',inplace=True)
            scores = b[sckey].values
            score.extend(scores)
            pos = b['pos'].values
            position.extend(pos)
            x.extend(pos+(l/2.0)) #offset as coords are rect centers
            widths.extend([l for i in scores])
            clrs.extend([c for i in scores])
            y.extend([h+0.5 for i in scores])
            alls = [a for i in scores]
            allele.extend(alls)
            peptide.extend(list(b.peptide.values))
            predictor.extend([m for i in scores])
            h+=1
        i+=1

    data = dict(x=x,y=y,allele=allele,peptide=peptide,width=widths,color=clrs,
                predictor=predictor,position=position,score=score)
    source = ColumnDataSource(data=data)
    plot.rect(x='x', y='y', source=source, width='width', height=0.8,
             legend_group='predictor',
             color='color',line_color='gray',alpha=0.7)

    #glyph = Text(x="x", y="y", text="text", text_align='center', text_color="black",
    #             text_font="monospace", text_font_size="10pt")
    #plot.add_glyph(source, glyph)

    hover = plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        ("allele", "@allele"),
        ("position", "@position"),
        ("peptide", "@peptide"),
        ("score", "@score"),
        ("predictor", "@predictor"),
    ])

    plot.xaxis.major_label_text_font_size = "9pt"
    plot.xaxis.major_label_text_font_style = "bold"
    plot.ygrid.grid_line_color = None
    plot.xgrid.minor_grid_line_alpha = 0.1
    plot.xgrid.minor_grid_line_color = 'gray'
    #plot.xgrid.minor_grid_line_dash = [6, 4]
    plot.yaxis.major_label_text_font_size = '0pt'
    #plot.xaxis.major_label_orientation = np.pi/4
    plot.min_border = 10
    plot.background_fill_color = "#fafaf4"
    plot.background_fill_alpha = 0.5
    plot.legend.orientation = "horizontal"
    plot.legend.location = "bottom_right"
    #plot.legend.label_text_font_size = 12
    plot.toolbar.logo = None
    plot.toolbar_location = "right"
    return plot

[docs]def bokeh_plot_sequence(preds, name=None, n=2, cutoff=.95, cutoff_method='default',
                        width=1000, color_sequence=False, title=''):
    """Plot sequence view of binders """

    from bokeh.plotting import figure
    from bokeh.models import ColumnDataSource, LinearAxis, Grid, Range1d, Text, Rect, CustomJS, Slider, RangeSlider, FactorRange
    from bokeh.layouts import gridplot, column

    colors = []
    seqs = []
    text = []
    alleles = []
    ylabels = []
    pcolors = get_bokeh_colors()

    for P in preds:
        print (P.name)
        df = P.data
        #get sequence from results dataframe
        seq = base.sequence_from_peptides(df)
        l = base.get_length(df)
        b = P.get_binders(name=name, cutoff=cutoff, cutoff_method=cutoff_method)
        pb = P.promiscuous_binders(name=name, cutoff=cutoff, n=n, cutoff_method=cutoff_method)
        b = b[b.pos.isin(pb.pos)] #only promiscuous

        grps = b.groupby('allele')
        al = list(grps.groups)
        alleles.extend(al)
        ylabels.extend([P.name+' '+i for i in al])
        currseq=[seq for i in al]
        seqs.extend(currseq)
        t = [i for s in currseq for i in s]
        text.extend(t)
        print (len(seqs),len(text))
        for a in al:
            pos=[]
            f = list(b[b.allele==a].pos)
            for i in f:
                pos.extend(np.arange(i,i+l))
            if color_sequence is True:
                c = plotting.get_sequence_colors(seq)
            else:
                c = ['white' for i in seq]

            for i in pos:
                c[i] = pcolors[P.name]
            colors.extend(c)

    #put into columndatasource for plotting
    N = len(seqs[0])
    S = len(alleles)
    x = np.arange(1, N+1)
    y = np.arange(0,S,1)
    xx, yy = np.meshgrid(x, y)
    gx = xx.ravel()
    gy = yy.flatten()
    recty = gy+.5

    source = ColumnDataSource(dict(x=gx, y=gy, recty=recty, text=text, colors=colors))
    plot_height = len(seqs)*15+60
    x_range = Range1d(0,N+1, bounds='auto')
    L=100
    if len(seq)<100:
        L=len(seq)
    view_range = (0,L)
    viewlen = view_range[1]-view_range[0]

    fontsize="8.5pt"
    tools="xpan, reset, save"
    p = figure(title=title, plot_width=width, plot_height=plot_height, x_range=view_range, y_range=ylabels, tools=tools,
               min_border=0, sizing_mode='stretch_both', lod_factor=10,  lod_threshold=1000)
    seqtext = Text(x="x", y="y", text="text", text_align='center',text_color="black",
                 text_font="monospace", text_font_size=fontsize)
    rects = Rect(x="x", y="recty", width=1, height=1, fill_color="colors", line_color='gray', fill_alpha=0.6)

    p.add_glyph(source, rects)
    p.add_glyph(source, seqtext)
    p.xaxis.major_label_text_font_style = "bold"
    p.grid.visible = False
    p.toolbar.logo = None

    #preview view (no text)
    p1 = figure(title=None, plot_width=width, plot_height=S*3+5, x_range=x_range, y_range=(0,S), tools=[],
                    min_border=0, sizing_mode='stretch_width', lod_factor=10, lod_threshold=10)
    rects = Rect(x="x", y="recty",  width=1, height=1, fill_color="colors", line_color=None, fill_alpha=0.6)
    previewrect = Rect(x=viewlen/2,y=S/2, width=viewlen, height=S*.99, line_color='darkblue', fill_color=None)
    p1.add_glyph(source, rects)
    p1.add_glyph(source, previewrect)
    p1.yaxis.visible = False
    p1.grid.visible = False
    p1.toolbar_location = None

    #callback for slider move
    jscode="""
        var start = cb_obj.value[0];
        var end = cb_obj.value[1];
        x_range.setv({"start": start, "end": end})
        rect.width = end-start;
        rect.x = start+rect.width/2;
        var fac = rect.width/width;
        console.log(fac);
        if (fac>=.14) { fontsize = 0;}
        else { fontsize = 8.5; }
        text.text_font_size=fontsize+"pt";
    """
    callback = CustomJS(
        args=dict(x_range=p.x_range,rect=previewrect,text=seqtext,width=p.plot_width), code=jscode)
    slider = RangeSlider (start=0, end=N, value=(0,L), step=10)#, callback_policy="throttle")
    slider.js_on_change('value_throttled', callback)

    #callback for plot drag
    jscode="""
        start = parseInt(range.start);
        end = parseInt(range.end);
        slider.value[0] = start;
        rect.width = end-start;
        rect.x = start+rect.width/2;
    """
    #p.x_range.callback = CustomJS(args=dict(slider=slider, range=p.x_range, rect=previewrect),
    #                              code=jscode)

    p = gridplot([[p1],[p],[slider]], toolbar_location="below", merge_tools=False)
    return p

[docs]def bokeh_plot_grid(pred, name=None, width=None, palette='Blues', **kwargs):
    """Plot heatmap of binding results for a predictor."""

    from bokeh.plotting import figure
    from bokeh.models import (Range1d,HoverTool,FactorRange,ColumnDataSource,
                              LinearColorMapper,LogColorMapper,callbacks,DataRange)
    from bokeh.palettes import all_palettes
    TOOLS = "xpan, xwheel_zoom, hover, reset, save"

    if width == None:
        sizing_mode = 'scale_width'
        width=900
    else:
        sizing_mode = 'fixed'
    P=pred
    df = P.data
    if df is None:
        return

    cols = ['allele','pos','peptide',P.scorekey]
    #d = df[cols].copy()
    b = P.get_binders(name=name,**kwargs)
    d = P.data.copy()
    #mark binders
    mask = d.index.isin(b.index)
    d['binder'] = mask

    l = base.get_length(df)
    grps = df.groupby('allele')
    alleles = grps.groups
    seqlen = get_seq_from_binders(P, name)
    seq = base.seq_from_binders(df)
    height = 300
    alls = len(alleles)
    x_range = Range1d(0,seqlen-l+1, bounds='auto')
    #x_range = list(seq)
    y_range = df.allele.unique()
    val = P.scorekey
    cut = P.cutoff
    if P.name not in ['tepitope']:
        d['score1'] = d[val].apply( lambda x: 1-math.log(x, 50000))
        val='score1'
    d[val][d.binder==False] = min(d[val])

    source = ColumnDataSource(d)
    colors = all_palettes[palette][7]
    mapper = LinearColorMapper(palette=colors, low=d[val].max(), high=d[val].min())

    p = figure(title=P.name+' '+name,
               x_range=x_range, y_range=y_range,
               x_axis_location="above", plot_width=width, plot_height=height,
               tools=TOOLS, toolbar_location='below', sizing_mode=sizing_mode)
    p.rect(x="pos", y="allele", width=1, height=1,
           source=source,
           fill_color={'field': val,'transform':mapper},
           line_color='gray', line_width=.1)
    p.select_one(HoverTool).tooltips = [
         ('allele', '@allele'),
         (P.scorekey, '@%s{1.11}' %P.scorekey),
         ('pos', '@pos'),
         ('peptide', '@peptide')
    ]
    p.toolbar.logo = None
    p.yaxis.major_label_text_font_size = "10pt"
    p.yaxis.major_label_text_font_style = "bold"
    return p

[docs]def bokeh_plot_bar(preds, name=None, allele=None, title='', width=None, height=100,
                    palette='Set1', tools=True, x_range=None):
    """Plot bars combining one or more prediction results for a set of
    peptides in a protein/sequence"""

    from bokeh.models import Range1d,HoverTool,ColumnDataSource
    from bokeh.plotting import figure
    from bokeh.transform import dodge
    from bokeh.core.properties import value

    height = 180
    seqlen = 0
    if width == None:
        width=700
        sizing_mode='scale_width'
    for P in preds:
        if P.data is None or len(P.data)==0:
            continue
        seqlen = get_seq_from_binders(P)

    if x_range == None:
        x_range = Range1d(0,seqlen)
    y_range = Range1d(start=0, end=1)
    if tools == True:
        tools="xpan, xwheel_zoom, reset, hover"
    else:
        tools=None
    plot = figure(title=title,plot_width=width,sizing_mode=sizing_mode,
                    plot_height=height, y_range=y_range, x_range=x_range,
                    y_axis_label='rank',
                    tools=tools)
    colors = get_bokeh_colors(palette)
    data = {}
    mlist = []
    for pred in preds:
        m = pred.name
        df = pred.data
        if df is None or len(df) == 0:
            continue
        if name != None:
            df = df[df.name==name]
        grps = df.groupby('allele')
        alleles = grps.groups.keys()
        if allele not in alleles:
            continue
        #print (m, alleles, allele)
        df = df[df.allele==allele]
        df = df.sort_values('pos').set_index('pos')
        key = pred.scorekey
        X = df[key]
        X = (X+abs(X.min())) / (X.max() - X.min())
        data[m] = X.values
        data['pos'] = list(X.index)
        #data['peptide'] = df.peptide.values
        mlist.append(m)
    source = ColumnDataSource(data)

    w = round(1.0/len(mlist),1)-.1
    i=-w/2
    for m in mlist:
        #m = pred.name
        c = colors[m]
        plot.vbar(x=dodge('pos', i, range=plot.x_range), top=m, width=w, source=source,
                   color=c, legend=value(m), alpha=.8)
        i+=w

    hover = plot.select(dict(type=HoverTool))
    hover.tooltips = OrderedDict([
        #("allele", "@allele"),
        ("pos", "@pos") ])

    plot.min_border = 10
    plot.background_fill_color = "beige"
    plot.background_fill_alpha = 0.5
    plot.toolbar.logo = None
    plot.toolbar_location = "right"
    plot.legend.location = "top_right"
    plot.legend.orientation = "horizontal"
    return plot

[docs]def bokeh_vbar(x, height=200, title='', color='navy'):

    from bokeh.plotting import figure
    from bokeh.models import ColumnDataSource

    source = ColumnDataSource(data={'chr':list(x.index),'x':range(len(x)),'y':x.values})
    plot = figure(title=title, x_range = list(x.index), plot_height=height, tools='save,reset')
    plot.vbar(x='chr',top='y', width=.8, bottom=0,source=source, color=color)
    plot.ygrid.grid_line_color = None
    plot.xgrid.grid_line_color = None
    plot.xaxis.major_label_orientation = np.pi/4
    return plot

[docs]def bokeh_pie_chart(df, title='', radius=.5, width=400, height=400, palette='Spectral'):
    """Bokeh pie chart"""

    from bokeh.plotting import figure
    from bokeh.models import HoverTool,ColumnDataSource
    from math import pi

    s = df.cumsum()/df.sum()
    cats = s.index
    p=[0]+list(s)
    #print (p)
    starts = [1/2*pi-(i*2*pi) for i in p[:-1]]
    ends = [1/2*pi-(i*2*pi) for i in p[1:]]

    from bokeh.palettes import brewer
    n = len(s)
    pal = brewer[palette][6]
    source = ColumnDataSource(
                dict(x=[0 for x in s], y=[0 for x in s],
                  radius = [radius for x in s],
                  category= cats,
                  starts=starts,
                  ends=ends,
                  colors=pal,
                  counts = df
                  ))
    plot = figure(title=title, plot_width=width, plot_height=height, tools='save,reset')
    plot.wedge(x='x', y='y', radius='radius', direction="clock", fill_color='colors', color='black',
                start_angle='starts', end_angle='ends', legend='category', source=source)
    plot.axis.visible = False
    plot.ygrid.visible = False
    plot.xgrid.visible = False
    #hover = plot.select(dict(type=HoverTool))
    #hover.tooltips = [
    #    ('category', '@category'),
    #    ('percents','@counts')
    #]
    return plot

[docs]def plot_tracks(preds, name, n=1, cutoff=.95, cutoff_method='default', regions=None,
                legend=False, colormap='Paired', figsize=None, ax=None, **kwargs):
    """
    Plot binders as bars per allele using matplotlib.
    Args:
        preds: list of one or more predictors
        name: name of protein to plot
        n: number of alleles binder should be found in to be displayed
        cutoff: percentile cutoff to determine binders to show

    """

    import matplotlib as mpl
    import pylab as plt
    from matplotlib.patches import Rectangle
    if ax == None:
        if figsize==None:
            h = sum([len(p.data.groupby('allele')) for p in preds])
            w = 10
            h = round(h*.1+2)
            figsize = (w,h)
        #plt.clf()
        fig = plt.figure(figsize=figsize,facecolor='white')
        ax = fig.add_subplot(111)

    p = len(preds)
    cmap = mpl.cm.get_cmap(colormap)
    colors = { preds[i].name : cmap(float(i)/p) for i in range(p) }

    alleles = []
    leg = []
    y=0
    handles = []
    for pred in preds:
        m = pred.name
        df = pred.data
        if df is None or len(df) == 0:
            print('no data to plot for %s' %m)
            continue
        if name != None:
            if name not in df.name.unique():
                print ('no such sequence %s' %name)
                continue
            df = df[df.name==name]
        sckey = pred.scorekey

        binders = pred.get_binders(name=name, cutoff=cutoff,
                                   cutoff_method=cutoff_method)
        #print (binders)
        pb = pred.promiscuous_binders(binders=binders, n=n)

        if len(pb) == 0:
            continue
        l = base.get_length(pb)
        seqlen = df.pos.max()+l
        #print (name,m,df.pos.max(),l,seqlen)
        grps = df.groupby('allele')
        if m in colors:
            c=colors[m]
        else:
            c='blue'
        leg.append(m)
        order = sorted(grps.groups)
        alleles.extend(order)
        #for a,g in grps:
        for a in order:
            g = grps.groups[a]
            b = binders[binders.allele==a]
            b = b[b.pos.isin(pb.pos)] #only promiscuous
            b.sort_values('pos',inplace=True)
            pos = b['pos'].values+1 #assumes pos is zero indexed
            #clrs = [scmap.to_rgba(i) for i in b[sckey]]
            #for x,c in zip(pos,clrs):
            for x in pos:
                rect = ax.add_patch(Rectangle((x,y), l, 1, facecolor=c, edgecolor='black',
                                    lw=1.5, alpha=0.6))
            y+=1
        handles.append(rect)

    if len(leg) == 0:
        return
    ax.set_xlim(0, seqlen)
    ax.set_ylim(0, len(alleles))
    w=20
    if seqlen>500: w=100
    ax.set_xticks(np.arange(0, seqlen, w))
    ax.set_ylabel('allele')
    ax.set_yticks(np.arange(.5,len(alleles)+.5))
    fsize = 14-1*len(alleles)/40.
    ax.set_yticklabels(alleles, fontsize=fsize )
    ax.grid(b=True, which='major', alpha=0.5)
    ax.set_title(name, fontsize=16, loc='right')
    if regions is not None:
        r = regions[regions.name==name]
        coords = (list(r.start),list(r.end-r.start))
        coords = zip(*coords)
        plot_regions(coords, ax, color='gray')
    if legend == True:
        ax.legend(handles, leg, bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
                  ncol=3)
    plt.tight_layout()
    return ax

[docs]def plot_regions(coords, ax, color='red', label='', alpha=0.6):
    """Highlight regions in a prot binder plot"""

    from matplotlib.patches import Rectangle
    #l = len(seqs.head(1)['key'].max())
    h = ax.get_ylim()[1]
    for c in coords:
        x,l = c
        ax.add_patch(Rectangle((x,0), l, h,
            facecolor=color, lw=.8, alpha=alpha, zorder=0))
    return

[docs]def draw_labels(labels, coords, ax):
    """Add labels on axis"""

    bbox_args = dict(boxstyle='square',fc='whitesmoke')
    from matplotlib.transforms import blended_transform_factory
    tform = blended_transform_factory(ax.transData, ax.transAxes)
    for text, x in zip(labels,coords):
        xy = (x,-.05)
        an = ax.annotate(text, xy=xy, xycoords=tform,
                   ha='center', va="center",
                   size=14,
                   zorder=10, textcoords='offset points',
                   bbox=bbox_args)
    plt.subplots_adjust(bottom=0.1)
    return

[docs]def plot_bars(P, name, chunks=1, how='median', cutoff=20, color='black'):
    """
    Bar plots for sequence using median/mean/total scores.
    Args:
        P: predictor with data
        name: name of protein sequence
        chunks: break sequence up into 1 or more chunks
        how: method to calculate score bar value
        perc: percentile cutoff to show peptide
    """

    import seaborn as sns
    df = P.data[P.data.name==name].sort_values('pos')
    w = 10
    l= base.get_length(df)
    seqlen = df.pos.max()+l

    funcs = {'median': np.median, 'mean': np.mean, 'sum': np.sum}
    grps = df.groupby('pos')
    key = P.scorekey
    X = grps.agg({key: np.median, 'peptide': base.first})
    q = (1-cutoff/100.) #score quantile value
    cutoff = X[key].quantile(q)
    X[key][X[key]<cutoff] = np.nan
    if len(X)<20:
        chunks = 1
    seqlist = X.peptide.apply( lambda x : x[0])
    seqchunks = np.array_split(X.index, chunks)

    f,axs = plt.subplots(chunks,1,figsize=(15,2+2.5*chunks))
    if chunks == 1:
        axs = [axs]
    else:
        axs = list(axs.flat)
    for c in range(chunks):
        #print (c)
        ax = axs[c]
        st = seqchunks[c][0]
        end = seqchunks[c][-1]
        p = X[st:end]
        #p = p[p.peptide.isin(cb.peptide)]
        ax.bar(p.index, p[key], width=1, color=color)
        ax.set_title(str(st)+'-'+str(end), loc='right')
        xseq = seqlist[st:end]
        if len(xseq)<150:
            fsize = 16-1*len(xseq)/20.
            ax.set_xlim(st,end)
            ax.set_xticks(p.index+0.5)
            ax.set_xticklabels(xseq, rotation=0, fontsize=fsize)
        ax.set_ylim(X[key].min(), X[key].max())
    f.suptitle(name+' - '+P.name)
    plt.tight_layout()
    return axs

[docs]def plot_bcell(plot,pred,height,ax=None):
    """Line plot of iedb bcell results"""

    x = pred.data.Position
    y = pred.data.Score
    h = height
    y = y+abs(min(y))
    y = y*(h/max(y))+3
    #plot.line(x, y, line_color="red", line_width=2, alpha=0.6,legend='bcell')
    ax.plot(x,y,color='blue')
    return

[docs]def plot_seqdepot(annotation, ax):
    """Plot sedepot annotations - replace with generic plot coords track"""

    from matplotlib.patches import Rectangle
    y=-1.5
    fontsize=12

    if 'signalp' in annotation:
        bbox_args = dict(boxstyle='rarrow', fc='white', lw=1, alpha=0.8)
        pos = annotation['signalp'].values()
        print (pos)
        for x in pos:
            an = ax.annotate('SP', xy=(x,y), xycoords='data',
                        ha='left', va="center", bbox=bbox_args,
                        size=fontsize)
    if 'tmhmm' in annotation:
        vals = annotation['tmhmm']
        pos = [i[0]+(i[1]-i[0])/2.0 for i in vals]
        widths = [i[1]-i[0] for i in vals]
        bbox_args = dict(boxstyle='round', fc='deepskyblue', lw=1, alpha=0.8)
        for x,w in zip(pos,widths):
            an = ax.annotate('TMHMM', xy=(x,y), xycoords='data',
                       ha='left', va="center", bbox=bbox_args,
                       size=fontsize)
    if 'pfam27' in annotation:
        vals = annotation['pfam27']
        text = [i[0] for i in vals]
        pos = [i[1]+(i[2]-i[1])/2.0 for i in vals]
        widths = [i[2]-i[1] for i in vals]
        #print (pos,widths,text)
        bbox_args = dict(boxstyle='round', fc='white', lw=1, alpha=0.8)
        for x,w,t in zip(pos,widths,text):
            an = ax.annotate(t, xy=(x,y), xycoords='data',
                       ha='left', va="center", bbox=bbox_args,
                       size=fontsize)

    ax.set_ylim(y-1, ax.get_ylim()[1])
    return

[docs]def plot_multiple(preds, names, kind='tracks', regions=None, genome=None, **kwargs):
    """Plot results for multiple proteins"""

    for prot in names:
        if kind == 'tracks':
            ax = plot_tracks(preds,name=prot,**kwargs)
        elif kind == 'bar':
            axs = plot_bars(preds[0],name=prot)
            ax = axs[0]
        if regions is not None:
            r = regions[regions.name==prot]
            print (r)
            #print genome[genome.locus_tag==prot]
            coords = (list(r.start),list(r.end-r.start))
            coords = zip(*coords)
            plot_regions(coords, ax, color='gray')
        #labels = list(r.peptide)
        #plotting.draw_labels(labels, coords, ax)
        if genome is not None:
            p = genome[genome['locus_tag']==prot]
            seq = p.translation.iloc[0]
            from . import analysis
            sd = analysis.get_seqdepot(seq)['t']
            plot_seqdepot(sd, ax)
        plt.tight_layout()
        plt.show()
    return

[docs]def plot_binder_map(P, name, values='rank', cutoff=20, chunks=1, cmap=None):
    """
    Plot heatmap of binders above a cutoff by rank or score.
    Args:
        P: predictor object with data
        name: name of protein to plot
        values: data column to use for plot data, 'score' or 'rank'
        cutoff: cutoff if using rank as values
        chunks: number of plots to split the sequence into
    """

    import pylab as plt
    import seaborn as sns
    df = P.data[P.data.name==name].sort_values('pos')
    w = 10
    l= base.get_length(df)
    seqlen = df.pos.max()+l
    f,axs = plt.subplots(chunks,1,figsize=(15,3+2.5*chunks))
    if chunks == 1:
        axs = [axs]
    else:
        axs = list(axs.flat)

    if values == 'score':
        values = P.scorekey
        if cmap == None: cmap='RdBu_r'
    X = df.pivot_table(index='allele', columns='pos', values=values)
    if values == P.scorekey:
        #normalise score across alleles for clarity
        zscore = lambda x: (x - x.mean()) / x.std()
        X = X.apply(zscore, 1)
    if values == 'rank':
        X[X > cutoff] = 0
        if cmap == None: cmap='Blues'
    s = df.drop_duplicates(['peptide','pos'])
    seqlist = s.set_index('pos').peptide.apply( lambda x : x[0])
    #print seqlist
    seqchunks = np.array_split(X.columns, chunks)
    for c in range(chunks):
        ax = axs[c]
        p = X[seqchunks[c]]
        #plot heatmap
        vmin=min(X.min()); vmax=max(X.max())
        center = vmin+(vmax-vmin)/2
        sns.heatmap(p, cmap=cmap, cbar_kws={"shrink": .5},
                    vmin=vmin, vmax=vmax, #center=center,
                    ax=ax, xticklabels=20)
        #show sequence on x-axis
        st = seqchunks[c][0]
        end = seqchunks[c][-1]
        xseq = seqlist[st:end]
        ax.set_title(str(st)+'-'+str(end), loc='right')
        ax.spines['bottom'].set_visible(True)
        if len(seqchunks[c])<150:
            fsize = 16-1*len(seqchunks[c])/20.
            ax.set_xticks(np.arange(0,len(xseq))+0.5)
            ax.set_xticklabels(xseq, rotation=0, fontsize=fsize)

    f.suptitle(name+' - '+P.name)
    plt.tight_layout()
    return ax

[docs]def binders_to_coords(df):
    """Convert binder results to dict of coords for plotting"""

    coords = {}
    if not 'start' in df.columns:
        df=base.get_coords(df)
    if 'start' in df.columns:
        for i,g in df.groupby('name'):
            l = g.end-g.start
            coords[i] = zip(g.start,l)
    return coords

[docs]def plot_overview(genome, coords=None, cols=2, colormap='Paired',
                  legend=True, figsize=None):
    """
    Plot regions of interest in a group of protein sequences. Useful for
    seeing how your binders/epitopes are distributed in a small genome or subset of genes.
    Args:
        genome: dataframe with protein sequences
        coords: a list/dict of tuple lists of the form {protein name: [(start,length)..]}
        cols: number of columns for plot, integer
    """

    import pylab as plt
    if type(coords) is list:
        coords = { i:coords[i] for i in range(len(coords)) }
        legend=False
    import matplotlib as mpl
    import seaborn as sns
    #sns.reset_orig()
    cmap = mpl.cm.get_cmap(colormap)
    t = len(coords)
    colors = [cmap(float(i)/t) for i in range(t)]
    from matplotlib.patches import Rectangle

    names = [coords[c].keys() for c in coords][0]

    df = genome[genome.locus_tag.isin(names)]
    rows = int(np.ceil(len(names)/float(cols)))
    if figsize == None:
        h = round(len(names)*.2+10./cols)
        figsize = (14,h)
    f,axs=plt.subplots(rows,cols,figsize=figsize)
    grid=axs.flat
    rects = {}
    i=0
    for idx,prot in df.iterrows():
        ax=grid[i]
        protname = prot.locus_tag
        seq = prot.translation
        if 'description' in prot:
            title = prot.description
        else:
            title = protname
        y=0
        for label in coords:
            c = coords[label]
            if not protname in c:
                continue
            vals = c[protname]
            #print vals
            for v in vals:
                x,l = v[0],v[1]
                rect = ax.add_patch(Rectangle((x,y), l, .9,
                                facecolor=colors[y],
                                lw=1.2, alpha=0.8))
                if len(v)>2:
                    s = v[2]
                    bbox_args = dict(fc=colors[y], lw=1.2, alpha=0.8)
                    ax.annotate(s, (x+l/2, y),
                        fontsize=12, ha='center', va='bottom')
            if not label in rects:
                rects[label] = rect
            y+=1
        i+=1
        slen = len(seq)
        w = round(float(slen)/20.)
        w = math.ceil(w/20)*20
        ax.set_xlim(0, slen)
        ax.set_ylim(0, t)
        ax.set_xticks(np.arange(0, slen, w))
        ax.set_yticks([])
        ax.set_title(title, fontsize=16, loc='right')

    if i|2!=0 and cols>1:
        try:
            f.delaxes(grid[i])
        except:
            pass
    if legend == True:
        f.legend(rects.values(), rects.keys(), loc=4)
    plt.tight_layout()
    return

[docs]def seqdepot_to_coords(sd, key='pfam27'):
    """
    Convert seqdepot annotations to coords for plotting
    """

    coords=[]
    if len(sd['t'])==0 or not key in sd['t']:
        return []
    x = sd['t'][key]
    #print x

    if key in ['pfam27','pfam28']:
        coords = [(i[1],i[2]-i[1],i[0]) for i in x]
    elif key in ['gene3d','prints']:
        coords = [(i[2],i[3]-i[2],i[1]) for i in x]
    elif key == 'tmhmm':
        coords = [(i[0],i[1]-i[0]) for i in x]
    elif key == 'signalp':
        x = x.items()
        coords = [(i[1],10,'SP') for i in x]
    return coords

[docs]def get_seqdepot_annotation(genome, key='pfam27'):
    """
    Get seqdepot annotations for a set of proteins in dataframe.
    """
    from . import seqdepot
    annot={}
    for i,row in genome.iterrows():
        n = row.locus_tag
        seq = row.translation
        #print n,seq
        sd = seqdepot.new()
        aseqid = sd.aseqIdFromSequence(seq)
        result = sd.findOne(aseqid)
        #for x in result['t']:
        #    print x, result['t'][x]
        x = seqdepot_to_coords(result, key)
        annot[n] = x
    return annot