Source code for epitopepredict.web

#!/usr/bin/env python

"""
    epitopepredict, methods for supporting web app
    Created Sep 2017
    Copyright (C) Damien Farrell
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 3
    of the License, or (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""

from __future__ import absolute_import, print_function
import sys,os,glob
from collections import OrderedDict
import pandas as pd
import numpy as np
from . import base, plotting, sequtils, analysis
from bokeh.models import ColumnDataSource, Slider
from bokeh.models.widgets import DataTable, TableColumn, Select, Button, Slider, TextInput
from bokeh.layouts import row, column, gridplot, widgetbox, layout
from bokeh.embed import components

path = 'results'
predictors = base.predictors
plotkinds = ['tracks','bar','text']

[docs]def get_readme(): f=os.path.join(base.module_path,'readme.md') lines=open(f,'r') return ''.join(lines.readlines())
[docs]def get_file_lists(path): """Get list of available prediction results in the given path. Tries to check for each possible predictor.""" if path == None or not os.path.exists(path): return [] names = [] for p in predictors: files = glob.glob(os.path.join(path, p, '*.csv')) n = [os.path.splitext(os.path.basename(i))[0] for i in files] names.extend(n) names = set(names) names = sorted(names) return names
[docs]def get_results_info(P): """Info on sequence used for prediction""" df = P.data if df is None: return '' #l = base.get_length(df) seq = sequence_from_peptides(P.data) l = len(seq) return {'length':l}
[docs]def get_alleles(preds): """get available alleles""" a = [] for P in preds: df = P.data if df is None: continue x = df.allele.unique() a.extend(x) a = list(set(a)) return a
[docs]def get_predictors(path, name=None): """Get a set of predictors under a results path for all or a specific protein. """ preds = [] for pred in base.predictors: P = base.get_predictor(pred) if name is not None: #if single file load into object respath = os.path.join(path, pred, name)+'.csv' if not os.path.exists(respath): continue #print (respath) P.load(respath) else: #multiple files keep reference to path only respath = os.path.join(path, pred) P.path = respath if P.data is not None or os.path.exists(respath): preds.append(P) return preds
[docs]def get_sequences(pred): """Get set of sequences from loaded data""" seqs = {} df = pred.data for n,df in pred.data.groupby('name'): s = sequence_from_peptides(df) seqs[n] = s seqs = pd.DataFrame(seqs.items(), columns=['name','seq']) #print (seqs) return seqs
[docs]def sequences_to_html_table(seqs, classes=''): """Convert seqs to html""" tabledata=[] tabledata.append('<th>name</th><th>sequence</th>') for i,row in seqs.iterrows(): seq = row.seq name = row['name'] seqhtml = '' for i in range(len(seq)): seqhtml += '<span style="background-color:white">%s</span>' %seq[i] row = '<tr><th>%s</th><td>%s</td></tr>' %(name, seqhtml) tabledata.append(row) table = '\n'.join(tabledata) table = '<table class="%s">\n' %classes + table + '</table>' return table
[docs]def create_sequence_html(preds, name='', classes='', **kwargs): seqs=[] tabledata=[] tabledata.append('<th>allele</th><th>sequence</th>') colors = plotting.get_bokeh_colors() for P in preds: df = P.data if df is None: continue b = P.get_binders(**kwargs) l = base.get_length(df) seq = sequence_from_peptides(df) clr = colors[P.name] grps = b.groupby('allele') for a,g in grps: pos=[] for i in g.pos: pos.extend(np.arange(i,i+l)) seqhtml = '' for i in range(len(seq)): if i in pos: seqhtml += '<span style="background-color:%s; opacity:0.8">%s</span>' %(clr,seq[i]) else: seqhtml += '<span style="background-color:white">%s</span>' %seq[i] row = '<tr><th>%s</th><td>%s</td></tr>' %(a, seqhtml) tabledata.append(row) table = '\n'.join(tabledata) table = '<table class="%s">\n' %classes + table + '</table>' return table
[docs]def sequence_to_html_grid(preds, classes='', **kwargs): """Put aligned or multiple identical rows in dataframe and convert to grid of aas as html table""" seqdf = [] bdata = {} for P in preds: df = P.data if df is None: continue b = P.get_binders(**kwargs) bdata[P.name] = b #pb = P.promiscuous_binders(binders=b,**kwargs) l = base.get_length(df) grps = b.groupby('allele') alleles = grps.groups seq = sequence_from_peptides(df) #put into new df one row per allele x = [(P.name,a,seq) for a in alleles] df = pd.DataFrame(x, columns=['pred','allele','seq']).set_index(['pred','allele']) df = df.seq.apply(lambda x: pd.Series(list(x))) seqdf.append(df) seqdf = pd.concat(seqdf) colors = plotting.get_bokeh_colors() def color(x): p, a = x.name pos = [] clr = colors[p] b = bdata[p] f = list(b[b.allele==a].pos) for i in f: pos.extend(np.arange(i,i+l)) clrs = ['' for i in x] for i in pos: clrs[i] = 'background-color: %s; opacity: .8;' %clr return clrs s = seqdf.style\ .set_table_attributes('class="%s"' %classes)\ .apply(color,1) table = s.render() return table
[docs]def create_figures(preds, name='', kind='tracks', cutoff=5, n=2, cutoff_method='default', **kwargs): """Get plots of binders for single protein/sequence""" figures = [] if kind == 'tracks': plot = plotting.bokeh_plot_tracks(preds, title=name, width=700, palette='Set1', cutoff=float(cutoff), n=int(n), cutoff_method=cutoff_method) if plot is not None: figures.append(plot) elif kind == 'grid': for p in preds: plot = plotting.bokeh_plot_grid(p, name=name, width=None ) figures.append(plot) return figures
[docs]def create_bokeh_table(path, name): """Create table of prediction data""" P = get_results(path, 'tepitope', name) if P.data is None: return df = P.data[:10] data = dict( peptide=df.peptide.values, pos=df.pos.values, score=df.score.values, allele=df.allele.values ) #print (df) source = ColumnDataSource(data) columns = [ TableColumn(field="peptide", title="peptide"), TableColumn(field="pos", title="pos"), TableColumn(field="score", title="score"), TableColumn(field="allele", title="allele"), ] table = DataTable(source=source, columns=columns, width=400, height=280) return table
[docs]def get_results_tables(path, name=None, promiscuous=True, limit=None, **kwargs): """Get binder results from a results path. Args: path: path to results name: name of particular protein/sequence view: get all binders or just promiscuous """ n=kwargs['n'] cutoff=kwargs['cutoff'] preds = get_predictors(path, name) data = {} for P in preds: binder_file = os.path.join(path,'binders_%s_%s.csv' %(P.name,cutoff)) #print (binder_file) #if we have all binders from last time use these if P.data is not None: #results for specific name if present in object b = P.get_binders(name=name, **kwargs) elif os.path.exists(binder_file): print ('cached file found') b = pd.read_csv(binder_file, index_col=0) else: #otherwise calculate binders b = P.get_binders(path=P.path, **kwargs) b.to_csv(binder_file) if promiscuous == True: b = P.promiscuous_binders(binders=b, **kwargs) b = b.reset_index(drop=True) if limit != None: b = b.loc[:limit] #print (b[:10]) data[P.name] = b return data
[docs]def get_summary_tables(path, limit=None, **kwargs): """Get binder results summary for all proteins in path. Args: path: path to results """ data={} #preds = get_predictors(path) for pred in base.predictors: sfile = os.path.join(path, 'summary_%s.csv' %pred) if not os.path.exists(sfile): continue summ = pd.read_csv(sfile, index_col=0) for c in ['translation','note']: if c in summ.columns: summ = summ.drop(columns=c) data[pred] = summ return data
[docs]def aggregate_summary(data): X = pd.concat(data).reset_index().rename(columns={'level_0':'predictor'}) a = pd.pivot_table(X,index=['locus_tag','length'],columns=['predictor'],values=['clusters','binders']) a = a.fillna('-').reset_index() return a
[docs]def get_scrollable_table(df): """Return a scrollable table as a div element to be placed in web page""" res = df.to_html(classes="tinytable sortable") div = '<div class="scrollingArea">%s</div>' %res return div
[docs]def dataframes_to_html(data, classes=''): """Convert dictionary of dataframes to html tables""" if type(data) is pd.DataFrame: data = {'data':data} tables = OrderedDict() for k in data: df = data[k] s = df.style\ .set_table_attributes('class="%s"' %classes) #.background_gradient(subset=[P.scorekey], cmap=cm) #%classes tables[k] = s.render(index=False) return tables
[docs]def dict_to_html(data): s = '' for k in data: s += '<a>%s: %s</a><br>' %(k,data[k]) return s
[docs]def column_to_url(df, field, path): """Add urls to specified field in a dataframe by prepending the supplied path.""" if len(df) == 0: return df df[field] = df.apply(lambda x: '<a href=%s target="_blank">%s</a>' %(path+x[field],x[field]),1) return df
[docs]def tabbed_html(items): """Create html for a set of tabbed divs from dict of html code, one for each tab. Uses css classes defined in static/custom.css""" name = 'tab-group' html = '<div class="tabs">\n' for t in items: html += '<div class="tab">\n' html += '<input type="radio" id="%s" name="%s" checked>\n' %(t,name) html += '<label for="%s">%s</label>\n' %(t,t) html += '<div class="content">\n' html += items[t] #html += '<p>%s</p>' %t html += '</div></div>\n' html += '</div>' #print (html) return html
[docs]def create_widgets(): select = Select(title="Name:", value="name", options=["foo", "bar"]) slider = Slider(start=0, end=100, value=5, step=.5, title="Cutoff") button = Button(label="Submit", button_type="success") #text_input = TextInput(value="default", title="Name:") return widgetbox([button,select,slider], width=200)
[docs]def test(): from bokeh.io import output_file, show path = 'results' name = 'Rv0011c' kwargs ={'cutoff_method':'default'} preds = get_predictors(path, name) plots = create_figures(preds) #table = create_bokeh_table(path, name) tables = create_binder_tables(preds, name) grid = gridplot(plots, ncols=1, merge_tools=True) widgets = create_widgets() l = layout([[ plots, widgets ]], ncols=2, nrows=1) #script, div = components(l) show(l)
if __name__ == "__main__": test()