Source code for epitopepredict.neo

#!/usr/bin/env python

"""
    Command line script for neo epitope prediction
    Created March 2018
    Copyright (C) Damien Farrell
"""

from __future__ import absolute_import, print_function
import sys, os, subprocess
import time
import pickle
import numpy as np
import pandas as pd
pd.set_option('display.width', 150)
pd.set_option('max_colwidth', 80)
from collections import OrderedDict
from epitopepredict import base, config, analysis, sequtils, peptutils, tepitope

defaultpath = os.getcwd()
sim_matrix = tepitope.get_matrix('pmbec')
metrics = ['score', 'matched_score', 'binding_diff','perc_rank',
               'wt_similarity','self_similarity', 'virus_similarity',
               'anchor','hydro', 'net_charge']

[docs]class NeoEpitopeWorkFlow(object): """Class for implementing a neo epitope workflow.""" def __init__(self, opts={}): for i in opts: self.__dict__[i] = opts[i] return
[docs] def setup(self): """Setup main parameters""" if check_imports() == False: return #check_ensembl() pd.set_option('display.width', 120) base.iedbmhc1path = self.iedbmhc1_path base.iedbmhc2path = self.iedbmhc2_path self.vcf_files = self.vcf_files.split(',') f = self.vcf_files[0] fileext = os.path.splitext(f)[1] if fileext == '.txt' and os.path.exists(f): print ('found file list') self.vcf_files = read_names(f) self.mhc1_alleles = self.mhc1_alleles.split(',') self.mhc2_alleles = self.mhc2_alleles.split(',') if len(self.mhc1_alleles)==0 and len(self.mhc2_alleles)==0: return False self.predictors = self.predictors.split(',') for p in self.predictors: if p not in base.predictors: print ('unknown predictor in config file. Use:') show_predictors() return False if self.mhc1_alleles[0] in base.mhc1_presets: self.mhc1_alleles = base.get_preset_alleles(self.mhc1_alleles[0]) elif self.mhc2_alleles[0] in base.mhc2_presets: self.mhc2_alleles = base.get_preset_alleles(self.mhc2_alleles[0]) if type(self.cutoffs) is int or type(self.cutoffs) is float: self.cutoffs = [self.cutoffs] else: self.cutoffs = [float(i) for i in self.cutoffs.split(',')] self.names=None if not os.path.exists(self.path) and self.path != '': os.mkdir(self.path) return True
[docs] def get_file_labels(self, files): l=OrderedDict() for f in files: if not os.path.exists(f): print ('no such file %s' %f) continue label = os.path.splitext(os.path.basename(f))[0] l[label] = {'filename':f} return l
[docs] def run(self): """Run workflow for multiple samples and prediction methods.""" print ('running neoepitope predictions') start = time.time() path = self.path overwrite = self.overwrite files = self.vcf_files preds = self.predictors labels = self.get_file_labels(files) cutoffs = self.cutoffs if len(cutoffs) < len(preds) : cutoffs = [cutoffs[0] for p in preds] for f in labels: print ('sample name: %s' %f) infile = labels[f]['filename'] #file to save variants to, if present we can skip eff_csv = os.path.join(path, 'variant_effects_%s.csv' %f) eff_obj = os.path.join(path, 'variant_effects_%s.pickle' %f) if not os.path.exists(eff_obj) or overwrite == True: #get variant effects for each file and then iterate over predictors variants = load_variants(vcf_file=infile) labels[f]['variants'] = len(variants) print ('getting variant effects') effects = get_variants_effects(variants, self.verbose) #serialize variant effects effects_to_pickle(effects, eff_obj) else: #else reload from last run effects = pickle.load(open(eff_obj,'rb')) #save effects as table eff_data = effects_to_dataframe(effects) eff_data['sample'] = f eff_data.to_csv(eff_csv) #get mutated peptides seqs = get_mutant_sequences(effects=effects, length=self.mhc1_length, verbose=self.verbose) #get similarities df = get_closest_matches(seqs, self.verbose, cpus=self.cpus) i=0 for predictor in self.predictors: outfile = os.path.join(path, 'results_%s_%s.csv' %(f,predictor)) if os.path.exists(outfile) and overwrite == False: continue if predictor in base.mhc1_predictors: alleles = self.mhc1_alleles #length = self.mhc1_length else: alleles = self.mhc2_alleles #length = self.mhc2_length res = predict_binding(df, alleles=alleles, predictor=predictor, verbose=self.verbose, cpus=self.cpus) res['label'] = f res.to_csv(outfile, index=False) #gets promiscuous binders based on the cutoff #P = base.get_predictor(predictor) #P.data = res #pb = P.promiscuous_binders(n=1, keep_columns=True, cutoff=cutoffs[i]) #pb['label'] = f #print (pb[:20]) #pb.to_csv(os.path.join(path, 'binders_%s_%s.csv' %(f,predictor)), index=False) i+=1 #combine results if multiple predictors? #combine_results() #combine results for multiple samples? #save sample labels pd.DataFrame(labels).T.to_csv(os.path.join(path, 'sample_labels.csv')) print ('finished, results saved to %s' %path) end = round(time.time()-start,1) print ('took %s seconds' %end) return
[docs] def combine_samples(self, labels): """Put peptides from multiple files in one table""" res=[] for i,r in labels: df=pd.read_csv('results_%s_tepitope.csv' %r.filename) res.append(df) res=pd.concat(res) pd.pivot_table(res, index=['peptide'], columns=['label'], values='score') return
[docs]def pbmec_score(seq1, seq2): """Score with PBMEC matrix""" x=0 try: for i in seq1: for j in seq2: x+=sim_matrix[i][j] except: return -1 return x
[docs]def get_alleles(f): """Get input alleles""" fileext = os.path.splitext(f)[1] if fileext == '.txt' and os.path.exists(f): items = read_names(f) else: items = f.split(',') return items
[docs]def read_names(filename): """read plain text file of items""" with open(filename) as f: p = f.readlines() p = [x.strip() for x in p] p = list(filter(None, p)) return p
[docs]def variants_from_csv(csv_file, sample_id=None, reference=None): """Variants from csv file. Args: csv_file: csv file with following column names- chromosome, position, reference_allele, alt_allele, gene_name, transcript_id, sample_id sample_id: if provided, select variants only for this id reference: ref genome used for variant calling """ from pyensembl import ensembl_grch38 import varcode from varcode import Variant df = pd.read_csv(csv_file) variants=[] if sample_id != None and 'sample_id' in df.columns: df = df[df.sample_id==sample_id] df = df.drop_duplicates(['POS','REF','ALT']) for i,r in list(df.iterrows()): #print i v = Variant(contig=r.CHROM, start=r.POS, ref=r.REF, alt=r.ALT, ensembl=ensembl_grch38) variants.append(v) varcl = varcode.variant_collection.VariantCollection(variants) return varcl
[docs]def dataframe_to_vcf(df, outfile): """Write a dataframe of variants to a simple vcf file. Dataframe requires the following columns: #CHROM','POS','ID','REF','ALT' """ f = open(outfile, 'w') f.write('##fileformat=VCFv4.0\n') f.write('##reference=GRCh38.fa\n') f.write('##source=csv\n') f.write('##FILTER=<ID=PASS,Description="Passed all filters">\n') f.write('##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n') df = df.rename(columns={'CHROM':'#CHROM'}) df['ID']='.' df['FILTER']='PASS' df['QUAL'] = 60 df['INFO'] = '.' df['FORMAT'] = 'GT' df['sample'] = '0/1' cols = ['#CHROM','POS','ID','REF','ALT','QUAL','FILTER','INFO','FORMAT','sample'] df[cols].to_csv(f, sep='\t',index=False)#,align='left') return
[docs]def get_variant_class(effect): import varcode v = effect.variant if v.is_deletion: return 'deletion' elif v.is_insertion: return 'insertion' elif v.is_snv: return 'snv' elif v.is_indel: return 'indel' elif type(effect) is varcode.effects.effect_classes.FrameShift: return 'frameshift'
[docs]def effects_to_pickle(effects, filename): """serialize variant effects collections""" pickle.dump(effects, open(filename, "wb"), protocol=2) return
[docs]def effects_to_dataframe(effects): x=[] for eff in effects: if eff is None: continue d=OrderedDict() d['gene_name'] = eff.gene_name d['transcript_id'] = eff.transcript_id wt = eff.original_protein_sequence mut = eff.mutant_protein_sequence vloc = eff.aa_mutation_start_offset d['aa_change'] = eff.short_description d['mutation'] = eff.variant.short_description d['variant_class'] = get_variant_class(eff) #d['wt_sequence'] = wt #d['mut_sequence'] = mut x.append(d) df = pd.DataFrame(x) df['chr'] = df.apply(lambda x: x.mutation.split(' ')[0],1) return df
'''def filter_variant_effects(effects, verbose=False): """Get fitered list of effects. Omits silent and noncoding effects. Returns: list of varcode variant effect objects """ effs = effects.drop_silent_and_noncoding() filt = [] for eff in effs: v = eff.variant mut = eff.mutant_protein_sequence #if type(eff) is varcode.effects.effect_classes.FrameShift: # print eff, eff.shifted_sequence if mut is None: continue vloc = eff.aa_mutation_start_offset if vloc is None or len(v.coding_genes) == 0: continue if verbose==True: print (v.gene_names, type(eff)) print (eff.transcript_id, eff.short_description, eff.variant.ref) #print v.contig #print mut #print eff.to_dict()['variant'].to_dict() filt.append(eff) return filt'''
[docs]def get_variants_effects(variants, verbose=False, gene_expression_dict=None): """Get all effects from a list of variants. Returns: list of varcode variant effect objects""" from varcode.effects import Substitution, Insertion, Deletion effects = variants.effects() effects = effects.drop_silent_and_noncoding() #print (len(effects)) effects = effects.filter_by_effect_priority(Substitution) if gene_expression_dict is not None: effects = effects.filter_by_gene_expression(gene_expression_dict) print ('%s effects from %s variants' %(len(effects),len(variants))) return effects
[docs]def peptides_from_effect(eff, length=11, peptides=True, verbose=False): """Get mutated peptides from a single effect object. Returns: dataframe with peptides and variant info """ import varcode pad = length-1 if eff==None: return gene = eff.gene_name varclass = get_variant_class(eff) orig = eff.original_protein_sequence mut = eff.mutant_protein_sequence vloc = eff.aa_mutation_start_offset if vloc is None or mut is None: return st = vloc-pad; end = vloc+pad+1 if st<0: st=0 #if frameshift changed sequence may be long if type(eff) is varcode.effects.effect_classes.FrameShift: #mutpep = mut[vloc:] mutpep = eff.shifted_sequence #print (mutpep) wt = None #print (type(eff), len(orig), len(mut), vloc, len(mutpep), mutpep, mut[vloc:], wt) #print (mut) else: mutpep = mut[st:end] if varclass == 'snv': wt = orig[st:end] else: wt = None #if verbose == True: # print (type(eff), len(orig), len(mut), vloc, st, end, len(mutpep)) if len(mutpep)<length: #if verbose == True: # print ('peptide length too small') return if peptides is True: df = peptutils.get_fragments(seq=mutpep, length=length) df['pos'] = pd.Series(range(st,end)) df['prot_length_ratio'] = len(mut)/float(len(orig)) if wt != None: wdf = peptutils.get_fragments(seq=wt, length=length) df['wt'] = wdf.peptide else: df['wt'] = None #print (df) else: #just return the mutated protein df = pd.DataFrame.from_dict([{'wt_sequence':orig,'mutant_sequence': mut}]) df['pos'] = vloc #print gene,st,end,mutpep #print df df['name'] = gene df['transcript_id'] = eff.transcript_id #df['transcript_name'] = eff.transcript_name df['aa_change'] = eff.short_description df['mutation'] = eff.variant.short_description df['variant_class'] = varclass return df
[docs]def get_mutant_sequences(variants=None, effects=None, reference=None, peptides=True, drop_duplicates=True, length=11, verbose=False): """ Get mutant proteins or peptide fragments from vcf or maf file. Args: variants: varcode variant collection effects: non-synonmymous effects, alternative to variants peptides: get peptide fragments around mutation Returns: pandas dataframe with mutated peptide sequence and source information """ res = [] if variants is not None: effects = get_variants_effects(variants, verbose) if effects is None: print ('no variant information') return for eff in effects: #print (eff) peps = peptides_from_effect(eff, length=length, peptides=peptides, verbose=verbose) if peps is None: continue res.append(peps) res = pd.concat(res).reset_index(drop=True) #remove rows where mut same as wt peptide res = res[res.peptide!=res.wt] if drop_duplicates == True: res = res.drop_duplicates('peptide') print ('%s sequences/peptides from %s effects' %(len(res),len(effects))) return res
[docs]def load_variants(vcf_file=None, maf_file=None, max_variants=None): """Load variants from vcf file""" import varcode if vcf_file is not None: variants = varcode.load_vcf(vcf_file, allow_extended_nucleotides=True, max_variants=max_variants) f=vcf_file elif maf_file is not None: variants = varcode.load_maf(maf_file) f=maf_file print ('%s variants read from %s' %(len(variants),f)) return variants
[docs]def get_closest_matches(df, verbose=False, cpus=1): """ Find peptide similarity metrics """ if verbose == True: print ('finding matches to self proteome') #find matches to self proteome, adds penalty score column to df #we should only blast non-duplicates.... df = self_matches(df, cpus=cpus) if verbose == True: print ('finding matches to viral proteomes') df = virus_matches(df, cpus=cpus) #get similarity scores for wt and closest match to proteome matrix = 'pmbec' df['wt_similarity'] = df.apply(lambda x: wt_similarity(x, matrix=matrix),1) df['self_similarity'] = df.apply(lambda x: self_similarity(x, matrix=matrix),1) df['virus_similarity'] = df.apply(lambda x: virus_similarity(x, matrix=matrix),1) #get closest peptide in another column, either wt or nearest self df['closest'] = df.apply(get_closest_match, 1) df['length'] = df.peptide.str.len() #exclude exact matches to self? if verbose==True: print ('%s peptides with exact matches to self' %len(df[df.self_mismatches==0])) return df
[docs]def predict_binding(df, predictor='netmhcpan', alleles=[], verbose=False, cpus=1, cutoff=.95, cutoff_method='default'): """ Predict binding scores for mutated and wt peptides (if present) from supplied variants. Args: df: pandas dataframe with peptide sequences, requires at least 2 columns 'peptide' - the mutant peptide 'wt' - a corresponding wild type peptide this data could be generated from get_mutant_sequences or from an external program predictor: mhc binding prediction method alleles: list of alleles Returns: dataframe with mutant and wt binding scores for all alleles """ P = base.get_predictor(predictor, scoring='ligand') print (P) print ('predicting mhc binding for %s peptides with %s' %(len(df), P.name)) peps = list(df.peptide) res = P.predict_peptides(peps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) if res is None: print ('no binding predictions!') return #predict closest matching peptide affinity if verbose == True: print ('predicting wt peptides') wtpeps = list(df.closest) #print wild type peptides b_wt = P.predict_peptides(wtpeps, alleles=alleles, cpus=cpus, cutoff=cutoff, cutoff_method=cutoff_method, drop_columns=True) #combine mutant and matching binding predictions res = combine_wt_scores(res, b_wt, P.scorekey) res = res.drop(['pos','name'],1) #combine binding results with main dataframe res = df.merge(res, on='peptide') res['binding_diff'] = res[P.scorekey]/res.matched_score #anchor position mutated in any 9-mers res['anchor'] = res.apply(anchor_mutated, 1) #hydrophobicity and net charge res = analysis.peptide_properties(res, 'peptide') res['length'] = res.peptide.str.len() #merge promiscuity measure into results #if len(pb) > 0: # res = res.merge(pb[['peptide','alleles']], on='peptide',how='left') #else: # res['alleles'] = 0 #rename some columns res = res.rename(columns={'rank':'binding_rank','alleles':'promiscuity'}) res = res.sort_values('binding_rank', ascending=True) return res
[docs]def score_peptides(df, rf=None): """Score peptides with a classifier. Returns a prediction probability.""" if rf is None: from sklearn.externals import joblib rf = joblib.load(os.path.join(base.datadir,'rf_model.joblib')) X = df[metrics] X = X.fillna(X.mean()) X = X.replace(np.inf,.1) sc = rf.predict_proba(X)[:,1] return sc
[docs]def combine_wt_scores(x, y, key): """Combine mutant peptide and matching wt/self binding scores from a set of predictions. Assumes both dataframes were run with the same alleles. Args: x,y: pandas dataframes with matching prediction results key: """ x = x.sort_values(['pos','allele']).reset_index(drop=True) y = y.sort_values(['pos','allele']).reset_index(drop=True) x['matched_score'] = y[key] return x
[docs]def make_blastdb(url, name=None, filename=None, overwrite=False): """Download protein sequences and a make blast db. Uses datacache module.""" import datacache cachedir = datacache.get_data_dir() blastdb = os.path.join(cachedir, name) if os.path.exists(blastdb+'.phr') and overwrite==False: #print ('blast files found') return blastdb filename = datacache.fetch_file(url, filename=filename, decompress=True, subdir=None) #print filename cmd = 'makeblastdb -dbtype prot -in %s -out %s' %(filename,blastdb) #print cmd tmp=subprocess.check_output(cmd, shell=True) return blastdb
[docs]def make_human_blastdb(): """Human proteome blastdb""" url = 'ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/pep/Homo_sapiens.GRCh38.pep.all.fa.gz' filename = 'Homo_sapiens.GRCh38.pep.all.fa.gz' blastdb = make_blastdb(url, name='GRCh38', filename=filename) return blastdb
[docs]def make_virus_blastdb(): """Human virus blastdb""" url = 'http://www.uniprot.org/uniprot/?sort=score&desc=&compress=no&query=taxonomy:%22Viruses%20[10239]%22%20\ keyword:%22Reference%20proteome%20[KW-1185]%22%20host:%22Homo%20sapiens%20(Human)%20[9606]%22&fil=&force=no&preview=true&format=fasta' filename = 'uniprot_human_virus_proteome.fa.gz' blastdb = make_blastdb(url, name='human_virus', filename=filename) return blastdb
[docs]def self_matches(df, **kwargs): blastdb = make_human_blastdb() x = find_matches(df, blastdb, **kwargs) x = x.rename(columns={'sseq':'self_match','mismatch':'self_mismatches'}) return x
[docs]def virus_matches(df, **kwargs): blastdb = make_virus_blastdb() x = find_matches(df, blastdb, **kwargs) if 'sseq' in x.columns: x = x.rename(columns={'sseq':'virus_match','mismatch':'virus_mismatches'}) else: x['virus_match'] = None return x
[docs]def find_matches(df, blastdb, cpus=4, verbose=False): """ Get similarity measures for peptides to a self proteome. Does a local blast to the proteome and finds most similar matches. These can then be scored. Args: df: dataframe of peptides blastdb: path to protein blastdb Returns: dataframe with extra columns: 'sseq','mismatch' """ if verbose == True: print ('blasting %s peptides' %len(df)) length = df.peptide.str.len().max() def check_mm(x): #corrected mismatches for shorter hits if x.length<length: return length-x.length+x.mismatch else: return x.mismatch bl = sequtils.blast_sequences(blastdb, df.peptide, evalue=200000, cpus=cpus, ungapped=True, gapopen=10, gapextend=2, qcov_hsp_perc=100, comp_based_stats=0) if len(bl) == 0: if verbose == True: print ('no hits found!') return df if verbose == True: print ('%s hits' %len(bl)) cols = ['qseqid','sseq','mismatch'] #ignore any hits with gaps bl = bl[(bl.gapopen==0)]# & (bl.length>=length)] #take longest hit with lowest e-value for each query bl = bl.sort_values(['qseqid','length','evalue'],ascending=(True,False,True)) bl = bl.groupby(['qseqid'],as_index=False).first() #correct mismatches to account for shorter hits bl['mismatch'] = bl.apply(check_mm, 1) bl = bl[cols] #merge results x = df.merge(bl,left_on='peptide',right_on='qseqid', how='left') x = x.sort_values(by='mismatch',ascending=True) x = x.drop(['qseqid'],1) #x['exact_match'] = x.mismatch.clip(0,1).fillna(1) return x
[docs]def wt_similarity(x, matrix='blosum62'): x1 = x.peptide x2 = x.wt #print(x1,x2) matrix = tepitope.get_matrix(matrix) return tepitope.similarity_score(matrix,x1,x2)
[docs]def self_similarity(x, matrix='blosum62'): if x.self_match is None: return x1 = x.peptide x2 = x.self_match matrix = tepitope.get_matrix(matrix) return tepitope.similarity_score(matrix,x1,x2)
[docs]def virus_similarity(x, matrix='blosum62'): if x.virus_match is None: return x1 = x.peptide x2 = x.virus_match matrix = tepitope.get_matrix(matrix) return tepitope.similarity_score(matrix,x1,x2)
[docs]def get_closest_match(x): """Create columns with closest matching peptide. If no wt peptide use self match. vector method""" if x.wt is None: return x.self_match else: return x.wt
[docs]def anchor_mutated(x): return peptutils.compare_anchor_positions(x.wt, x.peptide)
[docs]def summary_plots(df): """summary plots for testing results""" f,axs=plt.subplots(2,2,figsize=(10,10)) axs=axs.flat g = df.groupby(['name']).size().sort_values(ascending=False)[:20] g.plot(kind='barh',ax=axs[0],color='gray') axs[0].set_title('peptide counts') df.variant_class.value_counts().plot(kind='pie',autopct='%.1f',ax=axs[1]) axs[1].set_title('variant classes') df.self_mismatches.value_counts().sort_index().plot(kind='bar',ax=axs[2]) axs[2].set_title('mismatches to self') #df.wt_similarity.hist(ax=axs[3]) #df.plot('wt_similarity','self_similarity',kind='scatter',ax=axs[3]) df.plot('score','matched_score',kind='scatter',ax=axs[3]) return
[docs]def show_predictors(): for p in base.predictors: print(p)
[docs]def check_imports(): try: import varcode except Exception as e: print (e) print ('varcode required. please run pip install varcode') return False return True
[docs]def fetch_ensembl_release(path=None, release='75'): """Get pyensembl genome files""" from pyensembl import Genome,EnsemblRelease #this call should download the files genome = EnsemblRelease(release, species='human') genome.download(overwrite=False) genome.index(overwrite=False) genome.cache_directory_path = path print ('pyensembl genome files cached in %s' %genome.cache_directory_path) #run_pyensembl_install() return
[docs]def check_ensembl(release='75'): """Check pyensembl ref genome cached. Needed for running in snap""" #check if running inside a snap package so we can download #the genome files for pyensembl cache_dir=None if base.check_snap() is True: #home = os.path.join('/home', os.environ['USER']) home = os.environ['SNAP_USER_COMMON'] cache_dir = os.path.join(home, '.cache') os.environ['PYENSEMBL_CACHE_DIR'] = cache_dir print ('checking for ref human genome') fetch_ensembl_release(cache_dir, release) return
[docs]def run_vep(vcf_file, out_format='vcf', assembly='GRCh38', cpus=4, path=None): """Run ensembl VEP on a vcf file for use with pvacseq. see https://www.ensembl.org/info/docs/tools/vep/script/index.html """ fname = os.path.splitext(vcf_file)[0] out = fname+'.vep.%s' %out_format if path == None: path = '/local/ensembl-vep/' path = os.path.join(path,'./vep') cmd = '{p} --input_file {i} --pick --force_overwrite \ --assembly {a} --fork {c} \ --symbol --terms SO --output_file {o} \ --plugin Downstream --plugin Wildtype \ --cache --offline'.format(o=out,i=vcf_file,a=assembly,c=cpus,p=path) if out_format == 'vcf': cmd += ' --format vcf --vcf' print (cmd) tmp = subprocess.check_output(cmd, shell=True) return
[docs]def plot_variant_summary(data): from bokeh.plotting import figure from bokeh.charts import Donut d = Donut(df, label=['abbr', 'medal'], values='medal_count', text_font_size='8pt', hover_text='medal_count') return d
[docs]def test_run(): """Test run for sample vcf file""" print ('neoepitope workflow test') path = os.path.dirname(os.path.abspath(__file__)) options = config.baseoptions options['base']['predictors'] = 'netmhcpan,tepitope' options['base']['mhc1_alleles'] = 'HLA-A*02:01' options['base']['path'] = 'neo_test' options['base']['overwrite'] = True #options['base']['mhc2_length'] = 11 #options['base']['verbose'] = True options['base']['cpus'] = 2 options['neopredict']['vcf_files'] = os.path.join(path, 'testing','input.vcf') options['neopredict']['release'] = '75' options = config.check_options(options) #print (options) W = NeoEpitopeWorkFlow(options) check_ensembl(release='75') st = W.setup() #check_ensembl() W.run()
[docs]def varcode_test(): path = os.path.dirname(os.path.abspath(__file__)) infile = os.path.join(path, 'testing','input.vcf') variants = load_variants(vcf_file=infile) get_variants_effects(variants) return