\documentclass{beamer}
% Slides for the Nov 30 - Dec 2, 2011 FBA tutorial at SRI,
% based on the slides for the SRI 21-22 June FBA tutorial. 

% Setup appearance:

\usetheme{Darmstadt}
\usefonttheme[onlylarge]{structurebold}
\setbeamerfont*{frametitle}{size=\normalsize,series=\bfseries}
\setbeamertemplate{navigation symbols}{}
\usepackage{graphicx}

% Standard packages

\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{times}
\usepackage[T1]{fontenc}

% Setup TikZ

\usepackage{tikz}
%\usetikzlibrary{arrows}

% The main document

\begin{document}

\tikzstyle{block}=[draw opacity=0.7,line width=1.4cm]

% Author, Title, etc.

\title[MetaFlux] 
{%
  MetaFlux in Pathway Tools 
}

\author[Latendresse]
{
  Mario Latendresse\\
  Markus Krummenacker
}

\institute[SRI International]
{
 SRI International
}

\date[Jan 22 -- 23 , 2013]{Jan 22 -- 23, 2013}


\begin{frame}{}
  \titlepage
\end{frame}

\begin{frame}{Outline}
  \tableofcontents
\end{frame}

\section{Overview of MetaFlux}

\begin{frame}{The FBA Tool in Pathway Tools}

\begin{enumerate}
\item The FBA Tool, MetaFlux, was introduced in version 15.0 of Pathway Tools (Feb 2011)

\item MetaFlux has three modes: solving, development, and gene knockout

\item Solving mode: compute the fluxes of reactions to produce the biomass

\item Development mode: trying different biomass, nutrients,
  secretions, and reactions to create a model

\item Gene knockout: deactivating gene(s) from the model and
  see the effect on growth (testing a model)
\end{enumerate}

\end{frame}

%%%
\begin{frame}{What is a Flux of a Reaction?}

\begin{itemize}
\item Fluxes are rate of reactions typically expressed as mmol per gram dry weight per hour, denoted mmol/gDW/hr 

\item Other units could be used (eg, mol instead of mmol)

\item The FBA tool does not assume any unit for the fluxes as this is not needed to get valid results

\item Solving an FBA model gives the fluxes of all reactions that are needed to create a non-zero flux for the biomass (set of metabolites necessary for growth)
\end{itemize}
\end{frame}


%%%
\begin{frame}{Creating an FBA Model vs Solving an FBA Model (1)}

\begin{block}{Creating a Flux Balance Analysis (FBA) Model}
Creating an FBA model consists in curating the
description of an organism such that it represents 
as accurately as possible the {\it in vivo} reaction fluxes 
under certain conditions.
\end{block}

\begin{block}{Solving an FBA Model}
Solving an FBA model computes the reaction fluxes 
under certain conditions. The model could be infeasible: no biomass
produced.
\end{block}

\begin{block}{Gene Knockout}
A gene knockout deactivates the reactions catalyzed by a
gene and solving such a model. We can verify an FBA model by comparing
the results (growth/no growth) of a gene deletion with experimental data.
\end{block}

\end{frame}

%%%%%%%%%%
\begin{frame}{Creating an FBA Model vs Solving an FBA Model (2)}
\begin{center}
\includegraphics[angle=0,scale=0.45]{fbaGenSolvingMode.png}
\end{center}
\end{frame}

%%%%%%%%%%%
\section{Introduction to Flux Balance Analysis}

%%%
\begin{frame}{What is Flux Balance Analysis (FBA)?}

\begin{block}{Computing Fluxes of Reactions for Organism Growth}

Given a network of biochemical reactions, nutrients and secretions,
assign a flux (a numerical value) to every reaction to produce a set
of biomass metabolites for growth. Maximize the biomass.

\end{block}

%%%
\begin{block}{Main Assumptions}
\begin{itemize}
\item The system is in a steady state (metabolite concentrations do not vary)
\item Regulation is ignored 
\item Cofactors are ignored
\item Compartments are not completely taken care of
\item Some transport reactions must be explicitly specified (e.g., ATP synthase)
\end{itemize}
\end{block}

\end{frame}

%%%
\subsection{Standard LP Formulation}
\begin{frame}{Standard FBA Mathematical Formulation}

\begin{block}{Main Formulation}
\begin{align*}
  &\mbox{Max}\, b_{\mbox{\tiny biomass}} \\ 
  &{\bold S}_{ij}{\bold  v} = 0  \\
  &\mbox{where} \,{\bold S}\, \mbox{is the stoichiometric matrix}
\end{align*}
\end{block}

$\bold S$ is a matrix where
each row represents a metabolite and each column represents a reaction.

$ b_{\mbox{\tiny biomass}}$ is the flux for the biomass reaction.

$\mathbf v$ is a vector of variables representing the fluxes (real numbers).
\ \\
Reactions must be mass balanced.

\end{frame}

%%%%%%%%%%%
\begin{frame}{An Example of an LP Formulation}

\begin{block}{The Reactions, Biomass,  Nutrients C, D, E}
$\begin{array}{ll}
R_1 :& A + 2 B \rightarrow Biomass \\
R_2 :& C + D \rightarrow 2 A \\
R_3 :& D + E \rightarrow B
\end{array}
$
\end{block}

\begin{block}{The Linear Program (LP)}
$\begin{array}{ll}
A: & 2 R_2 - R_1 = 0 \\
B: & R_3 - 2 R_1 = 0 \\
C: & R_C - R_2 = 0 \\
D: & R_D -R_2 - R_3 = 0 \\
E: & R_E -R_3 = 0\\
& Maximize\; R_1 \\
&0 \le R_C, R_D, R_E \le  100
\end{array}
$
\end{block}

\end{frame}


%%%
\begin{frame}{The Biomass Reaction}

It is a virtual reaction representing a set of metabolites to be produced to enable growth.

\begin{block}{}
$$c_1 B_1 + c_2 B_2 + \cdots + c_n B_n \rightarrow Biomass$$
\end{block}

The coefficients $c_i$ are integers (positive or negative).\\
That reaction is in the $\mathbf S$ matrix (as any other reaction).\\

Note: {\bf all $B_i$ metabolites must be produced by some other
  reactions in the organism to satisfy the LP formulation with a
  non-zero biomass}.

\end{frame}

%%%
\begin{frame}{Solving the FBA Formulation}

\begin{itemize}
\item Solving such a formulation is done by a Linear Programming (LP)
solver

\item There are many open source and commercial LP solvers: CPLEX,
  GLPK, SCIP, Gurobi, and more 
  
\item Pathway Tools 15.0 uses SCIP

\item Even with thousands of reactions, typical FBA/LP formulation can
  be solved in a few seconds
\end{itemize}
\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Submitting an FBA Input File}

%%%
\begin{frame}{Syntax of the MetaFlux Input File}

\begin{itemize}
\item The input to MetaFlux is a text file (a \texttt{.fba} file)

\item Metabolites are specified as list of names or frame-ids or use \texttt{all-compounds}

\item Reactions are specified as frame-ids (common) or reaction equations (rare)

\item The keyword \texttt{metab-all} is the set of all metabolic reactions in the PGDB

\item The keyword \texttt{metacyc-metab-all} is the set of all
  metabolic reactions in MetaCyc

\item Full documentation in the FBA Chapter of the Pathway Tools' User Guide

\item Let's look at the \texttt{bsubcyc1.fba} file
\end{itemize}
\end{frame}

%%%
\begin{frame}{}
\begin{itemize}

\item A graphical user interface (GUI) is used to submit an FBA input file

\item The output files (e.g., {\tt .sol}) are displayed as a text file via a browser

\item The Cellular Omics Viewer can be invoked via one click to show the resulting fluxes

\item The invocation of the MetaFlux GUI is under the Tools menu 

\item GUI demo using the {\tt bsubcyc1.fba} file
\end{itemize}
\end{frame}


\subsection{Output Produced}
%%%
\begin{frame}{Four Files Generated}

Four files are generated when submitting a model.

\begin{itemize}
\item A {\tt .lp} file: the input to the SCIP solver
\item A {\tt .log} file: a trace of the output of the generation phase and of the SCIP solver. It contains
the quality of the solution, the reactions that were filtered out (e.g. unbalanced or might be unbalanced).
\item A {\tt .sol} file: a summary of the solution found as a text file. {\bf This is the file to look at first}. Contains
 metabolites produced, nutrients and secretions used, added reactions, fluxes for all
 reactions, and reactions with zero flux.
\item A {\tt .dat} file: an omics data file for the Pathway Tools Cellular Overview. Contains
only reactions with non-zero flux. Gap-filled reactions are not included.
\end{itemize}
 
\end{frame}

%%%
\begin{frame}{The Solution File}

The solution files list the:

\begin{itemize}
\item fixed- and try-biomass metabolites that could be produced
\item fixed- and try-nutrients used
\item fixed- and try-secretions produced
\item added reactions from MetaCyc (reversed or not)
\item added reversed reactions from the PGDB
\item reactions with their non-zero flux
\item remaining reactions that have zero flux
\end{itemize}

\end{frame}

\begin{frame}{The {\tt .log} File}
\begin{itemize}
\item Contains warnings and possibly error messages about unbalanced and instantiated reactions

\item Each reaction that is unbalanced or might be unbalanced is not included in the model. The list of these reactions are given in that file.

\item The process of instantiation of reactions is summarize in that file 

\item More on instantiation of reactions in Markus' presentation

\end{itemize}
\end{frame}


\begin{frame}{The {\tt .dat} File}
\begin{itemize}
\item  Each reaction having a flux is listed in that file

\item The file can be used as is by the Cellular Omics Viewer of Pathway Tools

\item The file can be used on the Web or the Desktop mode

\item The Cellular Omics Viewer is directly accessible from the MetaFlux GUI

\end{itemize}
\end{frame}

\begin{frame}{The {\tt .lp} File}

\begin{itemize}
\item The {\tt .lp} file is the input of the solver. The complete specification of the model is in that file.

\item The file has three parts: the objective function ({\tt maximize obj}), the constraints ({\tt Subject to}), the list of variables with their lower and upper bounds ({\tt bounds}).

\item There are lots of comments in that file

\item Each metabolite produces one constraint: the constraint contains all reactions where that metabolite is either produced or consumed. Each constraint says that production and consumption of a specific metabolite must balance

\item For MILP, the objective function can be very large (over 10000 terms); for LP, this is a single term
 
\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Genes and Reactions Knockout}

\begin{frame}{Testing a Model Using Genes Knockout}
\begin{block}{Knocking Out One Gene}
\begin{itemize}
\item Knocking out a gene means to deactivate the reactions catalyzed by
  that gene

\item Isozymes are taken into account
\end{itemize}
\end{block}

\begin{block}{Multiple Kockouts}
More than one gene might be knocked out simultaneously
\end{block}   

\begin{block}{Batch Knockouts}
Typically, MetaFlux is used to run a batch of gene knockouts (e.g., all genes)  
\end{block}   
\end{frame}

%%%%%%%%%%%%%
\begin{frame}{Knockout Parameters in FBA Input File (1)}

\begin{itemize}

\item The parameter \texttt{knockout-genes} gives the genes (names or frame ids) to
  knockout, not necessarily simultaneously

\item We can specify \texttt{metab-genes}
  for all genes that are involved in metabolic reactions of the PGDB

\item The parameter \texttt{knockout-nb-genes} gives the number of
  genes to knockout from the set \texttt{knockout-genes}

\item If \texttt{knockout-nb-genes} is 1, it is a single-gene
  knockout, if it is 2, it is a double-gene knockout, and so on 

\item Note that a double-gene knockout for all genes is over 500 thousands knockout experiments if you have, say, 1000 genes
 
\end{itemize}

\end{frame}

%%%%%%%%%%%%%
\begin{frame}{Knockout Parameters in FBA Input File (2)}

\begin{itemize}
\item Parameter \texttt{knockout-summary-only} controls the number of
  \texttt{.sol} files generated

\item If it is \texttt{yes} then only one \texttt{.sol} is generated
  summarizing the gene knockout solutions

\item If it is \texttt{no} then a \texttt{.sol} file is generated for
  each gene knockout run, plus the summary

\item Each file has the suffix 'knockout-n' where n is an integer
  starting with 0

\item Saying \texttt{no} could generate thousands of \texttt{.sol} files
when using \texttt{metab-genes} for \texttt{knockout-genes} 
 
\end{itemize}
\end{frame}

%%%%%%%%%%
\begin{frame}{Knockout Parameters in FBA Input File (3)}

\begin{itemize}
\item The parameter \texttt{knockout-reactions} can be used to specify explicitly the reactions to deactivate without specifying genes

\item The parameter \texttt{knockout-nb-reactions} is used to specify the number of reactions to deactivate simultaneously

\item These two parameters can be combined with the parameters for knocking out genes
\end{itemize}

\end{frame}

%%%%%%%%%%%%%
\begin{frame}{Knockout Parameters in FBA Input File (4)}
\begin{itemize}

\item Examples of gene knockout run on EcoCyc for

\begin{enumerate}
\item A few genes: cysN, cysD, gltX

\item All metabolic genes with summary solution file only (takes about one minute)

\item All metabolic genes with all solution files generated (takes more than one minute)
\end{enumerate}

\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction to Development Mode}

\begin{frame}{Minimal Nutrient Sets}
\begin{block}{Definition of a Minimal Nutrient Set}
A minimal nutrient set is a sufficient set of nutrients to have growth
and from which any nutrient removed results in no growth.
It is likely that there are many minimal nutrient sets, of various sizes, for one
organism and one biomass reaction.
\end{block}

\begin{block}{Minimum Set of Nutrients}
A minimal nutrient set is not necessarily the smallest set of
nutrients. A minimum set of nutrients is a minimal set having the
smallest number of nutrients among all minimal sets.
\end{block}


\end{frame}

%%%%%%%%%%
\begin{frame}{Verifying Minimal Sets for EcoCyc}

\begin{itemize}
\item Let's assume a simple biomass reaction, and the current set of
  reactions in EcoCyc: we will not add reactions to EcoCyc

\item We can infer from biological knowledge some small sets of
  nutrients

\item We will need to find out which secretions are necessary to use
  these minimal sets
\end{itemize}

\end{frame}

%%%%%%%%%%
\begin{frame}{Finding the Right Secretions}

\begin{itemize}

\item The \texttt{try-secretions} parameter can specify a set of
  secretions to try. We can specify \texttt{all-compounds} to try them
  all.

\item The \texttt{try-secretions-weight} should be negative, say $-10$,
  so that it {\it cost} something to add a secretion to the model

\item MetaFlux will report which secretions are needed to have growth,
  given the biomass reaction, the nutrients, and the set of reactions
  of EcoCyc

\item MetaFlux could have no feasible solution, that is no sets of
  secretions that it can add to generate growth
\end{itemize}

\end{frame}

%%%
\begin{frame}{Infeasible Formulation}

\begin{itemize}
\item FBA/LP formulation is infeasible: no solution or the only solution
is a zero flux for the biomass 

\item Infeasibility is likely due to some metabolites in the biomass that cannot be
produced 

\item This might be due to missing reactions, nutrients, secretions, or a
combination of these

\item {\bf Gap-filling proposes model modifications of minimal cost}
\end{itemize}

\end{frame}

%%%
\begin{frame}{If Infeasible with All Secretions Tried}

\begin{itemize}
\item Assuming that even with all secretions tried, no feasible solution is found

\item We could find out which subset of biomass metabolites can be produced

\item Specify all the biomass metabolites as \texttt{try-biomass}

\item Specify a large positive weight (a gain) for \texttt{try-biomass-weight}, say 1000

\item {\bf MetaFlux can apply multiple gap-filling simultaneously}
\end{itemize}

\end{frame}

%%%%%%%%%%%
\begin{frame}{Controlling the Weight Parameters}

\begin{itemize}
\item  A weight is a positive or negative integer 

\item A positive weight is a gain, whereas a negative weight is a cost

\item MetaFlux always try to maximize an objective function: if
  something (e.g. secretions) has a cost, MetaFlux minimizes their
  use, if it has a gain, it maximizes their use or production
  (biomass)

\item The absolute value of a weight is not very important, but the relative weight values are important

\item Consider a gain of 1000 for one biomass metabolite vs the cost
  of 10 (weight -10) for a secretions. Consider a gain of 10 for one
  biomass. What is the difference in possible solutions?

\item Let's look at a .lp MILP file to better see what is mathematically happening 
\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Generating a Model}

%%%%%%%%%%%%
\subsection{Single and Multiple Gap-Filling}

%%%
\begin{frame}{Single and Multiple Gap-Filling}
\begin{block}{Typically "Gap-Filling" Means "Completing the Reaction Network"}
\begin{itemize}
\item  Gap-filling adds reactions from a reference database (e.g., MetaCyc)
to the FBA model to produce missing biomass 

\item Model might still be infeasible due to a lack of reactions in MetaCyc, or lack of nutrients, or secretions
\end{itemize}
\end{block}

\begin{block}{Solution: Gap-Filling Extended to Important Metabolites}
 Nutrients, secretions, and biomass metabolites can also be added or removed. For biomass metabolites, we
 try to include as many as possible while still getting a feasible solution.
\end{block}   
\end{frame}

%%%
\begin{frame}{Multiple Gap-Filling}

\begin{block}{Multiple Gap-Filling}
Multiple gap-filling is done on reactions, nutrients, secretions, and biomass metabolites {\bf at the same
time}.
\end{block}

\begin{block}{Objective} Try to add as many biomass metabolites as possible by adding a minimum
number of nutrients, secretions, and reactions; and still get a feasible solution.
\end{block}

\begin{block}{Usage}
Speeds curation of a PGDB.
It is a technique to complete a PGDB to do standard FBA analysis.
\end{block}

\begin{block}{}
Our multiple gap-filling extends the reaction gap-filling idea developed by Costas Maranas.
\end{block}

\end{frame}

\section{The MILP Formulation}

%%%
\begin{frame}{Linear Programming Becomes Mixed-Integer Linear Programming (MILP)}

The LP formulation becomes a Mixed Integer Linear Program (MILP):
binary variables control the addition of reactions, nutrients, biomass,
and secretions.

A constraint to control the flux $r_i$ of a reaction with a binary variable $s_i$:

$$  r_i - s_i 1000 \le 0 $$

When $s_i$ is 1, the reaction $r_i$ can have a non-zero flux. And that
$s_i$ add a cost or gain in the objective function to maximize.

The biomass, secretions and nutrients can be converted into virtual reactions.

Each biomass metabolite is controlled by one reaction: {\bf no more major constraint as in the LP formulation}.

\end{frame}


\subsection{User Input: Fixed and Try Sets, Weights}

%%%
\begin{frame}{Fixed Sets for Multiple Gap-filling}

The user provides fixed sets of reactions and metabolites ``at no cost or gain''.

\begin{itemize}
\item Set of fixed reactions to use at no cost: typically all metabolic reactions of the PGDB
are used 

\item Sets of nutrient and secreted metabolites that can be used at no cost 

\item Set of metabolites that must be produced in the biomass and for which no gain
is given

\item Any or all of these sets might be empty 

\item {\bf It is recommended to start with an empty set of fixed biomass metabolites.}

\end{itemize}

\end{frame}

%%%
\begin{frame}{Try-Sets and Weights for Multiple Gap-filling}

The user provides four try-sets and weights to control the generation of the model.

\begin{itemize}
\item Set of reactions to try to add at a cost: typically all metabolic reactions of MetaCyc

\item Sets of nutrients, secretions and biomass metabolites to try to
add to the model 

\item Weights, as integers for gain and cost, for the reactions, nutrients, secretions and biomass metabolites

\item Typically, adding a biomass metabolite is a gain, but adding a
  reaction or a nutrient is a cost. We have different weights for
  different type of reactions (e.g., spontaneous, in the taxonomic
  range, etc.)

\end{itemize}

\end{frame}


%%%
\begin{frame}{The Objective Function for Multiple Gap-Filling (1)}

The objective function to maximize is
\begin{align*}
&\sum_i w_t R_i + \sum_i w_o R_i + \sum_i w_u R_i   + \sum_i w_s R_i + \sum_i w_r R_i + \sum_i w_{rm} R_i \\
&\sum_i w_b B_i + \sum_i w_s S_i + \sum_i w_n N_i   - \sum_j F_j  \\
&\mbox{\bf where}\, w_t, w_o, w_u, w_s, w_r, w_{rm}\, \mbox{are weights for reactions} \\
&\;\mbox{in taxonomic range, outside taxonomic range,}\\
&\;\mbox{unknown taxonomic range, spontaneous, reversed from PGDB},\\ 
&\;\mbox{and reversed from Metacyc}\\
&\mbox{\bf where}\, w_b , w_s, w_n\, \mbox{are weights for biomass, secretions, and nutrients} \\
&\mbox{\bf where}\, B_i, R_i, S_i, N_i\, \mbox{are binary variables} \\
&\mbox{\bf where}\, F_j\, \mbox{are the fluxes of reactions}.
\end{align*}


\end{frame}

%%%
\begin{frame}{The Objective Function for Multiple Gap-Filling (2)}

\begin{itemize}
\item Different weights $w_t, w_o, w_u, w_s, w_r, w_{rm}$  for reactions 

\item  The $w_t$ are
for reactions from MetaCyc being in the taxonomic range of the PGDB
whereas $w_o$ are for reactions outside the taxonomic range and $w_u$
for reactions of unknown taxonomic range

\item The weight $w_s$ for spontaneous reaction should be a low
negative number and not zero to avoid bringing them all in the model

\item  The $w_r$ are for reversed reactions from the PGDB that are not reversible

\item  Similarly for  $w_{rm}$ for reversed reactions from MetaCyc

\item The term $- \sum_j F_j$ forces removal of the high-flux loops 

\item At that stage, the real fluxes of the reactions are not optimized. Solving the generated
FBA model would give the optimized fluxes.
\end{itemize}

\end{frame}

%%%
\begin{frame}{Mixed Integer Linear Programming (MILP)}

\begin{itemize}

\item A MILP formulation is typically more difficult to solve exactly than a LP formulation
due to the integer and binary variables. Essentially, the integer and binary variables require
the solver to try to solve many (e.g., thousands) of LP cases.

\item The solver might take forever to find the optimal solution

\item We typically set a time limit to the solver, say 5 minutes

\item MILP solvers vary widely in their performance and capabilities
\end{itemize}

\end{frame}


%%%%%%%%%%%%%%%%%%%%%%
\section{Development Mode}


\subsection{Methodology}

%%%
\begin{frame}{The Weights: Costs and Gains}
\begin{block}{Typical Weights}
\begin{itemize}
\item Adding a biomass metabolite to the model is a {\bf gain}. 

\item Adding any reaction, secretion, or nutrient has a {\bf cost}. 

\item That corresponds to the usual goal: generating as many biomass metabolites as possible
with the minimum number of nutrients, secretions, and added reactions
\end{itemize}
\end{block}

\begin{block}{Variations}
But other scenarios are useful: use as many nutrients and secretions as possible
\end{block}

\begin{block}{Selecting the Right Weights for Reactions}
There are many different weights for the reactions: taxonomic range, reversed, and more
\end{block}
\end{frame}

\begin{frame}{Balancing Out Costs and Gains (1)}
\begin{itemize}
\item Example: 1000 for a biomass metabolite, -10 for a reaction, -2 for a nutrient, -1 for a secretion

\item The solver could add as many as 100 reactions to produce one biomass metabolite

\item The solver would not add 101 reactions since there would be a net lost

\item Setting the gain at $10^6$ for one biomass metabolite would certainly add as many reactions
as possible  to produce every biomass metabolite since MetaCyc will
have less than 100,000 reactions for the foreseeable future
\end{itemize}

\end{frame}

%%%
\begin{frame}{Balancing Out Costs and Gains (2)}
\begin{itemize}
\item The ratio between the weight (cost) for a reaction and a nutrient should also
be wisely selected

\item Example: -10 for a reaction and -6 for a nutrient would bring in some reactions
capable of replacing any two nutrients. This is typically undesirable. But you might
be interested to see if this is possible.

\end{itemize}
\end{frame}

%%%
\begin{frame}{The Reaction Weights}
\begin{itemize}
\item The basic weight for a reaction from MetaCyc 
  \begin{itemize}
  \item {\bf outside the taxonomic range} of the PGDB is given by {\tt try-reactions-weight}
  \item {\bf in the taxonomic range} of the PGDB is given by {\tt try-reactions-in-taxa-weight}
  \item {\bf of unknown taxonomic range} is given by {\tt try-reactions-unknown-taxa-weight}
  \end{itemize}

\item A reversed reaction from MetaCyc is added with the {\it additional} weight given by {\tt try-reactions-reverse-try-weight}.
This is an additional weight to the basic weight.

\item A reversed reaction from the PGDB, but reversed, is added with weight given by {\tt try-reactions-reverse-weight}
\end{itemize}

\end{frame}

%%%
\begin{frame}{Suggested Simple {\tt .fba} Settings}

\begin{itemize}
\item The entire biomass reaction is specified as a list of metabolites in the try-biomass section.
No metabolites specified in the biomass fixed set.

\item Similarly for nutrients and secretions: all metabolites are specified in the try-sets

\item Parameter try-biomass-weight is set to a high value, say 10000. All other weights are negative with small values 
($-1$ to $-100$).

\item No coefficients for the metabolites (biomass, nutrients, or secretions) 

\item The reaction section says \texttt{metab-all}. The try-reactions section is empty.

\item We have \texttt{try-add-reverse-rxns: no}

\item Execute this \texttt{.fba} file and see how many try-biomass metabolites can be produced 

\end{itemize}

\end{frame}

%%%
\begin{frame}{Settings When not all Biomass is Produced}

\begin{itemize}
\item You will rarely get all the try biomass metabolites produced the
  first time 
\item In this case, add the keyword \texttt{metacyc-metab-all} to section \texttt{try-reactions}
\item Execute
\item More biomass metabolites could be produced with suggested reactions to add
\item Analyze the suggested reactions to add: are they in the same taxonomic range as the organism of the PGDB, do they form a pathway, etc.?
\item If the number of suggested reactions to add is overwhelming, decrease the list of metabolites to try in the biomass reaction
\end{itemize}

\end{frame}

%%%
\begin{frame}{Other Suggested {\tt .fba} Settings}

\begin{itemize}
\item If the previous settings do not provide more biomass produced, add \texttt{try-add-reverse-rxns:yes} and \texttt{try-add-reverse-try-rxns:yes}

\item You could also try to only try reversing the reactions of your
  PGDB without considering the MetaCyc reactions

\item Make sure the list of metabolites to secrete is not too small: add secretions, to the try-secretion set, that you think might be missing

\item Execute

\item Really add, to your PGDB, some of the suggested reactions to add
  that you think are indeed missing

\item Really change the directionality of some of the reactions as
  suggested by MetaFlux
\end{itemize}

\end{frame}

%%%
\begin{frame}{Controlling Which Reactions that Can Be Added}

\begin{itemize}
\item Most of the reactions are added from the set given by {\tt try-reactions}

\item Reversed reactions from MetaCyc might be added only if {\tt try-add-reverse-try-rxns} is {\tt yes}

\item Reversed reactions from the PGDB might be added only if {\tt try-add-reverse-rxns} is {\tt yes}

\end{itemize}

\end{frame}

%%%
\begin{frame}{Move Try Metabolites into Fixed Sets}
\begin{itemize}

\item When it is clear that some biomass metabolites can be
  produced, they can be listed in the {\tt biomass} section, that is
  as fixed biomass metabolites

\item Similarly for secretions and nutrients, they should eventually be listed as fixed metabolites in the {\tt .fba} file 

\end{itemize}
\end{frame}

%%%

%%%
\begin{frame}{Iterative Tuning}

\begin{itemize}
\item The preceding suggestions will have to be repeated until a satisfactory set of biomass metabolites is produced

\item It will require a careful analysis of the suggested reactions to add

\item Typically, this process requires from several days to several weeks of work

\end{itemize}

\begin{block}{FBA Model}
The goal of this entire process is to get only fixed sets so that it describes a desired FBA model
\end{block}

\begin{itemize}
\item At some point it might be useful to change the weights to
  increase the speed of the solver 
\end{itemize}
\end{frame}

%%%
\begin{frame}{Checking a Model}

\begin{itemize}

\item A complete FBA model will require proper coefficients for the biomass reaction

\item These coefficients could come from {\it in vivo} experiments

\item The solving of a model will verify that the biomass flux is similar to {\it in vivo} experiments

\item The completed model can also be verified against gene knockout experiments

\end{itemize}

\end{frame}


%%%%%%%

\section{The Weights and Gap: Fine Control}

%%%
\begin{frame}{Suboptimal Solutions}

\begin{itemize}
\item If the SCIP solver terminates due to the time limit, a suboptimal solution has been found. 

\item An optimal solution is sometimes needed to have the correct reactions to add.

\item The quality of the solution is given by the "gap" percentage given in the terminal output alongside all output of the SCIP solver.

\item This gap value must be interpreted based on the weights used. Typically, we expect a gap of less than 5\% to call it good enough. 

\item In the first stages of generating a model, we limit the solver to 5 minutes. At some steps it might useful to increase it: 20 minutes, 30 minutes, one hour, or more.

\end{itemize}
\end{frame}
%%%
\begin{frame}{How To Interpret the Gap}

\begin{itemize}
\item It is simpler to analyze the difference between the last two values under the headings "Dualbound" and "Primalbound" of the SCIP output

\item Let's assume the gap is not 0\%. And the suboptimal solution suggests three reactions to add.

\item Assume that Primalbound is 319900, Dualbound is 320000. The difference is 100. 

\item Assuming that all the costs for adding a reaction is 50, it means that it is possible that SCIP will find an optimal solution removing two suggested reactions to add

\item If you doubt that the suggested reactions to add are necessary, rerun with more time for the solver

\end{itemize}

\end{frame}

%%%%%%%%%%%%%%%%%%%%

\end{document}


