[1]:
# the boring stuff:
import numpy
import pandas
import scipy
from matplotlib import pyplot
from IPython.display import display, Image

# probabilistic programming:
import arviz
import pymc
import sunode
import pytensor
import pytensor.tensor as pt
import pytensor.printing

# modeling:
import calibr8
import murefi

Differentiable ODE models with murefi and sunode

This example builds on top of the basic Michaelis-Menten example to showcase how a murefi ODE model can be auto-differentiated with sunode. As we will see at the end of this example, this enables us to run efficient MCMCs with NUTS.

Preparing Data & Models

The next cell is a condensed version of the corresponding chapter from the basic example (Example_03_MichaelisMenten.ipynb).

[2]:
class ProductAssayModel(calibr8.BasePolynomialModelT):
    def __init__(self):
        super().__init__(
            independent_key='P',
            dependent_key='A570',
            mu_degree=1,
            scale_degree=0,
        )

def observe_with_true_parameters(x):
    return scipy.stats.t.rvs(loc=0.17 + 0.12 * x, scale=0.03, df=3)

# generate fake calibration data with a linear relationship
numpy.random.seed(202103)
N = 48
X = numpy.linspace(0.1, 10, N)
Y = observe_with_true_parameters(X)

cm_product = ProductAssayModel()
theta_fit, _ = calibr8.fit_scipy(
    cm_product,
    independent=X, dependent=Y,
    theta_guess=[0, 0.2, 0.05, 5],
    theta_bounds=[(-2, 2), (0.001, 0.5), (0.001, 0.5), (1, 10)]
)
fig, axs = calibr8.plot_model(cm_product);
for ax in axs:
    ax.set_xlabel('product concentration   [mmol/L]')
axs[0].set_ylabel('$A_{570}$   [a.u.]')
axs[1].set_ylabel("")
axs[0].set_ylim(bottom=0)
axs[1].set_ylim(bottom=0)
fig.tight_layout()
pyplot.show()

class MichaelisMentenModel(murefi.BaseODEModel):
    def __init__(self):
        self.guesses = dict(S_0=5, P_0=0, v_max=0.1, K_S=1)
        self.bounds = dict(
            S_0=(1, 20),
            P_0=(0, 10),
            v_max=(0.0001, 5),
            K_S=(0.01, 10),
        )
        super().__init__(
            independent_keys=['S', 'P'],
            parameter_names=["S_0", "P_0", "v_max", "K_S"],
        )

    def dydt(self, y, t, theta):
        S, P = y
        v_max, K_S = theta

        dPdt = v_max * S / (K_S + S)
        return [
            -dPdt,
            dPdt,
        ]

model = MichaelisMentenModel()

# generate ground truth data at 10 time points
theta_true = (8, 0, 0.16, 2.5)
rep_groundtruth = model.predict_replicate(
    # S_0, P_0, v_max, K_S
    parameters=theta_true,
    template=murefi.Replicate.make_template(tmin=0, tmax=180, independent_keys='SP', rid='A01', N=10)
)

# use error model to make noisy observations of the ground truth
rep_observed = murefi.Replicate(rid='A01')
rep_observed['A570'] = murefi.Timeseries(
    t=rep_groundtruth['P'].t,
    y=observe_with_true_parameters(rep_groundtruth['P'].y),
    independent_key='P',
    dependent_key='A570'
)

# pack generated data into Dataset
dataset = murefi.Dataset()
dataset[rep_observed.rid] = rep_observed


fig, ax = pyplot.subplots(dpi=90)
ax.scatter(rep_observed['A570'].t, rep_observed['A570'].y, label='observations')
# use error model to project groundtruth onto dependent variable axis
ax.plot(rep_observed['A570'].t, cm_product.predict_dependent(rep_groundtruth["P"].y)[0], label='groundtruth')
ax.set(xlabel='time   [s]', ylabel='$A_{570}$   [a.u.]')
ax.legend()
pyplot.show()
C:\Users\zufal\AppData\Local\Temp\ipykernel_9420\3436460410.py:33: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  fig.tight_layout()
_images/Example_04_MichaelisMenten_sunode_3_1.png
_images/Example_04_MichaelisMenten_sunode_3_2.png

Objective function and gradients

Like introduced in the basic example we need a ParameterMapping to specify how we want to apply the model to the dataset.

[3]:
df_mapping = pandas.DataFrame(columns='rid,S_0,P_0,v_max,K_S'.split(',')).set_index('rid')
# we'll fix S_0=8.0 and fit only the remaining parameters
df_mapping.loc['A01'] = (8.0, 'P_0', 'v_max', 'K_S')

# create the ParameterMapping object
pm = murefi.ParameterMapping(
    df_mapping,
    guesses=model.guesses,      # fed as dict where the key is the name of the parameter
    bounds=model.bounds         # same as guesses
)
display(pm.as_dataframe())
display(pm)
S_0 P_0 v_max K_S
rid
A01 8.0 P_0 v_max K_S
ParameterMapping(1 replicates, 4 inputs, 3 free parameters)

The objective function is created in the same way:

[4]:
obj = murefi.objectives.for_dataset(
    dataset,
    model,
    pm,
    # no need for a substrate calibration model, because there's no data
    calibration_models=[cm_product]
)

# The objective function can be evaluated at the initial guess to see if everything works as expected:
print(f'Negative log-likelihood at initial guess: {obj(pm.guesses)}')
Negative log-likelihood at initial guess: -11.26583475496314

With PyTensor installed, the objective can be called with TensorVariables to create a computation graph for the likelihood.

[5]:
K_S = pt.scalar("K_S")
P_0 = pt.scalar("P_0")
v_max = pt.scalar("v_max")
theta = [K_S, P_0, v_max]

L = obj([K_S, P_0, v_max])
# If `sunode` is installed we can auto-differentiate:
L_grad = pytensor.grad(L, wrt=theta)

# We can compile function to evaluate L and L_grad:
f_L = pytensor.function(inputs=theta, outputs=[L])
f_L_grad = pytensor.function(inputs=theta, outputs=L_grad)

parameters = pm.guesses
[6]:
print(f"""
Evaluations at pm.guesses:
obj      : {obj(parameters)}
f_L      :  {f_L(*parameters)[0]}
f_L_grad : {f_L_grad(*parameters)}
""")

Evaluations at pm.guesses:
obj      : -11.26583475496314
f_L      :  11.265835029666126
f_L_grad : [array(-6.7969403), array(16.5343305), array(442.18509082)]

[7]:
# This cell needs graphviz and pydot:
# > mamba install -c conda-forge graphviz
# > pip install pydot
pytensor.printing.pydotprint(f_L, outfile="graph.png")
Image("graph.png")
The output file is available at graph.png
[7]:
_images/Example_04_MichaelisMenten_sunode_11_1.png

Benchmarking

The compiled likelihood function with sunode should be faster than the odeint-based obj. At the very least the autograd gradient should be faster than a manual one.

[8]:
%%timeit
obj(parameters)
1.1 ms ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[9]:
%%timeit
f_L(*parameters)
1.47 ms ± 34.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
[10]:
%%timeit
f_L_grad(*parameters)
1.45 ms ± 14.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Building a Bayesian ODE model with murefi, calibr8, sunode and pymc

So far we have just seen how an ODE model built with murefi and calibr8 can be compiled with pytensor and auto-differentiated by pytensor and sunode. The above steps are good to know about for those who want to leverage fast likelihoods and gradients for other methods, but for building a PyMC model all of this happens behind the scenes.

Here we just have to come up with priors for our model parameters and can just press the inference button™.

[11]:
# first build the model
with pymc.Model() as pmodel:
    t_theta = {
        'S_0': 8,
        'P_0': pymc.Exponential('P_0', lam=4),
        'v_max': pymc.LogNormal('v_max', mu=numpy.log(3/25), sigma=1),
        'K_S': pymc.LogNormal('K_S', mu=numpy.log(4), sigma=1)
    }
    # Just pass the priors into the objective.
    # PyMC, sunode, murefi and calibr8 take care of the rest.
    obj(t_theta)

pymc.model_to_graphviz(pmodel)
[11]:
_images/Example_04_MichaelisMenten_sunode_17_0.svg
[12]:
# then hit the inference button
with pmodel:
    idata = pymc.sample(cores=1)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [P_0, v_max, K_S]
100.00% [2000/2000 00:38<00:00 Sampling chain 0, 0 divergences]
100.00% [2000/2000 00:39<00:00 Sampling chain 1, 0 divergences]
Sampling 2 chains for 1_000 tune and 1_000 draw iterations (2_000 + 2_000 draws total) took 78 seconds.
[13]:
arviz.plot_trace(idata);
_images/Example_04_MichaelisMenten_sunode_19_0.png
[14]:
dataset_pred = model.predict_dataset(
    template=murefi.Dataset.make_template_like(dataset, independent_keys='SP'),
    parameter_mapping=pm,
    parameters={
        k : idata.posterior[k].stack(sample=('chain', 'draw')).values
        for k in pm.parameters.keys()
    }
)
dataset_pred
[14]:
Dataset([('A01', Replicate(S[:200], P[:200]))])
[15]:
fig, ax = pyplot.subplots(dpi=90)
colors = dict(P='blue', S='orange')

pymc.gp.util.plot_gp_dist(
    ax=ax,
    x=dataset_pred['A01']['S'].t,
    samples=dataset_pred['A01']['S'].y,
    palette='Reds',  plot_samples=False,
)
pymc.gp.util.plot_gp_dist(
    ax=ax,
    x=dataset_pred['A01']['P'].t,
    samples=dataset_pred['A01']['P'].y,
    palette='Blues', plot_samples=False,
)

# plot data, transformed via the error model into the independent unit
x_obs = rep_observed['A570'].t
y_obs = cm_product.predict_independent(rep_observed['A570'].y)
ax.scatter(x_obs, y_obs, label='observations',color=colors['P'])

# plot all timeseries of the groundtruth for comparison
for ykey, ts in reversed(rep_groundtruth.items()):
    ax.plot(ts.t, ts.y, label=ykey + ' (true)', linestyle=':', color=colors[ykey])

ax.set_xlabel('time   [s]')
ax.set_ylabel('P, S   [mmol/L]')
ax.legend()
pyplot.show()
_images/Example_04_MichaelisMenten_sunode_21_0.png
[16]:
%load_ext watermark
%watermark -n -u -v -iv -w
Last updated: Fri Dec 16 2022

Python implementation: CPython
Python version       : 3.9.15
IPython version      : 8.7.0

matplotlib: 3.6.2
pymc      : 5.0.0
murefi    : 5.2.0
pytensor  : 2.8.10
pandas    : 1.5.2
sunode    : 0.4.0
calibr8   : 7.0.0
scipy     : 1.9.3
arviz     : 0.14.0
numpy     : 1.23.5

Watermark: 2.3.1

[ ]: