Reputation: 110
I have three coupled ODEs that describe a biomass growth, substrate consumption and product formation. I have experimental findings for all 3 curves.
I have successfully used curve_fit from scipy.optimize to find optimal parameters for each curve separately, but I can't find a way to fit all 3 curves.
In the code I have tried to mimic solution from a similar question, but that code is for a simpler case and does not work for me.
When I run my code the ODR tells that my function is returning a wrong-shaped array. Since the mathematical model is able to reproduce the characteristics of the process and I have experimental findings of all 3 concentrations, I believe I just don't know how to feed my data into ODR.
How can I fix my code? Or maybe there are betters ways to curve fit my model?
from scipy import integrate
import numpy as np
from scipy.interpolate import Akima1DInterpolator
from operator import itemgetter
from scipy.odr import Model, Data, ODR
## The Model
def fukuda_solution(beta, t):
def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
biomass = X[0]
substrate = X[1]
ethanol = X[2]
W = 0.4 # not relevant in my case
F = 0.0 # not relevant in my case
Sf= 0.4 # not relevant in my case
miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
if substrate >= Scr:
Rep = Aep * (substrate - Scr)
else:
Rep = 0.0
if substrate >= Scr or ethanol <= 0:
Rec = 0.0
else:
Rec = Aec * (Scr - substrate)
dX = (miu + (Rec/Yex) - F/W) * biomass
dS = -((miu/Yxs) + (Rep/Yes)) * biomass + (F/W)*(Sf-substrate)
dE = (Rep-Rec) * biomass - (F/W)*ethanol
return np.array([dX,dS,dE], dtype=float)
miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
X0 = np.array([0.85, 71.8, 3.57], dtype=float) # initial concentrations: biomass, glucose, ethanol
# X_calculated, infodict = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec), full_output=True)
X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
print "Shape ravel"
print np.shape(X_calculated.ravel())
print "Shape X_calculated"
print np.shape(X_calculated)
return X_calculated.ravel()
## Data
## Measurements from lab. experiments
## The glucose is consumed by biomass. The ethanol is produced, but is later consumed
## when glucose concentration decreases,
## Glucose
t_gly = np.array([0. , 2.5, 8, 11, 14, 24], dtype=float)
a1_gly= np.array([71.8, 56.75, 9.74, 5.87, 2.57, 0.001], dtype=float)
## Biomass
t_bio = np.array([0.0 , 2.5, 5.0, 8.0, 11.0, 14.0, 18.0, 22.0, 24.0], dtype=float)
a1_bio= np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.60, 11.5, 11.92], dtype=float)
## Ethanol
t_eth = np.array([0.0 , 2.5, 5.0, 8.0, 11.0, 14.0, 18.0, 22.0, 24.0], dtype=float)
a1_eth= np.array([3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
t = np.arange(0, 24., 0.01)
## Here I interpolate data from experiments.
## I just need more data points for curve fitting.
g_a1_bio = Akima1DInterpolator(t_bio,a1_bio)
fit_a1_bio = g_a1_bio(t)
g_a1_gly = Akima1DInterpolator(t_gly,a1_gly)
fit_a1_gly = g_a1_gly(t)
g_a1_eth = Akima1DInterpolator(t_eth,a1_eth)
fit_a1_eth = g_a1_eth(t)
## My coupled ODEs have 9 parameters.
## This is the best guess I could get by manually experimenting with parameters.
guess1 = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
I expect that the problem is hiding somewhere here, but I am not certain.
## Preparing experimental findings for ODR
data_time = np.repeat(t, 3)
data_experiments = np.array([fit_a1_bio,fit_a1_gly,fit_a1_eth])
data = Data(data_time, data_experiments.ravel())
model = Model(fukuda_solution)
Here the code fails:
odr = ODR(data, model, guess1)
odr.set_job(2)
out = odr.run()
out.pprint()
print out.beta
print out.sd_beta
Because of additional comments my code appears in multiple blocks here, but it is actually one file.
Upvotes: 0
Views: 1510
Reputation: 110
I am now providing an answer to my own question. The problem was that I was fighting the wrong problem. Since I have three curves, this is not a curve fitting problem. This is a basic parameter identification (or parameter estimation) problem.
This is how one should solve similar problems:
basinhopping
from scipy.optimize
.Here is the minimal working example/solution. The first file 'ethanol.py' is main program, the second file 'ethanol_model.py' is imported by the first file and contains the model, objective function and three utility functions. Fukuda is the model
ethanol.py
import numpy as np
import matplotlib.pyplot as plt
from ethanol_model import fukuda_solution
from ethanol_model import fukuda_fit
from scipy.optimize import basinhopping
from copy import deepcopy
## Measurements data
# Biomass data
data_X = np.array([0.85, 2.24, 6.11, 9.41, 10.36, 11.32, 11.6, 11.5, 11.92], dtype=float)
# Substrate data with some missing points
data_S = np.array([ 8.44800000e+01, 5.67500000e+01, np.nan, 9.38000000e+00, 5.51000000e+00, 2.21000000e+00, np.nan, np.nan, 1.00000000e-04], dtype=float)
# Product data
data_E = np.array([ 3.57, 6.86, 20.49, 25.23, 19.83, 14.84, 9.56, 5.37, 3.57], dtype=float)
## time points when measurements were taken
data_t = np.array([ 0., 2.5, 5., 8., 11., 14., 18., 22., 24. ], dtype=float)
# This is a good initial guess, but it works with bad guess also
guess = [0.4575, 0.5, 0.36, 0.45, 6.3599, 1.543, 19.694, 0.02, 0.23]
parameters = deepcopy(guess)
minimizer_kwargs = {"method": "BFGS", "args": (data_X, data_S, data_E, data_t)}
ret = basinhopping(fukuda_fit, parameters, minimizer_kwargs=minimizer_kwargs, niter=200)
print(ret)
paramaters = ret.x
retfun = fukuda_fit(parameters, data_X, data_S, data_E, data_t)
print "Fitness2: {0}". format(retfun)
## The rest is only for visual plotting
t = np.arange(0, 24., 0.01)
results = fukuda_solution(parameters, t, start_X=data_X[0], start_S = data_S[0], start_E=data_E[0])
biomass, glycose, ethanol = results
plt.plot(t, biomass, 'g-', label='Biomass', linewidth=1)
plt.plot(t, glycose, 'y-', label='Substrate', linewidth=1)
plt.plot(t, ethanol, 'b-', label='Ethanol', linewidth=1)
plt.plot(data_t, data_X, 'go', label='Biomass data')
plt.plot(data_t, data_S, 'ys', label='Substrate data')
plt.plot(data_t, data_E, 'b^', label='Ethanol data')
plt.legend(loc='best')
plt.show()
print("-------------------------")
ethanol_model.py
from scipy import integrate
import numpy as np
def fukuda_solution(beta, t, **kwargs):
def fukuda(X, t, miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec):
biomass = X[0]
substrate = X[1]
ethanol = X[2]
very_low = 1e-08 # very small number
miu = miu_max * (substrate/(Ks+substrate)) * (Ki/(Ki+ethanol))
Rep = (Aep * (substrate - Scr) if substrate >= Scr else 0.0 )
Rec = (0.0 if substrate >= Scr or ethanol <= 0 else Aec * (Scr - substrate))
# here I'm artificially limiting concentrations to positive values
dS = (-((miu/Yxs) + (Rep/Yes)) * biomass if substrate > very_low else 0)
dX = ((miu + (Rec/Yex)) * biomass if biomass > very_low else 0)
dE = ((Rep-Rec) * biomass if ethanol > very_low else 0)
return np.array([dX,dS,dE], dtype=float)
miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec = beta
X0 = np.array([kwargs['start_X'], kwargs['start_S'], kwargs['start_E']], dtype=float)
X_calculated = integrate.odeint(fukuda, X0, t, args=(miu_max, Scr, Yxs, Yes, Yex, Ks, Ki, Aep, Aec))
return np.transpose(X_calculated)
def gmax(*args):
"""Find max in many arrays"""
lmax = []
for l in args:
lmax.append(max(l))
return max(lmax)
def gmin(*args):
"""Find min in many arrays"""
lmin = []
for l in args:
lmin.append(min(l))
return min(lmin)
def resc(a,r): # a is array, r is related array
"""Normalize (rescale) an array"""
b = []
for i in a:
b.append( (i-gmin(a,r))/(gmax(a,r) - gmin(a,r)) )
return np.hstack(b)
def fukuda_fit(beta, data_X, data_S, data_E, data_t):
"""Objective function for Fukuda model"""
start_X = data_X[0]
start_S = data_S[0]
start_E = data_E[0]
result = fukuda_solution(beta,data_t, start_X=start_X, start_S=start_S, start_E=start_E)
X = result[0]
S = result[1]
E = result[2]
# remove NaN values
nans = np.isnan(data_X)
data_X = np.compress(~nans,data_X)
X = np.compress(~nans,X)
nans = np.isnan(data_S)
data_S = np.compress(~nans,data_S)
S = np.compress(~nans,S)
nans = np.isnan(data_E)
data_E = np.compress(~nans,data_E)
E = np.compress(~nans,E)
residX = np.sum( np.power(resc(X,data_X)-resc(data_X,X), 2) )
residS = np.sum( np.power(resc(S,data_S)-resc(data_S,S), 2) )
residE = np.sum( np.power(resc(E,data_E)-resc(data_E,E), 2) )
resid = residX + residS + residE
return 1./(len(data_X) + len(data_S) + len(data_E)) * resid
And yes, interpolation is completely unnecessary in solving these types of problems.
Upvotes: 1