Source code for harissa.inference.kinetics
"""
Inference of basal parameters
"""
import numpy as np
from numpy import log
from scipy.special import psi, polygamma
[docs]
def estim_gamma(x):
"""
Estimate the parameters of a gamma distribution using
the method of moments. The output is (a,b) for the distribution
f(x) = x**(a-1)*exp(-b*x)/(gamma(a)/b**a).
"""
m = np.mean(x)
v = np.var(x)
if v == 0: return 0, 1
else: return m*m/v, m/v
[docs]
def estim_gamma_poisson(x):
"""
Estimate parameters a and b of the Gamma-Poisson(a,b) distribution,
a.k.a. negative binomial distribution, using the method of moments.
"""
m1 = np.mean(x)
m2 = np.mean(x*(x-1))
if m1 == 0: return 0, 1
r = m2 - m1**2
if r > 0: b = m1/r
else:
v = np.var(x)
if v == 0: return 0, 1
b = m1/v
a = m1 * b
return a, b
[docs]
def infer_kinetics(x, times, tol=1e-5, max_iter=100, verb=False):
"""
Infer parameters a[0], ..., a[m-1] and b of a Gamma-Poisson model
with time-dependant a and constant b for a given gene at m time points.
Parameters
----------
x : ndarray
`x[k]` is the gene expression of cell `k`
times : ndarray
`times[k]` is the time point of cell `k`
"""
t = np.sort(list(set(times)))
m = t.size
n = np.zeros(m) # Number of cells for each time point
a = np.zeros(m)
b = np.zeros(m)
# Initialization of a and b
for i in range(m):
cells = (times == t[i])
n[i] = np.sum(cells)
a[i], b[i] = estim_gamma_poisson(x[cells])
b = np.mean(b)
# Newton-like method
k, c = 0, 0
sx = np.sum(x)
while (k == 0) or (k < max_iter and c > tol):
da = np.zeros(m)
for i in range(m):
if a[i] > 0:
cells = (times == t[i])
z = a[i] + x[cells]
p0 = np.sum(psi(z))
p1 = np.sum(polygamma(1, z))
d = n[i]*(log(b)-log(b+1)-psi(a[i])) + p0
h = p1 - n[i]*polygamma(1, a[i])
da[i] = -d/h
anew = a + da
if np.sum(anew < 0) == 0: a[:] = anew
else:
max_test = 5
test = 0
da *= 0.5
while (np.sum(a + da < 0) > 0) and (test < max_test):
da *= 0.5
test += 1
if test < max_test: a[:] = a + da
else: print('Warning: parameter a not improved')
if np.sum(a == 0) == 0:
b = np.sum(n*a)/sx
else: b = 1
c = np.max(np.abs(da))
k += 1
if (k == max_iter) and (c > tol):
# print('Warning: bad convergence (b = {})'.format(b))
a, b = a/b, 1
# if verb: print('Estimation done in {} iterations'.format(k))
if np.sum(a < 0) > 0: print('WARNING: a < 0')
if b < 0: print('WARNING: b < 0')
if np.all(a == 0): print('WARNING: a == 0')
# if k > 20 and np.max(a/b) > 2: print(k, np.max(a/b))
return a, b
# Tests
if __name__=='__main__':
x = np.array([2,0,10,5,0,7])
times = np.array([0,0,0,1,1,1])
a, b = infer_kinetics(x, times, verb=True)
print(a, b)