Source code for harissa.inference.kinetics

"""
Inference of basal parameters
"""
import numpy as np
from numpy import log
from scipy.special import psi, polygamma

[docs] def estim_gamma(x): """ Estimate the parameters of a gamma distribution using the method of moments. The output is (a,b) for the distribution f(x) = x**(a-1)*exp(-b*x)/(gamma(a)/b**a). """ m = np.mean(x) v = np.var(x) if v == 0: return 0, 1 else: return m*m/v, m/v
[docs] def estim_gamma_poisson(x): """ Estimate parameters a and b of the Gamma-Poisson(a,b) distribution, a.k.a. negative binomial distribution, using the method of moments. """ m1 = np.mean(x) m2 = np.mean(x*(x-1)) if m1 == 0: return 0, 1 r = m2 - m1**2 if r > 0: b = m1/r else: v = np.var(x) if v == 0: return 0, 1 b = m1/v a = m1 * b return a, b
[docs] def transform(x): """ Replace x by the conditional expectation given x of the underlying Gamma distribution, within the Gamma-Poisson model inferred from x. NB: this simply corresponds to a linear transformation with offset. """ a, b = estim_gamma_poisson(x) if not (a > 0 and b > 0): print(('Warning: you should check whether x is not ' 'almost zero (sum(x) = {}).').format(np.sum(x))) a, b = np.abs(a), np.abs(b) return (a + x)/(b + 1)
[docs] def infer_kinetics(x, times, tol=1e-5, max_iter=100, verb=False): """ Infer parameters a[0], ..., a[m-1] and b of a Gamma-Poisson model with time-dependant a and constant b for a given gene at m time points. Parameters ---------- x : ndarray `x[k]` is the gene expression of cell `k` times : ndarray `times[k]` is the time point of cell `k` """ t = np.sort(list(set(times))) m = t.size n = np.zeros(m) # Number of cells for each time point a = np.zeros(m) b = np.zeros(m) # Initialization of a and b for i in range(m): cells = (times == t[i]) n[i] = np.sum(cells) a[i], b[i] = estim_gamma_poisson(x[cells]) b = np.mean(b) # Newton-like method k, c = 0, 0 sx = np.sum(x) while (k == 0) or (k < max_iter and c > tol): da = np.zeros(m) for i in range(m): if a[i] > 0: cells = (times == t[i]) z = a[i] + x[cells] p0 = np.sum(psi(z)) p1 = np.sum(polygamma(1, z)) d = n[i]*(log(b)-log(b+1)-psi(a[i])) + p0 h = p1 - n[i]*polygamma(1, a[i]) da[i] = -d/h anew = a + da if np.sum(anew < 0) == 0: a[:] = anew else: max_test = 5 test = 0 da *= 0.5 while (np.sum(a + da < 0) > 0) and (test < max_test): da *= 0.5 test += 1 if test < max_test: a[:] = a + da else: print('Warning: parameter a not improved') if np.sum(a == 0) == 0: b = np.sum(n*a)/sx else: b = 1 c = np.max(np.abs(da)) k += 1 if (k == max_iter) and (c > tol): # print('Warning: bad convergence (b = {})'.format(b)) a, b = a/b, 1 # if verb: print('Estimation done in {} iterations'.format(k)) if np.sum(a < 0) > 0: print('WARNING: a < 0') if b < 0: print('WARNING: b < 0') if np.all(a == 0): print('WARNING: a == 0') # if k > 20 and np.max(a/b) > 2: print(k, np.max(a/b)) return a, b
# Tests if __name__=='__main__': x = np.array([2,0,10,5,0,7]) times = np.array([0,0,0,1,1,1]) a, b = infer_kinetics(x, times, verb=True) print(a, b)