In [1]:
def gauss(mu, sigma, x):
""" fonction calculant la probabilité d'appartenance d'un point à une gaussienne (sorte de distance)
attention, en dimension 2 cette fois
"""
return 1/(sqrt(det(sigma)*2*pi**2))*exp(-0.5*(dot(dot((x-mu).T, inv(sigma)), (x-mu))))
In [2]:
# a quoi ressemble une gaussienne type
mu = array([0.0, 0.0])
sigma = array([[1.0, 0.0], [0.0, 1.0]])
G = [mu, sigma]
figure(figsize=(10,10))
X = arange(-5, 5, 0.25)
Y = arange(-5, 5, 0.25)
Z = empty((X.shape[0], Y.shape[0]))
i = 0
for x in X:
j = 0
for y in Y:
Z[i,j] = gauss(mu, sigma, array([x,y]))
j+=1
i+=1
X, Y = np.meshgrid(X, Y)
from mpl_toolkits.mplot3d import Axes3D
ax = gca(projection='3d')
contour(X, Y, Z) #, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
#plot(linspace(-5, 5, 1000), [gauss(mu, sigma, t) for t in linspace(-5, 5, 1000)])
Out[2]:
on génére dix valeurs centrées sur (0,0) et dix valeurs centrée sur (2,2) pour obtenir un modèle bimodale
In [3]:
V1 = randn(20,2)
V2 = randn(20,2)*2 + 2
print V1
print V2
On représente toutes les données par une seule gaussienne
In [4]:
mu = mean(V1.tolist()+V2.tolist())
print mu
In [5]:
#help(cov)
V3 = array(V1.tolist()+V2.tolist()).T
sigma = cov(V3)
print sigma
In [6]:
figure(figsize=(10,10))
X = arange(-5, 5, 0.25)
Y = arange(-5, 5, 0.25)
Z = empty((X.shape[0], Y.shape[0]))
i = 0
for x in X:
j = 0
for y in Y:
Z[i,j] = gauss(mu, sigma, array([x,y]))
j+=1
i+=1
X, Y = np.meshgrid(X, Y)
from mpl_toolkits.mplot3d import Axes3D
ax = gca(projection='3d')
contour(X,Y,Z)
#ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.scatter(V3[0,:], V3[1,:], 0, c='r')
Out[6]:
Maintenant on va essayer d'apprendre un modèle à 2 gaussiennes. Pour cela on part de la mono-gaussienne précédente et on décale leur centroide de pas grand chose
In [7]:
mu_1 = mu - array([sigma[0,0], sigma[1,1]])
sigma_1 = sigma
mu_2 = mu + array([sigma[0,0], sigma[1,1]])
sigma_2 = sigma
G1 = [mu_1, sigma_1]
G2 = [mu_2, sigma_2]
In [8]:
# on affiche cette modélisation initiale
figure(figsize=(10,10))
X = arange(-10, 10, 0.25)
Y = arange(-10, 10, 0.25)
Z1 = empty((X.shape[0], Y.shape[0]))
Z2 = empty((X.shape[0], Y.shape[0]))
i = 0
for x in X:
j = 0
for y in Y:
Z1[i,j] = gauss(mu_1, sigma_1, array([x,y]))
Z2[i,j] = gauss(mu_2, sigma_2, array([x,y]))
j+=1
i+=1
X, Y = np.meshgrid(X, Y)
from mpl_toolkits.mplot3d import Axes3D
ax = gca(projection='3d')
contour(X,Y,Z1)
contour(X,Y,Z2)
#ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.scatter(V3[0,:], V3[1,:], 0, c='r')
ylim((-10,10))
xlim((-10,10))
Out[8]:
pour chaque valeur échantillon on calcul la distance avec chacune des gaussiennes
In [9]:
d_1_2 = {(x[0],x[1]):(gauss(mu_1, sigma_1, x), gauss(mu_2, sigma_2, x)) for x in V3.T.tolist()}
on affecte les points à la gaussienne la plus proche.
In [10]:
X_1 = [x for x in V3.T.tolist() if d_1_2[(x[0], x[1])][0] < d_1_2[(x[0], x[1])][1]]
X_2 = [x for x in V3.T.tolist() if d_1_2[(x[0], x[1])][0] >= d_1_2[(x[0], x[1])][1]]
In [11]:
print str(len(X_1)) + " " + str(len(X_2))
On recalcul les moyennes et ecart-types
In [12]:
mu_1 = mean(X_1)
sigma_1 = cov(array(X_1).T)
mu_2 = mean(X_2)
sigma_2 = cov(array(X_2).T)
on affiche les nouvelles gaussiennes
In [13]:
figure(figsize=(10,10))
X = arange(-10, 10, 0.25)
Y = arange(-10, 10, 0.25)
Z1 = empty((X.shape[0], Y.shape[0]))
Z2 = empty((X.shape[0], Y.shape[0]))
i = 0
for x in X:
j = 0
for y in Y:
Z1[i,j] = gauss(mu_1, sigma_1, array([x,y]))
Z2[i,j] = gauss(mu_2, sigma_2, array([x,y]))
j+=1
i+=1
X, Y = np.meshgrid(X, Y)
from mpl_toolkits.mplot3d import Axes3D
ax = gca(projection='3d')
contour(X,Y,Z1)
contour(X,Y,Z2)
#ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.scatter(V3[0,:], V3[1,:], 0, c='r')
ylim((-10,10))
xlim((-10,10))
Out[13]:
Et on recommence ... au moins 4 fois pour voir
In [20]:
# l'initialisation est un peu différente de la façon précédente, mais pour que la convergence soit moins rapide.
mu_1 = mu - 0.0003*array([sigma[1,1], sigma[0,0]])
sigma_1 = sigma
mu_2 = mu + 0.0003*array([sigma[1,1], sigma[0,0]])
sigma_2 = sigma
G1 = [mu_1, sigma_1]
G2 = [mu_2, sigma_2]
for k in xrange(4):
figure(figsize=(10,10))
d_1_2 = {(x[0],x[1]):(gauss(mu_1, sigma_1, x), gauss(mu_2, sigma_2, x)) for x in V3.T.tolist()}
X_1 = [x for x in V3.T.tolist() if d_1_2[(x[0], x[1])][0] < d_1_2[(x[0], x[1])][1]]
X_2 = [x for x in V3.T.tolist() if d_1_2[(x[0], x[1])][0] >= d_1_2[(x[0], x[1])][1]]
mu_1 = mean(X_1)
sigma_1 = cov(array(X_1).T)
mu_2 = mean(X_2)
sigma_2 = cov(array(X_2).T)
X = arange(-10, 10, 0.25)
Y = arange(-10, 10, 0.25)
Z1 = empty((X.shape[0], Y.shape[0]))
Z2 = empty((X.shape[0], Y.shape[0]))
i = 0
for x in X:
j = 0
for y in Y:
Z1[i,j] = gauss(mu_1, sigma_1, array([x,y]))
Z2[i,j] = gauss(mu_2, sigma_2, array([x,y]))
j+=1
i+=1
X, Y = np.meshgrid(X, Y)
from mpl_toolkits.mplot3d import Axes3D
ax = gca(projection='3d')
contour(X,Y,Z1)
contour(X,Y,Z2)
#ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False)
ax.scatter(V3[0,:], V3[1,:], 0, c='r')
ylim((-10,10))
xlim((-10,10))
In [ ]: