Tpravaux pratique aprrentissage et ZCR

的 Joseph Razik, last modified 在 2020-11-18
i322_tp
In [1]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [2]:
import struct
In [3]:
!cd /tmp/; wget http://razik.univ-tln.fr/misc/I322/I322_tp.tgz ; tar xzvf /tmp/I322_tp.tgz
--2020-11-23 10:20:35--  http://razik.univ-tln.fr/misc/I322/I322_tp.tgz
Résolution de razik.univ-tln.fr (razik.univ-tln.fr)… 193.49.96.234
Connexion à razik.univ-tln.fr (razik.univ-tln.fr)|193.49.96.234|:80… connecté.
requête HTTP transmise, en attente de la réponse… 200 OK
Taille : 331711 (324K) [application/x-gzip]
Sauvegarde en : « I322_tp.tgz »

I322_tp.tgz         100%[===================>] 323,94K  1,56MB/s    ds 0,2s    

2020-11-23 10:20:35 (1,56 MB/s) — « I322_tp.tgz » sauvegardé [331711/331711]

I322/
I322/RAW/
I322/RAW/b.raw
I322/RAW/t.raw
I322/RAW/p.raw
I322/RAW/1.raw
I322/RAW/0.raw
I322/RAW/f.raw
I322/RAW/i.raw
I322/RAW/9.raw
I322/RAW/j.raw
I322/RAW/y.raw
I322/RAW/c.raw
I322/RAW/5.raw
I322/RAW/m.raw
I322/RAW/g.raw
I322/RAW/o.raw
I322/RAW/a.raw
I322/RAW/l.raw
I322/RAW/4.raw
I322/RAW/7.raw
I322/RAW/d.raw
I322/RAW/3.raw
I322/RAW/v.raw
I322/RAW/w.raw
I322/RAW/z.raw
I322/RAW/6.raw
I322/RAW/8.raw
I322/RAW/r.raw
I322/RAW/2.raw
I322/RAW/e.raw
I322/RAW/n.raw
I322/RAW/x.raw
I322/RAW/u.raw
I322/RAW/k.raw
I322/RAW/h.raw
I322/RAW/q.raw
I322/RAW/s.raw
In [16]:
# f_name = "/home/partage/I322/RAW/a.raw"
f_name = "/tmp/a.raw"
In [5]:
def lire_son(f_name):
    valeurs = []
    with open(f_name, 'br') as fichier:
        while fichier:
            v = fichier.read(2)
            if not v:
                break
            valeurs.append(struct.unpack('h', v)[0])
    return(array(valeurs))
In [17]:
valeurs = lire_son(f_name)
In [18]:
#f = figure(figsize=(12,5))
f, ax = subplots()
figsize(12,5)
ax.set_axis_off()
plot(valeurs)
Out[18]:
[<matplotlib.lines.Line2D at 0x7f5a91aae490>]
In [8]:
def zcr(fenetre):
    cpt = 0
    old_val = fenetre[0]
    for v in fenetre:
        if v*old_val < 0:
            cpt += 1
        old_val = v
    return cpt
In [76]:
zcr(valeurs)
Out[76]:
3268
In [10]:
len(valeurs)
Out[10]:
14400
In [11]:
range(220, len(valeurs), 220)
Out[11]:
range(220, 14400, 220)
In [100]:
les_zcr = [zcr(valeurs[deb:deb+220]) for deb in range(0, len(valeurs), 220)]
In [13]:
plot(les_zcr)
Out[13]:
[<matplotlib.lines.Line2D at 0x7f07add49b20>]
In [14]:
plot(les_zcr)
Out[14]:
[<matplotlib.lines.Line2D at 0x7f07adca9ac0>]
In [15]:
hist(les_zcr)
Out[15]:
(array([ 1.,  1.,  2., 11., 16.,  5., 10.,  3.,  4., 13.]),
 array([ 0. ,  8.3, 16.6, 24.9, 33.2, 41.5, 49.8, 58.1, 66.4, 74.7, 83. ]),
 <BarContainer object of 10 artists>)
In [16]:
hist(les_zcr)
Out[16]:
(array([ 1.,  1.,  2., 11., 16.,  5., 10.,  3.,  4., 13.]),
 array([ 0. ,  8.3, 16.6, 24.9, 33.2, 41.5, 49.8, 58.1, 66.4, 74.7, 83. ]),
 <BarContainer object of 10 artists>)
In [17]:
mean(les_zcr)
Out[17]:
49.303030303030305
In [18]:
std(les_zcr)
Out[18]:
20.222227267681184
In [19]:
def gauss(x, mu, sigma):
    return 1/(sqrt(2*pi)*sigma) * exp(-0.5*(x - mu)**2/sigma**2)
In [20]:
Y = [gauss(x, 49.3, 20.22) for x in range(101)]
In [21]:
plot(Y)
plot(les_zcr, [0]*len(les_zcr), 'ro')
Out[21]:
[<matplotlib.lines.Line2D at 0x7f07adb747c0>]
In [22]:
from sklearn import mixture
In [103]:
g = mixture.GaussianMixture(1, covariance_type='diag')
In [24]:
g.fit(array(les_zcr).reshape(-1,1))
Out[24]:
GaussianMixture(covariance_type='diag')
In [25]:
g.means_
Out[25]:
array([[49.3030303]])
In [26]:
g.covariances_
Out[26]:
array([[408.93847667]])
In [27]:
g.weights_
Out[27]:
array([1.])
In [28]:
Z = [exp(g.score(x.reshape(-1,1))) for x in arange(101)]
In [29]:
plot(Z)
Out[29]:
[<matplotlib.lines.Line2D at 0x7f07926bae20>]
In [104]:
a_g2 = mixture.GaussianMixture(2, covariance_type='diag')
In [45]:
a_g2.fit(array(les_zcr).reshape(-1,1))
Out[45]:
GaussianMixture(covariance_type='diag', n_components=2)
In [46]:
a_g2.means_
Out[46]:
array([[79.11111498],
       [40.90125872]])
In [47]:
a_g2.covariances_
Out[47]:
array([[ 10.77008664],
       [200.13660043]])
In [48]:
a_g2.weights_
Out[48]:
array([0.21988493, 0.78011507])
In [49]:
Z = [exp(a_g2.score(x.reshape(-1,1))) for x in arange(101)]
In [50]:
plot(Z)
Out[50]:
[<matplotlib.lines.Line2D at 0x7f079053cfd0>]
In [51]:
test_f_name = "/tmp/I322/RAW/0.raw"
In [52]:
test_valeurs = lire_son(test_f_name)
In [53]:
test_les_zcr = [zcr(test_valeurs[deb:deb+220]) for deb in range(0, len(test_valeurs), 220)]
In [54]:
plot(test_les_zcr)
Out[54]:
[<matplotlib.lines.Line2D at 0x7f079051a6d0>]
In [56]:
len(test_les_zcr)
Out[56]:
31
In [55]:
hist(test_les_zcr)
Out[55]:
(array([ 3.,  1.,  1.,  0.,  9., 11.,  3.,  2.,  0.,  1.]),
 array([ 0. ,  6.2, 12.4, 18.6, 24.8, 31. , 37.2, 43.4, 49.6, 55.8, 62. ]),
 <BarContainer object of 10 artists>)
In [58]:
a_g2.score(array([test_les_zcr[0]]).reshape(-1,1))
Out[58]:
-7.996180299159372
In [61]:
test_les_zcr[0]
Out[61]:
0
In [65]:
def vraisemblance(x):
    return 0.21988493*gauss(x, 79.11111498 , sqrt(10.77008664)) + 0.78011507*gauss(x, 40.90125872, sqrt(200.13660043))
In [66]:
# vraisemblance pour la première valeur des zcr
# c_g1*gauss(x, mu_g1, sigma_g1) + c_g2*gauss(x, mu_g2, sigma_g2)

# v = 0.21988493*gauss(0, 79.11111498 , sqrt(10.77008664)) + 0.78011507*gauss(0, 40.90125872, sqrt(200.13660043))
v = vraisemblance(test_les_zcr[0])
print(v)
print(log(v))
0.0003367464450031686
-7.996180299512988
In [67]:
P = prod(array([vraisemblance(x) for x in test_les_zcr]))
print(P)
2.2866752681968918e-61
In [69]:
log(P)
Out[69]:
-139.63059175797204
In [70]:
P_log = sum(array([log(vraisemblance(x)) for x in test_les_zcr]))
print(P_log)
-139.630591757972
In [72]:
a_g2.score(array(test_les_zcr).reshape(-1,1))
Out[72]:
-4.504212637677535
In [79]:
P_a = prod(array([vraisemblance(x) for x in les_zcr]))
print(log(P_a))
-278.7023749118592
In [105]:
zero_g2 = mixture.GaussianMixture(2, covariance_type='diag')
In [86]:
zero_g2.fit(array(les_zcr).reshape(-1,1))
Out[86]:
GaussianMixture(covariance_type='diag', n_components=2)
In [88]:
zero_g2.means_
Out[88]:
array([[31.21117743],
       [29.43841612]])
In [89]:
zero_g2.covariances_
Out[89]:
array([[  6.64596133],
       [344.80709667]])
In [90]:
zero_g2.weights_
Out[90]:
array([0.53514291, 0.46485709])
In [93]:
Z = [exp(zero_g2.score(x.reshape(-1,1))) for x in arange(101)]
In [94]:
plot(Z)
Out[94]:
[<matplotlib.lines.Line2D at 0x7f078ee90af0>]
In [95]:
def vraisemblance_0(x):
    return 0.53514291*gauss(x, 31.21117743 , sqrt(6.64596133)) + 0.46485709*gauss(x, 29.43841612, sqrt(344.80709667))
In [102]:
P_0 = prod(array([vraisemblance_0(x) for x in test_les_zcr]))
print(log(P_0))
-114.11049941907352
In [101]:
P_0 = prod(array([vraisemblance_0(x) for x in les_zcr]))
print(log(P_0))
-346.219561095101
In [ ]:
# ce qui reste à faire
# 1. faire un modèle pour chaque classe (a ... z, 0 ... 9)
# 2. tester chaque fichier son sur l'ensemble des modèles (prise de décision)
# 3. construire la matrice de confusion (à partir des résultats précédents)