Full corpus

Loading

gesterestr = read.csv(file="frequencies_Geste_full.csv", sep = ";", header = TRUE, row.names=1, quote = '\"')
gesterestr = as.matrix(gesterestr)
monCorpus = gesterestr

Distributions

Some general description

#Total tokens
sum(gesterestr)
## [1] 1104296
#Total forms
nrow(gesterestr)
## [1] 52202
#Most frequent
sum(gesterestr[1,])
## [1] 34115
#Verif. Zipf
sum(gesterestr[10,])
## [1] 13712
#Number of hapaxes
nrow(gesterestr[rowSums(gesterestr) == 1,])
## [1] 25811
#%age hapax
nrow(gesterestr[rowSums(gesterestr) == 1,]) / nrow(gesterestr) * 100
## [1] 49.44447

Witnesses (individuals)

monCorpus = gesterestr
#Graphe de dispersion
plot(colSums(monCorpus), ylab = "Number of words", main="Scatter plot", sub = nomCorpus)

#Histogramme
hist(colSums(monCorpus), main = "Number of words", sub=nomCorpus, xlab = "Number of words", ylab = "Frequency")

#Histogramme un peu plus configuré
#hist(colSums(monCorpus), breaks=seq(1000,30000,1000), main = "Nombre de mots par texte", sub=nomCorpus, xlab = "Nombre de mots", ylab = "Fréquence")

Description:

#Summary
summary(colSums(monCorpus))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     387    7016   11490   22086   28366  217942
#SD
sd(colSums(monCorpus))
## [1] 32507.24
#Variance
var(colSums(monCorpus))
## [1] 1056720966

Power-law distribution.

#Moyenne géométrique
exp(mean(log(colSums(monCorpus))))
## [1] 12016.18

Boxplot,

boxplot(colSums(monCorpus), main = "Number of words per text", ylab="Number of words", sub=nomCorpus)

Texts ranked

colSums(monCorpus)[order(colSums(monCorpus), decreasing = TRUE)]
##               lorr_1387pm13_pic_1365ca_BaudSebC 
##                                          217942 
##                pic_1213pm13_pic_1190pm10_AliscW 
##                                           71752 
##             pic_1213pm13_pic_1190pm10_MonRaincB 
##                                           53557 
##                pic_1275pm25_pic_1210pm10_Aiol2N 
##                                           49242 
##            art_1295_picmérid_1180ca_MonGuill1C2 
##                                           48970 
##                nil_1450pm10_pic_1300ca_EnfGarB. 
##                                           42516 
##                   pic_1275pm25_pic_1160caAiol1N 
##                                           41408 
##                 agn_1225pm25_agn_1170ca_HornP.C 
##                                           39489 
##               nil_1250pm50_nil_1230ca_GuiBourgG 
##                                           35800 
##               bourg_1270ca_nil_1210pm10_AimeriD 
##                                           33433 
##            Paris_1290pm10_flandr_1275_AdenBuevH 
##                                           33077 
##              agn_1137pm13_Nord.Ouest_1100caRolS 
##                                           29123 
##              bourg_1270ca_nil_1213pm13_MortAymC 
##                                           28997 
##              agn_1250pm10_agn_1150pm16_ChGuillM 
##                                           26472 
##              lorrsept_1275pm25_nil_1200ca_AmAmD 
##                                           25291 
##                 pic_1275pm25_pic_1190pm10ElieB. 
##                                           24052 
##          bourg_1325pm25_Sud.Est_1190pm10_FloovG 
##                                           21392 
##           frc_1262pm13_nil_1150pm16_CourLouisLe 
##                                           19632 
##         Nord_1275pm25_Nord.Est_1190pm10RCambr2M 
##                                           17913 
##             lorr_1290pm10_picmérid_1225caOrsonP 
##                                           17084 
##                     agn_1275pm25_nil_nil_Otin_B 
##                                           13607 
## Nord.Est_1262pm13_Nord.Est_1190pm10_PriseOrabR1 
##                                           13561 
##           agn_1290pm10_agn_1250pm10_DestrRomeF2 
##                                           13269 
##                    StBrieuc_1317_nil_nil_Otin_A 
##                                           12140 
##         Paris_1335ca_nil_1150pm17CharroiSch_B1. 
##                                           11685 
##         Paris_1335ca_nil_1150pm17CharroiSch_B2. 
##                                           11295 
##          pic_1225pm25_Nord.Est_1190pm10RCambr1M 
##                                           11053 
##    lorrmérid_1275pm25_nil_1150pm20CharroiSch_D. 
##                                           10832 
##              art_1295_nil_1150pm17CharroiSch_C. 
##                                           10798 
##         frc_1263pm13_nil_1150pm17CharroiSch_A2. 
##                                           10688 
##         frc_1283pm17_nil_1150pm17CharroiSch_A4. 
##                                           10542 
##         frc_1263pm13_nil_1150pm17CharroiSch_A1. 
##                                            9574 
##        lorr_1325pm25_nil_1150pm17CharroiSch_A3. 
##                                            8743 
##             agn_1290pm10_agn_1175pm25_PelCharlB 
##                                            7588 
##       bourg_1270ca_champmérid_1210pm10GirVianeE 
##                                            7403 
##                      nil_1300ca_norm_1200caAyeB 
##                                            7176 
##            Meuse_1262pm13_lorr_1200caPriseCordD 
##                                            7117 
##                Est_1300ca_pic_1213pm13FlorenceW 
##                                            6983 
##      pic_1213pm13_picmérid_1150pm16_MonGuill1C1 
##                                            6927 
##                agn_1335ca_agn_1190pm10_AmAmOctF 
##                                            6888 
##                 agn_1250pm10_agn_1170ca_HornP.O 
##                                            6592 
##              bourg_1270ca_frc_1210pm10GuibAndrM 
##                                            5725 
##            pic_1290pm10_pic_1275pm25FlorOctOctV 
##                                            4589 
##                  agn_1213pm13_frc_1125pm25GormB 
##                                            3798 
##             agn_1200pm20_agn_1180pm10_Asprem.P4 
##                                            2369 
##                     agn_1200pm20_nil_nil_Otin_M 
##                                            2085 
##                 StBrieuc_1317_nil_1190ca_Fier.V 
##                                            1682 
##           agn_1250pm50_nil_1250pm50_MacaireAl2B 
##                                            1179 
##      nil_1250pm50_nil_1150pm17CharroiSch_fragm. 
##                                             879 
##              agn_1250pm16_agn_1180pm10_Asprem.C 
##                                             387

Word frequencies

plot(rowSums(monCorpus), ylab = "Number of occurrences", main="Scatter plot", sub = nomCorpus)

hist(rowSums(monCorpus), breaks=10000, main="Word frequencies", sub=nomCorpus, xlab = "Number of occurrences", xlim=c(1,200))

#Summary
summary(rowSums(monCorpus))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1.00     1.00     2.00    21.15     4.00 34115.00
#SD
sd(rowSums(monCorpus))
## [1] 364.8223
#Variance
var(rowSums(monCorpus))
## [1] 133095.3

Power-law type distribution. Frequencies on a logarithmic scale:

tableFreq = table(rowSums(monCorpus))
freqCounts = cbind(as.numeric(labels(tableFreq)[[1]]), as.vector(tableFreq))
plot(freqCounts[,1], freqCounts[,2], xlab = "word freq.", ylab = "nb. of forms with this freq.")

#logarithmic scale
plot(freqCounts[,1], freqCounts[,2], xlab = "word freq.", ylab = "nb. of forms with this freq.", log="xy")

Geometric mean:

exp(mean(log(rowSums(monCorpus))))
## [1] 2.565113

Some analysis on MFW:

#Total Frequency rank 1
sum(gesterestr[1,])
## [1] 34115
#Total Frequency rank 600
sum(gesterestr[600,])
## [1] 201
#Total Frequency rank 1000
sum(gesterestr[1000,])
## [1] 114
#Total Frequency rank 1000
sum(gesterestr[1200,])
## [1] 94
#Total Frequency rank 2000
sum(gesterestr[2000,])
## [1] 52
#Total Frequency rank 2500
sum(gesterestr[2500,])
## [1] 40
#Total Frequency rank 3000
sum(gesterestr[3000,])
## [1] 32
#Means by text: 600
sum(gesterestr[600,]) / ncol(gesterestr)
## [1] 4.02
#Means by text: 1000
sum(gesterestr[1000,]) / ncol(gesterestr)
## [1] 2.28
#Means by text: 2000
sum(gesterestr[2000,]) / ncol(gesterestr)
## [1] 1.04

Analysis on MFW with proper names removed

Preparing corpus

monCorpus2 = read.csv(file="frequencies_Geste_ProperNamesRemoved_for1-4000.csv", sep = ";", header = TRUE, row.names=1, quote = '\"')
monCorpus2 = as.matrix(monCorpus2)

Relative frequencies for the 600 MFW

#Faire varier le chiffre en dessous pour tenter d'autres sélections
monCorpusSelect = monCorpus2[1:600,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }

Analysis

Factor analysis

library('FactoMineR')
maBase = t(monCorpusSelect)
monACP = PCA(maBase)

monACP$eig
##         eigenvalue percentage of variance
## comp 1  72.9928920            12.16548200
## comp 2  45.4577306             7.57628843
## comp 3  34.4948518             5.74914197
## comp 4  30.0493978             5.00823296
## comp 5  28.4858245             4.74763742
## comp 6  24.3679325             4.06132208
## comp 7  23.1597676             3.85996127
## comp 8  21.1972684             3.53287807
## comp 9  20.5741798             3.42902996
## comp 10 17.6059818             2.93433030
## comp 11 16.1896124             2.69826873
## comp 12 15.1803819             2.53006365
## comp 13 14.1698426             2.36164044
## comp 14 13.0110835             2.16851392
## comp 15 12.2731525             2.04552542
## comp 16 11.9545830             1.99243050
## comp 17 11.4615260             1.91025434
## comp 18 11.1876788             1.86461313
## comp 19 10.7945443             1.79909071
## comp 20 10.6250330             1.77083884
## comp 21 10.3052821             1.71754702
## comp 22  9.8947067             1.64911778
## comp 23  9.2025950             1.53376584
## comp 24  8.9688082             1.49480137
## comp 25  8.7942087             1.46570146
## comp 26  8.1482268             1.35803781
## comp 27  7.8933084             1.31555139
## comp 28  6.9736568             1.16227614
## comp 29  6.7999545             1.13332575
## comp 30  6.6952829             1.11588049
## comp 31  6.2335393             1.03892321
## comp 32  6.0701630             1.01169384
## comp 33  5.8794697             0.97991162
## comp 34  5.5426312             0.92377187
## comp 35  5.4544902             0.90908170
## comp 36  5.0988085             0.84980142
## comp 37  4.6157259             0.76928765
## comp 38  4.5431727             0.75719545
## comp 39  4.1871575             0.69785959
## comp 40  3.8282652             0.63804420
## comp 41  3.6753772             0.61256286
## comp 42  3.3125479             0.55209131
## comp 43  3.2549854             0.54249756
## comp 44  2.4111989             0.40186648
## comp 45  2.0952063             0.34920105
## comp 46  1.5706623             0.26177705
## comp 47  1.5183192             0.25305320
## comp 48  1.2404399             0.20673998
## comp 49  0.5585449             0.09309081
##         cumulative percentage of variance
## comp 1                           12.16548
## comp 2                           19.74177
## comp 3                           25.49091
## comp 4                           30.49915
## comp 5                           35.24678
## comp 6                           39.30810
## comp 7                           43.16807
## comp 8                           46.70094
## comp 9                           50.12997
## comp 10                          53.06430
## comp 11                          55.76257
## comp 12                          58.29264
## comp 13                          60.65428
## comp 14                          62.82279
## comp 15                          64.86832
## comp 16                          66.86075
## comp 17                          68.77100
## comp 18                          70.63561
## comp 19                          72.43471
## comp 20                          74.20554
## comp 21                          75.92309
## comp 22                          77.57221
## comp 23                          79.10597
## comp 24                          80.60078
## comp 25                          82.06648
## comp 26                          83.42452
## comp 27                          84.74007
## comp 28                          85.90234
## comp 29                          87.03567
## comp 30                          88.15155
## comp 31                          89.19047
## comp 32                          90.20217
## comp 33                          91.18208
## comp 34                          92.10585
## comp 35                          93.01493
## comp 36                          93.86473
## comp 37                          94.63402
## comp 38                          95.39122
## comp 39                          96.08908
## comp 40                          96.72712
## comp 41                          97.33968
## comp 42                          97.89177
## comp 43                          98.43427
## comp 44                          98.83614
## comp 45                          99.18534
## comp 46                          99.44712
## comp 47                          99.70017
## comp 48                          99.90691
## comp 49                         100.00000
barplot(monACP$eig[,1], main="Eigenvalues", names.arg=1:nrow(monACP$eig))

plot.PCA(monACP)

maDescription = dimdesc(monACP)
head(na.omit(maDescription$Dim.1$quanti), n=10)
##       correlation      p.value
## et      0.8094060 1.121794e-12
## au      0.7619851 1.298950e-10
## sont    0.7566972 2.061230e-10
## dont    0.7560158 2.185737e-10
## non     0.7325164 1.480523e-09
## moi     0.7301423 1.776392e-09
## droit   0.7223872 3.179344e-09
## uoit    0.7131954 6.183665e-09
## soit    0.7016251 1.378560e-08
## a       0.6906622 2.847610e-08
tail(na.omit(maDescription$Dim.1$quanti), n=10)
##      correlation      p.value
## co    -0.8062675 1.598253e-12
## sun   -0.8109536 9.398613e-13
## seit  -0.8226655 2.332285e-13
## sunt  -0.8349796 4.805490e-14
## e     -0.8463918 9.858935e-15
## mei   -0.8485535 7.198703e-15
## tut   -0.8515295 4.631495e-15
## al    -0.8520937 4.255404e-15
## sur   -0.8571800 1.951346e-15
## pur   -0.8571929 1.947407e-15
head(na.omit(maDescription$Dim.2$quanti), n=10)
##        correlation      p.value
## mais     0.6675259 1.192037e-07
## car      0.6545848 2.517084e-07
## ains     0.6526491 2.806225e-07
## faire    0.6500554 3.242488e-07
## sans     0.6166221 1.859942e-06
## uenus    0.5942362 5.372931e-06
## sains    0.5907636 6.289294e-06
## tous     0.5895411 6.644928e-06
## dedens   0.5733475 1.348806e-05
## no       0.5678215 1.702859e-05
tail(na.omit(maDescription$Dim.2$quanti), n=10)
##         correlation      p.value
## es       -0.5531734 3.096695e-05
## marchis  -0.5573724 2.616361e-05
## seignor  -0.5624656 2.126035e-05
## uile     -0.5642515 1.975219e-05
## ainz     -0.5645785 1.948695e-05
## ge       -0.5652377 1.896221e-05
## cite     -0.5901032 6.479156e-06
## granz    -0.6126725 2.256150e-06
## toz      -0.6586748 1.995241e-07
## terre    -0.6612321 1.722357e-07

PMD metric

distance = dist(t(monCorpusSelect), method = "manhattan")

2D fit

fit = cmdscale(distance,eig=TRUE, k=2)
x = fit$points[,1]
y = fit$points[,2]
plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2", main="Metric PMD")
text(x, y, labels = row.names(t(monCorpus2)), cex=.7) 

PMD non metric

isoMDS() from package MASS,

library(MASS)
distance = dist(t(monCorpusSelect), method = "manhattan")
fit = isoMDS(distance, k=2)
## initial  value 25.144180 
## iter   5 value 12.300577
## iter  10 value 10.795732
## iter  15 value 10.583081
## iter  20 value 10.498248
## iter  20 value 10.491333
## iter  20 value 10.486975
## final  value 10.486975 
## converged
fit
## $points
##                                                         [,1]         [,2]
## agn_1137pm13_Nord.Ouest_1100caRolS              -0.717163253 -0.132077063
## agn_1200pm20_agn_1180pm10_Asprem.P4             -0.889593533 -0.167713855
## agn_1200pm20_nil_nil_Otin_M                     -0.801173685 -0.155049893
## agn_1213pm13_frc_1125pm25GormB                  -0.704042608  0.013544142
## agn_1225pm25_agn_1170ca_HornP.C                 -0.826477551  0.069149707
## agn_1250pm10_agn_1150pm16_ChGuillM              -0.613041381  0.016542198
## agn_1250pm10_agn_1170ca_HornP.O                 -0.952700263  0.002378254
## agn_1250pm16_agn_1180pm10_Asprem.C              -2.030447378  0.323354388
## agn_1250pm50_nil_1250pm50_MacaireAl2B           -0.428783003  0.905236508
## agn_1275pm25_nil_nil_Otin_B                     -0.579819885 -0.051937045
## agn_1290pm10_agn_1175pm25_PelCharlB             -0.604188822 -0.143024411
## agn_1290pm10_agn_1250pm10_DestrRomeF2            0.004224903  0.614021871
## agn_1335ca_agn_1190pm10_AmAmOctF                -0.651959596  0.606195389
## art_1295_nil_1150pm17CharroiSch_C.               0.276792114 -0.050592195
## art_1295_picmérid_1180ca_MonGuill1C2             0.217413661  0.057993529
## bourg_1270ca_champmérid_1210pm10GirVianeE        0.202830787 -0.121547712
## bourg_1270ca_frc_1210pm10GuibAndrM               0.267157782 -0.271364724
## bourg_1270ca_nil_1210pm10_AimeriD                0.207651479 -0.097138660
## bourg_1270ca_nil_1213pm13_MortAymC               0.181084547 -0.135849209
## bourg_1325pm25_Sud.Est_1190pm10_FloovG           0.333955174 -0.189370472
## Est_1300ca_pic_1213pm13FlorenceW                 0.154165886 -0.182222316
## frc_1262pm13_nil_1150pm16_CourLouisLe            0.163361490 -0.061951169
## frc_1263pm13_nil_1150pm17CharroiSch_A1.          0.241831664 -0.132303387
## frc_1263pm13_nil_1150pm17CharroiSch_A2.          0.243530244 -0.109813574
## frc_1283pm17_nil_1150pm17CharroiSch_A4.          0.275778571 -0.117480171
## lorr_1290pm10_picmérid_1225caOrsonP              0.450154207 -0.002051281
## lorr_1325pm25_nil_1150pm17CharroiSch_A3.         0.245895134 -0.114880572
## lorr_1387pm13_pic_1365ca_BaudSebC                0.336042051  0.178384976
## lorrmérid_1275pm25_nil_1150pm20CharroiSch_D.     0.414494174 -0.182833306
## lorrsept_1275pm25_nil_1200ca_AmAmD               0.239391515 -0.001920170
## Meuse_1262pm13_lorr_1200caPriseCordD             0.429828289 -0.159017219
## nil_1250pm50_nil_1150pm17CharroiSch_fragm.       0.567486577 -0.448378244
## nil_1250pm50_nil_1230ca_GuiBourgG                0.259275707 -0.018138285
## nil_1300ca_norm_1200caAyeB                       0.012295607  0.127302749
## nil_1450pm10_pic_1300ca_EnfGarB.                 0.505854066  0.375177155
## Nord_1275pm25_Nord.Est_1190pm10RCambr2M          0.351826350  0.037832872
## Nord.Est_1262pm13_Nord.Est_1190pm10_PriseOrabR1  0.169026131 -0.085947909
## Paris_1290pm10_flandr_1275_AdenBuevH             0.267719062  0.141956131
## Paris_1335ca_nil_1150pm17CharroiSch_B1.          0.290015337 -0.029222630
## Paris_1335ca_nil_1150pm17CharroiSch_B2.          0.277154316  0.005534127
## pic_1213pm13_pic_1190pm10_AliscW                 0.158642898  0.001597015
## pic_1213pm13_pic_1190pm10_MonRaincB              0.196106268  0.049105058
## pic_1213pm13_picmérid_1150pm16_MonGuill1C1       0.223333378  0.085734106
## pic_1225pm25_Nord.Est_1190pm10RCambr1M           0.343512131 -0.022699646
## pic_1275pm25_pic_1160caAiol1N                    0.186744109  0.032048993
## pic_1275pm25_pic_1190pm10ElieB.                  0.219088689  0.032931241
## pic_1275pm25_pic_1210pm10_Aiol2N                 0.189949632  0.021175611
## pic_1290pm10_pic_1275pm25FlorOctOctV             0.388957773  0.139304754
## StBrieuc_1317_nil_1190ca_Fier.V                  0.169470221 -0.595434176
## StBrieuc_1317_nil_nil_Otin_A                     0.137349034 -0.056541479
## 
## $stress
## [1] 10.48698
x = fit$points[,1]
y = fit$points[,2]
plot(x, y, xlab="Coordonnée 1", ylab="Coordonnée 1",
  main="Non metric PMD", type="n")
text(x, y, labels = row.names(t(monCorpus2)), cex=.7) 

CAH

Selection at 600 MFW

monCorpusSelect = monCorpus2[1:1000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Selection at 1000 MFW

monCorpusSelect = monCorpus2[1:1000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Selection at 1200 MFW

monCorpusSelect = monCorpus2[1:1200,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Selection at 2000 MFW

monCorpusSelect = monCorpus2[1:2000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Selection at 3000 MFW

monCorpusSelect = monCorpus2[1:3000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Selection at 4000 MFW

monCorpusSelect = monCorpus2[1:4000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Removing too short texts

Based on the deformation they seem to cause to previous CAH, let’s remove texts with less than 2000 words:

[49] “StBrieuc_1317_nil_1190ca_Fier.V” , 1682 [9] “agn_1250pm50_nil_1250pm50_MacaireAl2B”, 1179 [32] “nil_1250pm50_nil_1150pm17CharroiSch_fragm.” , 879 [8] “agn_1250pm16_agn_1180pm10_Asprem.C”, 387

monCorpus3 = monCorpus2[,-c(49,9,32,8)]
boxplot(colSums(monCorpus3), main = "Number of words per text", ylab="Number of words", sub=nomCorpus)

#Re-sorting words
monCorpus3 = monCorpus3[order(rowSums(monCorpus3), decreasing = TRUE),]

CAH Selection at 1000 MFW

monCorpusSelect = monCorpus3[1:1000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Description of classes

It is then possible to separate witnesses in different classes, and compute their specificities. Nb classes can be chosen with the help of a height plot.

CAH2 = as.hclust(CAH)
plot(CAH2$height, type="h", ylab="hauteurs")

We can then describe the classes

classes = cutree(CAH, k = "3")
#Adding classes to the table
monCorpusAvecClasses = t(monCorpusSelect)
monCorpusAvecClasses = cbind(as.data.frame(monCorpusAvecClasses), as.factor(classes))
colnames(monCorpusAvecClasses[ncol(monCorpusAvecClasses)]) = "Classes"
#And describing
library(FactoMineR)
mesClasses = catdes(monCorpusAvecClasses, num.var = ncol(monCorpusAvecClasses))
#Looking in more detail at some descriptions
head(mesClasses$quanti$`1`, n=10)
##        v.test Mean in category Overall mean sd in category  Overall sd
## pur  6.150242      0.007304998 0.0017474436    0.002774797 0.003398287
## sunt 6.030072      0.006376563 0.0016132262    0.002586779 0.002970690
## ad   5.911688      0.013054791 0.0031299427    0.006101124 0.006313656
## mei  5.829337      0.002082077 0.0004978879    0.001034190 0.001022013
## sur  5.794786      0.004868630 0.0012296727    0.002363936 0.002361610
## tut  5.755748      0.004923835 0.0012118677    0.002532150 0.002425330
## lur  5.746153      0.004366352 0.0010441277    0.002294333 0.002174305
## al   5.622060      0.007859435 0.0022036718    0.003694655 0.003783244
## e    5.602756      0.039127887 0.0108415916    0.014023607 0.018986414
## sun  5.560295      0.007618652 0.0018262512    0.004481422 0.003917683
##           p.value
## pur  7.736496e-10
## sunt 1.638864e-09
## ad   3.386185e-09
## mei  5.564801e-09
## sur  6.840829e-09
## tut  8.625893e-09
## lur  9.129686e-09
## al   1.886935e-08
## e    2.109702e-08
## sun  2.693184e-08
tail(mesClasses$quanti$`1`, n=10)
##          v.test Mean in category Overall mean sd in category   Overall sd
## droit -4.873678     6.648641e-05 0.0009769109   0.0002102485 0.0007025151
## uoit  -4.896590     3.324321e-05 0.0012831772   0.0001051243 0.0009599793
## dont  -4.969360     1.181674e-04 0.0020457297   0.0001951748 0.0014587356
## au    -4.973441     8.223470e-04 0.0045650893   0.0021521208 0.0028300984
## non   -4.998135     5.452789e-05 0.0009564187   0.0001406521 0.0006786010
## a     -5.011458     2.695149e-02 0.0361933395   0.0058019338 0.0069352730
## moi   -5.061471     2.776890e-04 0.0019990380   0.0004756086 0.0012789710
## mon   -5.147234     3.433947e-04 0.0025590944   0.0007120460 0.0016188461
## et    -5.161155     1.012365e-02 0.0391548996   0.0212841472 0.0211537543
## sont  -5.220152     3.213510e-04 0.0031190234   0.0010162011 0.0020154977
##            p.value
## droit 1.095392e-06
## uoit  9.751378e-07
## dont  6.717436e-07
## au    6.577480e-07
## non   5.788735e-07
## a     5.401913e-07
## moi   4.160331e-07
## mon   2.643553e-07
## et    2.454308e-07
## sont  1.787765e-07
head(mesClasses$quanti$`2`, n=10)
##             v.test Mean in category Overall mean sd in category
## toz       4.474021     0.0017023915 0.0008434564   0.0012403362
## tot       4.382810     0.0033660192 0.0019674502   0.0017741771
## rois      4.310599     0.0044414237 0.0028822382   0.0017626344
## cheualier 4.261726     0.0024184910 0.0015919849   0.0007987976
## roi       4.255214     0.0028503275 0.0018160613   0.0012184779
## uos       4.178890     0.0122224852 0.0079625916   0.0053207325
## desus     4.164201     0.0005132772 0.0002884785   0.0003352833
## moi       4.144333     0.0028611614 0.0019990380   0.0008756075
## auoir     3.993397     0.0014021988 0.0009233315   0.0006385095
## auferrant 3.969725     0.0002854385 0.0001533879   0.0002217018
##             Overall sd      p.value
## toz       0.0011803430 7.676223e-06
## tot       0.0019619011 1.171581e-05
## rois      0.0022238529 1.628129e-05
## cheualier 0.0011923573 2.028539e-05
## roi       0.0014943654 2.088489e-05
## uos       0.0062673464 2.929352e-05
## desus     0.0003319006 3.124449e-05
## moi       0.0012789710 3.408040e-05
## auoir     0.0007372564 6.513332e-05
## auferrant 0.0002045153 7.195560e-05
tail(mesClasses$quanti$`2`, n=10)
##             v.test Mean in category Overall mean sd in category
## sur      -3.053893     5.662408e-05 0.0012296727   2.099547e-04
## nel      -3.059150     3.204821e-04 0.0008494206   5.990498e-04
## del      -3.077019     1.659918e-03 0.0027140111   1.587501e-03
## u        -3.091304     2.637333e-04 0.0012274229   4.139493e-04
## pur      -3.161469     0.000000e+00 0.0017474436   0.000000e+00
## ensamble -3.248262     2.075725e-05 0.0001591343   5.277235e-05
## mais     -3.477778     1.240853e-03 0.0026230608   1.948339e-03
## al       -3.495605     5.266973e-05 0.0022036718   1.028980e-04
## faire    -3.581903     3.022582e-04 0.0005825763   3.780505e-04
## fait     -3.880163     1.325204e-03 0.0027574043   1.621045e-03
##            Overall sd      p.value
## sur      0.0023616097 0.0022589279
## nel      0.0010630417 0.0022196617
## del      0.0021061754 0.0020908180
## u        0.0019166433 0.0019927940
## pur      0.0033982874 0.0015697542
## ensamble 0.0002619142 0.0011611237
## mais     0.0024435279 0.0005055891
## al       0.0037832437 0.0004729875
## faire    0.0004811530 0.0003411006
## fait     0.0022693406 0.0001043866
head(mesClasses$quanti$`3`, n=10)
##            v.test Mean in category Overall mean sd in category
## ains     5.408400     0.0015253768 5.612932e-04   0.0006529045
## sains    5.082391     0.0004500108 1.809083e-04   0.0002331815
## tous     4.955749     0.0018226451 6.847528e-04   0.0009751956
## dedens   4.906695     0.0009463731 3.917381e-04   0.0003556854
## sans     4.779205     0.0015055952 6.757197e-04   0.0007416895
## trestous 4.653482     0.0002954394 1.152374e-04   0.0001866660
## tout     4.650212     0.0038803997 1.672939e-03   0.0016585782
## toutes   4.603120     0.0003199332 1.311994e-04   0.0001806836
## dolans   4.536534     0.0002494829 8.494561e-05   0.0002004730
## maistre  4.533244     0.0004088393 1.725775e-04   0.0001854301
##            Overall sd      p.value
## ains     0.0007909356 6.359024e-08
## sains    0.0002349336 3.727140e-07
## tous     0.0010187958 7.205207e-07
## dedens   0.0005015491 9.262377e-07
## sans     0.0007704645 1.759900e-06
## trestous 0.0001718212 3.263754e-06
## tout     0.0021062773 3.315937e-06
## toutes   0.0001819252 4.162078e-06
## dolans   0.0001609294 5.718618e-06
## maistre  0.0002312490 5.808472e-06
tail(mesClasses$quanti$`3`, n=10)
##          v.test Mean in category Overall mean sd in category   Overall sd
## escuz -3.146018     1.119065e-05 0.0002518119   3.211569e-05 0.0003393658
## fet   -3.159369     5.709486e-05 0.0014979408   1.398930e-04 0.0020235434
## tens  -3.183511     9.193638e-06 0.0002409550   2.556520e-05 0.0003230205
## anz   -3.194625     1.059614e-05 0.0002285938   3.820492e-05 0.0003027801
## auez  -3.230122     4.120227e-04 0.0011750134   6.159440e-04 0.0010480829
## piez  -3.305240     1.010120e-05 0.0003784395   2.108560e-05 0.0004944692
## sanz  -3.354954     1.376751e-06 0.0005358283   4.963945e-06 0.0007068335
## bone  -3.688227     1.798580e-04 0.0005903337   2.481718e-04 0.0004938160
## mes   -3.740337     2.230568e-03 0.0043820954   7.226719e-04 0.0025522979
## granz -3.747034     6.883753e-06 0.0007540766   2.481972e-05 0.0008847902
##            p.value
## escuz 0.0016550987
## fet   0.0015811133
## tens  0.0014550083
## anz   0.0014001265
## auez  0.0012373760
## piez  0.0009489526
## sanz  0.0007937827
## bone  0.0002258223
## mes   0.0001837736
## granz 0.0001789378
plot(mesClasses)

CAH Selection at 2000 MFW

monCorpusSelect = monCorpus3[1:2000,]
for(i in 1:ncol(monCorpusSelect)){
        monCorpusSelect[,i] = monCorpusSelect[,i]/sum(monCorpusSelect[,i])
    }
library(cluster)
CAH = agnes(t(monCorpusSelect), metric="manhattan", method = "ward")
plot(CAH, which.plots = 2, main = "CAH", xlab=paste(nrow(monCorpusSelect), " MFW -- Manhattan dist."))

Description of classes

It is then possible to separate witnesses in different classes, and compute their specificities. Nb classes can be chosen with the help of a height plot.

CAH2 = as.hclust(CAH)
plot(CAH2$height, type="h", ylab="hauteurs")

We can then describe the classes

classes = cutree(CAH, k = "3")
#Adding classes to the table
monCorpusAvecClasses = t(monCorpusSelect)
monCorpusAvecClasses = cbind(as.data.frame(monCorpusAvecClasses), as.factor(classes))
colnames(monCorpusAvecClasses[ncol(monCorpusAvecClasses)]) = "Classes"
#And describing
library(FactoMineR)
mesClasses = catdes(monCorpusAvecClasses, num.var = ncol(monCorpusAvecClasses))
#Looking in more detail at some descriptions
head(round(mesClasses$quanti$`1`, digits=4), n=25)
##          v.test Mean in category Overall mean sd in category Overall sd
## pur      6.1376           0.0067       0.0016         0.0026     0.0031
## sunt     6.0154           0.0058       0.0015         0.0024     0.0027
## ad       5.9070           0.0120       0.0029         0.0056     0.0058
## mei      5.8200           0.0019       0.0005         0.0010     0.0009
## sur      5.7982           0.0044       0.0011         0.0021     0.0022
## lur      5.7501           0.0040       0.0010         0.0021     0.0020
## tut      5.7371           0.0045       0.0011         0.0023     0.0022
## al       5.6221           0.0072       0.0020         0.0034     0.0035
## e        5.6007           0.0357       0.0099         0.0127     0.0173
## sun      5.5464           0.0070       0.0017         0.0041     0.0036
## seit     5.4748           0.0020       0.0005         0.0012     0.0010
## dunt     5.4731           0.0018       0.0004         0.0011     0.0010
## od       5.4573           0.0033       0.0008         0.0019     0.0017
## mun      5.3219           0.0018       0.0004         0.0012     0.0010
## si       5.3008           0.0186       0.0135         0.0030     0.0037
## funt     5.2749           0.0008       0.0002         0.0006     0.0004
## reis     5.1917           0.0046       0.0011         0.0033     0.0025
## seignurs 5.1741           0.0009       0.0002         0.0006     0.0005
## rei      5.1565           0.0038       0.0009         0.0027     0.0021
## uus      5.0396           0.0066       0.0016         0.0050     0.0037
## unt      4.9906           0.0023       0.0006         0.0018     0.0013
## co       4.9686           0.0036       0.0009         0.0029     0.0021
## nun      4.9614           0.0008       0.0002         0.0006     0.0005
## u        4.9366           0.0034       0.0011         0.0020     0.0018
## io       4.9246           0.0038       0.0009         0.0030     0.0022
##          p.value
## pur            0
## sunt           0
## ad             0
## mei            0
## sur            0
## lur            0
## tut            0
## al             0
## e              0
## sun            0
## seit           0
## dunt           0
## od             0
## mun            0
## si             0
## funt           0
## reis           0
## seignurs       0
## rei            0
## uus            0
## unt            0
## co             0
## nun            0
## u              0
## io             0
tail(round(mesClasses$quanti$`1`, digits=4), n=10)
##        v.test Mean in category Overall mean sd in category Overall sd
## droit -4.8741           0.0001       0.0009         0.0002     0.0006
## uoit  -4.8998           0.0000       0.0012         0.0001     0.0009
## dont  -4.9669           0.0001       0.0019         0.0002     0.0013
## au    -4.9835           0.0008       0.0042         0.0020     0.0026
## non   -5.0134           0.0000       0.0009         0.0001     0.0006
## moi   -5.0723           0.0003       0.0018         0.0004     0.0012
## a     -5.0781           0.0246       0.0332         0.0050     0.0064
## mon   -5.1568           0.0003       0.0023         0.0006     0.0015
## et    -5.1677           0.0093       0.0359         0.0195     0.0194
## sont  -5.2137           0.0003       0.0029         0.0009     0.0019
##       p.value
## droit       0
## uoit        0
## dont        0
## au          0
## non         0
## moi         0
## a           0
## mon         0
## et          0
## sont        0
head(mesClasses$quanti$`2`, n=10)
##             v.test Mean in category Overall mean sd in category
## toz       4.478658     0.0015561647 0.0007710472   0.0011309974
## tot       4.375692     0.0030835690 0.0018029717   0.0016315537
## cuit      4.372575     0.0002490371 0.0001278660   0.0001834762
## rois      4.291429     0.0040649230 0.0026419057   0.0016192577
## cheualier 4.238178     0.0022120757 0.0014589207   0.0007325382
## roi       4.233783     0.0026088945 0.0016647435   0.0011215995
## uos       4.169151     0.0111887216 0.0072947110   0.0048826947
## desus     4.144129     0.0004691662 0.0002643345   0.0003066657
## moi       4.122455     0.0026166624 0.0018318164   0.0007996889
## auoir     3.965095     0.0012840977 0.0008470702   0.0005895703
##             Overall sd      p.value
## toz       0.0010777864 7.511382e-06
## tot       0.0017993337 1.210478e-05
## cuit      0.0001703758 1.227895e-05
## rois      0.0020387040 1.775273e-05
## cheualier 0.0010925746 2.253412e-05
## roi       0.0013710670 2.297922e-05
## uos       0.0057424260 3.057365e-05
## desus     0.0003038854 3.411071e-05
## moi       0.0011705082 3.748557e-05
## auoir     0.0006776432 7.336679e-05
tail(mesClasses$quanti$`2`, n=10)
##             v.test Mean in category Overall mean sd in category
## nel      -3.071761     2.921670e-04 7.762399e-04   5.459934e-04
## u        -3.086083     2.408151e-04 1.125502e-03   3.778036e-04
## del      -3.094420     1.515588e-03 2.484454e-03   1.446934e-03
## pur      -3.154992     0.000000e+00 1.599425e-03   0.000000e+00
## ensamble -3.249854     1.912161e-05 1.463373e-04   4.869693e-05
## mais     -3.481179     1.136516e-03 2.409430e-03   1.780200e-03
## al       -3.499477     4.820102e-05 2.016114e-03   9.416327e-05
## faire    -3.585149     2.769231e-04 5.351283e-04   3.459125e-04
## fuissent -3.608004     2.100995e-06 5.431677e-05   9.395934e-06
## fait     -3.890525     1.216125e-03 2.525689e-03   1.495133e-03
##            Overall sd      p.value
## nel      9.688784e-04 0.0021280009
## u        1.762494e-03 0.0020281193
## del      1.924999e-03 0.0019719842
## pur      3.116819e-03 0.0016050234
## ensamble 2.406704e-04 0.0011546411
## mais     2.248115e-03 0.0004992124
## al       3.457391e-03 0.0004661715
## faire    4.427961e-04 0.0003368857
## fuissent 8.897759e-05 0.0003085616
## fait     2.069495e-03 0.0001000275
head(mesClasses$quanti$`3`, n=40)
##             v.test Mean in category Overall mean sd in category
## ains      5.409832     1.406578e-03 5.172708e-04   6.031454e-04
## sains     5.089362     4.139562e-04 1.663530e-04   2.136303e-04
## passes    5.028743     1.565728e-04 5.026232e-05   1.080122e-04
## tous      4.968268     1.680063e-03 6.301646e-04   8.981395e-04
## dedens    4.919486     8.718890e-04 3.603755e-04   3.276282e-04
## laissa    4.858387     1.100498e-04 3.626568e-05   8.078015e-05
## commanda  4.784853     1.045703e-04 3.182573e-05   8.570037e-05
## sans      4.779803     1.387637e-03 6.215808e-04   6.861405e-04
## tout      4.670654     3.574118e-03 1.536767e-03   1.528719e-03
## trestous  4.661928     2.724132e-04 1.060404e-04   1.723743e-04
## toutes    4.613347     2.945913e-04 1.206043e-04   1.664115e-04
## maistre   4.550576     3.766488e-04 1.586368e-04   1.709456e-04
## mais      4.538221     4.708800e-03 2.409430e-03   1.496869e-03
## dolans    4.534993     2.297414e-04 7.820176e-05   1.848393e-04
## toute     4.513328     8.154328e-04 3.837880e-04   3.660893e-04
## sarrasins 4.482699     3.546568e-04 1.170169e-04   2.975713e-04
## ensamble  4.468342     3.887045e-04 1.463373e-04   2.216853e-04
## soies     4.404860     3.501856e-04 1.394567e-04   2.109537e-04
## chou      4.370183     6.520479e-04 2.009220e-04   6.291789e-04
## maris     4.317473     1.186399e-04 4.012836e-05   1.008126e-04
## mieus     4.294786     2.845108e-04 8.966726e-05   2.778141e-04
## doiuent   4.278769     8.463444e-05 3.253332e-05   5.642585e-05
## no        4.266644     4.270914e-04 1.575664e-04   3.657351e-04
## solaus    4.247424     9.440792e-05 3.040047e-05   9.161727e-05
## lieu      4.233871     1.673524e-04 5.344567e-05   1.650636e-04
## lies      4.222888     2.678242e-04 1.043338e-04   1.740069e-04
## haus      4.222000     8.447641e-05 2.737784e-05   8.208264e-05
## entres    4.220293     2.098408e-04 7.636474e-05   1.549809e-04
## cousin    4.200350     1.110112e-04 3.847578e-05   9.720733e-05
## menes     4.192784     1.139253e-04 4.203144e-05   8.525681e-05
## dieu      4.124579     2.674281e-03 1.345647e-03   1.223182e-03
## ainc      4.121052     6.299958e-04 2.343479e-04   5.384821e-04
## faire     4.112571     9.455421e-04 5.351283e-04   2.863789e-04
## desous    4.110031     2.646247e-04 9.598003e-05   2.469042e-04
## cha       4.103084     4.126646e-04 1.280662e-04   4.403300e-04
## biaus     4.096747     8.714449e-04 4.087061e-04   4.963774e-04
## espiel    4.096126     2.880943e-04 8.996775e-05   3.068360e-04
## mors      4.093211     7.086827e-04 3.406439e-04   2.702706e-04
## montes    4.081574     2.929712e-04 1.109115e-04   2.301039e-04
## tenrement 4.076297     6.651975e-05 2.024514e-05   7.251305e-05
##             Overall sd      p.value
## ains      7.293959e-04 6.308394e-08
## sains     2.158680e-04 3.592696e-07
## passes    9.380193e-05 4.937058e-07
## tous      9.376430e-04 6.755358e-07
## dedens    4.613522e-04 8.677165e-07
## laissa    6.738547e-05 1.183459e-06
## commanda  6.745700e-05 1.711125e-06
## sans      7.111248e-04 1.754672e-06
## tout      1.935456e-03 3.002426e-06
## trestous  1.583478e-04 3.132606e-06
## toutes    1.673385e-04 3.962365e-06
## maistre   2.125736e-04 5.349926e-06
## mais      2.248115e-03 5.673074e-06
## dolans    1.482672e-04 5.760531e-06
## toute     4.243507e-04 6.381831e-06
## sarrasins 2.352205e-04 7.370488e-06
## ensamble  2.406704e-04 7.882819e-06
## soies     2.122693e-04 1.058522e-05
## chou      4.580295e-04 1.241425e-05
## maris     8.068621e-05 1.578258e-05
## mieus     2.012982e-04 1.748622e-05
## doiuent   5.402857e-05 1.879298e-05
## no        2.802902e-04 1.984355e-05
## solaus    6.686521e-05 2.162424e-05
## lieu      1.193733e-04 2.297027e-05
## lies      1.717821e-04 2.411913e-05
## haus      6.000706e-05 2.421439e-05
## entres    1.403318e-04 2.439849e-05
## cousin    7.662311e-05 2.665022e-05
## menes     7.608247e-05 2.755520e-05
## dieu      1.429292e-03 3.714136e-05
## ainc      4.259867e-04 3.771465e-05
## faire     4.427961e-04 3.912775e-05
## desous    1.820634e-04 3.956053e-05
## cha       3.077637e-04 4.076786e-05
## biaus     5.011781e-04 4.189956e-05
## espiel    2.146173e-04 4.201219e-05
## mors      3.989557e-04 4.254394e-05
## montes    1.979163e-04 4.473177e-05
## tenrement 5.037003e-05 4.575857e-05
tail(mesClasses$quanti$`3`, n=10)
##          v.test Mean in category Overall mean sd in category   Overall sd
## escuz -3.152697     1.032201e-05 0.0002292556   2.958106e-05 0.0003081239
## fet   -3.154568     5.279559e-05 0.0013693657   1.294578e-04 0.0018518226
## tens  -3.181822     8.496953e-06 0.0002204283   2.365666e-05 0.0002955389
## anz   -3.190269     9.782726e-06 0.0002090418   3.527212e-05 0.0002771317
## auez  -3.217091     3.788943e-04 0.0010758081   5.647197e-04 0.0009611938
## piez  -3.317069     9.323632e-06 0.0003448047   1.946336e-05 0.0004487545
## sanz  -3.354431     1.274714e-06 0.0004892993   4.596045e-06 0.0006455326
## bone  -3.687228     1.660404e-04 0.0005397281   2.291591e-04 0.0004496805
## mes   -3.726697     2.054225e-03 0.0040114798   6.648747e-04 0.0023303363
## granz -3.742516     6.373568e-06 0.0006888167   2.298022e-05 0.0008090922
##            p.value
## escuz 0.0016176971
## fet   0.0016073588
## tens  0.0014635196
## anz   0.0014214030
## auez  0.0012949740
## piez  0.0009096720
## sanz  0.0007952850
## bone  0.0002267101
## mes   0.0001940058
## granz 0.0001821867
plot(mesClasses)

To describe the Picard group without the artifact of the Northern Lotharingian subgroup, that did not appear in the orginal results (see Camps, 2016):

Group 4, without [28] “lorrsept_1275pm25_nil_1200ca_AmAmD” [30] “nil_1250pm50_nil_1230ca_GuiBourgG” [33] “Nord_1275pm25_Nord.Est_1190pm10RCambr2M” [41] “pic_1225pm25_Nord.Est_1190pm10RCambr1M”

monCorpusAvecClasses2 = monCorpusAvecClasses[-c(28,30,33,41),]
mesClasses = catdes(monCorpusAvecClasses2, num.var = ncol(monCorpusAvecClasses))
#Looking in more detail at some descriptions
head(round(mesClasses$quanti$`3`, digits=4), n=25)
##           v.test Mean in category Overall mean sd in category Overall sd
## ains      5.6322           0.0016       0.0005         0.0003     0.0007
## tous      5.4891           0.0021       0.0006         0.0006     0.0010
## passes    5.2743           0.0002       0.0000         0.0001     0.0001
## chou      5.2216           0.0009       0.0002         0.0006     0.0005
## trestous  5.0875           0.0003       0.0001         0.0001     0.0002
## tout      5.0120           0.0043       0.0015         0.0010     0.0020
## sarrasins 4.9654           0.0004       0.0001         0.0003     0.0002
## sains     4.9536           0.0004       0.0001         0.0002     0.0002
## toutes    4.9496           0.0004       0.0001         0.0001     0.0002
## commanda  4.9074           0.0001       0.0000         0.0001     0.0001
## cha       4.9023           0.0006       0.0001         0.0004     0.0003
## mieus     4.8405           0.0004       0.0001         0.0003     0.0002
## ochis     4.7118           0.0002       0.0000         0.0002     0.0001
## no        4.6579           0.0005       0.0002         0.0004     0.0003
## lieu      4.6264           0.0002       0.0001         0.0002     0.0001
## uausist   4.6239           0.0002       0.0000         0.0001     0.0001
## espiel    4.6180           0.0004       0.0001         0.0003     0.0002
## laissa    4.6063           0.0001       0.0000         0.0001     0.0001
## dolans    4.5675           0.0003       0.0001         0.0002     0.0002
## chi       4.5667           0.0009       0.0003         0.0006     0.0005
## toute     4.5588           0.0009       0.0004         0.0002     0.0004
## cief      4.4868           0.0007       0.0002         0.0005     0.0004
## ainc      4.4662           0.0008       0.0002         0.0005     0.0004
## mais      4.4656           0.0052       0.0023         0.0014     0.0023
## ceual     4.4543           0.0006       0.0002         0.0005     0.0003
##           p.value
## ains            0
## tous            0
## passes          0
## chou            0
## trestous        0
## tout            0
## sarrasins       0
## sains           0
## toutes          0
## commanda        0
## cha             0
## mieus           0
## ochis           0
## no              0
## lieu            0
## uausist         0
## espiel          0
## laissa          0
## dolans          0
## chi             0
## toute           0
## cief            0
## ainc            0
## mais            0
## ceual           0
tail(round(mesClasses$quanti$`3`, digits=4), n=10)
##        v.test Mean in category Overall mean sd in category Overall sd
## donez -2.9366            0e+00       0.0002          0e+00     0.0002
## ainz  -2.9825            0e+00       0.0009          1e-04     0.0010
## ci    -3.0037            4e-04       0.0011          4e-04     0.0009
## tuit  -3.0366            1e-04       0.0006          2e-04     0.0006
## deu   -3.0514            1e-04       0.0016          2e-04     0.0017
## armez -3.0935            0e+00       0.0002          0e+00     0.0002
## auez  -3.2057            2e-04       0.0011          4e-04     0.0010
## granz -3.2662            0e+00       0.0008          0e+00     0.0008
## mes   -3.2776            2e-03       0.0042          8e-04     0.0024
## bone  -3.3023            2e-04       0.0006          2e-04     0.0005
##       p.value
## donez  0.0033
## ainz   0.0029
## ci     0.0027
## tuit   0.0024
## deu    0.0023
## armez  0.0020
## auez   0.0013
## granz  0.0011
## mes    0.0010
## bone   0.0010