{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "# Data and computations for paper \n", "\n", "Lars G. Johnsen\n", "National Library of Norway" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "Updated file `D:\\documents\\GitHub\\newspapers_coll_conc\\collocations.py`" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import dhlab.nbtext as nb\n", "import dhlab.module_update as mu\n", "mu.update('collocations')\n", "from collocations import dist_coll_urn, urn_coll, calculate_midpoint, dist\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def df_jaccard(df, col1, col2, number=100, asc=False):\n", " s1 = df.sort_values(by=col1, ascending=asc)[:number].index\n", " s2 = df.sort_values(by=col2, ascending=asc)[:number].index\n", " \n", " #print(len(set(s1)&set(s2)))\n", " \n", " return jaccard(s1, s2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "movealpha = lambda x: list(tuple([x[-1]]) + tuple(x[:-1]))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def jaccards(s1, s2):\n", " return len(set(s1)&set(s2)),len(set(s1)|set(s2))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def jaccard(s1, s2):\n", " return len(set(s1)&set(s2))/len(set(s1)|set(s2))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def make_dcoll_df(dists, norm, tot, exp=0):\n", " \n", " coll = nb.frame(\n", " {\n", " 'freq':dists['freq'].astype(int),\n", " 'score':(dists.freq/dists.freq.sum())**exp*abs(norm/dists['2']), \n", " 'dist':dists['1'],\n", " 'dist_' : dists['2'],\n", " 'reference':(dists.freq/dists.freq.sum())/tot.tot\n", " }\n", " ).transpose()\n", " coll = coll.fillna(0)\n", " return coll.astype({'freq':'int32'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reference corpus\n", "\n", "The reference is the total counts from approximately 450 000 books from nb.no" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "tot = nb.frame(nb.totals(50000), 'tot')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tot
.0.058921
,0.051453
og0.025297
i0.021425
det0.012728
\n", "
" ], "text/plain": [ " tot\n", ". 0.058921\n", ", 0.051453\n", "og 0.025297\n", "i 0.021425\n", "det 0.012728" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nb.normalize_corpus_dataframe(tot)\n", "tot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Corpus\n", "\n", "The corpus for doing collocations is a sample of 800 books from fictional literature, dewey decimal code 813." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "smd = 5\n", "lmd = 10\n", "collword = 'kaffe'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def filenames(smd = smd, lmd = lmd, collword = collword, corpus_size= 1000, period = (1980, 2000)):\n", " return {\n", " 'small_right': '_'.join([collword, str(smd), str(corpus_size), '-'.join([str(x) for x in period]), str(0), str(int(2*smd)), '.csv']),\n", " 'large_right': '_'.join([collword, str(smd), str(corpus_size), '-'.join([str(x) for x in period]), str(0), str(int(2*lmd)), '.csv']),\n", " 'small_left' : '_'.join([collword, str(lmd), str(corpus_size), '-'.join([str(x) for x in period]), str(int(2*smd)), str(0), '.csv']),\n", " 'large_left' : '_'.join([collword, str(lmd), str(corpus_size), '-'.join([str(x) for x in period]), str(int(2*lmd)), str(0), '.csv'])\n", " }" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Store the data subsequent use" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'small_right': 'kaffe_5_1000_1980-2000_0_10_.csv',\n", " 'large_right': 'kaffe_5_1000_1980-2000_0_20_.csv',\n", " 'small_left': 'kaffe_10_1000_1980-2000_10_0_.csv',\n", " 'large_left': 'kaffe_10_1000_1980-2000_20_0_.csv'}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "names = filenames()\n", "names" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "a1 = pd.read_csv(names['small_right'], index_col=0)\n", "a2 = pd.read_csv(names['large_right'], index_col=0)\n", "b1 = pd.read_csv(names['small_left'], index_col=0)\n", "b2 = pd.read_csv(names['large_left'], index_col=0)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create collocation dataframe \n", "\n", "Based on data from a1 and a2, and b1 and b2. The name for the collocations after is `coll` while `collb` is for the collocates coming before." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "coll = make_dcoll_df(a1, lmd, tot)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "collb = make_dcoll_df(b1, lmd, tot)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sorting \n", "\n", "Sorting on the reference is by column 'nb'" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freq score dist dist_ reference
te2373.521132.830732.8431.707
vafler133.424662.708332.92216.32
kaker923.3672.940982.97255.776
termos113.278692.805563.050
kakao633.205133.086623.12441.183
senga133.205132.916673.1215.2036
røkte123.1152633.2158.61
olje173.105593.077273.229.69598
smørbrød263.076923.157413.25147.56
eller2153.012053.313263.321.44512
rundstykker143.0033.166673.33248.224
først182.967363.246033.371.05339
kjelen222.941183.301593.476.456
pr.192.801123.466673.572.3908
mineralvann132.762433.458333.62170.416
kjeks162.754823.504763.6385.0343
bananer332.739733.59633.65299.242
wienerbrød182.680973.628573.730
kake192.64553.688893.7839.1793
prate112.638523.616673.7919.4529
" ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coll[coll.freq > 10].sort_values(by='score', ascending=False)[:20][:20].fillna(0).style.background_gradient()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freq score dist dist_ reference
kakao633.205133.086623.12441.183
drikkes221.602566.277786.24301.237
bananer332.739733.59633.65299.242
koppene192.02844.898814.93262.334
kaker923.3672.940982.97255.776
rundstykker143.0033.166673.33248.224
vafler133.424662.708332.92216.32
konjakk372.427184.085194.12204.488
kannen112.469143.94.05198.144
Kaffen102.227174.3754.49180.986
bomull502.267574.383384.41173.405
mineralvann132.762433.458333.62170.416
koppen411.80185.546395.55150.341
smørbrød263.076923.157413.25147.56
tobakk532.304154.318784.34132.894
sirup161.919395.188895.21128.614
kanna102.380954.055564.2113.938
fløte372.604173.79633.84109.894
sukker1692.202644.533854.54108.422
Viktigste101.980255.05102.048
" ], "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coll.sort_values(by='reference', ascending=False)[:20].fillna(0).style.background_gradient()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freqscoredistdist_reference
,32152.2271714.4936064.491.153050
.39642.0533884.8659044.871.241492
og27532.5510203.9196263.922.008213
eller2153.0120483.3132583.321.445120
i9801.8867925.3044785.300.844082
8471.7889095.5851985.591.629991
ned581.6313216.1439956.131.674834
\n", "
" ], "text/plain": [ " freq score dist dist_ reference\n", ", 3215 2.227171 4.493606 4.49 1.153050\n", ". 3964 2.053388 4.865904 4.87 1.241492\n", "og 2753 2.551020 3.919626 3.92 2.008213\n", "eller 215 3.012048 3.313258 3.32 1.445120\n", "i 980 1.886792 5.304478 5.30 0.844082\n", "på 847 1.788909 5.585198 5.59 1.629991\n", "ned 58 1.631321 6.143995 6.13 1.674834" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coll.loc[[',','.', 'og','eller', 'i', 'på','ned']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# collb" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freq score dist dist_ reference
kopp8507.5188-1.32574-1.331119.41
kanne585.61798-1.70979-1.78581.517
skjenket1633.64964-2.71856-2.74351.469
mineralvann253.40136-2.83333-2.94327.723
rykende374.46429-2.14993-2.24257.746
Viktigste251.66667-6.01587-6255.119
kakao352.6738-3.68642-3.74245.102
serverte434.42478-2.18148-2.26240.712
kopper874.4843-2.18757-2.23220.739
vafler133.20513-2.91667-3.12216.32
drakk3265.68182-1.74805-1.76216.31
kop147.57576-1-1.32212.987
rundstykker122.20264-4.45556-4.54212.763
bananer221.96078-5.07778-5.1199.495
skjenker213.663-2.58889-2.73186.168
krus303.84615-2.4963-2.6185.665
drikker1504.80769-2.05629-2.08168.802
bestilte562.7933-3.5459-3.58164.847
spanderte94.2735-1.94444-2.34160.89
Eksport131.98413-5-5.04152.476
" ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "collb.sort_values(by='reference', ascending=False)[:20].fillna(0).style.background_gradient()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "collb = collb.drop('kop')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freq score dist dist_ reference
kopp8507.5188-1.32574-1.331119.41
slurk496.99301-1.34286-1.43118.642
svart1076.80272-1.42884-1.4732.0186
drakk3265.68182-1.74805-1.76216.31
koke675.64972-1.71567-1.77120.784
kanne585.61798-1.70979-1.78581.517
kokt215.40541-1.66667-1.8536.7442
kokte645.37634-1.80331-1.86123.787
sterk855.37634-1.81906-1.8615.8174
drikke2154.90196-2.02677-2.04102.011
drikker1504.80769-2.05629-2.08168.802
drukket714.71698-2.0707-2.12121.499
kopper874.4843-2.18757-2.23220.739
rykende374.46429-2.14993-2.24257.746
serverte434.42478-2.18148-2.26240.712
lage724.2735-2.29248-2.3416.3779
ny423.92157-2.4803-2.552.8695
krus303.84615-2.4963-2.6185.665
varm913.83142-2.58141-2.6139.3232
skjenker213.663-2.58889-2.73186.168
skjenket1633.64964-2.71856-2.74351.469
kilo243.59712-2.66667-2.7831.99
koker253.58423-2.67857-2.7993.5027
mer1683.50877-2.8337-2.853.67989
mineralvann253.40136-2.83333-2.94327.723
lager343.38983-2.86941-2.9515.7347
usøtet213.20513-3-3.120
hente433.10559-3.16389-3.2217.8665
helte353.10559-3.15224-3.22118.335
laget763.09598-3.19971-3.2313.7915
kjøpte233.08642-3.13333-3.248.9624
te1203.08642-3.22429-3.2416.0542
ha2523.003-3.31855-3.334.21966
mye542.98507-3.30804-3.352.57749
servert382.89855-3.39676-3.4597.8189
litt1802.86533-3.47969-3.496.84285
bomull222.85714-3.4-3.576.2981
eksportvarer252.84091-3.43452-3.520
bestilte562.7933-3.5459-3.58164.847
god532.78552-3.55011-3.593.14228
" ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "collb[collb.freq > 20].sort_values(by='score', ascending=False)[:40].fillna(0).style.background_gradient()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freqscoredistdist_reference
kakao352.673797-3.686420-3.74245.101930
kaffe1571.497006-6.683458-6.6887.791997
te1203.086420-3.224290-3.2416.054192
sjokolade192.267574-4.351852-4.4154.396609
buljong11.818182-6.000000-5.509.346958
\n", "
" ], "text/plain": [ " freq score dist dist_ reference\n", "kakao 35 2.673797 -3.686420 -3.74 245.101930\n", "kaffe 157 1.497006 -6.683458 -6.68 87.791997\n", "te 120 3.086420 -3.224290 -3.24 16.054192\n", "sjokolade 19 2.267574 -4.351852 -4.41 54.396609\n", "buljong 1 1.818182 -6.000000 -5.50 9.346958" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "collb.loc[[x for x in ['kakao', 'kaffe', 'te', 'sjokolade', 'latte', 'buljong'] if x in collb.index]]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "common = coll.loc[[',','.', 'og','eller', 'i', 'på']]\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "common['translate'] = [',', '.', 'and', 'or', 'in', 'on']\n", "\n", "common = common[movealpha(common.columns)]\n", "\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
translatefreqscoredistdist_reference
,,32152.2271714.4936064.491.153050
..39642.0533884.8659044.871.241492
ogand27532.5510203.9196263.922.008213
elleror2153.0120483.3132583.321.445120
iin9801.8867925.3044785.300.844082
on8471.7889095.5851985.591.629991
\n", "
" ], "text/plain": [ " translate freq score dist dist_ reference\n", ", , 3215 2.227171 4.493606 4.49 1.153050\n", ". . 3964 2.053388 4.865904 4.87 1.241492\n", "og and 2753 2.551020 3.919626 3.92 2.008213\n", "eller or 215 3.012048 3.313258 3.32 1.445120\n", "i in 980 1.886792 5.304478 5.30 0.844082\n", "på on 847 1.788909 5.585198 5.59 1.629991" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "common" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "top10ratio = collb.sort_values(by='score', ascending=False)[:10]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['nytraktet', 'nytrukket', 'kopp', 'Svart', 'nykokt', 'slurk', 'pund',\n", " 'svart', 'Mer', 'nylaget'],\n", " dtype='object')" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top10ratio.index" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "top10ratio['translate'] = ['freshly drawn', 'freshly drawn', 'cup', 'black', 'freshly boiled', 'sip', 'pound',\n", " 'black', 'more', 'freshly made']" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "top10ratio = top10ratio[movealpha(top10ratio.columns)]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
translatefreqscoredistdist_reference
nytraktetfreshly drawn157.692308-1.000000-1.300.000000
nytrukketfreshly drawn147.575758-1.000000-1.320.000000
koppcup8507.518797-1.325735-1.331119.407652
Svartblack127.246377-1.000000-1.3859.552151
nykoktfreshly boiled127.246377-1.000000-1.380.000000
slurksip496.993007-1.342857-1.43118.641838
pundpound136.944444-1.100000-1.4417.839419
svartblack1076.802721-1.428837-1.4732.018644
Mermore96.666667-1.000000-1.506.771161
nylagetfreshly made96.666667-1.000000-1.500.000000
\n", "
" ], "text/plain": [ " translate freq score dist dist_ reference\n", "nytraktet freshly drawn 15 7.692308 -1.000000 -1.30 0.000000\n", "nytrukket freshly drawn 14 7.575758 -1.000000 -1.32 0.000000\n", "kopp cup 850 7.518797 -1.325735 -1.33 1119.407652\n", "Svart black 12 7.246377 -1.000000 -1.38 59.552151\n", "nykokt freshly boiled 12 7.246377 -1.000000 -1.38 0.000000\n", "slurk sip 49 6.993007 -1.342857 -1.43 118.641838\n", "pund pound 13 6.944444 -1.100000 -1.44 17.839419\n", "svart black 107 6.802721 -1.428837 -1.47 32.018644\n", "Mer more 9 6.666667 -1.000000 -1.50 6.771161\n", "nylaget freshly made 9 6.666667 -1.000000 -1.50 0.000000" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top10ratio" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "top10ref = collb.sort_values(by='reference', ascending=False)[:10]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['kopp', 'kanne', 'skjenket', 'mineralvann', 'rykende', 'Viktigste',\n", " 'kakao', 'serverte', 'kopper', 'vafler'],\n", " dtype='object')" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top10ref.index" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "top10ref['translate'] = ['cup', 'jug', 'poured', 'mineral water', 'smoking', 'main',\n", " 'cocoa', 'served', 'cups', 'waffles']" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "top10ref = top10ref[movealpha(top10ref.columns)]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
translatefreqscoredistdist_reference
koppcup8507.518797-1.325735-1.331119.407652
kannejug585.617978-1.709788-1.78581.516609
skjenketpoured1633.649635-2.718562-2.74351.468955
mineralvannmineral water253.401361-2.833333-2.94327.723096
rykendesmoking374.464286-2.149929-2.24257.746331
Viktigstemain251.666667-6.015873-6.00255.118839
kakaococoa352.673797-3.686420-3.74245.101930
serverteserved434.424779-2.181481-2.26240.712230
koppercups874.484305-2.187566-2.23220.738775
vaflerwaffles133.205128-2.916667-3.12216.319529
\n", "
" ], "text/plain": [ " translate freq score dist dist_ reference\n", "kopp cup 850 7.518797 -1.325735 -1.33 1119.407652\n", "kanne jug 58 5.617978 -1.709788 -1.78 581.516609\n", "skjenket poured 163 3.649635 -2.718562 -2.74 351.468955\n", "mineralvann mineral water 25 3.401361 -2.833333 -2.94 327.723096\n", "rykende smoking 37 4.464286 -2.149929 -2.24 257.746331\n", "Viktigste main 25 1.666667 -6.015873 -6.00 255.118839\n", "kakao cocoa 35 2.673797 -3.686420 -3.74 245.101930\n", "serverte served 43 4.424779 -2.181481 -2.26 240.712230\n", "kopper cups 87 4.484305 -2.187566 -2.23 220.738775\n", "vafler waffles 13 3.205128 -2.916667 -3.12 216.319529" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top10ref" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# coll" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# jaccard similarity\n", "\n", "compare over a range" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "rng = range(2,40,2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# for collb" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "jaccard_scoresb = nb.frame({'ratio': {x:df_jaccard(collb[collb.freq > 20], 'reference', 'score', x) for x in rng}\n", " }).transpose()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Jaccard score')" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXxU5fX48c/JRiAkQCCQQMK+JewQwa2KOxYF97pVsYu1Fde2X/XnUou1X1tr22+rbdWK1lqL1BVX6i6IAgHZt4Q9JCEJSzbIfn5/3Bsc4iSZLJOZSc779cqLufuZGzJn7vPcex5RVYwxxpj6wgIdgDHGmOBkCcIYY4xXliCMMcZ4ZQnCGGOMV5YgjDHGeGUJwhhjjFeWIEyHJyLPicivAh1HHRG5WET2ikipiEwKdDzGNMQShOnURGSOiCxt58P+Dpirqt1V9at2PrYxPrMEYUKKiEQEOobG+BjfIGCjH/cfEjrSe+moLEF0ciKSIiKvikiBiBwQkcfd+WEicp+I7BaRfBF5XkR6uMsGi4iKyA1uU8khEblJRE4QkXUicrhuP+76c0TkcxH5s4gUicgWETnLY/kNIrJZREpEZIeI/Mhj2XQRyRaRu0QkD3jWnX+BiKxxj7VMRMZ7bDNJRFa7+3sJiG7gvacCfwNOcpt7Drvze7jvt8B9//eJiNe/FRF5UEReFpEXRKQYmOOeu7tFZLt7TheKSLyIdBGRUiAcWCsi29199BeRV9zj7RSRW1uy/3q/m+tFZI+IFIrIvR77CxeR/+duWyIiq0QkxV02WkTeF5GDIrJVRK5o5P9NvIg8KyI57u//dY9lPxSRLHc/i0Skv8cyFZGbRSQTyPSYd6v7uy8UkUfrzrf7/l/w2L7u/UW403Pc7Urcc3dNQzGbFlBV++mkP7gfVMAfgBicD9JT3WXfA7KAoUB34FXgn+6ywYDifLhGA+cC5cDrQF9gAJAPnO6uPweoBu4AIoHvAEVAvLt8JjAMEOB04Agw2V023d32N0AXoCsw2d3/NPc9XA/scpdHAbs9jnUZUAX8qoFzMAdYWm/e88AbQKz7XrcB329g+wfd/V+E84WrK3A78CWQ7Mb0JPBvj20UGO6+DgNWAQ+4sQ8FdgDntWT/Hr+bp911JwAVQKq7/OfAemCUe74nAL3d3/9e4AYgwj3HhcCYBt7328BLQC/3PNf9rs90t5vsxvZn4LN67/19IB7o6jHvY3feQPd8/8Dj/b/gsX3d+4twYy4GRrnLkhqK135a+BkR6ADsJ4C/fDgJKAAivCz7EPiJx/Qo94MqwuOPdIDH8gPAdzymXwFud1/PAXIA8Vi+AvhuA3G9Dtzmvp4OVALRHsv/CjxUb5utOMnlNC/HWoaPCQIn4VQAaR7zfgR80sD2D3p+ALrzNgNneUwn1Z07d9ozQUwD9tTb/h7g2Zbs3+N3k1zvXF/pcZ5me3kf3wGW1Jv3JPALL+smAbVALy/LngF+6zHd3Y1tsMd7P7PeNgrM8Jj+CfChx/tvLEEcBi7FTTb207Y/1gbYuaUAu1W12suy/jjfxOvsxvmj7Ocxb7/H66Neprt7TO9T9y/cY3/9AUTkfOAXwEicb8ndcL7l1ilQ1XKP6UHA9SJyi8e8KHd/2sCxfNWHr69CPLcf0Mg2e+tNDwJeE5Faj3k1OOdun5d1+9c1b7nCgSUt3H+dPI/XR/j6d5ECbPfyHgYB0+rFEQH808u6KcBBVT3kZVl/YHXdhKqWisgBnPO3q4H3U3/esf8bjVHVMhH5DvAz4BkR+Rz4qapuaWpb4xvrg+jc9gIDxXtnYQ7Oh0adgThNPfu9rOuLASIi9faXIyJdcK42fgf0U9WewDs4zR916pcc3gs8rKo9PX66qeq/gdwGjtWQ+vsuxPnGW/+91/9gb2wfe4Hz68UXrare9rEX2Flv3VhV/XYb7d/b8YY1MP/Tevvsrqo/bmDdeBHp6WXZcf9vRCQGpwnLMzZvJaRTPF4PdPcDUIbzhaFOoudGqrpYVc/BuarZgtO0ZtqIJYjObQXOB+ojIhIjItEicoq77N/AHSIyRES6A78GXmrgasMXfYFbRSRSRC4HUnESQRROW3UBUO1eTZzbxL6eBm4SkWniiBGRmSISC3yBk8huFZEIEbkEmNrIvvYDySISBaCqNcBC4GERiRWRQcCdwAuN7KO+v7nbDwIQkQQRmd3AuiuAYnE64bu6nchjReSENtp/fX8HHhKREe65Gy8ivYG3gJEi8l33dxQpzk0HqfV3oKq5wLvAX0Skl7vuae7iF4EbRGSim/x/DSxX1V1NxPVzd18pwG04/RsAa4DTRGSgODdJ3FO3gYj0E5FZbhKqAEpxrqRMG7EE0Ym5H4YXAsOBPUA2Tls0wHyc5oXPgJ04ndC3eNmNr5YDI3C+oT8MXKaqB1S1BLgV50P5EHA1sKiJuDOAHwKPu9tk4fQloKqVwCXu9CH3/bzayO4+wrnlNE9ECt15t+B8c90BLMX50JvfjPf6f+57+K+IlOB0KE9r4L3U/Q4m4pznQpwP8R5tsX8vfo9zrv+L08H7DE77fQlOYr4S59t7Hl/fGODNd3GutLbg3DBwu/t+PgTux7kqzMW5WrnSh7jewOmsX4PTAf6Mu7/3cZLFOnf5Wx7bhAE/deM9iNMH9RMfjmV8JMc31RrT9kRkDs5dKacGOhYTfEREgRGqmhXoWMzx7ArCGGOMV5YgjDHGeGVNTMYYY7yyKwhjjDFedZgH5fr06aODBw8OdBjGGBNSVq1aVaiqCd6WdZgEMXjwYDIyMgIdhjHGhBQRabDSgDUxGWOM8coShDHGGK8sQRhjjPHKEoQxxhivLEEYY4zxyhKEMcYYryxBGGOM8coShDEhouhIFc9+vpPcoqOBDsV0Eh3mQTljOrKiI1Vc+8xy1u8r4n/f2cLl6cn8ePowknt1a3pjY1rIEoQxQa64vIrr5i9nS14xv710PGuyD7MwYy8vrdzLJZMHcPMZwxnUOybQYZoOqMNUc01PT1crtWE6mpLyKq6bv4L12UX89dopnJPWD4Ccw0d58tPt/HvlXmpqldkT+vOTM4YzvG/3AEdsQo2IrFLVdK/LLEEYE5xKK6q5fv4K1u49zONXT2bG2MRvrJNfXM5Tn+3gheW7qaiuZea4JG45cwSjEmMDELEJRZYgjAkxRyqrmTN/Jav2HOLPV03i2+OSGl2/sLSCvy/ZyT+/2EVZZQ0zxiQy98zhjB3Q2NDWxliCMCakHK2s4YbnVrBi50H+78pJXDihv8/bHiqr5NnPd/Lssl2UlFdz1ui+3HLWCCam9PRjxCaUNZYg/Hqbq4jMEJGtIpIlInd7WT5HRApEZI378wOPZdeLSKb7c70/4zQmWBytrOH7/1jJip0H+cN3JjYrOQD0ioniznNHsfSuM7nznJFk7D7ERU98znXzV5Cx66CfojYdld+uIEQkHNgGnANkAyuBq1R1k8c6c4B0VZ1bb9t4IANIBxRYBUxR1UMNHc+uIEyoK6+q4YfPZ7A0q5DfXTaBS6ckt3qfpRXV/POL3Ty9ZAcHyyo5eVhvbjlzBCcOjUdE2iBqE+oCdQUxFchS1R2qWgksAGb7uO15wPuqetBNCu8DM/wUpzEBV1Fdw4/+uYqlWYX85tLxbZIcALp3ieDH04ex9K4zuG9mKpn5pVz19Jdc8eQXfLwln9rajtHEbPzDnwliALDXYzrbnVffpSKyTkReFpGU5mwrIjeKSIaIZBQUFLRV3Ma0q4rqGn78wmo+3VbA/148jivSU5reqJm6RUXwg28NZcn/nMGDF6ax9+BRbnhuJac9+jFPfJxFQUlFmx/ThD5/Jghv16/1v668CQxW1fHAB8A/mrEtqvqUqqaranpCgtchVY0JapXVtcx98Ss+2pLPwxeP5cqpA/16vOjIcOacMoTP/ucMHr96Eim9uvHo4q2c/MiH3PziapZtL6Sj3LhiWs+fT1JnA55fhZKBHM8VVPWAx+TTwG88tp1eb9tP2jxCYwKoqqaWW/69mvc37Wfe7DFcM21Qux07KiKMC8b354Lx/cnKL+XfK/bw8qps3l6Xy9A+MVw9bSCXTk6mV0xUu8Vkgo8/O6kjcDqpzwL24XRSX62qGz3WSVLVXPf1xcBdqnqi20m9Cpjsrroap5O6wdswrJPahJLqmlpuXfAV76zP44EL0vjeqUMCHRLlVTW8vS6Xfy3fzeo9h50kMi6Ja04cyOSBvaxTu4NqrJPab1cQqlotInOBxUA4MF9VN4rIPCBDVRcBt4rILKAaOAjMcbc9KCIP4SQVgHmNJQdjQkl1TS13LFzLO+vzuG9malAkB3Cany6dksylU5LZnFvMi8v38NpX+3j1q32MTozlmmkDmT1pAHHRkYEO1bQTe1DOmHZUU6v8dOEaXl+Twz3nj+ZHpw8LdEiNKquoZtHaHF74cjcbc4rpGhnO7In9uWbaIMYl21PaHYE9SW1MEKipVX7+8lpeXb2Pn583ipvPGB7okHymqqzLLuJfy3ezaG0O5VW1jE/uwTXTBnLhhP50i7LC0KHKEoQxAVZbq9z1yjr+syqbO88Zya1njQh0SC1WdLSK17/ax7+W72bb/lJiu0Tw+DWTOX2k3UkYigJWasMY4ySH//faev6zKptbzxoR0skBoEfXSK4/eTCLbz+N/9x0Ekk9o7nzpTX2LEUHZAnCGD/71dubWbByLzefMYw7zg7t5OBJRDhhcDyPXz2Zkopq/ufltfYMRQdjCcIYP9pfXM5zy3Zy1dQUfnbuqA55q+jIfrHcc/5oPt5awAtf7g50OKYNWYIwxo/eWLOPWoUffmtoh0wOdeacPJjTRibwq7c3k5VfEuhwTBuxBGGMH726eh8TUnoyNKFjDwUqIvzusvHEdIngtgVrqKyuDXRIpg1YgjDGTzbnFrMlr4RLJnmrUdnx9I2L5pFLxrExp5jH3t8a6HBMG7AEYYyfvPbVPiLCpNmD/oSyc8ckctXUgTz12Q6+2H6g6Q1MULMEYYwf1NQqb6zZx/RRCcR3soJ391+QypDeMdy5cA1FR6oCHY5pBUsQxvjBsu2F7C+u4OJJbTPwTyjpFhXBH6+cSEFJBfe+vt5ufQ1hliCM8YPXVu8jNjqCs1L7BjqUgBif3JM7zhnJW+tyee2rfYEOx7SQJQhj2lhZRTXvbsjjgvFJREeGBzqcgLnp9GFMHRzPA29sZO/BI4EOx7SAJQhj2tjijXkcrarplM1LnsLDhN9/ZwIC3PHSGqpr7NbXYNPUmOSWIIxpY699tY/kXl1JH9Qr0KEEXHKvbjx00Vgydh/ir59sD3Q4xsOWvGKuePKLRtexBGFMG9pfXM7nWYVcPGkAYWEd98np5rho0gBmTejPHz/MZM3ew4EOp9Mrq6jm4bc3MfNPS9leUNroupYgjGlDdaU1Lu4kD8f56qGLxpIYF83tC76irKI60OF0SqrKu+tzOfv3n/L0kp1ckZ7MRz+d3ug2liCMaUOdpbRGc/XoGsljV0xg98EjPPTWpkCH0+nsPlDGDc+t5Mf/Wk3PblG88uOT+d9LxtOriWd0/JogRGSGiGwVkSwRubuR9S4TERWRdHd6sIgcFZE17s/f/BmnMW2hs5XWaK4Th/bmptOHsWDlXt7bkBfocDqFiuoa/vRhJuf+4TNW7jzI/Rek8ebcU5jiY/+Y38YJFJFw4AngHCAbWCkii1R1U731YoFbgeX1drFdVSf6Kz5j2lpnLK3RXHecPZIlmQXc8+o6Jg3sSb+46ECH1GEtzSzk/jc2sLOwjJnjk7h/ZhqJPZp3vv15BTEVyFLVHapaCSwAZntZ7yHgt0C5H2Mxxq++Lq3Rt9OV1miOqIgw/vidSRytquFn/1nb5G2Wpvn2F5cz98XVXPvMclSV5783lSeuntzs5AD+TRADgL0e09nuvGNEZBKQoqpvedl+iIh8JSKfisi3vB1ARG4UkQwRySgoKGizwI1prrrSGpdMtualpgzv2537ZqaxJLOQ55btCnQ4HUZ1TS3zl+7krMc+5b+b9nP72SN47/bTOK0VY4X7rYkJ8HaP37GvCyISBvwBmONlvVxgoKoeEJEpwOsiMkZVi4/bmepTwFMA6enp9lXEBExdaY0zR3fO0hrNdc20gXyyNZ9H3tvCycN7MzoxLtAhhbTVew5x32sb2JRbzGkjE5g3awyD+8S0er/+vILIBlI8ppOBHI/pWGAs8ImI7AJOBBaJSLqqVqjqAQBVXQVsB0b6MVZjWsxKazSfiPDIpeOJi47g9gVrKK+qCXRIIenwkUrueXU9l/51GQfLKvnLNZP5xw0ntElyAP8miJXACBEZIiJRwJXAorqFqlqkqn1UdbCqDga+BGapaoaIJLid3IjIUGAEsMOPsRrTYlZao2X6dO/Co5dNYEteCY8utgGGmkNV+U/GXs587FMWZuzl+6cM4YOfns63xyW16dC2fmtiUtVqEZkLLAbCgfmqulFE5gEZqrqokc1PA+aJSDVQA9ykqgf9FasxrWGlNVrujNF9ue6kQTyzdCfTRyXwrREtby/vLLYXlHLPK+tZsesgkwf25FcXjSOtv3+a6KSj1GpPT0/XjIyMQIdhOpn9xeWc9L8fcvMZw/npuaMCHU5IKq+q4YI/L6WkvIr3bjutyYe3OquaWuWZpTt47L/b6BIRxr0zU7l8SkqrS7qIyCpVTfe2zJ6kNqYVrLRG60VHhvN/V07kYFklc/+9mvc25JFXZHe9e8rKL+HSvy7j1+9s4VsjEvjgztP5zgkD/V7vy593MRnT4b26eh8TrbRGq43p34MHLhzDvDc38nmWM5Z139gujE/uyYTkHoxPcf7t2a1zXV1U19Ty9JKd/OGDbXSLchLprAn927SfoTGWIIxpobrSGvNmjwl0KB3Cd08cxOVTktmUW8y6vYdZl13E2uzDfLB5/7F1BvXu9nXSSO7J2AFxdIvqmB9jW/NK+J+X17I2u4gZYxJ56KKxJMR2adcYOuaZNaYd1JXWuGC8ldZoK9GR4Uwe2IvJA7/u8C8ur2JDdhFrs4tYl32YVbsO8uZa5475MIGR/WIZ7yaMiSk9GZUYS2R46LaeV9XU8uSn2/nTh1l0j47g8asnMbON707ylSUIY1rASmu0n7joSE4e3oeTh/c5Nq+gpIJ12YdZm13E2r2HeX/TfhZmZANOOY+0pDjGDohjdGIcqUmxjEqMo3uX4P+425xbzM9fXsuGfcXMHJ/EvFlj6N29fa8aPAX/GTMmCNWV1vjFhdY5HQgJsV04K7UfZ6X2A5znArIPHWVt9mHW7nUSxxtrcnihfM+xbVLiu5KaGMfopDhSE2MZnRTHwPhuhAfBwE5VNbX85ePtPP5xJnHRkfzlmsl8e1xSoMOyBGFMS1hpjeAiIqTEdyMlvtuxJj9VJaeonM05xWzJK2ZzXglbcov5YPN+6moEdo0MZ2RirJMw3KSRmhhHj26R7Rb7xpwifvafdWzOLWbWhP48OGtM0FyVWoIwppnKKqp5b2Mesyf2t9IaQUxEGNCzKwN6duXstH7H5pdX1ZC5v5TNecXOjQa5Jby3MY8FK7+uLdq/RzSjk+IYnRjLqMRYRvSNZWhCTJv+viura3n84yz+8nEWPbtF8eR3p3DemMQ2239bsARhTDMt3pjHkUorrRGqoiPDGZfcg3HJPY7NU1XySyqO3Zm2JbeYzbklfLatgGr3ckMEknt1ZVhCd4YldGd43+7u65hm9xOszy7i5y+vZUteCRdPGsAvLkwLylt4LUEY00xWWqPjERH6xUXTLy6a6aO+bjasqK5hR0EZ2wtKycovZXtBGdvzS/lyxwHKq2qPrderW+TxSaNvDMMTYhnQq+txfRx1I7z97dMd9I6J4u/XpR93dRNsLEEY0wz7i8v5PKuQuWcM9/tTrCbwukSEk5oUR2rS8bWOamuVfYePHp84Ckp5f9N+FpR93VQVFRHG0D4xDOvbnaF9YnhvQx6Z+aVcNiWZ+2emtWtfR0tYgjCmGY6V1phszUudWVjY153inlccAIfKKtlR+HXiyMovZcO+It5dn0vf2GienXMCZ4TIzQ2WIIxphrrSGkPaqN6+6Xh6xUQxJSaeKYPij5tfXlVDRJgQEUIP8YVOpMYEWF0Hpg0raloiOjI8pJIDWIIwxmdWWsN0NpYgjPGBldYwnZElCGN8UFdaw5qXTGfi1wQhIjNEZKuIZInI3Y2sd5mIqIike8y7x91uq4ic5884jWmKldYwnZHf7mISkXDgCeAcIBtYKSKLVHVTvfVigVuB5R7z0oArgTFAf+ADERmpqjX+iteYhlhpDdNZ+fMKYiqQpao7VLUSWADM9rLeQ8BvAc8xBmcDC1S1QlV3Alnu/oxpd3WlNS6xZx9MJ+PPBDEA2Osxne3OO0ZEJgEpqvpWc7d1t79RRDJEJKOgoKBtojamnte+2kdKvJXWMJ2PPxOEtzoEemyhSBjwB+Cnzd322AzVp1Q1XVXTExISWhyoMQ2pK61x8cQBARnRy5hA8ueT1NlAisd0MpDjMR0LjAU+cf/wEoFFIjLLh22NaRdWWsN0Zv68glgJjBCRISIShdPpvKhuoaoWqWofVR2sqoOBL4FZqprhrneliHQRkSHACGCFH2M1xisrrWE6M78lCFWtBuYCi4HNwEJV3Sgi89yrhMa23QgsBDYB7wE32x1Mpr1ZaQ3T2fnUxCQiXYGBqrq1OTtX1XeAd+rNe6CBdafXm34YeLg5xzOmLVlpDdPZNXkFISIXAmtwvskjIhNFZFHjWxkT2qy0hjG+NTE9iPMMwmEAVV0DDPZfSMYEnpXWMMa3BFGtqkV+j8SYIPLGmhwrrWE6PV/6IDaIyNVAuIiMwCmLscy/YRkTOJXVtSzemMe5aYlWWsN0ar5cQdyCUxOpAngRKAJu92dQxgTS0qwCSsqruWB8UqBDMSagGr2CcAvu/VJVfw7c2z4hGRNYb63LpUfXSE4Z3ifQoRgTUI1eQbjPHkxpp1iMCbiK6hre37if88b0IyrChksxnZsvfRBfube1/gcoq5upqq/6LSpjAuSzbYWUVFQz0559MManBBEPHADO9JingCUI0+G8vS6Hnt0iOXlY70CHYkzANZkgVPWG9gjEmEArr6rh/U37uXBCfyLDrXnJGF+epE4WkddEJF9E9ovIKyJipS1Nh/PptgLKKmustIYxLl++Jj2LU121P86gPW+684zpUN5al0t8TBQnDo0PdCjGBAVfEkSCqj6rqtXuz3OAjc5jOpSjlTV8uHk/M8YmEmHNS8YAviWIQhG5VkTC3Z9rcTqtjekwPtmaz5HKGi4YZw/HGVPHlwTxPeAKIA/IBS5z5xnTYby1Ppc+3aOYOsSal4yp48tdTHuARgf4MSaUHams5qPN+Vw2Jdmal4zx4MtdTP8QkZ4e071EZL5/wzKm/Xy0JZ+jVTXMtNpLxhzHl69L41X1cN2Eqh4CJvmycxGZISJbRSRLRO72svwmEVkvImtEZKmIpLnzB4vIUXf+GhH5m69vyJjmentdLgmxXThhsDUvGePJlyepw0Skl5sYEJF4X7ZzC/09AZwDZAMrRWSRqm7yWO1FVf2bu/4s4PfADHfZdlWd6PtbMab5yiqq+WhLPleekEJ4mAQ6HGOCii8J4jFgmYi87E5fjm9jRU8FslR1B4CILABmA8cShKoWe6wfg1PCw5h28+GWfCqqa632kjFe+NJJ/byIZODUYhLgknpXAQ0ZAOz1mM4GptVfSURuBu4Eoji+3tMQEfkKKAbuU9UlPhzTmGZ5e10O/eK6kD6oV6BDMSbo+NJJPQynuedxYD1wtmendWObepn3jSsEVX1CVYcBdwH3ubNzgYGqOgknebwoInFeYrtRRDJEJKOgoMCHkIz5Wkl5FR9vLeDb45IIs+YlY77Bl07qV4AaERkO/B0YgjOyXFOygRSP6WQgp5H1FwAXAahqhaoecF+vArYDI+tvoKpPqWq6qqYnJNjD3aZ5PtycT2V1rY0cZ0wDfEkQtapaDVwC/J+q3gH48he1EhghIkNEJAq4Eqem0zHuGNd1ZgKZ7vwEt5MbERkKjAB2+HBMY3z21rpcknpEMynFmpeM8caXTuoqEbkKuA640J0X2dRGqlotInOBxUA4MF9VN4rIPCBDVRcBc0XkbKAKOARc725+GjBPRKqBGuAmVT3YnDdmTGOKy6v4bFsB1500yJqXjGmALwniBuAm4GFV3SkiQ4AXfNm5qr4DvFNv3gMer29rYLtXcJq2jPGLDzbtp7Km1h6OM6YRvtzFtAm41WN6J/CIP4Myxt/eWpfLgJ5dmZjiy/0WxnROVnjGdDpFR6pYklnAzPFJiFjzkjEN6TAJ4kBpBSXlVYEOw4SA/27Ko6pGmWmlvY1pVIdJEDlF5WzOLQl0GCYEvL0+l5T4roxP7hHoUIwJag32QYjImzRS+kJVg64E+KacIqvnbxp1+EglSzML+cG3hlrzkjFNaKyT+nfuv5cAiXx959JVwC4/xtQi4WFiVxCmSYs35lFdq/ZwnDE+aDBBqOqnACLykKqe5rHoTRH5zO+RNVPXyHA25RY3vaLp1N5al8ug3t0Y0/8blVuMMfX40geR4D7NDID7HETQ1bWIjgxn6/4SqmtqAx2KCVIHyypZtv0AM8fZ3UvG+MKXB+XuAD4RkbpSF4OBG/0WUQt1jQyjsrqWnYVljOgXG+hwTBBavDGPmlrlAivtbYxPGk0QIhKGU257BDDanb1FVSv8HVhzRUeGUwpsyi22BGG8entdLkP7xJCaZP8/jPFFo01MqloLPOZWV13r/gRdcgDoEhlOVHiY9UMYrwpLK1i2vdAejjOmGXzpg/iviFwqQf5XJcDwvt3tTibj1Xsb8qhVrPaSMc3gSx/EnTjDgVaLSDnOZ7GqatDdBpLWP45PttrAQeab3l6Xy7CEGEZZ86MxPmvyCkJVY1U1TFWjVDXOnQ665ACQmhRHYWkFBSVB2QpmAiS/pJzlOw9wwfj+1rxkTDP4cgWBiPTC6aiOrpunqkH3LERd5+Pm3GISYoPuTlwTIIutecmYFvFlTOofAJ/hDPzzS/ffB/0bVsukJTkXNtZRbTy9uS6XkUHtE3cAAB3rSURBVP26M9Kal4xpFl86qW8DTgB2q+oZwCQgKBv6e3aLon+PaDZbgjCu/cXlrNx1kJnj7NkHY5rLlwRRrqrlACLSRVW3AKP8G1bLpSbFWYIwx7y7Phe15iVjWsSXBJEtIj2B14H3ReQNIMeXnYvIDBHZKiJZInK3l+U3ich6EVkjIktFJM1j2T3udltF5Dxf31Ba/zi2F5RRXlXj6yamA3t7fS6jE2MZ3rd7oEMxJuT4chfTxap6WFUfBO4HngEuamo7EQkHngDOB9KAqzwTgOtFVR2nqhOB3wK/d7dNA64ExgAzgL+4+2tSalIcNbVK5v5SX1Y3HVheUTkrdx2yyq3GtJAvndQnikgsHKvw+jFOP0RTpgJZqrpDVSuBBcBszxVU1bMtKIavx5+YDSxwn+DeCWS5+2tSqttRbc1M5u31uQB820aOM6ZFfGli+ivg+XW8zJ3XlAHAXo/pbHfecUTkZhHZjnMFcWszt71RRDJEJKOgwOk3HxTfjW5RVvrbwNvrckhLimNogjUvGdMSviQIUdVjI8u59Zl8eX7C2xNJ3xihTlWfUNVhwF3Afc3c9ilVTVfV9IQE57mHsDBhdGKsJYhObt/ho6zec9g6p41pBV8SxA4RuVVEIt2f24AdTW7lfOtP8ZhOpvHO7QV83bfR3G2PU3cnk0deM53Mu27zkvU/GNNyviSIm4CTgX04H9zT8G08iJXACBEZIiJROJ3OizxXEJERHpMzgUz39SLgShHp4g5QNAJY4cMxAedOppLyarIPHfV1E9PBvLUul3EDejCod0ygQzEmZDXZVKSq+Tgf7s2iqtUiMhfnyetwYL6qbhSReUCGqi4C5orI2UAVcAi43t12o4gsBDYB1cDNqurzfaueHdUp8d2aG7oJcXsPHmHN3sPcff7oplc2xjSoyQQhIv8AblPVw+50L5wxIr7X1Laq+g7wTr15D3i8vq2RbR8GHm7qGN6MToxFBDbnlnDumMSW7MKEsHfc5qWZdveSMa3iSxPT+LrkAKCqh/DtNteA6RYVwZDeMWzKLQp0KCYA3l6fy4TkHnb1aEwr+ZIgwtyrBgBEJB4fq8AGktNRbYMHdTZ7DhxhXXaRjTttTBvw5YP+MWCZiLzsTl9OC5t+2lNqUixvr8+lpLyK2OjIQIdj2kndw3Hnj7OmRWNay5dSG88DlwH7gXzgElX9p78Da620/k5H9ZY8u4roLI5W1vDm2hwmDexJci9rXjKmtXxqKnLvKirAHTBIRAaq6h6/RtZKnncynTA4PsDRmLZWWV3L1rwS1mYfZl32YdZlF7Ftfwm1CvNmjwl0eMZ0CL7cxTQLp5mpP84VxCBgM04hvaCVGBdNz26RbMqxJ6pDXU2tsr2glLV7nUSwLvswm3NLqKypBaBXt0jGJ/fk3LR+TBrYi+mjbDRBY9qCL1cQDwEnAh+o6iQROQO4yr9htZ6IkGZjQ4QcVWXPwSOszS5inZsQNuQUcaTSeQyme5cIxg6I44ZTBjM+uSfjk3uQ3KurjTVtjB/4kiCqVPWAiISJSJiqfiwiv/F7ZG0gNSmOF77cTXVNLRHhvtywZQIhK7+E177a514dFFF0tAqAqIgw0pLiuHxKMuOTezIhpQdD+3QnLMySgTHtwZcEcVhEugNLgH+JSD7O081BLy0pjorqWnYdKGN4XxuPOFjd9cp61uw9zMh+sZw/NvHYlcGoxFgiLbEbEzC+JIhZQDnO2NTXAnHAL/0ZVFup66jelFtiCSJIFR2t4qs9h7j5jOH89NygHcnWmE6pwa9nIlIiIsVAHnAYp1bS48Cvga0i8qWInNU+YbbM8L7diQwX64cIYl9sP0CtwqnD+wQ6FGNMPQ1eQahqg1+53eE/xwL/cv8NSlERYQzvG2t3MgWxJZkFxESFM2lgr6ZXNsa0qxY18KpqjaquBf7cxvG0udSkWLuCCGJLswo5cWhvoiKsr8GYYNOqv0pVfbKtAvGXtKQ48ksqKCytCHQopp49B46w+8ARTh1hzUvGBKMO/7UtzeOJahNclmQ544h/a4Q92GZMMOrwCSLVEkTQWppZSFKPaIYl2KhvxgSjDp8gesVEkRgXbaW/g0xNrfJ5ViHfGtHHnoI2Jkj5NUGIyAwR2SoiWSJyt5fld4rIJhFZJyIfisggj2U1IrLG/VlUf9vmSOsfZ3cyBZl12YcpLq/mVGteMiZo+S1BuLfCPgGcD6QBV4lIWr3VvgLSVXU88DLwW49lR1V1ovszqzWxpCbFsr2glIpqn4e1Nn62JLMQEXv+wZhg5s8riKlAlqruUNVKYAEw23MFVf1YVY+4k18Cyf4IJDUpjupaJXN/qT92b1pgaWYhY/rHER8TFehQjDEN8GeCGADs9ZjOduc15PvAux7T0SKS4T6xfZG3DUTkRnedjIKCggZ3nHas5IY1MwWD0opqVu85xKnDrXnJmGDmz7GlvfU8qtcVRa4F0oHTPWYPVNUcERkKfCQi61V1+3E7U30KeAogPT3d674BBvWOoWtkuN3JFCS+3H6A6lrlNHv+wZig5s8riGwgxWM6Gcipv5KInA3cC8xS1WNPs6lqjvvvDuATYFJLAwkPE0Yl2hPVwWJpViHRkWFMGWzlNYwJZv5MECuBESIyRESigCuB4+5GEpFJwJM4ySHfY34vEenivu4DnAJsak0wdXcyqTZ4oWHayWeZBUwb0psuEeGBDsUY0wi/JQhVrQbmAotxhihd6I5tPc8dxhTgUaA78J96t7OmAhkishb4GHhEVVuVIFKT4iguryanqLw1uzGttO/wUXYUlPEta14yJuj5sw8CVX0HeKfevAc8Xp/dwHbLgHFtGUtaklOcdnNOMQN6dm3LXZtmWJpp5TWMCRUd/knqOqMS4xCxO5kCbUlmIX1juzCyX/dAh2KMaUKnSRDdu0QwKL6bdVQHUK1bXuNUK69hTEjoNAkCnH4ISxCBszGnmENHqqz/wZgQ0akSRFpSHLsOHKG0ojrQoXRKdeW9T7HyGsaEhE6VIOpKf2/Ns6uIQFiyrZDRibH0jY0OdCjGGB90rgTR3y25YZVd292RympW7T5kzUvGhJBOlSD694imR9dINtnYEO1u+c6DVNbU2u2txoSQTpUgRITUJCu5EQhLMwuJighj6pD4QIdijPFRp0oQ4PRDbMkrpqbWSm60pyWZBUwdHE90pJXXMCZUdLoEkZYUR3lVLbsOlAU6lE5jf3E52/aXcqr1PxgTUjpdgqi7k8mamdrP0sxCwEaPMybUdLoEMaJfdyLCxO5kakdLMgvoHRN1bOAmY0xo6HQJoktEOMP7drcriHZSW6sszTrAKcP7EBZm5TWMCSWdLkFAXckNu9W1PWzJK6GwtMKefzAmBHXKBJGWFEdecTkHyyoDHUqHtzTLynsbE6o6ZYKwjur2sySzkBF9u5PYw8prGBNqOmmCcAcPsgThV+VVNazYedBubzUmRHXKBNG7exf6xXWxO5n8bOWug1RU11r/gzEhyq8JQkRmiMhWEckSkbu9LL9TRDaJyDoR+VBEBnksu15EMt2f69s6ttSkOBtdzs+WZhYSGS5MG9I70KEYY1rAbwlCRMKBJ4DzgTTgKhFJq7faV0C6qo4HXgZ+624bD/wCmAZMBX4hIr3aMr7UpDi2F5RSWV3blrs9jmrnLuexJLOQyQN7EdPFr0OfG2P8xJ9XEFOBLFXdoaqVwAJgtucKqvqxqh5xJ78Ekt3X5wHvq+pBVT0EvA/MaMvg0pLiqKpRMvP9c7vr3oNHOON3n/Dm2hy/7D/YFZRUsCm3mNNG2t1LxoQqfyaIAcBej+lsd15Dvg+825xtReRGEckQkYyCgoJmBff1nUxtnyBUlV8s2siuA0d44I0NnfJ22mXbrbyGMaHOnwnC22OzXttcRORaIB14tDnbqupTqpququkJCc37pjqkTwzRkWF+uZNp8cb9fLQln6umplBSXs0j725u82MEu8+2FdKzWyRjB/QIdCjGmBbyZ4LIBlI8ppOBb7S3iMjZwL3ALFWtaM62rREeJoxKjGvzO5nKKqr55ZsbGZ0Yy0Ozx/L9U4ewMCOblbsOtulxgpmqsjSrgFOG9SHcymsYE7L8mSBWAiNEZIiIRAFXAos8VxCRScCTOMkh32PRYuBcEenldk6f685rU2lJsWzOK27TzuQ/frCN3KJyHr54HBHhYdx29ggG9OzKva+tp6rGfx3iwSQzv5T9xRX2/IMxIc5vCUJVq4G5OB/sm4GFqrpRROaJyCx3tUeB7sB/RGSNiCxytz0IPISTZFYC89x5bSo1KY7DR6rIKy5vk/1tyilm/ue7uGrqQKYMcm666hYVwYOzxrBtfynPLN3ZJscJdkusvLcxHYJf7z9U1XeAd+rNe8Dj9dmNbDsfmO+/6DhWfnpTTjFJPbq2al+1tcq9r6+nZ9dI7pox6rhl56T145y0fvzxg23MHJdESny3Vh0r2C3NLGBIn5gO/z6N6eg65ZPUdUa3YU2mBSv38tWew9w7M5We3aK+sfzBWWMQhAcXbezQz0dUVNfw5Y6D9vS0MR1Ap04Q3btEMDC+W6ufqC4sreA3723hxKHxXDzJ+528A3p25Y5zRvDhlnz+u2l/q44XzFbvPszRqhprXjKmA+jUCQKcZqbWPgvx63c2c6Syml9dNA6Rhu/aueGUIYxOjOWXizZSVlHdqmMGqyWZBYSHCScNs/IaxoS6Tp8gUpPi2HWgrMUf2Mu2F/Lq6n386LRhDO/bvdF1I8PDePjiseQUlfN/H2a26HjBbmlWIZNSehIbHRnoUIwxrWQJIikWVWfks+aqqK7hvtc3MDC+G3PPHO7TNlMGxXPlCSk8s3Rnhys3fqiskvX7iuz2VmM6iE6fINL6t7yj+unPdrCjoIx5s8cQHRnu83Z3zRhNj66R3Pvaempr26/DWlV54cvdrNjpn4f2Pt9eiKqNHmdMR9HpE8SAnl2Ji45odoLYc+AIf/4oi5njkpg+qm+ztu0VE8X/+3Yqq/cc5qWMvU1v0AZqa5UH3tjIfa9v4Nq/L2fxxrw2P8aSbYXERkcwIdnKaxjTEXT6BCEijG7m2BCqyv1vbCAyPIz7L6hfwdw3l04ewLQh8Tzy7hYOlFY0vUErVNfU8vOX1/HPL3cz5+TBpPWP4yf/Ws0ba/a12TGc8hqFnDysNxHhnf6/lTEdgv0l49zJtDWvxOfmnnc35PHptgLuPGdki8daFhEevngsRyqr+fU7W1q0D19UVtdy24I1vLI6mzvOHskvLkzjhR9M44TBvbj9pTUsWLGnTY6zs7CMfYePcqo1LxnTYViCwEkQRypr2H3wSJPrlpRX8cs3NzKmfxzXnTSoyfUbM7xvLDeeNpRXVmfzxfYDrdqXN+VVNdz0wireXp/Lvd9O5bazRyAidO8SwXM3TOX0kQnc/ep65rdBCZC68hqnWQe1MR2GJQi+HhvCl8quv39/G/klFceK8bXW3DNGkBLflfteX9+mo9uVVVTzvedW8tGWfH510Vh+eNrQ45ZHR4bz5HenMGNMIvPe2sQTH2e16nhLMgtJie/KoN4xrdqPMSZ4WIIARvTrTniYNNlRvWFfEf9Ytotrpw1iYkrPNjl216hw5s0ay/aCMp5esqNN9ll0tIrr5q/gyx0HeOzyCVx7ovcrnS4R4Tx+9SQunjSARxdv5bfvbWlRGZCqmlq+3HGAU4db85IxHYkNFozzbXpYQkyjCaKmVrn3tfXEx3ThZ+eNanC9ljhjdF/OH5vInz7M5MLx/RnYu+VF7g6WVXLd/OVszSvhiasnc/64pEbXjwgP47HLJ9A1Kpy/fLKdI5U1PHBBGmHNGMdhzd7DlFZUW/OSMR2MXUG4Upu4k+nFFXtYm13E/Rek0qNr2z8l/MCFaUSECQ8s2tDiYn75xeVc+dQXbNtfylPfTW8yOdQJCxMevsgZ3Oi5Zbu459X11DTj+Ywl2woIEzh5mCUIYzoSSxCutKQ4covKOXzkm+NH55eU89v3tnDK8N7MmtDfL8dP6tGVO88dxSdbC3hvQ/OfUcg+dIQrnvyC7ENHee6GEzhjdPOezRAR7puZyq1njeCljL3c8dIanwc4WpJVyPjknvToZuU1jOlILEG4jnVUe7mKePjtzVRU1fLQ7LGNFuNrretPGkRaUhwPvrmR0mbUhtpZWMYVf/uCA2WV/PP701r8TV5EuPOckdx9/mgWrc3hJ/9aTXlVTaPbFB2tYu3ew1be25gOyBKEq6E7mZZmFvLGmhx+PH0YQxMaL8bXWhFuMb/8kgp+/99tPm2zbX8JVzz5BeXVtfz7hyceG8muNW46fRjzZo/h/U37+eHzGRytbDhJfLG9kForr2FMh+TXBCEiM0Rkq4hkicjdXpafJiKrRaRaRC6rt6zGHYb02FCk/pQQ24WE2C7Hlf4ur6rh/jc2MLh3N348fZi/QwBg0sBeXDNtIM8t28mGfUWNrrthXxHfefILBHjpxhMZO6DtSlxcd9JgHr1sPJ9nFXL9/BWUlFd5XW9JZiExUeFMGtg2d3UZY4KH3xKEiIQDTwDnA2nAVSJSvy7FHmAO8KKXXRxV1Ynuzywvy9tcalLccXcyPfnpDnYWlvHQRWObVYyvtX5+3mjiY6K49/UNDXYWr9p9kKue+pJuURH856aTGNEvts3juDw9hT9dNYnVew5x7d+Xe+2fWZJZyIlDexNp5TWM6XD8+Vc9FchS1R2qWgksAGZ7rqCqu1R1HdB2T4i1QlpSHJn5JVRW17KzsIwnPsniwgn92735pEfXSO6bmcbavYf5t5dSGJ9nFXLt31fQJ7YLC286ya8Pp10wvj9PfncKm/NKuPKpLyko+bpu1O4DZew5eMT6H4zpoPyZIAYAnqVKs915vooWkQwR+VJELvK2gojc6K6TUVBQ0JpYAWdsiKoaZXtBKQ+8sYEu4WHcPzO11fttidkT+3PysN785r0tx30of7RlPzc8t5KB8d146UcnMqBnV7/HclZqP56dcwK7DxzhO09+QW7RUeDr8hpWf8mYjsmfCcLb7T7NucF/oKqmA1cDfxSRb3QCqOpTqpququkJCa3/kEpzO6p/t3grSzIL+fmMUfSNa1kxvtYSER66aCwVVbU8/PYmAN5el8uNz69iVL9YFtx4In1j2y+2U4b34Z/fn0pBSQWX/+0L9hw4wtLMQvr3iGZYgpXXMKYj8ueT1NlAisd0MpDj68aqmuP+u0NEPgEmAdvbMsD6hvSJoUtEGB9uyWd8cg+umda6YnytNSyhOzdNH8afPsykZ7conv9iF1MG9eKZOScQF4AhPdMHx/PiD0/kuvnLufzJZRypqOH8cYl+vfXXGBM4/ryCWAmMEJEhIhIFXAn4dDeSiPQSkS7u6z7AKcAmv0XqiggPY1RiLGECD180jvBmlJvwl59MH8ag3t14btkuTh7Wh398b2pAkkOdcck9WHDjSdQqlFRU2+2txnRgfruCUNVqEZkLLAbCgfmqulFE5gEZqrpIRE4AXgN6AReKyC9VdQyQCjwpIrU4SewRVfV7ggCYe8ZwDpZVMi5IRkWLjgzn8asm897GXG45c0S73k3VkFGJsSz80UkszNjL2an9Ah2OMcZPpKV1f4JNenq6ZmRkBDoMY4wJKSKyyu3v/Qa7ed0YY4xXliCMMcZ4ZQnCGGOMV5YgjDHGeGUJwhhjjFeWIIwxxnhlCcIYY4xXliCMMcZ41WEelBORAmB3Ox6yD1DYjsdrS6Eae6jGDaEbe6jGDaEbe3vHPUhVvdbM6TAJor2JSEZDTx8Gu1CNPVTjhtCNPVTjhtCNPZjitiYmY4wxXlmCMMYY45UliJZ7KtABtEKoxh6qcUPoxh6qcUPoxh40cVsfhDHGGK/sCsIYY4xXliCMMcZ4ZQmiBURkl4isF5E1IhLUoxSJyHwRyReRDR7z4kXkfRHJdP/tFcgYvWkg7gdFZJ973teIyLcDGaM3IpIiIh+LyGYR2Sgit7nzQ+GcNxR7UJ93EYkWkRUistaN+5fu/CEistw95y+5Qx8HlUZif05Ednqc84kBic/6IJpPRHYB6aoa9A/hiMhpQCnwvKqOdef9Fjioqo+IyN1AL1W9K5Bx1tdA3A8Cpar6u0DG1hgRSQKSVHW1iMQCq4CLgDkE/zlvKPYrCOLzLiICxKhqqYhEAkuB24A7gVdVdYGI/A1Yq6p/DWSs9TUS+03AW6r6ciDjsyuIDk5VPwMO1ps9G/iH+/ofOB8CQaWBuIOequaq6mr3dQmwGRhAaJzzhmIPauoodScj3R8FzgTqPmCD9Zw3FHtQsATRMgr8V0RWiciNgQ6mBfqpai44HwpA3wDH0xxzRWSd2wQVdM00nkRkMDAJWE6InfN6sUOQn3cRCReRNUA+8D6wHTisqtXuKtkEabKrH7uq1p3zh91z/gcR6RKI2CxBtMwpqjoZOB+42W0OMf73V2AYMBHIBR4LbDgNE5HuwCvA7apaHOh4msNL7EF/3lW1RlUnAsnAVCDV22rtG5Vv6scuImOBe4DRwAlAPBCQ5khLEC2gqjnuv/nAazj/IUPJfre9ua7dOT/A8fhEVfe7f0y1wNME6Xl325JfAf6lqq+6s0PinHuLPVTOO4CqHgY+AU4EeopIhLsoGcgJVFy+8Ih9htvcp6paATxLgM65JYhmEpEYtwMPEYkBzgU2NL5V0FkEXO++vh54I4Cx+KzuA9Z1MUF43t1Ox2eAzar6e49FQX/OG4o92M+7iCSISE/3dVfgbJz+k4+By9zVgvWce4t9i8eXCcHpOwnIObe7mJpJRIbiXDUARAAvqurDAQypUSLyb2A6Tgnh/cAvgNeBhcBAYA9wuaoGVYdwA3FPx2nmUGAX8KO6dv1gISKnAkuA9UCtO/v/4bTlB/s5byj2qwji8y4i43E6ocNxvvQuVNV57t/qApwmmq+Aa91v5EGjkdg/AhIAAdYAN3l0ZrdffJYgjDHGeGNNTMYYY7yyBGGMMcYrSxDGGGO8sgRhjDHGK0sQxhhjvLIEYUKaiKiIPOYx/TO3qF9b7Ps5Ebms6TVbfZzL3QqqH/v7WO7x5ojI4+1xLBPaLEGYUFcBXCIifQIdiCcRCW/G6t8HfqKqZ/ghDhER+zs3LWL/cUyoq8YZw/eO+gvqXwGISKn773QR+VREForINhF5RESucevyrxeRYR67OVtElrjrXeBuHy4ij4rISreY2o889vuxiLyI87BZ/Xiucve/QUR+4857ADgV+JuIPFpv/b+IyCz39WsiMt99/X0R+ZX7+k53fxtE5HZ33mD3iuQvwGogRURucN/Dp8ApHse43N12rYh81sxzbzq4iKZXMSboPQGsE2ecC19NwCnodhDYAfxdVaeKM0jOLcDt7nqDgdNxitV9LCLDgeuAIlU9wa2y+bmI/NddfyowVlV3eh5MRPoDvwGmAIdwqgFf5D41eybwM1WtP/jUZ8C3cMp0DADqSl6cCiwQkSnADcA0nCdul7sJ4BAwCrhBVX/ilm34pXvsIpwSFF+5+3oAOE9V99WVfDCmjl1BmJDnVhx9Hri1GZutdAuiVeCUhq77gF+PkxTqLFTVWlXNxEkko3Hqb13nlmheDvQGRrjrr6ifHFwnAJ+oaoFbgvpfQFNVgJcA3xKRNGATXxf8OwlYhpMoXlPVMrcMw6s4CQVgt6p+6b6e5nHsSuAlj2N8DjwnIj/EKfdgzDF2BWE6ij/iNKc86zGvGvdLkFv0zHPISc+aPLUe07Uc/3dRvxaN4nxbv0VVF3suEJHpQFkD8UmT76D+gZxv9b2AGThXE/F8PbpbifueGlI/Dq81dVT1JhGZBswE1ojIRFU90NxYTcdkVxCmQ3AL3y3E6fCtswunWQWcEd0iW7Dry0UkzO2XGApsBRYDP3ZLYyMiI93Kvo1ZDpwuIn3cDuyrgE99OP4XOM1dn+FcUfzM/Rd33kUi0s09/sUey+ofe7qI9HZjvrxugYgMU9XlqvoAUAik+BCT6STsCsJ0JI8Bcz2mnwbeEJEVwIc0/O2+MVtxPsj74VTULBeRv+M0Q612v8UX0MRwlqqaKyL34LT/C/COqvpSfnoJcK6qZonIbpyriCXuPleLyHPACnfdv6vqV+KMBlf/2A/iJJtcnCutuuakR0VkhBvTh8BaH2IynYRVczXGGOOVNTEZY4zxyhKEMcYYryxBGGOM8coShDHGGK8sQRhjjPHKEoQxxhivLEEYY4zx6v8DQijcKBh8oJgAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "ax = jaccard_scoresb['ratio'].plot(title='compared to reference corpus');\n", "ax.set_xlabel(\"Number of words\")\n", "ax.set_ylabel(\"Jaccard score\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# for coll" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "jaccard_scores = nb.frame({'ratio': {x:df_jaccard(coll, 'score', 'reference', x) for x in rng} }).transpose()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Jaccard score')" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dd3gVVfrA8e+bRiD03gldqoAUG0ixgA0LNiyArnUVrKvu/ta2ltWV1VXsK2AFseNasCFVgdAEpCUQILQEEkqCgZT398dM8Bpvkptwa/J+nicPd2bOzLx3Qu5755wz54iqYowxxhQXFeoAjDHGhCdLEMYYY7yyBGGMMcYrSxDGGGO8sgRhjDHGK0sQxhhjvLIEYSo1EZkqIo+GOo4iInKhiGwTkWwR6R3qeIwpjSUIU2WJyFgRmR/k0z4N3KqqNVV1eZDPbUy5WIIwEUNEYkIdQ2l8jK8NsCaAx48Ilem9VGaWIKowEWklIh+JSIaI7BWRSe76KBH5PxHZIiLpIvKmiNRxtyWKiIrIOLeqJEtEbhKRfiLys4jsKzqOW36siCwQkedFZL+IrBORYR7bx4nIWhE5KCKbRORGj22DRSRNRO4VkV3AFHf9uSKywj3XQhHp6bFPbxFZ5h7vPSC+hPfeBXgZOMmt7tnnrq/jvt8M9/3/n4h4/TsRkYdE5AMReVtEDgBj3Wt3n4ikuNd0hojUF5FqIpINRAMrRSTFPUZzEfnQPd9mERlfkeMX+92MEZGtIrJHRP7mcbxoEfmru+9BEVkqIq3cbceJyDcikiki60Xk0lL+39QXkSkissP9/X/ise16EUl2jzNTRJp7bFMR+bOIbAQ2eqwb7/7u94jIv4qut/v+3/bYv+j9xbjLY939DrrX7sqSYjYVpKr2UwV/cD+ogGeABJwP0lPdbdcCyUA7oCbwEfCWuy0RUJwP13jgTCAX+ARoDLQA0oHT3PJjgXzgDiAWuAzYD9R3t58DtAcEOA04BPRxtw12930SqAZUB/q4xx/gvocxQKq7PQ7Y4nGuUUAe8GgJ12AsML/YujeBT4Fa7nvdAFxXwv4Puce/AOfLVnXgduAnoKUb0yvANI99FOjgvo4ClgIPuLG3AzYBZ1Xk+B6/m9fcsscDh4Eu7vZ7gFVAZ/d6Hw80cH//24BxQIx7jfcA3Up4358D7wH13Otc9Lse6u7Xx43teWBusff+DVAfqO6xbra7rrV7vf/k8f7f9ti/6P3FuDEfADq725qVFK/9HMPnRKgDsJ8Q/eLhJCADiPGy7TvgFo/lzu4HVYzHH2kLj+17gcs8lj8EbndfjwV2AOKxfTFwdQlxfQJMcF8PBo4A8R7bXwL+UWyf9TjJZZCXcy3ExwSBk3AOA1091t0I/FDC/g95fgC669YCwzyWmxVdO3fZM0EMALYW2/9+YEpFju/xu2lZ7Fpf7nGdRnp5H5cB84qtewV40EvZZkAhUM/LtteBpzyWa7qxJXq896HF9lFguMfyLcB3Hu+/tASxD7gYN9nYj/9/rB6w6moFbFHVfC/bmuN8Ey+yBeePsonHut0er3/1slzTY3m7un/hHsdrDiAiI4AHgU4435Jr4HzLLZKhqrkey22AMSJym8e6OPd4WsK5fNWQ3+5CPPdvUco+24ottwE+FpFCj3UFONduu5eyzYuqt1zRwLwKHr/ILo/Xh/jtd9EKSPHyHtoAA4rFEQO85aVsKyBTVbO8bGsOLCtaUNVsEdmLc/1SS3g/xdcd/b9RGlXNEZHLgLuB10VkAXCXqq4ra1/jO2uDqLq2Aa3Fe2PhDpwPjSKtcap6dnsp64sWIiLFjrdDRKrh3G08DTRR1brAFzjVH0WKDze8DXhMVet6/NRQ1WnAzhLOVZLix96D8423+Hsv/sFe2jG2ASOKxRevqt6OsQ3YXKxsLVU920/H93a+9iWsn1PsmDVV9eYSytYXkbpetv3u/42IJOBUYXnG5m346FYer1u7xwHIwfnCUKSp506qOktVz8C5q1mHU7Vm/MgSRNW1GOcD9Z8ikiAi8SJyirttGnCHiLQVkZrA48B7Jdxt+KIxMF5EYkXkEqALTiKIw6mrzgDy3buJM8s41mvATSIyQBwJInKOiNQCfsRJZONFJEZELgL6l3Ks3UBLEYkDUNUCYAbwmIjUEpE2wJ3A26Uco7iX3f3bAIhIIxEZWULZxcABcRrhq7uNyN1FpJ+fjl/cf4F/iEhH99r1FJEGwP+ATiJytfs7ihWn00GX4gdQ1Z3Al8CLIlLPLTvI3fwuME5EernJ/3FgkaqmlhHXPe6xWgETcNo3AFYAg0SktTidJO4v2kFEmojI+W4SOgxk49xJGT+yBFFFuR+G5wEdgK1AGk5dNMBknOqFucBmnEbo27wcxleLgI4439AfA0ap6l5VPQiMx/lQzgJGAzPLiDsJuB6Y5O6TjNOWgKoeAS5yl7Pc9/NRKYf7HqfL6S4R2eOuuw3nm+smYD7Oh97kcrzX/7jv4WsROYjToDyghPdS9DvohXOd9+B8iNfxx/G9+DfOtf4ap4H3dZz6+4M4iflynG/vu/itY4A3V+Pcaa3D6TBwu/t+vgP+jnNXuBPnbuVyH+L6FKexfgVOA/jr7vG+wUkWP7vb/+exTxRwlxtvJk4b1C0+nMuUg/y+utYY/xKRsTi9Uk4NdSwm/IiIAh1VNTnUsZg/sjsIY4wxXlmCMMYY45VVMRljjPHK7iCMMcZ4VWkelGvYsKEmJiaGOgxjjIkoS5cu3aOqjbxtqzQJIjExkaSkpFCHYYwxEUVEShxtwKqYjDHGeGUJwhhjjFeWIIwxxnhlCcIYY4xXliCMMcZ4ZQnCGGOMV5YgjDHGeGUJoor5dMV2du7/NdRhGGMigCWIKmTX/lwmTF/B/328OtShGGMigCWIKmRxaiYA361LZ9lWb1MKG2PMbyxBVCFJqZnUiIumYc04Jn69PtThGGPCnCWIKmRJahZ9Wtfj5sEdWJC8l4Upe8reyRhTZVmCqCIO5OaxbtcB+ibW48oBrWlaO56JX2/A5gMxxpTEEkQVsXRLFqrQP7E+8bHR3DasA0u3ZPHD+oxQh2aMCVOWIKqIpNRMoqOEXq3rAnBp31a0rl+Dp79eb3cRxhivLEFUEUtSs+jevDY14pwpQGKjo5gwrCNrdhzgq9W7QhydMSYcWYKoAg7nF7By2z76Jtb/3foLeregfaME/v3NBgoK7S7CGPN7liCqgNXb93M4v5B+xRJEdJRw5xmd2ZiezcyV20MUnTEmXFmCqAKWpDoPxfVNrPeHbSO6N6Vrs9o8++1G8goKgx2aMSaMWYKoApJSM2nXMIGGNav9YVtUlHDXmZ3YsvcQHyxNC0F0xphwZQmikissVJK2ZHm9eygy9LjG9G5dl+e+20huXkEQozPGhDNLEJVcSkY2+w7l/aGB2pOIcPeZndm5P5dpi7cGMTpjTDizBFHJFQ3Q17+UBAFwSoeGnNSuAS/MTuHQkfxghGaMCXOWICq5pNQsGtasRpsGNcose/dZndiTfZg3Fm4JQmTGmHBnCaKSW5KaSb/EeohImWVPaFOfIZ0b8fKcFA7k5gUhOmNMOAtoghCR4SKyXkSSReQ+L9sHicgyEckXkVHFto0RkY3uz5hAxllZ7dz/K2lZv5ba/lDcXWd2Zv+veUyevzmAkRljIkHAEoSIRAMvACOArsAVItK1WLGtwFjg3WL71gceBAYA/YEHRaTkbjjGq6LnH8pqf/DUvUUdRnRvyn/nbSYr50igQjPGRIBA3kH0B5JVdZOqHgGmAyM9C6hqqqr+DBR/Quss4BtVzVTVLOAbYHgAY62UiiYI6tKsVrn2u+OMTuQcyeeVuZsCFJkxJhIEMkG0ALZ5LKe56/y2r4jcICJJIpKUkWHDVhdXNEFQTHT5fs2dmtTigl4tmLpwM+kHcwMUnTEm3AUyQXhrFfV1RDif9lXVV1W1r6r2bdSoUbmCq+w8JwiqiAnDOpJXoLw4O8XPkRljIkUgE0Qa0MpjuSWwIwj7Gn6bIKj4AH2+SmyYwKV9W/Luoq1s3/ern6MzxkSCQCaIJUBHEWkrInHA5cBMH/edBZwpIvXcxukz3XXGR0UTBPV2JwiqiFuHdgRg0vcb/RWWMSaCBCxBqGo+cCvOB/taYIaqrhGRR0TkfAAR6SciacAlwCsissbdNxP4B06SWQI84q4zPio+QVBFtKhbndEDWjMjKY3UPTl+jM4YEwkC+hyEqn6hqp1Utb2qPuaue0BVZ7qvl6hqS1VNUNUGqtrNY9/JqtrB/ZkSyDgrm5ImCKqIW4a0JzZa+M93dhdhTFVjT1JXQqu3H3AnCDr2R0ca14pn7Mlt+WTFdjbsPuiH6IwxkcISRCW0xB2g74Q2x34HAXDjoHbUjIvhmW82+OV4xpjIYAmiEiqaIKhRrT9OEFQR9RLiuG5gW75cvYvV2/f75ZjGmPBnCaKS8WWCoIq47tS21K0Ry8Sv1/v1uMaY8GUJopLxZYKgiqgVH8tNp7Vn9voMlm6xDmXGVAWWICqZogmCKvqAXGmuOakNDWtW4+lZ1hZhTFVgCaKSKZogKNGHCYLKq0ZcDLcOac+Pm/ayIHmP349vjAkvliAqmfJMEFQRVwxoTfM68Tz99XpUfR1ayxgTiSxBVCIVmSCovKrFRDN+WEeWb93H7PXpATuPMSb0LEFUIkUTBPnjAbnSXHxCS9o0qMHTszZQUFg57yJWpe3no2VplfYu6avVu/h+3e5Qh2Eq4EBuHs9/t5G1Ow8E/FyWICqRogmCujarHdDzxEZHcc9Znfll5wGer4QD+akqd72/gjtnrOTRz9dWuiSRnH6QW99dxrVTk7j/o5/59UhBqEMyPsrNK+D6N5KY+M0GRvxnHje9tZRfdgQuUViCqEQqOkFQRZzToxkX9W7Bf77byMKUytVgvTBlLxt2Z3N8q7q8Pn8z9374c6W5U1JVHv7sF6rHRXPdqW2ZvmQb502aH9APGeMfBYXK7dNXsGhzJo9d2J3xQzuwIHkPZz83jxvfSmLNDv8/xGoJopI41gmCyktE+McF3WnXMIEJ01eQcfBwUM4bDFMWpNIgIY73bjiR8cM6MiMpjfHTlnMkv/jMuJFn1prdzNu4hzvP6MTfz+3K29cN4MCveVzw4gKmLthc6e6WKgtV5YFPV/PVml38/dyuXDmgDXee2Zn59w5lwrCOLEzZyznPzef6N5P8OtqBJYhKYtkxThBUEQnVYnjhyj4c+DWPO95bUSm+ZW/Zm8N363YzekBr4mOjufOMTvzfOV34fNVOrn8zKaKrY3LzCnj081/o3KQWV5/YBoBTOjTkywkDGdihIQ999gt/eiOJvdmVJ9lXFs99l8w7i7Zy02ntue7UtkfX16kRyx1ndGL+vUO5/fSOLNq0l3Ofn8+f3khiVdqxJwpLEJXEEneCoF6tKj5BUEUc17Q2D5/fjfnJe3hxdnJQzx0Ib/64hWgRrnI/QAH+NLAd/7yoB3M3ZjBm8mIO5OaFMMKKe2XOJtKyfuWh87v9rhqyQc1q/HdMXx4+vxvzkvcw4j/z7DmXMPLOoi088+0GLu7TknuHd/Zapk71WG4/vRPz7xvKnWd0YvHmvZw3aT7XTV3Cz2n7KnxuSxCVRNEEQQnVKj5BUEVd1q8VF/RqzjPfbuCnTXuDfn5/yTmcz4wl2zi7RzOa1I7/3bbL+7fmuct7s2xrFqNf+4nMnCMhirJi0rIO8eIPyZzTsxkntW/wh+0iwpiTE/nkllOoFR/DVa8v4smv1pFXEPnVapHsq9W7+PsnqxnSuRH/vLhHmc831Y6PZfywjsy/byh3ndGJpC1ZnD9pAeOmLGbFtvInCksQlYA/JwiqCBHh0Qt7kNgggfHTlrMnQqsoPlyWxsHD+Yw7JdHr9vOOb85r1/Rl4+5sLn3lR3btzw1ugMfgsc/XIgJ/O7tLqeW6Nq/N/24byOX9WvPSDymMevlHtuy12QRDYdGmvYyfvpzjW9XlhSv7EFuOzie142O5bVhH5t87hHvO6szybfu44IUFjJ2ymOVbs3w+jiWISsCfEwRVVM1qMUwa3Yd9bntEYYS1RxQWKlMXpnJ8q7r0bl3ydRxyXGPeuLY/u/bnMurlhRHx4bkgeQ9frt7Fnwd3oHnd6mWWrx4XzRMX9eClK/uwOSObc56bzyfLtwchUlNk7c4D/OnNJFrVq87kMf0qPHVwrfhY/jykA/PvHco9Z3Vm5bZ9XPjiQq6ZvJilW8pOFJYgKgF/TxBUUV2b1+ah87oxb+MeXpqTEtJYymvuxgw2ZeQw7uTEMsue2K4B714/gOzD+Vzy8o+s3xW+M+3lFRTy4Mw1tK5fg+sHtSvXviN6NOPL2wfRpVktbn9vBXfOWEH24fwARWqKbMs8xJjJi0mIi+HN6wZQLyHumI9Zs1oMfx7SgXn3DuXe4cexevt+Ln5pIVe/vqjU/SxBVAJJqZm09eMEQcfiiv6tOO/45kz8ej2LN0fOsOBTF6bSqFY1zu7RzKfyPVvWZcaNJwFw2as/Vqh+NxjeWJhKcno2fz+3K/Gx0eXev0Xd6ky7/kRuP70jnyzfzrnPzTumRk9Tur3ZhxkzeTG5eQW8eV1/Wvhwx1ceNavFcPPg9sz7yxDuH3Fcmc+/WIKIcEUTBIWyesmTiPD4hd1p0yCB26Yti4gukykZ2fywPoOrBrQhLsb3P4lOTWrxwU0nUys+hitf+4kfU8KrgT7j4GH+8+1GTuvUiNO7NK7wcWKio7j99E68d+NJHMkv5KIXF/LKnJSIq0asiM17cpj49XrSsg4F/Fw5h/O5duoStu/7lclj+9GpSa2AnSuhWgw3ntaeefcOKbWcJYgIF6gJgo5FrfhYJo3uTdahPO6csTLsP0jeXJhKXHQUowe0Lve+rRvU4P0bT6Z53eqMmbKY79aGz/hGT361jtz8Ah48r6tfRvftl1ifLycM4oyuTXjiy3WMmbKY9IOR01BfXgdy87h26hKe/z6ZIU//wAOfrmb3gcC837yCQm5+Zxmrtu9n0ug+Qft7LqttwxJEhAvkBEHHolvzOvz93K7M2ZDBK3M3hTqcEh3IzeODpWmce3yzClfRNa0Tz3s3nsRxTWtx41tL+XRF6Bt0l2/N4oOlaVx7alvaNarpt+PWqRHLi1f24YmLerAkNZMRz85j9rrKN6pvYaFy14yVbMs8xAuj+3BJ31a8u2grg56azWOf/+LXO+PCQuUvH/zM3A0ZPHFRD87o2sRvxz5WliAinDNBUFxAJgg6VlcNaM05PZvx9Nfrjzakh5v3k9LIOVLAuJPbll24FPUT4njnTwPo06Yet7+3gncXbfVThOVXWKg8NHMNjWtV47ahHf1+fBHhiv6t+ezWU2lUqxrjpi7h7vdXRkR1oq9empPCN7/s5m/ndOGcns14/MIefH/XYM7t2ZzX529m4FOz+desdew/dOwPTT7x5Vo+Xr6de87qzGX9yn8XG0iWICKcM0FQ/YBNEHQsRIR/XtSDlvWqM37a8rB7uKygUHljYSp929SjR8s6x3y8WvGxvHltfwZ3asRfP17FKyHqyfX+0m2sTNvPX8/uQs0APjjZsUktPvnzKdw8uD2fLN/O0IlzmLZ4a9hXKZZl7oYMnv56PSN7NWesR6+21g1qMPHS4/n6jtMY1qUJL8xO4dSnvue57zZysIJP1786N4XX5m1m7MmJ3DK4vZ/egf9YgohgwZgg6FjVio/lhdF92Jt9hLtmhNfzEbPXpbM18xBjS3gwriLiY6N55eq+nNOzGU98uY6nZwV35r39v+bx1Ffr6dumHiN7NQ/4+eJjo7l3+HF8OWEgxzWtxf0frWLUywsjdnTYbZmHGD99OZ2b1OKJi7w/udyhcU2ev6I3X04YyEntGvDvbzYw6KnZvDInpVxjdX20LI3Hv1jHOT2b8cC5/mkn8reAJggRGS4i60UkWUTu87K9moi8525fJCKJ7vpYEXlDRFaJyFoRuT+QcUaqpCBNEHSsureow/+d24XZ6zN4bV74tEdMXZhKszrxnNWtqV+PGxcTxXOX9+byfq2YNDuZh2auCVpifOabDWQeOsJD53cL6gdOxya1mH7DiUy85Hi27D3EeZPm84///RJRz03k5hVw8ztLKShUXr7qhDIbcLs0q82r1/Rl5q2ncHyrujzx5ToGPjWbKQs2k5tXeqKYvT6dv3zwM6d0aMC/Lz2eqKjwSw4QwAQhItHAC8AIoCtwhYh0LVbsOiBLVTsAzwBPuusvAaqpag/gBODGouRhfrMkSBME+cPVJ7bh7B5NeWrWepZuCX17xMbdB5mfvIerTmxTriEMfBUdJTxxUQ+uH9iWN37cwo1vL/VLfXVp1u86yFs/bWF0/9Z0b3HsVWblJSJcfEJLvr9rMJf1a8XkBZsZNvEHvli1M+yHEVdV/v7JalZvP8Czl/UisWGCz/v2bFmXqeP688FNJ9GxcU0e/uwXhjz9A+8s2uJ1iPjlW7O45e1ldG5ai5evOoFqMeV/PiVYAnkH0R9IVtVNqnoEmA6MLFZmJPCG+/oDYJg4X3sUSBCRGKA6cASIzHvWAFqSmkXv1nWDMkHQsRIR/nlxT1rUrc5t7y4nK8TtEVMWplItJoor+geuUVBE+OvZXXjg3K7MXpfOuZMC95CZqvLgzNXUio/h7jO9j/gZLHVqxPL4hT346OaTaZBQjVveWcbYKUvCeliSdxdv5f2laYwf2oFhXSrWi6hvYn2m3XAi7/5pAM3qxPO3j1cz7N8/8MHSNPLdQQ+T07O5duoSGteuxtRx/akVH+vPt+F3gfxkaQFs81hOc9d5LaOq+cB+oAFOssgBdgJbgadVNfRfO8NI0QRB4da9tTS13ecjMrIPc/f7K0P2rXL/oTw+WpbGBb1aUN8PwxiURkS49tS2zLjpJAoKlFEv/cibP6b6/b1/vmonP23K5K4zO/tlaAZ/6N26HjNvPYUHz+vK0i1ZnPHMXP7z7UYO54fXnBrLt2bx0Mw1nNapERNO73TMxzu5Q0M+vPlkpozrR53qsdz9/krOfHYu0xdvZczkxURHCW9e2z8sRj4oSyAThLdKteJ/FSWV6Q8UAM2BtsBdIvKHgWRE5AYRSRKRpIyMjGONN6KEYoIgf+jZsi5/PbsL361L57/zNockhulLtpKbV+jXxumy9Gldj8/HD+SUDg144NM13DpteYV7vhR36Eg+j3++lq7NajM6gHdEFRETHcW4U9ry3V2ncWbXJjzz7QaGPzuP+RvDY76JPdmHufntZTStE89/Lu9FtJ/aAkSEIZ0b89mtp/LK1ScQGxXFfR+tYv+veUwd1582DXyvwgqlQCaINKCVx3JLYEdJZdzqpDpAJjAa+EpV81Q1HVgA9C1+AlV9VVX7qmrfRo0aBeAthK9QTRDkD2NPTuSsbk148qt1LCvH0MP+kF9QyJs/buHEdvXpEuS2m3oJcbw+ph/3Dj+Or1bv4vxJC/zS2+elH1LYsT+Xh0d289sHnL81qR3PpNF9ePPa/qgqV72+iNumLSc9QE8m+yK/oJBb311G1qEjvHTlCdSt4f87LxHhrG5N+XLCQF65+gSm33BiSNqHKiqQCWIJ0FFE2opIHHA5MLNYmZnAGPf1KOB7de69twJDxZEAnAisC2CsEWdJahbdQjRB0LESEZ4adTxN68Rz27vL2XcoeO0R365NZ/u+Xxl7jA/GVVRUlHDz4PZMu/5EDh3J58IXFzB98dYKVzlt3XuIV+Zu4oJezSPibnJQp0Z8dfsgbj+9I7PW7GLYxDlMXbA5JNPVPjVrPT9tyuTxC3sE/EM7KspJFJGUHCCACcJtU7gVmAWsBWao6hoReUREzneLvQ40EJFk4E6gqCvsC0BNYDVOopmiqj8HKtZIUzRBUCR8IJSkTvVYJo3uQ/rBXO5+/+egtUdMWbCZFnWrh3w4g/5t6/P5+IH0S6zPfR+t4q4ZKzl0pPxdQh/53y/ERAn3jSh9IqBwEh8bze2nd2LW7YPo1bouD332CyNfmM/KII6I+/nPO3l17iauPrENF5/QMmjnjTQB7f6iql+oaidVba+qj7nrHlDVme7rXFW9RFU7qGp/Vd3krs9213dT1a6q+q9AxhlpwmGCIH/o1aou943owrdrd/PynMA/H/HLjgMs2pzJmJPbhEVVTMOa1Xjj2v7ccXonPl6xnfMnLWDjbt/nlvhhfTrfrt3NbUM70rROfNk7hJm2DRN489r+TBrdm/QDh7ngxQXc/9GqgA2IV2Tj7oPc88FK+rSuy9/PLd7z3ngK//6R5g+SwmSCIH+49pREzunZjCe/Wsd/A/wQ3dSFm6keG81lfcOnITc6Sphwekfevm4A+w4d4fxJC/hoWVqZ+x3JL+SRz36hbcMErj01MfCBBoiIcG7P5nx312mMPTmR95O2cdq/ZvPEl2sDUvV4MDePG99aSo24aF688oRyDe9eFdnViUBLwmiCoGMlIjx7WS9GdG/Ko5+v5eUAjV+UmXOET1bs4KI+LahTI/z6np/SoSFfjB9Iz5Z1uHPGSu778OdSn8advGAzm/bk8MB5XcP6QStf1YqP5cHzuvH9XYMZ0b0Zr87dxMAnZ/P8dxvJ8dPT2KrK3e+vZEvmISaN7hORd13BZgkiwhRNENS3TWRXL3mKjY7iuSt6c27PZvzzy3VM+n6j388xbfFWjuQX/m7wtXDTuHY87/xpAH8e0p7pS7ZxwQsL2JSR/Ydyuw/k8vx3Gzm9S2OGdK74REDhqHWDGjxzWS++mjCIE9s3YKI7ztHk+ZuP+fmJl+akMGvNbu4fcRwntmvgp4grN0sQEaZogqB+bSO/eslTbHQUz17Wiwt6Nefprzfw7Lcb/NZwnVdQyFs/buHUDg3pGMBZuvwhJjqKe846jinj+rH7QC7nPT+fz1b+vnf4E1+sJa9QK3X9eeemtXjtmr58fMvJdG5ai0f+9wtDn57DjCXbjj6VXB7zN+7h6VnrObdnM647NTQ92CKRJYgIs+ToAH2VK0GA8+E48dJejDqhJc9+u5GJX/snScxas4tdB3IZF8QH447VkM6N+Xz8QDo3rcVt05bzwKerOZxfQFJqJp+s2MENA9tFzB8PvbAAABs2SURBVMNWx6J363q8e/2JvH3dABrWjOMvH/7MWc/O5YtVO30eADEt6xC3TVtGh8Y1efLinmE5amq4irxO9FXcktTMsJ0gyB+io4SnLu5JTJQwaXYyeYWF3Df8uGP6o56yIJU2DWpEXHVM87rVee/Gk3jqq3W8Nm8zy7fuI6+gkGZ14rllSPjNHRBIp3ZsyCkdTmHWmt1M/Ho9t7yzjO4tanPPWccxqGPDEv9/5OYVcPPby8gvUF65um9EPjcUSj7dQYhIdREJ7QhgBnASRN824TlBkL9ERQmPX9iDq05szStzNvHo52srfCfxc9o+lm7J4pqTEsN2SOXSxEZH8bdzuvLq1SewZW8O63Yd5K9ndylzKOrKSEQY3r0pX90+iImXHM++Q3mMmbyYy1796WjPvuIe/HQNq7bvZ+Klx9O2HCO0GkeZ/8tE5DzgaSAOaCsivYBHVPX80vc0/lY0QdC4Uyp/HWpUlPCPkd2JiYri9fnOk7YPnlf+SVWmLkglIS6aS/pG9sNQZ3ZryufNarNsaxbn9mwW6nBCKjrKGVb8vOOb896SrTz3fTKjXv6Rocc15u4zO9O1uTOEyrTFW3kvaRt/HtKeM/0850dV4cvXkIdwBs/7AUBVV9jcDKERKRME+YuI8OB5XYmJEv47fzN5BYX8Y2R3n+8E0g/m8tnPOxjdvzW1w3xYZV+0ql+DVvUrZ9ViRcTFRHH1SYlcfEJLpi5M5eUfUjj7uXmcd3xzhndryoOfrmFgx4bceYZVflSULwkiX1X3V+YqjUgRSRME+YuI8LdzuhATHcXLc1IoKFQev7CHT0li2qJt5BUoY8K4a6s5djXiYrhlcAeuHNCGV+emMHl+Kp+t3EGLutV57vLeYfHUfKTyJUGsFpHRQLSIdATGAwsDG5bxJpImCPInEeHe4Z2JjRae/z6ZvALlqVE9S/3DP5JfyNuLtjC4cyPaNaoZxGhNqNSpHss9Zx3H2JPb8u6irZzdo2nYzI0RqXz5pLkN6AYcBt7FmdTn9kAGZf6oaIKgvpVgeI2KEBHuOrMzd5zeiQ+XpXHXjBWl9of/YtVOMg4eDusH40xgNKpVjQmndwz7Z14iQal3EO680g+r6j3A34ITkvGmaIKg/pXsAbnymnB6R2KihX/NWk9+ofLMZb3+MKe0qjJlwWbaNUpgUMeqNU+IMf5UaoJQ1QIROSFYwZiSJaVmRewEQf725yEdiIkSnvhyHQWFyn8u7/27QdeWb9vHyrT9PDKyW0R2bTUmXPjSBrFcRGYC7+PMEw2Aqn4UsKjMHyxOzYzYCYIC4cbT2hMdJTz6+Vry313GpNG9jw5aN3VBKrWqxXBxn8ju2mpMqPnSBlEf2AsMBc5zf84NZFDm94omCKqq7Q8l+dPAdjx8fje++WU3N7+9jNy8Anbtz+WLVTu5tF8rS6bGHKMy/4JUdVwwAjElK5ogqH/bqvH8Q3mMOTmRmGjhbx+v5sa3ltKpSU0KVBlzUmKoQzMm4vnyJHVL4HngFECB+cAEVS17VhPjF5VpgqBAuHJAG2fazY9WMWdDBqd3aULrSjpWlTHB5EsV0xRgJtAcaAF85q4zQbIybR+t6levFBMEBcpl/Vrzr1HHUzs+hptOaxfqcIypFHyppG2kqp4JYaqI2HMQQZSSnkOnxtanuyyjTmjJRb1bWM8lY/zElzuIPSJylYhEuz9X4TRamyAoKFQ278mhfWN7GtgXlhyM8R9fEsS1wKXALmAnMMpdZ4IgLesQRwoK6WDDRRhjgsyXXkxbARvaO0RS3DmJ2ze2seyNMcFV5h2EiLwhInU9luuJyOTAhmWKJKc7CaJdQ7uDMMYEly9VTD1VdV/RgqpmAb0DF5LxlJKeQ4OEOBuV0hgTdL4kiCgROfqElojUx+ayDpqUjGzaW/uDMSYEfPmgnwgsFJEP3OVLgMcCF5LxlJKRzfDuNl2iMSb4yryDUNU3gYuB3UA6cJGqvuXLwUVkuIisF5FkEbnPy/ZqIvKeu32R51SmItJTRH4UkTUiskpE4n19U5VFZs4Rsg7l2R2EMSYkfGmkbg+kqOokYBVwumejdSn7RQMvACOArsAVItK1WLHrgCxV7QA8Azzp7hsDvA3cpKrdgMFAnq9vqrL4rQeTJQhjTPD50gbxIVAgIh2A/wJtcWaWK0t/IFlVN6nqEWA6MLJYmZHAG+7rD4Bh4kx+fSbws6quBFDVvapa4MM5K5UUtweTPQNhjAkFXxJEoarmAxcB/1HVO4BmPuzXAtjmsZzmrvNaxj3HfqAB0AlQEZklIstE5C/eTiAiN4hIkogkZWRk+BBSZElOz6ZaTBTN61YPdSjGmCrIlwSRJyJXANcA/3PXxfqwn7cxD9THMjHAqcCV7r8XisiwPxRUfVVV+6pq30aNKt/UkikZ2bRtmEC0DR9hjAkBXxLEOOAk4DFV3SwibXHaB8qSBrTyWG4J7CipjNvuUAfIdNfPUdU9qnoI+ALo48M5K5WUDBuDyRgTOr70YvpFVcer6jR3ebOq/tOHYy8BOopIWxGJAy7HGTbc00xgjPt6FPC9qiowC+gpIjXcxHEa8Itvb6lyyM0rYFvWIevBZIwJmYA98Kaq+SJyK86HfTQwWVXXiMgjQJKqzgReB94SkWScO4fL3X2zROTfOElGgS9U9fNAxRqOUvfmoAod7A7CGBMiAX0iWlW/wKke8lz3gMfrXJwH77zt+za+VWVVSinpOQC0b2SD9BljQsOXNggTAkXPQNggfcaYUCnxDkJEPuOPvY6OUlUbAjyAktOzaVG3OtXjokMdijGmiiqtiulp99+LgKb8Vt1zBZAawJgM7iB91v5gjAmhEhOEqs4BEJF/qOogj02ficjcgEdWhRUWKpsycujftn6oQzHGVGG+tEE0EpF2RQvucxCV76m0MLLzQC6/5hVYF1djTEj50ovpDuAHEdnkLicCNwQsIvPbGExWxWSMCaFSE4SIRAEHgI7Ace7qdap6ONCBVWVHR3G1OwhjTAiVmiBUtVBEJqrqScDKIMVU5SWnZ1M7PoaGNW2aUWNM6PjSBvG1iFzsDsNtgqCoB5NdcmNMKPnSBnEnkADki0guzgisqqq1AxpZFZaSkcNpnawfgDEmtMpMEKpaKxiBGMf+X/PIOHjY2h+MMSHn01hMIlIPp6H66LzQqmrPQgTApgzrwWSMCQ9lJggR+RMwAWc+hxXAicCPwNDAhlY1pWTYIH3GmPDgSyP1BKAfsEVVhwC9gco3v2eYSMnIJjZaaFW/RqhDMcZUcb4kiFx3WG5EpJqqrgM6Bzasqis5PZs2DRKIjbaBdo0xoeVLG0SaiNQFPgG+EZEs/jh1qPGTlIxsOlr7gzEmDPjSi+lC9+VDIjIbZ97orwIaVRWVV1DI1r2HGN6taahDMcaYsquYROREEakFR0d4nY3TDmH8bMveQ+QXqnVxNcaEBV8qul8Csj2Wc9x1xs9SrIurMSaM+JIgRFWPziynqoUEeC7rquroNKPWxdUYEwZ8SRCbRGS8iMS6PxOATWXuZcotJT2HJrWrUSs+NtShGGOMTwniJuBkYDuQBgzA5oMIiOSMbGt/MMaEDV96MaUDlwchlipNVdmUns0FvVuEOhRjjAF868X0hvscRNFyPRGZHNiwqp6Mg4c5eDjfhtgwxoQNX6qYeqrqvqIFVc3Curn6XfLRHkw2eK4xJjz4kiCi3NFcARCR+lgvJr87OkhfY7uDMMaEB18+6CcCC0XkA3f5EuCxwIVUNaWkZ1MjLpqmtePLLmyMMUFQ5h2Eqr4JjAJ2A+nARar6li8HF5HhIrJeRJJF5D4v26uJyHvu9kUiklhse2sRyRaRu305XyRLcXsw2TSjxphw4dOQoaq6BpgBfApki0jrsvYRkWjgBWAE0BW4QkS6Fit2HZClqh2AZ4Ani21/BvjSlxgjXUp6tjVQG2PCii+9mM4XkY3AZmAOkIpvH9r9gWRV3aSqR4DpwMhiZUYCb7ivPwCGifsVWkQuwHkgb40P54poOYfz2bE/156BMMaEFV/uIP6BM4vcBlVtCwwDFviwXwtgm8dymrvOaxlVzQf2Aw1EJAG4F3i4tBOIyA0ikiQiSRkZkTuH0eY9RQ3UliCMMeHDlwSRp6p7cXozRanqbKCXD/t5q0xXH8s8DDyjqtletv9WUPVVVe2rqn0bNWrkQ0jhyQbpM8aEI196Me0TkZrAPOAdEUkH8n3YLw1o5bHckj9ONFRUJk1EYnDmmsjEGc5jlIg8BdQFCkUkV1Un+XDeiJOSnk2UQJsGNs2oMSZ8+JIgzgdyceamvgqoTRlVP64lQEcRaYszjtPlwOhiZWYCY4AfcXpKfe+OHDuwqICIPARkV9bkAM4zEK3r16BaTHSoQzHGmKNKTBAicpCSq4QeEJEU4G+q+p23/VU1X0RuBWYB0cBkVV0jIo8ASao6E3gdeEtEknHuHKrkmE8pNkifMSYMlZggVLXEMR/cLqzdgXfcf0s6xhfAF8XWPeDxOhfnwbsSqepDpW2PdAWFyqY9OQzqFLltKMaYysmn5yCKU9UCVV0JPO/neKqctKxDHMkvtGcgjDFhp0IJooiqvuKvQKoq68FkjAlXx5QgzLFLSXeegWjX0BKEMSa8WIIIsZSMbBokxFEvIS7UoRhjzO9Ygggx68FkjAlXliBCLCUjx+aAMMaEJUsQIZSZc4TMnCN2B2GMCUuWIEKoqAeTJQhjTDiyBBFCKenWxdUYE74sQYRQSkY21WKiaF63eqhDMcaYP7AEEUIpGTm0bZhAdJRNM2qMCT+WIEIoJSPbJgkyxoQtSxAhkptXwLbMQ9ZAbYwJW5YgQiR1bw6Fig3SZ4wJW5YgQqRoDCa7gzDGhCtLECFiz0AYY8KdJYgQScnIpkXd6lSPs2lGjTHhyRJEiFgPJmNMuLMEEQKFhUpKeo41UBtjwpoliBDYdSCXX/MKrP3BGBPWLEGEQHK6NVAbY8KfJYgQsHmojTGRwBJECKRkZFM7PoaGNW2aUWNM+LIEEQIp6Tm0b1wTERukzxgTvixBhIDNQ22MiQSWIILsQG4e6QcPW4IwxoS9gCYIERkuIutFJFlE7vOyvZqIvOduXyQiie76M0RkqYiscv8dGsg4g2lTRtEYTPYMhDEmvAUsQYhINPACMALoClwhIl2LFbsOyFLVDsAzwJPu+j3AearaAxgDvBWoOIPtaBdX68FkjAlzgbyD6A8kq+omVT0CTAdGFiszEnjDff0BMExERFWXq+oOd/0aIF5EqgUw1qBJycgmNlpoXb9GqEMxxphSBTJBtAC2eSynueu8llHVfGA/0KBYmYuB5ap6uPgJROQGEUkSkaSMjAy/BR5IKenZtGmQQGy0Nf8YY8JbID+lvPXh1PKUEZFuONVON3o7gaq+qqp9VbVvo0aNKhxoMDk9mKz9wRgT/gKZINKAVh7LLYEdJZURkRigDpDpLrcEPgauUdWUAMYZNHkFhWzZa9OMGmMiQyATxBKgo4i0FZE44HJgZrEyM3EaoQFGAd+rqopIXeBz4H5VXRDAGINqa+Yh8gvVEoQxJiIELEG4bQq3ArOAtcAMVV0jIo+IyPlusdeBBiKSDNwJFHWFvRXoAPxdRFa4P40DFWuwpFgPJmNMBIkJ5MFV9Qvgi2LrHvB4nQtc4mW/R4FHAxlbKCQfnWbU2iCMMeHPutIEUUp6Dk1qV6NWfGyoQzHGmDJZgggiG4PJGBNJLEEEiapagjDGRBRLEEGSkX2Yg7n51v5gjIkYliCCJCXdHaTPejAZYyKEJYggScmweaiNMZHFEkSQJKdnUyMummZ14kMdijHG+MQSRJAUNVDbNKPGmEhhCSJINmXkWAO1MSaiWIIIgkNH8tm+71drfzDGRBRLEEFwdJpR68FkjIkgliCCwHowGWMikSWIIEjJyCFKILGhTTNqjIkcliCCICU9m9b1a1AtJjrUoRhjjM8sQQSBjcFkjIlEliACrKBQ2bQnxxqojTERxxJEgG3P+pUj+YX2DIQxJuJYgggw68FkjIlUliACzBKEMSZSWYIIsJSMbBokxFEvIS7UoRhjTLlYggiw5HTrwWSMiUyWIAIsJSOH9o2tgdoYE3ksQQRQZs4RMnOO2B2EMSYiWYIIoE3WQG2MiWCWIALIejAZYyKZJYgASsnIIS4mihb1qoc6FGOMKTdLEAGUkp5Nu4YJREfZNKPGmMgT0AQhIsNFZL2IJIvIfV62VxOR99zti0Qk0WPb/e769SJyViDjDJTkjGwbg8kYE7ECliBEJBp4ARgBdAWuEJGuxYpdB2SpagfgGeBJd9+uwOVAN2A48KJ7vIiRm1fAtsxD1v5gjIlYMQE8dn8gWVU3AYjIdGAk8ItHmZHAQ+7rD4BJIiLu+umqehjYLCLJ7vF+LOlkG3Yf5Ix/z/H7m6io/EKlULFB+owxESuQCaIFsM1jOQ0YUFIZVc0Xkf1AA3f9T8X2bVH8BCJyA3ADQO3m7ejYJLy+rfduXZdBHRuFOgxjjKmQQCYIby2z6mMZX/ZFVV8FXgXo27evvnjlCeWN0RhjTAkC2UidBrTyWG4J7CipjIjEAHWATB/3NcYYE0CBTBBLgI4i0lZE4nAanWcWKzMTGOO+HgV8r6rqrr/c7eXUFugILA5grMYYY4oJWBWT26ZwKzALiAYmq+oaEXkESFLVmcDrwFtuI3QmThLBLTcDp0E7H/izqhYEKlZjjDF/JM4X9sjXt29fTUpKCnUYxhgTUURkqar29bbNnqQ2xhjjlSUIY4wxXlmCMMYY45UlCGOMMV5VmkZqEckAtgTxlA2BPUE8nz9FauyRGjdEbuyRGjdEbuzBjruNqnod8qHSJIhgE5Gkklr+w12kxh6pcUPkxh6pcUPkxh5OcVsVkzHGGK8sQRhjjPHKEkTFvRrqAI5BpMYeqXFD5MYeqXFD5MYeNnFbG4Qxxhiv7A7CGGOMV5YgjDHGeGUJogJEJFVEVonIChEJ6xECRWSyiKSLyGqPdfVF5BsR2ej+Wy+UMXpTQtwPich297qvEJGzQxmjNyLSSkRmi8haEVkjIhPc9ZFwzUuKPayvu4jEi8hiEVnpxv2wu76tiCxyr/l77rQDYaWU2KeKyGaPa94rJPFZG0T5iUgq0FdVw/4hHBEZBGQDb6pqd3fdU0Cmqv5TRO4D6qnqvaGMs7gS4n4IyFbVp0MZW2lEpBnQTFWXiUgtYClwATCW8L/mJcV+KWF83d157BNUNVtEYoH5wATgTuAjVZ0uIi8DK1X1pVDGWlwpsd8E/E9VPwhlfHYHUcmp6lycuTY8jQTecF+/gfMhEFZKiDvsqepOVV3mvj4IrMWZTz0SrnlJsYc1dWS7i7HujwJDgaIP2HC95iXFHhYsQVSMAl+LyFIRuSHUwVRAE1XdCc6HAtA4xPGUx60i8rNbBRV21TSeRCQR6A0sIsKuebHYIcyvu4hEi8gKIB34BkgB9qlqvlskjTBNdsVjV9Wia/6Ye82fEZFqoYjNEkTFnKKqfYARwJ/d6hATeC8B7YFewE5gYmjDKZmI1AQ+BG5X1QOhjqc8vMQe9tddVQtUtRfO/PX9gS7eigU3Kt8Uj11EugP3A8cB/YD6QEiqIy1BVICq7nD/TQc+xvkPGUl2u/XNRfXO6SGOxyequtv9YyoEXiNMr7tbl/wh8I6qfuSujohr7i32SLnuAKq6D/gBOBGoKyJF0yq3BHaEKi5feMQ+3K3uU1U9DEwhRNfcEkQ5iUiC24CHiCQAZwKrS98r7MwExrivxwCfhjAWnxV9wLouJAyvu9vo+DqwVlX/7bEp7K95SbGH+3UXkUYiUtd9XR04Haf9ZDYwyi0WrtfcW+zrPL5MCE7bSUiuufViKicRaYdz1wAQA7yrqo+FMKRSicg0YDDOEMK7gQeBT4AZQGtgK3CJqoZVg3AJcQ/GqeZQIBW4saheP1yIyKnAPGAVUOiu/itOXX64X/OSYr+CML7uItITpxE6GudL7wxVfcT9W52OU0WzHLjK/UYeNkqJ/XugESDACuAmj8bs4MVnCcIYY4w3VsVkjDHGK0sQxhhjvLIEYYwxxitLEMYYY7yyBGGMMcYrSxAmoomIishEj+W73UH9/HHsqSIyquySx3yeS9wRVGcH+lzu+caKyKRgnMtENksQJtIdBi4SkYahDsSTiESXo/h1wC2qOiQAcYiI2N+5qRD7j2MiXT7OHL53FN9Q/A5ARLLdfweLyBwRmSEiG0TknyJypTsu/yoRae9xmNNFZJ5b7lx3/2gR+ZeILHEHU7vR47izReRdnIfNisdzhXv81SLypLvuAeBU4GUR+Vex8i+KyPnu649FZLL7+joRedR9fad7vNUicru7LtG9I3kRWAa0EpFx7nuYA5zicY5L3H1Xisjccl57U8nFlF3EmLD3AvCzOPNc+Op4nAHdMoFNwH9Vtb84k+TcBtzulksETsMZrG62iHQArgH2q2o/d5TNBSLytVu+P9BdVTd7nkxEmgNPAicAWTijAV/gPjU7FLhbVYtPPjUXGIgzTEcLoGjIi1OB6SJyAjAOGIDzxO0iNwFkAZ2Bcap6iztsw8PuuffjDEGx3D3WA8BZqrq9aMgHY4rYHYSJeO6Io28C48ux2xJ3QLTDOENDF33Ar8JJCkVmqGqhqm7ESSTH4Yy/dY07RPMioAHQ0S2/uHhycPUDflDVDHcI6neAskYBngcMFJGuwC/8NuDfScBCnETxsarmuMMwfISTUAC2qOpP7usBHuc+ArzncY4FwFQRuR5nuAdjjrI7CFNZPItTnTLFY10+7pcgd9AzzyknPcfkKfRYLuT3fxfFx6JRnG/rt6nqLM8NIjIYyCkhPinzHRQ/kfOtvh4wHOduoj6/ze520H1PJSkeh9cxdVT1JhEZAJwDrBCRXqq6t7yxmsrJ7iBMpeAOfDcDp8G3SCpOtQo4M7rFVuDQl4hIlNsu0Q5YD8wCbnaHxkZEOrkj+5ZmEXCaiDR0G7CvAOb4cP4fcaq75uLcUdzt/ou77gIRqeGe/0KPbcXPPVhEGrgxX1K0QUTaq+oiVX0A2AO08iEmU0XYHYSpTCYCt3osvwZ8KiKLge8o+dt9adbjfJA3wRlRM1dE/otTDbXM/RafQRnTWarqThG5H6f+X4AvVNWX4afnAWeqarKIbMG5i5jnHnOZiEwFFrtl/6uqy8WZDa74uR/CSTY7ce60iqqT/iUiHd2YvgNW+hCTqSJsNFdjjDFeWRWTMcYYryxBGGOM8coShDHGGK8sQRhjjPHKEoQxxhivLEEYY4zxyhKEMcYYr/4fEy4Zih/A4bUAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "ax = jaccard_scores['ratio'].plot(title='compared to reference corpus');\n", "ax.set_xlabel(\"Number of words\")\n", "ax.set_ylabel(\"Jaccard score\")" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freqscoredistdist_reference
avec96.251.1111111.613.973378
\n", "
" ], "text/plain": [ " freq score dist dist_ reference\n", "avec 9 6.25 1.111111 1.6 13.973378" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coll.loc[coll[coll.score > 4].index].sort_values(by='score', ascending=False)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
freqscoredistdist_reference
kakao633.2051283.0866163.12441.183475
drikkes221.6025646.2777786.24301.236739
bananer332.7397263.5962963.65299.241863
koppene192.0283984.8988104.93262.333714
kaker923.3670032.9409842.97255.775986
..................
sigarer22.2222223.5000004.5030.292974
bryderi21.3333339.5000007.5030.238671
mmol41.2903238.5000007.7530.183413
Middag52.0703934.6666674.8330.127670
servering72.2779044.2000004.3930.082467
\n", "

137 rows × 5 columns

\n", "
" ], "text/plain": [ " freq score dist dist_ reference\n", "kakao 63 3.205128 3.086616 3.12 441.183475\n", "drikkes 22 1.602564 6.277778 6.24 301.236739\n", "bananer 33 2.739726 3.596296 3.65 299.241863\n", "koppene 19 2.028398 4.898810 4.93 262.333714\n", "kaker 92 3.367003 2.940984 2.97 255.775986\n", "... ... ... ... ... ...\n", "sigarer 2 2.222222 3.500000 4.50 30.292974\n", "bryderi 2 1.333333 9.500000 7.50 30.238671\n", "mmol 4 1.290323 8.500000 7.75 30.183413\n", "Middag 5 2.070393 4.666667 4.83 30.127670\n", "servering 7 2.277904 4.200000 4.39 30.082467\n", "\n", "[137 rows x 5 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coll.loc[coll[coll.reference> 30].index].sort_values(by='reference', ascending=False)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'DataFrame' object has no attribute 'nb'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoll\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcoll\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnb\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m30\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m&\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoll\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcoll\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscore\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m3.5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32mD:\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 5177\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5179\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5180\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5181\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'nb'" ] } ], "source": [ "set(coll[coll.nb > 30].index) & set(coll[coll.score > 3.5].index)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }