import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from cycler import cycler

figsize = (6,3)

errata_counts = []
areas_counts = []

with open("data/draft-area-discussion-counts.csv") as draftDiscussionCountFile:
    for line in draftDiscussionCountFile:
        line = line.strip().split(",")
        area_count = int(line[4])
        if area_count > 0:
            errata_counts.append(int(line[3]))
            areas_counts.append(area_count)

data = {}

for i in range(len(areas_counts)):
    if areas_counts[i] not in data:
        data[areas_counts[i]] = []
    if errata_counts[i] > 0 and errata_counts[i] < 50:
        data[areas_counts[i]].append(errata_counts[i])

plt.rc('font',**{'family':'serif','serif':['Helvetica']})
plt.rc('axes', axisbelow=True)
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['font.size'] = 14

plt.figure(figsize=figsize)
plt.boxplot([data[j] for j in sorted(data.keys())])
plt.xlabel("Number of areas represented in discussion")
plt.ylabel("Number of errata filings")
#plt.xlim([0, 9])
plt.savefig('figures/rfcs-areas-discussion-filings-hist.pdf', bbox_inches='tight')