%% Figure: Distribution of citation numbers

% citation data
data = readtable('../Data/citations_years.txt');

% total citations
sum(data.Var2)


% papers without citations
[h bins] = hist(data.Var2, 0:max(data.Var2));
h(1) % number of papers without citations

% distribution function
[h bins] = hist(data.Var2, 0:max(data.Var2));
h = fliplr(cumsum(fliplr(h))); h = 1*h/h(1); % cumulative distribution

h(1+1) % cited at leaste once
h(1+10) % cited at leaste 10times
h(1+20) % cited at leaste 10times
h(1+100) % cited at leaste 10times


% fit powerlaw
p = polyfit(log10(bins(2:1000)), log10(h(2:1000)), 1)

% plot citation distribution
clf
set(gcf, 'Pos', [731 100 528 276])
loglog(0:3000, 10.^polyval(p, log10(0:3000)), 'Color', [0.89 0.45 0.13], 'LineWidth', 2)
hold on
loglog(bins, h, '.', 'Color', [0. 0.62 0.85], 'MarkerSize', 15)

xlabel('Number citations'), ylabel('Cumulative distribution function')
grid on
axis([0 5000 0.0001 1])
box off


% % distribution function
% [h bins] = hist(data.Var2, 0:max(data.Var2));
% 
% % fit powerlaw
% p = polyfit(log10(bins(1:100)), log10(h(1:100)+1), 1)
% 
% % plot citation distribution
% clf
% set(gcf, 'Pos', [731 100 528 276])
% loglog(bins(1:100), 10.^polyval(p, log10(bins(1:100))), 'Color', [0.89 0.45 0.13], 'LineWidth', 2)
% hold on
% loglog(bins, h, '.', 'Color', [0. 0.62 0.85], 'MarkerSize', 15)
% xlabel('Number citations'), ylabel('Frequency')
% grid on
% axis([0 4000 0 10000])



%% Figure: Publication year of most cited papers

% citation data
data = readtable('../Data/citations_years.txt');

% considered time interval
years = 1987:2024;

% number of max total citations a paper has, published in a given year
numCitations = zeros(length(years), 1);
label = {};

for i = 1:length(years)
   idx = data.Var3 == years(i);
   if any(idx)
       labelsYear = data.Var1(idx);
       [numCitations(i) idx2] = max(data.Var2(idx));
       %[numCitations(i) idx2] = max(data.Var4(idx));

       label(i) = labelsYear(idx2);
   end
end

% plot number of max citations
clf
set(gcf, 'Pos', [731 100 528 276])
bar(categorical(years), numCitations, .5, 'EdgeColor', 'none', 'FaceColor', [0. 0.62 0.85])
for i = 1:length(years)
   if numCitations(i) > 10
      text(categorical(years(i)), 15+numCitations(i), label(i), 'Rotation', 90)
   end
end
xlim([categorical(1987) categorical(2023)])
box off
xlabel('Publication year'), ylabel('Number of citations')




%% Figure: Time between publication and citation

% read citation time data
data = readtable('../Data/citations_citationTime.txt', 'delimiter', ';');

time = sort(2024 - years); % list of time of citation after publication
numCitations = -ones(height(data),length(time));
immediateCitations = zeros(height(data),1);
allCitations = zeros(height(data),1);

for i = 1:length(data.duration)
    entries = str2num(data.duration{i}); % get time of citation after publication of current paper
    numCitations(i,:) = hist(entries, time);
    immediateCitations(i) = sum(numCitations(i,1:2)); % count citations of the first 2 years
    allCitations(i) = sum(numCitations(i,:)); % count all citations of the paper
end


% plot citation year after publication year
clf
set(gcf, 'Pos', [731 100 528 276])
bar(time, sum(numCitations), 'EdgeColor', 'none', 'FaceColor', [0. 0.62 0.85])

% fit exponential model
expModel = fittype('a * exp(b * x)', 'independent', 'x', 'coefficients', {'a', 'b'});
initialGuesses = [1, -0.1]; % initial guesses for parameter values

% fit the model to the data
fitResult = fit(time', sum(numCitations)', expModel, 'StartPoint', initialGuesses);

hold on
h = plot(fitResult);
set(h, 'Color', [0.89 0.45 0.13], 'LineWidth', 3)
legend off
xlabel('Years after publication'), ylabel('Number of citations')
box off




% list of number of citations within the first two years
[m1, idx] = sort(immediateCitations,'desc');
for i = 1:10
    disp(sprintf('%s: %i', data.label{idx(i)}, m1(i)))
end


% list of number of citations within the first two years
[m1, idx] = sort(allCitations,'desc');
rank = 1;
oldNum = 0;
for i = 1:length(idx)
    if i > 1 & oldNum ~= allCitations(idx(i))
        rank = rank + 1;
    end
    if allCitations(idx(i)) >= 200 | immediateCitations(idx(i)) >= 10
       disp(sprintf('%i\t &%s\t&%i\t&%i\t&\\cite{%s}\\\\', rank, data.label{idx(i)}, allCitations(idx(i)), immediateCitations(idx(i)), data.label{idx(i)}))
    end
    
    oldNum = allCitations(idx(i));
end

