function [dictionary,frequency] = GenerateDictionary(allwords)
%GENERATEDICTIONARY Generates a dictionary of words from a word list
%
%   [dictionary,frequency] = GenerateDictionary(allwords)
%
% INPUTS: 
%   allwords: A cell array of individual words
%
% OUTPUTS:
%   dictionary: A cell array of unique words
%   frequency: The number of occurences of each word
%
% Jon Francombe, 28/01/2015
% j.francombe@surrey.ac.uk
%
%

% Flag to convert everything to lowercase
do.lowercase = 1;
% Flag to enable sorting (by frequency)
do.sort = 1;

% Preallocate
dictionary = cell(0);
frequency = [];

for n = 1:length(allwords)          % For each word
    
    % Set the testword
    testword = allwords{n};
    
    % Convert to lowercase
    if do.lowercase, testword = lower(testword); end
    
    % Remove any leading or trailing characters that are char(0) to
    % char(32), brackets, or punctuation (from the end)
    testword = stripword(testword);
    
    % After removal, check if it's empty. Ignore it if it is
    if isempty(testword), continue, end
    
    % Check if it is new, and if it's not, what number it is in the dictionary
    [state,number] = isindic(testword,dictionary);
    
    % If it's new, add it
    if state == 0
        dictionary{number} = testword;
        frequency(number) = 1;
    elseif state == 1   % If it's not new, increment the frequency
        frequency(number) = frequency(number) + 1;
    end
    
end

% Transpose
dictionary = dictionary'; frequency = frequency';

% Sort by frequency
if do.sort
    [frequency,I] = sort(frequency);
    dictionary = dictionary(I);
end






end






%%%% LOCAL FUNCTIONS
function [state,number] = isindic(query,dictionary)
% Check if the specified word is in a list of words

if length(dictionary) == 0
    state = 0; number = 1; return
end

for n = 1:length(dictionary)
    
   if strcmp(query,dictionary{n})   % If they match
       state = 1; number = n; return
   end
    
   % It's not in the dictionary
   state = 0; number = n+1;
   
end


end



function word = stripword(word)
% Remove leading or trailing blankspace

for n = 1:length(word)  % Iterate around the length of the word
    
    % First character
    firstchar = word(1);
    
    if ismember(firstchar,char([0:32 40 91 123 34]))
        word = word(2:end);
    else
        break
    end
    
    
end

% Same for trailing characters
for n = 1:length(word)  % Iterate around the length of the word
    
    % Last character
    lastchar = word(end);
    
    if ismember(lastchar,char([0:32 33 46 44 45 47 58 59 125 93 41 34 63 61]))
        word = word(1:end-1);
    else
        break
    end
    
    
end


end