% 2020.02.12, Yaohua Xie
% This is the source code of paper "Identifying patients from a large number of people by a small number of tests (2020)"
% Author: Yaohua Xie
% Email: Yaohua.Xie@hotmail.com
% ORCID: 0000-0001-6780-3156 or 0000-0002-7345-4316
% This study is performed mainly for epidemic prevention, but the proposed approach is versatile for various diseases. 
%
% This program is originally used to verify the proposed approach.
% But is can also be used as a tool to find out optimal value of M for a certain pair of P and Q, where:
% P is the number of people to be screened, which is known before screening.
% Q is the number of patients among the above people, which can only be estimated before screening.
% M is the "division parameter", and each large group is divided into M smaller groups recursively.
% Simply modify the values of P and Q in the following lines:
%     for P = 10 : 100 % The number of people.
%         for Q = 1 : P % The number of patients.
% Then, the optimal M value can be found in matrix TotalChk after running.
% It is the serial number corresponds to the smaller value of TotalChk.

close all;
clear; clc;

tic;

%% Simulate the information of people, including a small percentage of patients:

% Sometimes we do not need to identify each patient, but just want to know
% whether there are any patients among a large group of people.
% In that case, we only need to perform a "merged test" on the whole group,
% and do not need to further divide it into smaller groups.
%
% The following codes are used for identifying each patients.

OptCheckingAll = nan(100, 100);
OptResourceAll = nan(100, 100);
for P = 10 : 100 % The number of people.
    for Q = 1 : P % The number of patients.

        % Column 1: each element represents a person's serial number.
        % Column 2: each element represents a person's true status of illness.
        % (0: has no illness, 1: has illness):
        pplInfo = zeros(P, 2);
        pplInfo(:, 1) = 1 : P;
        
        % Simulate several patients, i.e., set several elements of pplInfo to 1:
        r = Q;
        while (r > 0)
            curNum = ceil(rand()*P);
            if (pplInfo(curNum, 2) ~= 1)
                pplInfo(curNum, 2) = 1;
                r = r - 1;
            end
        end
        % pplInfo(3, 2) = 1;
        % pplInfo(17, 2) = 1;
        % pplInfo(36, 2) = 1;
        % pplInfo(65, 2) = 1;
        % pplInfo(89, 2) = 1;
        
        %% Test and compare different M values:
        
        % Column 1: each element represents a person's serial number.
        % Column 2: each element represents a person's checked result of illness.
        % (NaN: unknown, 0: has no illness, 1: has illness):
        global chkResult;
        global nGroupChecked;
        global nPeopleChecked;
        
        TotalGrpChk = nan(P, 1);
        TotalPplChk = nan(P, 1);
        TotalChk = nan(P, 1);
        SynEvaluation = nan(P, 1);
        
        for M = 2 : P % The number of sub-groups in each time of division (should between 2 and P).
            %% Screen the people recursively:
            
            chkResult = nan(P, 2);
            chkResult(:, 1) = 1 : P;
            nGroupChecked = 0;
            nPeopleChecked = 0;
            
            HandleThisGroup(pplInfo, M);
            
            % After screening, chkResult should be the same as pplInfo:
            if (chkResult == pplInfo)
                % disp('Screening completed!');
            else
                disp('Incorrect results!');
            end
            
            %% Analysis of performance:
            
            if (chkResult == pplInfo)
                
                % The number of checked group:
                TotalGrpChk(M) = nGroupChecked;
                
                % The number of checked people:
                TotalPplChk(M) = nPeopleChecked;
                
                % The total number of both type of checking:
                TotalChk(M) = nGroupChecked + nPeopleChecked;
                
                % Calculate synthesized evaluation in case the two types require different resources:
                w1 = 0.6;   w2 = 0.4;
                SynEvaluation(M) = w1 * nGroupChecked + w2 * nPeopleChecked;
                % Assume that checking a group requires w1 resources, and checking a person requires w2 resources.
                
            end
        end
        
        %% Compare all the results of different M values:
        
        disp('Total people number is:'); P
        disp('Ill people number is:'); Q
        TotalChk_min = min(TotalChk(1:P)); % Regular checking (one by one) is also included.
        SynEval_min = min(SynEvaluation(1:P)); % Regular checking (one by one) is also included.
%         TotalChk_min = min(TotalChk(1:P-1)); % Regular checking (one by one) is not included.
%         SynEval_min = min(SynEvaluation(1:P-1)); % Regular checking (one by one) is not included.
        
        % In optimal situation, the required checking is OptChecking times of the usual one:
        OptChecking = TotalChk_min / TotalChk(P)
        
        % In optimal situation, the required resource is OptResource times of the usual one:
        OptResource = SynEval_min / SynEvaluation(P)
        
        % % Display the results (may look differently each time because pplInfo is random):
        % figure; stem(TotalGrpChk); title('TotalGrpChk');
        % figure; stem(TotalPplChk); title('TotalPplChk');
        % figure; stem(TotalChk); title('TotalChk');
        % figure; stem(SynEvaluation); title('SynEvaluation');
        
        OptCheckingAll(P, Q) = OptChecking; % Record all the OptChecking values in an array for comparison.
        OptResourceAll(P, Q) = OptResource; % Record all the OptResource values in an array for comparison.
    end
end

toc;
