%% Effects of the COVID-19 Lockdown on Water Consumptions: a Northern Italy Case Study
% Stefano Alvisi, Marco Franchini, Chiara Luciani, Irene Marzola, Filippo Mazzoni
% Note. This code has been developed and tested by using MATLABŪ version R2019b and version R2020b.

%{
This study analyses the effects of the lockdown due to COVID-19 pandemic on
water consumption with reference to a DMA in the city of Rovigo (Italy).
Hourly water consumption are available for all the users thanks to
smart-meter monitoring. The analyses were conducted at different levels of
temporal aggregation, respectively from daily values to hourly pattern,
considering the whole set of users or each individual user.

The code was developed with two macro section, one regarding the import
and cleaning of data, one regarding the analyses and plotting of figures.

Regarding the first section, "IMPORT AND CLEANING", the steps are
- import of water cumulative volume data;
- extrapolation of data in the period 1st FEBRAURY - 3th MAY OF 2020, when the first measures to contain COVID-19 were implemented;
- extrapolation of data in the period of lockdown (4th April - 3rd May 2020) and the same period the year before (4th April - 3rd May 2019);
- evaluation of hourly water consumption;
- cleaning and selection of users;
- import of type of day (weekdays, weekends/holidays).


Regarding the second section, "ANALYSES AND RESULTS", the steps are
 analyses at daily temporale scale:
        - daily water consumption of the totality of users, in the period between 1 February and 3 May 2020 (Figure 1);
        - daily average water consumption of the totality of users, of all residential users and all commercial users
          in the period of lockdown (4th April - 3rd May 2020) and the same period in the year before (4th April - 3rd May 2019) (Table 1);
        - relative frequency distribution histogram of the daily average water consumption variation among each residential users (Figure 2);
 analyses at hourly temporal scale:
        - comparison between 2019 and 2020 hourly consumption coefficients for the whole residential subset (Figure 3);
        - box plot of the differences between the 2019 and 2020 hourly average water consumption during weekdays for each residential user (Figure 4);
        - comparison between the 2020 and 2019 hourly average water consumption during weekdays of four commercial users (Figure 6).

Then at command window the user will be asked if he wants to start the
K-means analyses to search recurrent water use profile, as it takes
usually more than 15 minutes. If the analyses is carried out, the script
returns:
- analysis of clusters for weekday water use profile of each residential user (Figure 5);
- percentage of residential users assigned to each partition class identified (Table 2).
%}

close all
clear all
clc

%% IMPORT AND CLEANING

%DATA IMPORT
[num19,txt19,~]=xlsread('dataset2019.xlsx');
[num20,txt20,~]=xlsread('dataset2020.xlsx');

for i = 2:size(txt19,2)
    timevol19(i-1)=datetime(txt19(1,i),'InputFormat','dd-MMM-yyyy HH:mm:ss');
end

for i = 2:size(txt20,2)
    timevol20(i-1)=datetime(txt20(1,i),'InputFormat','dd-MMM-yyyy HH:mm:ss');
end

vol19=num19(:,2:end);          %cubic meters
vol20=num20(:,2:end)/1000;     %cubic meters

matricole19=num19(:,1);
matricole20=num20(:,1);

%There are two more users in 2020 than in 2019 that must be eliminated for
%consistence
rig=find(matricole20==10120713210);
matricole20(rig)=[];
vol20(rig,:)=[];
rig=find(matricole20==10120713215);
matricole20(rig)=[];
vol20(rig,:)=[];

clear num19 txt19
clear num20 txt20

if matricole19==matricole20
    matricole=matricole19;
    clear matricole19 matricole20
end


% EXTRAPOLATION OF PERIOD 1st FEBRAURY - 3th MAY OF 2020

indiniz=find(timevol20.Hour==0 & timevol20.Day==1 & timevol20.Month==2);
indfine=find(timevol20.Hour==0 & timevol20.Day==4 & timevol20.Month==5);

volmonth20=vol20(:,indiniz:indfine);
timevolmonth20=timevol20(indiniz:indfine);


% EXTRAPOLATION OF PERIOD 4th APRIL - 3th MAY 2019 and 2020

%Selection of period 4th April - 4th May
indiniz=find(timevol19.Hour==0 & timevol19.Day==4 & timevol19.Month==4);
indfine=find(timevol19.Hour==0 & timevol19.Day==4 & timevol19.Month==5);

vol19=vol19(:,indiniz:indfine);
timevol19=timevol19(indiniz:indfine);

indiniz=find(timevol20.Hour==0 & timevol20.Day==4 & timevol20.Month==4);
indfine=find(timevol20.Hour==0 & timevol20.Day==4 & timevol20.Month==5);

vol20=vol20(:,indiniz:indfine);
timevol20=timevol20(indiniz:indfine);


clear indiniz indfine

% EVALUATION OF THE HOURLY WATER CONSUMPTION
flow19=zeros(length(matricole),length(timevol19)-1);

for i=1:length(timevol19)-1
    flow19(:,i)=(vol19(:,i+1)-vol19(:,i))*1000;      % liters/hour
end

timeflow19=timevol19(1:end-1);


flow20=zeros(length(matricole),length(timevol20)-1);

for i=1:length(timevol20)-1
    flow20(:,i)=(vol20(:,i+1)-vol20(:,i))*1000;      % liters/hour
end

timeflow20=timevol20(1:end-1);


% SELECTION OF USERS

%Elimination of users with no consumption, empty data or leakages.

load closed.txt
load leakage.txt
load userwithnan.txt

for i=1:length(closed)
    indclosed(i,1)=find(matricole==closed(i));
end

for i=1:length(leakage)
    indleakage(i,1)=find(matricole==leakage(i));
end

for i=1:length(userwithnan)
    indnan(i,1)=find(matricole==userwithnan(i));
end

%Separation of residential and commercial users

load commercial.txt

for i=1:length(commercial)
    indcommercial(i,1)=find(matricole==commercial(i));
end

indresid=removerows([1:length(matricole)]',[indcommercial; indclosed; indleakage; indnan]);
matrresid=matricole(indresid);
matrcomm=matricole(indcommercial);

volresid=vol19(indresid,:);
flowresid=flow19(indresid,:);
vol20resid=vol20(indresid,:);
flow20resid=flow20(indresid,:);
volcomm=vol19(indcommercial,:);
flowcomm=flow19(indcommercial,:);
vol20comm=vol20(indcommercial,:);
flow20comm=flow20(indcommercial,:);


flowresid(flowresid<0)=0;
flow20resid(flow20resid<0)=0;

volmonth20=volmonth20([indresid; indcommercial],:);


% DAY DIVISION

%Division of the days based on:
%0 weekdays
%2 weekends/holidays

data19=importdata('day2019.xlsx');
data20=importdata('day2020.xlsx');

daytime=datetime(data19.textdata);
daytype=data19.data;
daytime20=datetime(data20.textdata);
daytype20=data20.data;
type=[0 2];

clear data19 data20

%% ANALYSES AND RESULT AT DAILY TEMPORALE SCALE

% TOTAL DAILY WATER CONSUMPTION (FIGURE 1)

%Period 1th February - 3th May 2020

%Daily water consumption of each user for each day of the monitored period
ind=find(timevolmonth20.Hour==0);
for i=1:length(ind)-1
    volmesi20day(:,i)=(volmonth20(:,ind(i+1))-volmonth20(:,ind(i)))*1000;   %liter
end
timevolmonth20=timevolmonth20(ind(1:end-1));
timevolmonth20.Format='dd-MMM-yyyy';
%Total daily water consumption for each day of the monitored period
volmesi20daytot=sum(volmesi20day,1)/1000;  %cubic meter


figure(1)
set(gcf,'Position',[400 400 860 555])
movegui('center')
plot(timevolmonth20,volmesi20daytot,'-k','Linewidth',1.5)
set(gca,'FontSize',15)
set(gca, 'FontName', 'Times New Roman')
ylim([50 100])
ylabel('Total daily consumption V^j (m^3/d)')
grid on
hold on
plot(timevolmonth20([1,23]),mean(volmesi20daytot(1:22))*ones(2,1),'--k','Linewidth',1.5)
plot(timevolmonth20([23,40]),mean(volmesi20daytot(23:39))*ones(2,1),'--k','Linewidth',1.5)
plot(timevolmonth20([40,end]),mean(volmesi20daytot(40:end))*ones(2,1),'--k','Linewidth',1.5)

xlim([timevolmonth20(1),timevolmonth20(end)])

in=find(timevolmonth20.Day==11 & timevolmonth20.Month==3);
plot([timevolmonth20(in), timevolmonth20(in)],[50,100],'k-')
in=find(timevolmonth20.Day==23 & timevolmonth20.Month==2);
plot([timevolmonth20(in), timevolmonth20(in)],[50,100],'k-')

str={'Suspension of','school activities'};
text(5,95,str,'FontName', 'Times New Roman','Fontsize',14)
str={'Lockdown','start'};
text(40,95,str,'FontName', 'Times New Roman','Fontsize',14)



% DAILY AVERAGE WATER CONSUMPTION (TABLE 1)

%Period 4th April - 3th May 2019

%Daily water consumption of each user and each day of the monitored period
ind=find(timevol19.Hour==0);
for i=1:length(ind)-1
    voldayresid(:,i)=(volresid(:,ind(i+1))-volresid(:,ind(i)))*1000;   %liter
    voldaycomm(:,i)=(volcomm(:,ind(i+1))-volcomm(:,ind(i)))*1000;   %liter
end

%Average daily water consumption of the commercial and residential sets
volmeandaytotresid=mean(sum(voldayresid,1))/1000;  %cubic meter
volmeandaytotcomm=mean(sum(voldaycomm,1))/1000;  %cubic meter

%Period 4th April - 3th May 2020

%Daily water consumption of each user and each day of the monitored period
ind=find(timevol20.Hour==0);
for i=1:length(ind)-1
    vol20dayresid(:,i)=(vol20resid(:,ind(i+1))-vol20resid(:,ind(i)))*1000;   %liter
    vol20daycomm(:,i)=(vol20comm(:,ind(i+1))-vol20comm(:,ind(i)))*1000;   %liter
end
%Average daily water consumption of the commercial and residential sets
vol20meandaytotresid=mean(sum(vol20dayresid,1))/1000;  %cubic meter
vol20meandaytotcomm=mean(sum(vol20daycomm,1))/1000;   %cubic meter

%Variation in total average water consumption
perctot=((vol20meandaytotresid+vol20meandaytotcomm)-(volmeandaytotresid+volmeandaytotcomm))/(volmeandaytotresid+volmeandaytotcomm)*100;
percres=((vol20meandaytotresid)-(volmeandaytotresid))/(volmeandaytotresid)*100;
perccom=((vol20meandaytotcomm)-(volmeandaytotcomm))/(volmeandaytotcomm)*100;

disp('TABLE 1')
disp('')
TABLE1 = table([length([matrresid; matrcomm]); length(matrresid);length(matrcomm)],[volmeandaytotresid+volmeandaytotcomm;volmeandaytotresid;volmeandaytotcomm],...
    [vol20meandaytotresid+vol20meandaytotcomm;vol20meandaytotresid;vol20meandaytotcomm], [perctot;percres;perccom], 'VariableNames',{'Number_of_Users','Average_water_consumption_2019','Average_water_consumption_2020','Variation'},'RowName',{'Dataset_TU','Dataset_RU', 'Dataset_CU'});
disp(TABLE1)


% FREQUENCY DISTRIBUTION HISTOGRAM (FIGURE 2)

%Histogram of the variations of each user in daily average water consumption of 2019 and 2020
figure(2)
movegui('center')
histogram(mean([vol20dayresid;vol20daycomm],2)-mean([voldayresid;voldaycomm],2),'Normalization','probability','FaceColor',[0.8 0.8 0.8])
set(gca,'FontSize',15)
set(gca, 'FontName', 'Times New Roman')
ylab={'Relative frequency distribution f (-)'};
xlabel('Daily average variations \Deltav_{n} (L/d)')
ylabel(ylab)
xlim([-500,500])
grid on


%% ANALYSES AND RESULT AT HOURLY TEMPORAL SCALE

% HOURLY CONSUMPTION COEFFICIENT FOR THE WHOLE RESIDENTIAL SUBSET (FIGURE 3)

%Profiles of the hourly consumption of the set of residential users for
%weekdays and weekends/holidays

flowtot=sum(flowresid,1);
patsinglehourtotDMA=cell(length(type),1);
for j=1:24
    for i=1:length(type)
        indhour=find(timeflow19.Hour==j-1);
        indhour=indhour(find(daytype==type(i)));
        patsinglehourtotDMA{i}(:,j)=mean(flowtot(:,indhour),2);
    end
end

flow20tot=sum(flow20resid,1);
pat20singlehourtotDMA=cell(length(type),1);
for j=1:24
    for i=1:length(type)
        indhour=find(timeflow20.Hour==j-1);
        indhour=indhour(find(daytype20==type(i)));
        pat20singlehourtotDMA{i}(:,j)=mean(flow20tot(:,indhour),2);
    end
end

%Mean values of the profiles of the hourly consumption of the set of
%residential users for weekdays and weekends/holidays
for i=1:length(type)
    day=daytime(find(daytype==type(i)));
    k=[];
    for j=1:length(day)
        k=[k, find(timeflow19.Day==day(j).Day & timeflow19.Month==day(j).Month)];
    end
    patmeanhourtotDMA(:,i)=mean(flowtot(:,k),2);
end

for i=1:length(type)
    day=daytime20(find(daytype20==type(i)));
    k=[];
    for j=1:length(day)
        k=[k, find(timeflow20.Day==day(j).Day & timeflow20.Month==day(j).Month)];
    end
    pat20meanhourtotDMA(:,i)=mean(flow20tot(:,k),2);
end

%Profile of the hourly consumption coefficients of the set of residential
%users for weekdays and weekends/holidays

patadimhourtotDMA=cell(length(type),1);
for i=1:length(type)
    patadimhourtotDMA{i}=patsinglehourtotDMA{i}(:,:)./patmeanhourtotDMA(:,i);
end

pat20adimhourtotDMA=cell(length(type),1);
for i=1:length(type)
    pat20adimhourtotDMA{i}=pat20singlehourtotDMA{i}(:,:)./pat20meanhourtotDMA(:,i);
end


%Evaluation of peak consumption coefficients and hours
for i=1:length(type)
    [peakcoefftotDMA(i),hourpeakcoefftotDMA(i)]=max(patadimhourtotDMA{i});
end
hourpeakcoefftotDMA=hourpeakcoefftotDMA-1;

%Comparison of the residential hourly consumption coefficients in 2019 with the one in 2020

tt=[0:23;1:24];
tt=tt(:);

figure(3)
set(gcf,'Position',[100 100 1077 420])
movegui('center')
titlech=cell(3,1);
titlech{1}='Weekdays';
titlech{2}='Weekends and holidays';

for i=1:length(type)
    subplot(1,length(type),i)
    
    
    Patdimincrem=[patadimhourtotDMA{i};patadimhourtotDMA{i}];
    Patdimincrem=Patdimincrem(:);
    
    Patdimincrem20=[pat20adimhourtotDMA{i};pat20adimhourtotDMA{i}];
    Patdimincrem20=Patdimincrem20(:);
    
    plot(tt,Patdimincrem,'k-','linewidth',2)
    hold on
    plot(tt,Patdimincrem20,'k--','linewidth',2)
    grid on
    set(gca,'FontSize',15)
    set(gca, 'FontName', 'Times New Roman')
    grid minor
    xlabel('Hour of the day')
    xlim([0 24])
    legend('2019','2020')
    ylim([0 2.5])
    ylabel('Hourly consumption coefficients C_{h,i} (-)')
    title(titlech{i})
end


%BOX PLOT OF HOURLY AVERAGE WATER CONSUMPTION VARIATIONS DURING WEEKDAYS FOR EACH RESIDENTIAL USER (FIGURE 4)

%Profile of the hourly water consumption of each user
patsinglehourresid=cell(length(type),1);
patsinglehourcomm=cell(length(type),1);

for j=1:24
    
    for i=1:length(type)
        indhour=find(timeflow19.Hour==j-1);
        indhour=indhour(find(daytype==type(i)));
        
        patsinglehourcomm{i}(:,j)=mean(flowcomm(:,indhour),2);
        patsinglehourresid{i}(:,j)=mean(flowresid(:,indhour),2);
    end
end

pat20singlehourresid=cell(length(type),1);
pat20singlehourcomm=cell(length(type),1);

for j=1:24
    
    for i=1:length(type)
        indhour=find(timeflow20.Hour==j-1);
        indhour=indhour(find(daytype20==type(i)));
        pat20singlehourcomm{i}(:,j)=mean(flow20comm(:,indhour),2);
        pat20singlehourresid{i}(:,j)=mean(flow20resid(:,indhour),2);
    end
end

%Differences between the hourly average water consumption for weekdays for each residential user in 2020 and 2019

assex=zeros(24*length(matrresid),1);
cont=1;
for i=1:length(matrresid):length(assex)
    assex(i:i+length(matrresid)-1)=cont;
    cont=cont+1;
end

figure(4)
set(gcf,'Position',[117,342,1740,550])
movegui('center')
assey=pat20singlehourresid{1}(:)-patsinglehourresid{1}(:);
boxplot(assey,assex,'labels',{'00-01','01-02','02-03','03-04','04-05','05-06','06-07','07-08','08-09','09-10','10-11','11-12','12-13','13-14','14-15','15-16','16-17','17-18','18-19','19-20','20-21','21-22','22-23','23-24'})
ylim([-50 50])
xlabel('Hour of the day')
set(gca,'FontSize',15)
set(gca, 'FontName', 'Times New Roman')
grid on
ylab={'Hourly average variations \Deltav_{r,i} (L/h)'};
ylabel(ylab)
a = get(get(gca,'children'),'children');
set(a, 'Color', 'k');
h = findobj(gcf,'tag','Outliers');
set(h,'MarkerEdgeColor','k')


% HOURLY AVERAGE WATER CONSUMPTION OF FOUR COMMERCIAL USERS DURING WEEKDAY (FIGURE 6)

%Weekday hourly average water consumption of 4 commercial users, namely, a pharmacy, a
%hardware store, a homeopatic and wellness centre and a hairdresser

tt=[0:23;1:24];
tt=tt(:);

ind=[5, 3, 7, 6];
titlecomm=cell(4,1);
titlecomm{1}='Pharmacy';
titlecomm{2}='Hardware store';
titlecomm{3}='Homeopatic and wellness center';
titlecomm{4}='Hairdresser';

figure(6);
set(gcf,'Position',[350 160 1020 790])
movegui('center')
annotation('textbox', [0.059 0.91 0.056 0.072],'String','a)','FontSize',15,'FontName','Times New Roman','FitBoxToText','off','EdgeColor','none');
annotation('textbox',  [0.49 0.91 0.056 0.072],'String','b)','FontSize',15,'FontName','Times New Roman','FitBoxToText','off','EdgeColor','none');
annotation('textbox', [0.059 0.44 0.056 0.072],'String','c)','FontSize',15,'FontName','Times New Roman','FitBoxToText','off','EdgeColor','none');
annotation('textbox',  [0.49 0.44 0.056 0.072],'String','d)','FontSize',15,'FontName','Times New Roman','FitBoxToText','off','EdgeColor','none');

for i=1:4
    subplot(2,2,i)
    Patdimincrem=[patsinglehourcomm{1}(ind(i),:);patsinglehourcomm{1}(ind(i),:)];
    Patdimincrem=Patdimincrem(:);
    
    Patdimincrem20=[pat20singlehourcomm{1}(ind(i),:);pat20singlehourcomm{1}(ind(i),:)];
    Patdimincrem20=Patdimincrem20(:);
    
    plot(tt,Patdimincrem,'k-','linewidth',2)
    hold on
    plot(tt,Patdimincrem20,'k--','linewidth',2)
    grid on
    grid minor
    set(gca,'FontSize',15)
    set(gca, 'FontName', 'Times New Roman')
    
    xlabel('Hour of the day (hour)')
    xlim([0 24])
    legend('2019','2020','Location','NorthWest')
    ylim([0 40])
    ylab={'Hourly average consumption (L/h)'};
    ylabel(ylab)
    title(titlecomm{i})
end


% WATER USE PROFILES CLUSTERING (FIGURE 5 AND TABLE 2)

a=input('Do you want to search recurrent water use profiles by using the K-means algorithm? Y/N ','s');
while a~='N'&a~='Y'
    a=input('Please answer with Y or N ','s');
end
if a=='Y'
      
    % The hourly average water consumption for each user (1,2,...,208) with
    % respect to the average weekday.
    demwd19 = patsinglehourresid{1};
    demwd20 = pat20singlehourresid{1};
    
    % Data for years 2019 and 2020 are grouped in variable 'demwd'. The hourly
    % average water consumption for each user and year is normalised through
    % the average daily water consumption.
    demwd = [demwd19; demwd20];
    for i = 1:size(demwd,1)
        if sum(demwd(i,:))==0
            demwdnorm(i,:) = zeros(1,size(demwd,2));
        else
            demwdnorm(i,:)=demwd(i,:)/mean(demwd(i,:));
        end
    end
    
    % The K-means clustering algorithm is applied. The number of clusters is
    % described by parameter 'k', while values for 'MaxIter' and 'Replicates'
    % are chosen in order to have convergence of the solution. The centroid of
    % each cluster (i.e. the 24 hourly consumption coefficient associated with
    % each cluster) is expressed as a row of matrix 'centroids', while their
    % corresponding cluster indexes (i.e. 1,2,...,k) are inserted in variable
    
    % Silhouette curve analysis
    
    % The number of partition classes (i.e. K-value) is selected through a
    % silhouette curve analysis. The selected K-value is the one whose
    % average silhouette value is the highest.
    
    % Mean silhouette calculation for a range of K-values
    
    figure(199)
    set(gca,'FontSize',15)
    set(gca, 'FontName', 'Times New Roman')
    K_range = [2:1:20];
    for k = 1:length(K_range)
        [idx, ~] = kmeans(demwdnorm,K_range(k),'Distance','sqeuclidean','MaxIter',1000000,'Replicates',10000,'Options',statset('UseParallel',1));
        [silh,~] = silhouette(demwdnorm,idx,'sqeuclidean','MaxIter',1000000,'Replicates',10000);
        silhouette_mean(k) = mean(silh);
        clear idx centroids silh h
    end
    close Figure 199
    
    % Optimal K-value selection
    [val,pos] = max(silhouette_mean);
    k = K_range(pos);
    
    % Silhouette curve analysis plot
    figure(200)
    set(gcf,'Position',[100 100 600 400])
    set(gca,'FontSize',15)
    set(gca, 'FontName', 'Times New Roman')
    plot(K_range,silhouette_mean,'b','LineWidth',2)
    grid on
    hold on
    xlabel('Partition class number (K)')
    ylabel('Mean silhouette value')
    title('Silhouette analysis')
    xlim([K_range(1) K_range(end)])
    ylim([0 0.3])
    xticks(0:1:100)
    yticks(0:0.025:0.3)
    plot(k,val,'or','LineWidth',4,'MarkerSize',4)
    
    % K-means algorithm application
    
    [idx, centroids] = kmeans(demwdnorm,k,'Distance','sqeuclidean','MaxIter',1000000,'Replicates',10000,'Options',statset('UseParallel',1));
    
    
    % Vectors idx19 and idx20 include the cluster index associated with each of
    % the 208 users for years 2019 and 2020.
    idx19 = idx(1:size(demwdnorm,1)/2);
    idx20 = idx(size(demwdnorm,1)/2+1:size(demwdnorm,1));
    
    % The k-centroids and their corresponding k-indexes are sorted based on the
    % increasing time of peak consumption. Matrix 'peaktime_class' includes peak
    % demand times of each cluster on the first row and their corresponding
    % cluster indexes on the second row.
    for i = 1:k
        [~,pos] = max(centroids(i,:));
        peaktime_class(:,i) = [pos;i];
    end
    [temp, order] = sort(peaktime_class(1,:));
    peaktime_class = peaktime_class(:,order);
    
    % Vectors 'idxmod19' and 'idxmod20' include the cluster index (sorted based
    % on the increasing time of peak demand) associated with each of the 208
    % users for years 2019 and 2020.
    for i = 1:k
        idxmod19(idx19==i)=find(peaktime_class(2,:)==i);
        idxmod20(idx20==i)=find(peaktime_class(2,:)==i);
    end
    
    % Correlation matrix. A square matrix of order k (i.e. 'corr') is
    % determined, where the cell of position (i,j) (i,j=1,2,...,k) includes the
    % number of residential users whose water consumption profile for weekdays
    % of year 2019 was clustered to i-class and whose water consumption
    % profile for weekdays of year 2020 was clustered to j-class. As a
    % consequence, the trace of 'corr' includes the number of users whose
    % cluster category has not changed from 2019 to 2020.
    corr = zeros(k,k);
    for i = 1:length(idx19)
        corr(idxmod19(i),idxmod20(i)) = corr(idxmod19(i),idxmod20(i))+1;
    end
    corr=round(corr/length(matrresid)*100,1);
    
    % A figure is generated, including the obtained k-centroids and the weekday
    % water consumption profiles of all the users for years 2019 and 2020.
    figure(5)
    set(gcf,'Position',[100 100 260*k 560])
    set(gca, 'FontName', 'Times New Roman')
    
    % Year 2019 subplots
    for i = 1:length(idx19)
        class = idx19(i);
        subplot(2,k,find(peaktime_class(2,:)==class))
        set(gca, 'FontName', 'Times New Roman')
        plot(demwdnorm(i,:),'linewidth',1,'color',[.65 .65 .65]);
        hold on
        grid on
        axis([1 24 0 15])
        xticks(linspace(1,24,5))
        xticklabels({'0','6','12','18','24'})
        xlabel('\fontsize{10} Time of the day (hour)')
        a = get(gca,'XTickLabel');
        set(gca,'XTickLabel',a,'fontsize',10)
        yticks(0:3:15)
        b = get(gca,'YTickLabel');
        set(gca,'YTickLabel',b,'fontsize',10)
        ylabel('\fontsize{10} Water use profiles C_{h,i} (-)')
    end
    for i = 1:k
        subplot(2,k,i)
        plot(centroids(peaktime_class(2,i),:),'k','linewidth',3)
        set(gca, 'FontName', 'Times New Roman')
        hold on
        clsyr = {append('K = ',num2str(i), ' (2019)')};
        usersprc = (['\fontsize{10}\color[rgb]{.0 .0 .0} ',...
            append(num2str(round(100*length(find(idx19==peaktime_class(2,i)))/length(idx19)))),'% of users']);
        title([clsyr; usersprc])
    end
    
    % Year 2020 subplots
    for i = 1:length(idx20)
        class = idx20(i);
        subplot(2,k,k+find(peaktime_class(2,:)==class))
        plot(demwdnorm(length(idx19)+i,:),'linewidth',1,'color',[.65 .65 .65])
        set(gca, 'FontName', 'Times New Roman')
        hold on
        grid on
        axis([1 24 0 16])
        xticks(linspace(1,24,5))
        xticklabels({'0','6','12','18','24'})
        xlabel('\fontsize{10} Hour of the day')
        a = get(gca,'XTickLabel');
        set(gca,'XTickLabel',a,'fontsize',10)
        yticks(0:3:15)
        b = get(gca,'YTickLabel');
        set(gca,'YTickLabel',b,'fontsize',10)
        ylabel('\fontsize{10} Water use profiles C_{h,i} (-)')
    end
    for i = 1:k
        subplot(2,k,i+k)
        plot(centroids(peaktime_class(2,i),:),'k','linewidth',3)
        set(gca, 'FontName', 'Times New Roman')
        hold on
        clsyr = {append('K = ',num2str(i), ' (2020)')};
        usersprc = (['\fontsize{10}\color[rgb]{.0 .0 .0}',...
            append(num2str(round(100*length(find(idx20==peaktime_class(2,i)))/length(idx20)))),'% of users']);
        title([clsyr; usersprc])
    end
    
    disp('TABLE 2')
    disp('')
    TABLE2=table(corr(:,1), corr(:,2), corr(:,3), corr(:,4) ,'VariableNames',{'K1_2020','K2_2020','K3_2020','K4_2020'},'RowName',{'K1_2019','K2_2019','K3_2019','K4_2019'});
    disp(TABLE2)
    
end