/*
SAS-Macro to estimate the parametric additive hazard model. 

The program is written using SAS 9.4.

User-specific input values are:
		- data: Input data set for which the parametric additive hazard model should be estimated
		- distribution: Distribution used for estimation,
						Possible choices: Exponential, Weibull, Gamma, Gompertz, Log-normal, Log-logistic
		- surv: Variable that indicates the observed survival time (possibly censored) of each study participant,
				In the example data set denoted by "surv"
		- event: Variable that indicates whether a study participant experienced an event or was censored,
				 In the example data set denoted by "event"
		- covar: Covariable to be included in the analysis
		- start_b_covar: Starting value for the regression coefficient that should be estimated
		- start_log_h0_a: Starting value (on the log-scale) for the first parameter of the used distribution that should be estimated,
		- start_log_h0_b: Starting value (on the log-scale) for the second parameter of the used distribution that should be estimated
		
*/

%macro ParamAddHaz(data=, distribution=, surv=surv, event=event, covar=, start_b_covar=, start_log_h0_a=, start_log_h0_b=);
  
	* Preparing input data set for estimation;
    data _data;
    	set &data;
		dummy=1;
    run;

	* Execute estimation using PROC NLMIXED;
  	proc nlmixed data=_data tech=nrridg df=10000 maxiter=5000 maxfunc=20000;

      	* Allocate starting values for optimization depending on the chosen distribution
	      and transform values on the log-scale for distribution parameters a and b to the
	      original scale; 
		* Exponential distribution;
		%if &distribution=Exponential  %then %do;
            parms b_&covar=&start_b_covar log_h0_b=&start_log_h0_b;
    	    h0_b=exp(log_h0_b);
	    %end;

		* Gamma, Weibull and Log-normal distribution;
	   	%if &distribution=Gamma or &distribution=Weibull or &distribution=Logn %then %do;
           	parms b_&covar=&start_b_covar log_h0_a=&start_log_h0_a log_h0_b=&start_log_h0_b;
    	   	h0_a=exp(log_h0_a);
		   	h0_b=exp(log_h0_b);
	   	%end;

		* Gompertz distribution;
		* For the Gompertz distribution, it is necessary to inlcude a user-defined density (pdf, h0_density)
		  and survival (h0_survival) function because no SAS-specific build-in functions are available;
	   	%if &distribution=Gompertz %then %do; 
           	parms b_&covar=&start_b_covar log_h0_a=&start_log_h0_a log_h0_b=&start_log_h0_b;
    	   	h0_a=exp(log_h0_a);
		   	h0_b=exp(log_h0_b);
			* pdf of the Gompertz distribution;
    	   	h0_density=h0_a*h0_b*exp(h0_a*&surv)*exp(h0_b)*exp(-h0_b*exp(h0_a*&surv));
			* Survival function of the Gompertz distribution;
	       	h0_survival=exp(-h0_b*(exp(h0_a*&surv)-1));
       	%end;

		* Log-logistic distribution;
		* For the Log-logistic distribution, it is necessary to inlcude a user-defined density (pdf, h0_density)
		  and survival (h0_survival) function because no SAS-specific build-in functions are available;
	  	%if &distribution=LogLogistic %then %do; 
           	parms b_&covar=&start_b_covar log_h0_a=&start_log_h0_a log_h0_b=&start_log_h0_b;
    	   	h0_a=exp(log_h0_a);
		   	h0_b=exp(log_h0_b);
			* pdf of the Log-logistic distribution;
		   	h0_density=((h0_b/h0_a)*(&surv/h0_a)**(h0_b-1))/( (1+(&surv/h0_a)**h0_b )**2);
			* Survival function of the Log-logistic distribution;
	       	h0_survival=1-(1/(1+(&surv/h0_a)**(-h0_b)));
       	%end;

		* Define the linear predictor;
		linp = b_&covar*&covar;	       


		* Define the log-likelihood function (ll) with respect to the chosen distribution;
		* First: Log-likehood in case an event is observed;
		* Exponential distribution;
	   	%if &distribution=Exponential %then %do; 
			if &event = 1 then ll=log(pdf("&distribution", &surv, h0_b) + linp*sdf("&distribution", &surv, h0_b))-&surv*linp; 
		%end;

		* Gamma and Weibull distribution;
	   	%if &distribution=Gamma or &distribution=Weibull %then %do; 
			if &event = 1 then ll=log(pdf("&distribution", &surv, h0_a, h0_b) + linp*sdf("&distribution", &surv, h0_a, h0_b))-&surv*linp; 
		%end;

		* Log-normal distribution;
	   	%if &distribution=Logn %then %do; 
			if &event = 1 then ll=log(pdf("&distribution", &surv, h0_b, h0_a) + linp*sdf("&distribution", &surv, h0_b, h0_a))-&surv*linp; 
		%end;
     
		* Gompertz and Log-logistic distribution;
       	%if &distribution=Gompertz or &distribution=LogLogistic %then %do; 
			if &event = 1 then ll=log(h0_density + linp*h0_Survival)-&surv*linp; 
		%end;	   
       
		* Second: Log-likelihood in case a study participant is censored;
		* Exponential distribution;
		%if &distribution=Exponential %then %do; 
			if &event = 0  then ll = log(sdf("&distribution", &surv, h0_b))-&surv*linp; 
		%end;

		* Gamma and Weibull distribution;
	   	%if &distribution=Gamma or &distribution=Weibull %then %do; 
			if &event = 0  then ll = log(sdf("&distribution", &surv, h0_a,h0_b))-&surv*linp; 
		%end;

		* Log-normal distribution;
   	   	%if &distribution=Logn %then %do; 
			if &event = 0  then ll = log(sdf("&distribution", &surv, h0_b,h0_a))-&surv*linp; 
		%end;

		* Gompertz and Log-logistic distribution;
       	%if &distribution=Gompertz or &distribution=LogLogistic %then %do; 
			if &event = 0  then ll = log(h0_Survival)-&surv*linp; 
		%end;

		* Model the final log-likelihood;
       	model dummy ~ general(ll);

		* Preparing additional estimates: Baseline mean and baseline median (where available);
		* Exponential distribution;
	   	%if &distribution=Exponential %then %do;
           	estimate "Baseline Mean" exp(log_h0_b);
		   	estimate "Baseline Median" exp(log_h0_b)*log(2);
       	%end;
	   
		* Gamma distribution;
	   	%if &distribution=Gamma %then %do; 
           	estimate "Baseline Mean" exp(log_h0_b)*exp(log_h0_a);
       	%end;

		* Weibull dsitribution;
	   	%if &distribution=Weibull %then %do; 
		   	estimate "Baseline Mean" exp(log_h0_b)*GAMMA( 1 + (1/exp(log_h0_a)));
		   	estimate "Baseline Median" exp(log_h0_b) *  log(2)**(1/exp(log_h0_a));
	   	%end;

		* Gompertz distribution;
	   	%if &distribution=Gompertz %then %do; 
	       	estimate "Baseline Median" (1/exp(log_h0_a))*log((-1/exp(log_h0_b))*log(0.5)+1);
       	%end;

		* Log-normal distribution;
	   	%if &distribution=Logn %then %do; 
	       	estimate "Baseline Median" exp(exp(log_h0_b));
		   	estimate "Baseline Mean" exp(exp(log_h0_b) + (exp(log_h0_a))**2/2);
	   	%end;

		* Log-logistic distribution;
	   	%if &distribution=LogLogistic %then %do; 
     	   	estimate "Baseline Median" exp(log_h0_a);
		    estimate "Baseline Mean" (h0_a*constant('pi')/h0_b)/(sin(constant('pi')/h0_b));
       	%end;

 	   	title"&distribution";
  	run;

%mend ParamAddHaz;

* Application to the example data set;
* Import data set;
* Set path according to the directory where the data set is stored;
proc import datafile="PATH\halluca_short.csv"
	out=halluca
	dbms=tab
	replace;
	getnames=YES;
run;

* Variables included in the HALLUCA data set:
		- t: Observed survival time
		- TNMII, TNMIIIa, TNMIIIb, TNMIV: Documented tumor classification
		- TNMmis: Variable indicating whether the tumor classification is missing
		- id_count: Id indicating the different study participants
		- d: Censoring indicator
;
* Prepare input data set for macro;
data halluca;
	set halluca;
	if t=0 then t=0.01;
	* Survival time in years;
	surv=t/12;
	* Define variable "event" that indicates whether a study participant experienced an event or was censored;
	* event=0: censored observation;
	* event=1: event observed;
	if d=0 then do; event=0; cens=1;end;
	if d=1 then do; event=1; cens=0;end;

	* Delete study participants with missing tumor classification;
	if TNMmis=1 then delete;
run;

* Apply the SAS macro using...;
* ... the Exponential distribution;
%ParamAddHaz(data=halluca,distribution=Exponential,  covar=TNMIV, start_b_covar= 0.7,                      start_log_h0_b=0.5);
*... the Weibull distribution;
%ParamAddHaz(data=halluca,distribution=Weibull,      covar=TNMIV, start_b_covar= 0.7, start_log_h0_a=-0.2, start_log_h0_b=3);
* ... the Gamma distribution;
%ParamAddHaz(data=halluca,distribution=Gamma,        covar=TNMIV, start_b_covar= 0.7, start_log_h0_a=0.5,  start_log_h0_b=2);
* ... the Gompertz distribution;
%ParamAddHaz(data=halluca,distribution=Gompertz,     covar=TNMIV, start_b_covar= 0.7, start_log_h0_a=-4,   start_log_h0_b=4);
* ... the Log-normal distribution;
%ParamAddHaz(data=halluca,distribution=Logn,         covar=TNMIV, start_b_covar= 0.7, start_log_h0_a=1.3,  start_log_h0_b=1.3);
* ... the Log-logistic distribution;
%ParamAddHaz(data=halluca,distribution=LogLogistic,  covar=TNMIV, start_b_covar= 0.7, start_log_h0_a=1.3,  start_log_h0_b=0.3);

