* cr_RPa_d_newmodels_matchIHS_dominion

set more off
cap log close

********************************************************************************
* First import output from the model-year matching algorithm
import delim "${doR}/algorithm_dominion_IHS_model_year_output.csv", clear
save "${datbuild}/algorithm_dominion_IHS_model_year_output.dta", replace

********************************************************************************
use "${datbuild}/master_newprice.dta", clear

drop if sprice==. | sprice==0
drop if lease==1

drop pumps city fleet lease countynum

* Clean zip codes
gen byte notnumericzip = real(zip)==.
replace zip="" if notnumericzip==1
destring zip, replace
drop notnumericzip

rename year model_year

* First, drop duplicates by 'make', 'model' and 'year', as one transaction is enough
* to determined whether a model is avaiable in a given year or not
drop if make=="" | model=="" | model_year==. | body==""
duplicates drop make model model_year, force

* Keep only relevant variables for the count
keep make model model_year

merge 1:1 make model model_year using "${datbuild}/algorithm_dominion_IHS_model_year_output.dta", keep(3) nogenerate

rename make_ihs make_IHS
rename model_ihs model_IHS
rename generationmodelyear GenerationModelYear

replace make_IHS=make if make_IHS==""
replace model_IHS=model_d2 if model_IHS==""

egen model_id=group(make_IHS model_IHS GenerationModelYear)
bys model_id: egen model_intro_year_dom = min(model_year)
bys model_id: egen model_intro_year_ihs = min(GenerationModelYear)
egen model_intro_year=rowmin(model_intro_year_dom model_intro_year_ihs)

drop model_intro_year_dom model_intro_year_ihs

* Keep a model id only in the introduction year
duplicates drop model_id model_intro_year, force

* Generate count variables
bys model_intro_year: egen nb_newmodels = count(model_id)

keep make_IHS model_IHS GenerationModelYear model_id model_intro_year nb_newmodels

* Save the dataset
save "${datbuild}/d_newmodels_matchIHS_dominion.dta", replace

* Erase intermediate datasets
erase "${datbuild}/algorithm_dominion_IHS_model_year_output.dta"


