* cr_d_IHS_charact

* This dofile generates a dataset with the monthly flow of new transactions
* of cars by car model and the characteristics associated to the model

set more off
cap log close

********************************************************************************
* Import excel

import excel "${IHS_char}/USA_data_2003_2019.xlsx", firstrow clear

* Rename the variables with the stock with time-consistent names
foreach v of varlist AH - AX {
	local x : variable label `v'
    rename `v' T_`x'
}

local vlist GenerationYear NoofDoors Engineltr Engineccm EnginekW EngineHP ///
NoofCylinders NoofGears GrossVehicleWeight Height Length Width Wheelbase ///
Fuelconscombined Fuelconsurban Fuelconsextraurban

foreach v of local vlist {
	replace `v' = . if `v'==-1 | `v'==0
}


local vlist NoofSeats FuelType Turbo Transmission AxleConfiguration
foreach v of local vlist {
	encode `v', gen(n_`v')
	drop `v'
	rename n_`v' `v'
}

replace NoofGears=. if NoofGears==99

* When models differ just in terms of fuel consumption (any measure), wheelbase, 
* GrossVehicleWeight Height Length Width, average it out
* and sum yearly sales over models

collapse Fuelconscombined Fuelconsurban Fuelconsextraurban ///
GrossVehicleWeight Height Length Width Wheelbase (sum) T_*, by(CountryName RegistrationType MakeGroup Make ///
ModelGroup Model SubModel Version Trim BodyGroup BodyType ///
GenerationYear NoofDoors NoofSeats FuelType Engineltr ///
Engineccm EnginekW EngineHP Turbo NoofCylinders ///
Transmission NoofGears AxleConfiguration DrivenWheels)

* Correct for the treatment of (mean) of the collapse command in case of all missing
* by group
local vlist GrossVehicleWeight Height Length Width Wheelbase ///
Fuelconscombined Fuelconsurban Fuelconsextraurban

foreach v of local vlist {
	replace `v' = . if `v'==0
}

* There are two duplicates in terms of all variables up to "Driven wheels", i.e. two
* models with all identical features but "DerivenWheels". For these two models
* we keep the "Front" record, because the "Rear" one seems to be a recording error
* since sales are recorded in one year only and most of the other features are missing

* When models differ just in terms of wheel base, average it out
* and sum yearly sales over models
collapse Fuelconscombined Fuelconsurban Fuelconsextraurban ///
GrossVehicleWeight Height Length Width Wheelbase (sum) T_* ///
(firstnm) DrivenWheels, by(CountryName RegistrationType MakeGroup Make ///
ModelGroup Model SubModel Version Trim BodyGroup BodyType ///
GenerationYear NoofDoors NoofSeats FuelType Engineltr ///
Engineccm EnginekW EngineHP Turbo NoofCylinders ///
Transmission NoofGears AxleConfiguration)

drop CountryName RegistrationType

* Save the resulting dataset
save "${datbuild}/d_IHS_charact.dta", replace
