# share-cv_r.R cleaning 
gc()
rm(list=ls())
setwd('c:/SHARE/R')
options(warn=2) #0 ignored, #1 printed, #2 as errors
options(error=utils::recover)
options(max.print=99999)
options(descr.plot = FALSE)
closeAllConnections() 

source('share-libraries.R')
source('share-functions.R')

##################################################################
setwd('c:/SHARE/R/data')
	d        	<- fread(file = 'data-raw-cv_r.csv')

	unique(d, by="mergeid")
	d <- setorder(d, mergeid)
	head(d)

	f_cn(d,'')
	f_cn(d,'\\.x')
	f_cn(d,'\\.y')

	fwrite(d, file = 'data-cv_r-temp.csv', na=NA)
setwd('c:/SHARE/R')
##################################################################

setwd('c:/SHARE/R/data')
	d <- fread(file = 'data-cv_r-temp.csv')
setwd('c:/SHARE/R')

# COUNTRY
d[, country := f_country(d, mergeid)]
table(d$country)

# YEAR and MONTH OF BIRTH
f_cn(d,'')
table(d$yrbirth)
table(d$mobirth)

d[, b_y 				:= f_NA(d, yrbirth,-1,2050)  ]
d[, b_m 				:= f_month(d, mobirth) ]
d[, b_ym				:= f_ym(d, b_y, b_m)]
d[, b_ym2				:= b_ym^2 ]
d[, b_ym1900 		:= b_ym-1900 ]
d[, b_ym19002 	:= b_ym1900^2 ]
h(d,,40,"mergeid|w_int_ym|b_ym")
table(d$b_y)

# Cohort in decades 
d[, 	bc_1900 			:= as.integer(b_ym<1910) ] 
d[, 	bc_1905 			:= as.integer(b_ym<1905) ] 
d[, 	bc_2000				:= as.integer(b_ym>=2000) ] 
d[, 	bc_2005				:= as.integer(b_ym>=2005) ] 
for (i in seq(10,95, by=5)) 
{ 
	yis = 1900 + i; yie = 1900 + i + 10
	cat('Cohort from ', yis, ' to ', yie, '\n')
	d[, 	fp0('bc_19',i) 			:= as.integer(b_ym >= yis & b_ym<yie) ] 
}
h(d,,30, 'mergeid|^b_ym$|^bc_')


d[, d_y 			:= f_NA(d, deceased_year,-1,2050)  ]
d[, d_m 			:= f_month(d, deceased_month) ]
d[, d_ym 			:= f_ym(d, d_y, d_m)]
d[, d_age 		:= f_NA(d, deceased_age,-1,2050)  ]
d[, d_age_ym	:= f_NA(d, d_ym-b_ym,-1,2050) ]
h(d,,40,"mergeid|deceased|death")

# gender
f_cn(d, 'gender')
table(d$gender)
d[, female 	:= f_gender(d, gender) ]
d[, male 		:= as.integer(female==0) ]
table(d$gender,d$female)
table(d$gender,d$male)

# WAVE PARTICIPATIONn
f_cn(d,'')
table(d$interview_w1)
d[, w_int.1 := interview_w1 ]
d[, w_int.2 := interview_w2 ]
d[, w_int.3 := interview_w3 ]
d[, w_int.4 := interview_w4 ]
d[, w_int.5 := interview_w5 ]
d[, w_int.6 := interview_w6 ]
d[, w_int.7 := interview_w7 ]

# 1 if main interview
table(d$interview_w7)
d[, wave.1 := f_01(d, interview_w1,'^Main interview$')]
d[, wave.2 := f_01(d, interview_w2,'^Main interview$')]
d[, wave.3 := f_01(d, interview_w3,'^Main interview$')]
d[, wave.4 := f_01(d, interview_w4,'^Main interview$')]
d[, wave.5 := f_01(d, interview_w5,'^Main interview$')]
d[, wave.6 := f_01(d, interview_w6,'^Main interview$')]
d[, wave.7 := f_01(d, interview_w7,'^Main interview$')]
h(d,,40,"mergeid|w_n.")

# 1 if main interview
table(d$interview_w7)
d[, wave_eof.1 := f_01(d, interview_w1,'^End-of-Life interview$')]
d[, wave_eof.2 := f_01(d, interview_w2,'^End-of-Life interview$')]
d[, wave_eof.3 := f_01(d, interview_w3,'^End-of-Life interview$')]
d[, wave_eof.4 := f_01(d, interview_w4,'^End-of-Life interview$')]
d[, wave_eof.5 := f_01(d, interview_w5,'^End-of-Life interview$')]
d[, wave_eof.6 := f_01(d, interview_w6,'^End-of-Life interview$')]
d[, wave_eof.7 := f_01(d, interview_w7,'^End-of-Life interview$')]
h(d,,40,"mergeid|w_n.")

d[, wave_i.1 := 1*wave.1]
d[, wave_i.2 := 2*wave.2]
d[, wave_i.3 := 3*wave.3]
d[, wave_i.4 := 4*wave.4]
d[, wave_i.5 := 5*wave.5]
d[, wave_i.6 := 6*wave.6]
d[, wave_i.7 := 7*wave.7]
h(d,,40,"mergeid|w_n.")

# YEAR OF INTERVIEW
f_cn(d,'')
table(d$int_year_w1)
d[, w_int_y.1 := f_NA(d, int_year_w1,-1,2050) ]
d[, w_int_y.2 := f_NA(d, int_year_w2,-1,2050) ]
d[, w_int_y.3 := f_NA(d, int_year_w3,-1,2050) ]
d[, w_int_y.4 := f_NA(d, int_year_w4,-1,2050) ]
d[, w_int_y.5 := f_NA(d, int_year_w5,-1,2050) ]
d[, w_int_y.6 := f_NA(d, int_year_w6,-1,2050) ]
d[, w_int_y.7 := f_NA(d, int_year_w7,-1,2050) ]
h(d,,40,"mergeid|w_int_y.")

# MONTH OF INTERVIEW
d[, w_int_m.1 := f_month(d, int_month_w1) ]
d[, w_int_m.2 := f_month(d, int_month_w2) ]
d[, w_int_m.3 := f_month(d, int_month_w3) ]
d[, w_int_m.4 := f_month(d, int_month_w4) ]
d[, w_int_m.5 := f_month(d, int_month_w5) ]
d[, w_int_m.6 := f_month(d, int_month_w6) ]
d[, w_int_m.7 := f_month(d, int_month_w7) ]
h(d,,40,"mergeid|w_int_m.")

# birth and interview time as real number
d[, w_int_ym.1 := f_ym(d, w_int_y.1, w_int_m.1)]
d[, w_int_ym.2 := f_ym(d, w_int_y.2, w_int_m.2)]
d[, w_int_ym.3 := f_ym(d, w_int_y.3, w_int_m.3)]
d[, w_int_ym.4 := f_ym(d, w_int_y.4, w_int_m.4)]
d[, w_int_ym.5 := f_ym(d, w_int_y.5, w_int_m.5)]
d[, w_int_ym.6 := f_ym(d, w_int_y.6, w_int_m.6)]
d[, w_int_ym.7 := f_ym(d, w_int_y.7, w_int_m.7)]

# AGE OF INTERVIEW
f_cn(d,'')
table(d$int_year_w1)
d[, w_age.1 := f_NA(d, age_int_w1,-1,2050) ]
d[, w_age.2 := f_NA(d, age_int_w2,-1,2050) ]
d[, w_age.3 := f_NA(d, age_int_w3,-1,2050) ]
d[, w_age.4 := f_NA(d, age_int_w4,-1,2050) ]
d[, w_age.5 := f_NA(d, age_int_w5,-1,2050) ]
d[, w_age.6 := f_NA(d, age_int_w6,-1,2050) ]
d[, w_age.7 := f_NA(d, age_int_w7,-1,2050) ]

d[, w_age_ym.1 := f_NA(d, w_int_ym.1-b_ym,-1,2050) ]
d[, w_age_ym.2 := f_NA(d, w_int_ym.2-b_ym,-1,2050) ]
d[, w_age_ym.3 := f_NA(d, w_int_ym.3-b_ym,-1,2050) ]
d[, w_age_ym.4 := f_NA(d, w_int_ym.4-b_ym,-1,2050) ]
d[, w_age_ym.5 := f_NA(d, w_int_ym.5-b_ym,-1,2050) ]
d[, w_age_ym.6 := f_NA(d, w_int_ym.6-b_ym,-1,2050) ]
d[, w_age_ym.7 := f_NA(d, w_int_ym.7-b_ym,-1,2050) ]
h(d,,40,"mergeid|w_age|age_int")

h(d,,30,'mergeid|wave\\.[1-7]$|sep|w_int_ym|w_age_ym')


for (i in 1:7) 
{ 
	d[, 	fp0('w_age2.',i) 			:= fvs('d$w_age.',i)^2 ] 
	d[, 	fp0('w_age_ym2.',i) 	:= fvs('d$w_age_ym.',i)^2 ] 
}
h(d,,,'b_ym|w_age_ym')

# dead or alive in each wave 
f_cn(d,'_w')
table(d$deadoralive_w6)
d[, w_dora.1 := deadoralive_w1 ]
d[, w_dora.2 := deadoralive_w2 ]
d[, w_dora.3 := deadoralive_w3 ]
d[, w_dora.4 := deadoralive_w4 ]
d[, w_dora.5 := deadoralive_w5 ]
d[, w_dora.6 := deadoralive_w6 ]
d[, w_dora.7 := deadoralive_w7 ]

d[, w_alive.1 := f_01(d, deadoralive_w1,'alive')]
d[, w_alive.2 := f_01(d, deadoralive_w2,'alive')]
d[, w_alive.3 := f_01(d, deadoralive_w3,'alive')]
d[, w_alive.4 := f_01(d, deadoralive_w4,'alive')]
d[, w_alive.5 := f_01(d, deadoralive_w5,'alive')]
d[, w_alive.6 := f_01(d, deadoralive_w6,'alive')]
d[, w_alive.7 := f_01(d, deadoralive_w7,'alive')]


######################################################################
# LIFE HISTORY WAVE
######################################################################
j <- which( d$wave.3==1   )
d[ j, w_lh 				:= 3 ]
d[ j, w_lh_ym 		:= w_int_ym.3 ]
h(d,j,20,'mergeid|wave.3|wave.7|w_alive.3|w_alive.7|w_int_ym|w_lh')

j <- which( d$wave.7==1 & (	is.na(d$wave.3) | d$wave.3==0) )
d[ j, w_lh 				:= 7 ]
d[ j, w_lh_ym 		:= w_int_ym.7 ]

j <- which( d$wave.7==1 & is.na(d$wave.3) )
j <- which( d$wave.7==1 & d$wave.3==0)
h(d,j,20,'mergeid|wave.3|wave.7|w_alive.3|w_alive.7|w_int_ym|w_lh')

j <- which( is.na(d$wave.3) & is.na(d$wave.7)   )
j <- which( !is.na(d$wave.3) & !is.na(d$wave.7)   )
j <- which( is.na(d$wave.3) & !is.na(d$wave.7)   )
h(d,j,20,'mergeid|wave.3|wave.7|w_alive.3|w_alive.7|w_int_ym|w_lh')

# life history age
d[, w_lha_ym 		:= f_NA(d, w_lh_ym - b_ym, -1, 2050) ]
d[, w_lha_ym2 	:= w_lha_ym^2 ]
h(d,j,20,'mergeid|wave.3|wave.7|w_alive.3|w_alive.7|w_int_ym|w_lh')

d[, w_age_lh					:= f_waves(d, w_age.3, w_age.7 ) ]
d[, w_age_lh2					:= w_age_lh^2 ]


####################################
# HH SIZE FOR EACH WAVE
f_cn(d,'')
table(d$w6_hhsize)
table(d$w7_hhsize)
d[, w_hhs.1 := f_NA(d, hhsize_w1,-1,97) ]
d[, w_hhs.2 := f_NA(d, hhsize_w2,-1,97) ]
d[, w_hhs.3 := f_NA(d, hhsize_w3,-1,97) ]
d[, w_hhs.4 := f_NA(d, hhsize_w4,-1,97) ]
d[, w_hhs.5 := f_NA(d, hhsize_w5,-1,97) ]
d[, w_hhs.6 := f_NA(d, hhsize_w6,-1,97) ]
d[, w_hhs.7 := f_NA(d, hhsize_w7,-1,97) ]
h(d,,40,"mergeid|w_hhs")
f_cn(d,'hh')

# age of first and last interview
d[, w_age_f := f_rowmins_notNA(d, f_toc(d,'^w_age_ym.[1-7]$')) ]  
d[, w_age_l := f_rowmaxs_notNA(d, f_toc(d,'^w_age_ym.[1-7]$')) ]  

#####################################################################
# SAVE
#####################################################################
f_dt_NULL(d,'temp|deadoralive|deceased')  
f_cn(d, '') 
f_cn(d, '^w_') 
f_cn(d, '^w')
 
f_cn(d, '_w') 
f_cn(d, 'mergeid$|^w|^b|^d|gender|female') 

d <- d[, grepl("mergeid$|^w|^b|^d|gender|male", colnames(d)), with=FALSE]
d <- unique(d, by="mergeid")
d <- setorder(d, mergeid)
head(d)
colnames(d)


setwd('c:/SHARE/R/data')
	fwrite(d, file = "data-cv_r.csv", na=NA)
	d <- d[ grepl('CZ', mergeid) ,]
	fwrite(d, file = "data-cv_r-CZ.csv", na=NA)
setwd('c:/SHARE/R')
cat("Data saved", "\n")

hf(d,'bc_')