# share-dn.R cleaning 
source('share-initialize.R')

#################################################################
# setwd('c:/SHARE/R/data')
	# d_isco       		<- fread(file = 'data-raw-isco.csv')
	# d_rp						<- fread(file = 'data-raw-rp.csv')
	# d 							<- fread(file = 'data-raw-dn.csv')

	# d <- merge(d, d_isco, 	by='mergeid', all = TRUE, sort = TRUE)
	# d <- merge(d, d_rp, 		by='mergeid', all = TRUE, sort = TRUE)

	
	# unique(d, by="mergeid")
	# d <- setorder(d, mergeid)
	# head(d)

	# f_cn(d,'')
	# f_cn(d,'\\.x')
	# f_cn(d,'\\.y')

	# fwrite(d, file = 'data-dn-temp.csv', na=NA)
# setwd('c:/SHARE/R')
#################################################################

setwd('c:/SHARE/R/data')
	d <- fread(file = 'data-dn-temp.csv')
setwd('c:/SHARE/R')


head(d)
f_cn(d,'')
f_cn(d,'\\.x')
f_cn(d,'\\.y')

# COUNTRY
d[, country := f_country(d, mergeid)]
table(d$country)
# d <- d[ country=='CZ' ]

##################################################################
f_cn(d,'w1_dn')
f_cn(d,'w2')
f_cn(d,'birth')
f_cn(d,'003')

# YEAR OF BIRTH dn003
# cv_r:  d[, birth_ym := f_ym(d, birth_year, birth_month)], birth_year, birth_month
# dn004, dn005, COUNTRY OF BIRTH IN SHARE-LOC
  
# DN014_ MARITAL STAUS -> EVER MARRIED
f_cn(d,'dn014')
table(d$w3_rp002_)
table(d$w7_dn014_)
table(d$w7old_dn014_)  # useless

d[, temp1 			:= f_nevermarried(d, w1_dn014_)]
d[, temp2 			:= f_nevermarried(d, w2_dn014_)]
d[, temp3 			:= f_01(d, w3_rp002_,'no')]
d[, temp4 			:= f_nevermarried(d, w4_dn014_)]
d[, temp5 			:= f_nevermarried(d, w5_dn014_)]
d[, temp6 			:= f_nevermarried(d, w6_dn014_)]
d[, temp7 			:= f_nevermarried(d, w7_dn014_)]
d[, mar_never		:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
d[, married 		:= f_01(d, mar_never, 0, -1, 2)]
h(d,,40,'mergeid|mar|temp')
j <- which( !is.na(d$temp7)  )
h(d,which(d$mar_never==1) ,40,'mergeid|mar|temp')
h(d,,40,'mergeid|mar|temp')

# dn041_ YEARS OF EDUCATION
f_cn(d,'dn041')
table(d$w2_dn041_)
table(d$w4_dn041_)
table(d$w7_dn041_)
f_dt_NULL(d,'temp')  
d[, temp2 := f_NA(d, round(w2_dn041_),-1,31)]
d[, temp4 := f_NA(d, round(w4_dn041_),-1,31)]
d[, temp5 := f_NA(d, round(w5_dn041_),-1,31)]
d[, temp6 := f_NA(d, round(w6_dn041_),-1,31)]
d[, temp7 := f_NA(d, round(w7_dn041_),-1,31)]
d[, ed_y	:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
table(d$ed_y)
h(d,,40,'ed_y|temp')
j <- which( !is.na(d$temp7)  )
h(d,j,40,'ed_y|temp')

d[, ed_y_p   	:= f_int_01(d, ed_y, 0, 10, -1 , 	30 )]
d[, ed_y_s 		:= f_int_01(d, ed_y, 10,	14, -1 ,	30 )]
d[, ed_y_t   	:= f_int_01(d, ed_y, 14,	30, -1 ,	30 )]
h(d,,40,'ed_y')

# FEMALE=1 DN042_ MALE OR FEMALE (1=male, 2=female) in cv_r: gender

######################################################################
# dn026_1         byte    %29.0g     yesno      Is natural parent still alive: mother
f_cn(d,'dn026')
table(d$w1_dn026_1)
table(d$w3_dn026_1)
table(d$w7_dn026_1)
d[, w_m_alive.1 := f_01(d, w1_dn026_1, 'yes')]
d[, w_m_alive.2 := f_01(d, w2_dn026_1, 'yes')]
d[, w_m_alive.3 := NA ]
d[, w_m_alive.4 := f_01(d, w4_dn026_1, 'yes')]
d[, w_m_alive.5 := f_01(d, w5_dn026_1, 'yes')]
d[, w_m_alive.6 := f_01(d, w6_dn026_1, 'yes')]
d[, w_m_alive.7 := f_01(d, w7_dn026_1, 'yes')]

# dn026_2         byte    %29.0g     yesno      Is natural parent still alive: father
d[, w_f_alive.1 := f_01(d, w1_dn026_2, 'yes')]
d[, w_f_alive.2 := f_01(d, w2_dn026_2, 'yes')]
d[, w_f_alive.3 := NA ]
d[, w_f_alive.4 := f_01(d, w4_dn026_2, 'yes')]
d[, w_f_alive.5 := f_01(d, w5_dn026_2, 'yes')]
d[, w_f_alive.6 := f_01(d, w6_dn026_2, 'yes')]
d[, w_f_alive.7 := f_01(d, w7_dn026_2, 'yes')]
h(d,,40,'mergeid|m_alive|dn026_1')
h(d,,-90,'mergeid|m_alive|dn026_1')

# dn027_1         int     %10.0g     dkrf       Age of death of parent: mother
table(d$w1_dn027_1)
table(d$w3_dn027_1)
table(d$w7_dn027_1)
f_dt_NULL(d,'temp')  
d[, temp1 := f_NA(d, w1_dn027_1, -1, 120)]
d[, temp2 := f_NA(d, w2_dn027_1, -1, 120)]
d[, temp4 := f_NA(d, w4_dn027_1, -1, 120)]
d[, temp5 := f_NA(d, w5_dn027_1, -1, 120)]
d[, temp6 := f_NA(d, w6_dn027_1, -1, 120)]
d[, temp7 := f_NA(d, w7_dn027_1, -1, 120)]
d[, m_d_a := f_waves(d, temp1,temp2,temp4,temp5,temp6,temp7)]
j <- which( !is.na(d$temp7)  )
h(d,j,40,'mergeid|m_|temp')

# dn027_2         int     %10.0g     dkrf       Age of death of parent: father
f_dt_NULL(d,'temp')  
d[, temp1 := f_NA(d, w1_dn027_2, -1, 120)]
d[, temp2 := f_NA(d, w2_dn027_2, -1, 120)]
d[, temp4 := f_NA(d, w4_dn027_2, -1, 120)]
d[, temp5 := f_NA(d, w5_dn027_2, -1, 120)]
d[, temp6 := f_NA(d, w6_dn027_2, -1, 120)]
d[, temp7 := f_NA(d, w7_dn027_2, -1, 120)]
d[, f_d_a := f_waves(d, temp1,temp2,temp4,temp5,temp6,temp7)]
j <- which( !is.na(d$temp7)  )
h(d,j,40,'mergeid|f_|temp')

# dn028_1         int     %10.0g     dkrf       Age of natural parent: mother
table(d$w1_dn028_1)
table(d$w3_dn028_1)
table(d$w7_dn028_1)
d[, w_m_age.1 := f_NA(d, w1_dn028_1, -1, 120)]
d[, w_m_age.2 := f_NA(d, w2_dn028_1, -1, 120)]
d[, w_m_age.4 := f_NA(d, w4_dn028_1, -1, 120)]
d[, w_m_age.5 := f_NA(d, w5_dn028_1, -1, 120)]
d[, w_m_age.6 := f_NA(d, w6_dn028_1, -1, 120)]
d[, w_m_age.7 := f_NA(d, w7_dn028_1, -1, 120)]
h(d,,40,'mergeid|w_age|028_1')

# dn028_2         int     %10.0g     dkrf       Age of natural parent: father
d[, w_f_age.1 := f_NA(d, w1_dn028_2, -1, 120)]
d[, w_f_age.2 := f_NA(d, w2_dn028_2, -1, 120)]
d[, w_f_age.4 := f_NA(d, w4_dn028_2, -1, 120)]
d[, w_f_age.5 := f_NA(d, w5_dn028_2, -1, 120)]
d[, w_f_age.6 := f_NA(d, w6_dn028_2, -1, 120)]
d[, w_f_age.7 := f_NA(d, w7_dn028_2, -1, 120)]
h(d,,40,'mergeid|w_f_age|028_2')

# dn030_1         byte    %48.0g     distance   Where does parent live: mother (distance)
d[, w_sn_m_km.1 := f_sn_distance_km(d, 'w1_dn030_1') ]
d[, w_sn_m_km.2 := f_sn_distance_km(d, 'w2_dn030_1') ]
d[, w_sn_m_km.4 := f_sn_distance_km(d, 'w4_dn030_1') ]
d[, w_sn_m_km.5 := f_sn_distance_km(d, 'w5_dn030_1') ]
d[, w_sn_m_km.6 := f_sn_distance_km(d, 'w6_dn030_1') ]
d[, w_sn_m_km.7 := f_sn_distance_km(d, 'w7_dn030_1') ]

# dn030_2         byte    %48.0g     distance   Where does parent live: father (distance)
d[, w_sn_f_km.1 := f_sn_distance_km(d, 'w1_dn030_2') ]
d[, w_sn_f_km.2 := f_sn_distance_km(d, 'w2_dn030_2') ]
d[, w_sn_f_km.4 := f_sn_distance_km(d, 'w4_dn030_2') ]
d[, w_sn_f_km.5 := f_sn_distance_km(d, 'w5_dn030_2') ]
d[, w_sn_f_km.6 := f_sn_distance_km(d, 'w6_dn030_2') ]
d[, w_sn_f_km.7 := f_sn_distance_km(d, 'w7_dn030_2') ]

# dn032_1         byte    %22.0g     contact    Personal contact with parent during past 12 months: mother
d[, w_sn_m_time.1 := f_sn_freq(d, 'w1_dn032_1') ]
d[, w_sn_m_time.2 := f_sn_freq(d, 'w2_dn032_1') ]
d[, w_sn_m_time.4 := f_sn_freq(d, 'w4_dn032_1') ]
d[, w_sn_m_time.5 := f_sn_freq(d, 'w5_dn032_1') ]
d[, w_sn_m_time.6 := f_sn_freq(d, 'w6_dn032_1') ]
d[, w_sn_m_time.7 := f_sn_freq(d, 'w7_dn032_1') ]

# dn032_2         byte    %22.0g     contact    Personal contact with parent during past 12 months: father
d[, w_sn_f_time.1 := f_sn_freq(d, 'w1_dn032_2') ]
d[, w_sn_f_time.2 := f_sn_freq(d, 'w2_dn032_2') ]
d[, w_sn_f_time.4 := f_sn_freq(d, 'w4_dn032_2') ]
d[, w_sn_f_time.5 := f_sn_freq(d, 'w5_dn032_2') ]
d[, w_sn_f_time.6 := f_sn_freq(d, 'w6_dn032_2') ]
d[, w_sn_f_time.7 := f_sn_freq(d, 'w7_dn032_2') ]
h(d,,40,'mergeid|f_time')

# dn033_1         byte    %10.0g     rate       Health of parent: mother (scale changed)
d[, w_sn_m_hgood.1 := f_01(d, w1_dn033_1, 'good|excellent')]
d[, w_sn_m_hgood.2 := f_01(d, w2_dn033_1, 'good|excellent')]
d[, w_sn_m_hgood.4 := f_01(d, w4_dn033_1, 'good|excellent')]
d[, w_sn_m_hgood.5 := f_01(d, w5_dn033_1, 'good|excellent')]
d[, w_sn_m_hgood.6 := f_01(d, w6_dn033_1, 'good|excellent')]
d[, w_sn_m_hgood.7 := f_01(d, w7_dn033_1, 'good|excellent')]

# dn033_2         byte    %10.0g     rate       Health of parent: father
d[, w_sn_f_hgood.1 := f_01(d, w1_dn033_2, 'good|excellent')]
d[, w_sn_f_hgood.2 := f_01(d, w2_dn033_2, 'good|excellent')]
d[, w_sn_f_hgood.4 := f_01(d, w4_dn033_2, 'good|excellent')]
d[, w_sn_f_hgood.5 := f_01(d, w5_dn033_2, 'good|excellent')]
d[, w_sn_f_hgood.6 := f_01(d, w6_dn033_2, 'good|excellent')]
d[, w_sn_f_hgood.7 := f_01(d, w7_dn033_2, 'good|excellent')]



# dn034_          byte    %29.0g     yesno      Ever had any siblings
f_cn(d, '034')
table(d$w1_dn034_)
table(d$w7_dn034_)
f_dt_NULL(d,'temp')  
d[, temp1 := f_01(d, w1_dn034_, 'yes')]
d[, temp2 := f_01(d, w2_dn034_, 'yes')]
d[, temp4 := f_01(d, w4_dn034_, 'yes')]
d[, temp5 := f_01(d, w5_dn034_, 'yes')]
d[, temp6 := f_01(d, w6_dn034_, 'yes')]
d[, temp7 := f_01(d, w7_dn034_, 'yes')]
d[, sibl	:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
table(d$siblings)
h(d,,40,'mergeid|sibl|temp')

# dn035_          byte    %10.0g     oldyoung   Oldest or youngest child
f_cn(d, '035')
table(d$w1_dn035_)
table(d$w6_dn035_)  
table(d$w7_dn035_)  # not asked
f_dt_NULL(d,'temp')  
d[, temp1 := f_01(d, w1_dn035_, 'oldest')]
d[, temp2 := f_01(d, w2_dn035_, 'oldest')]
d[, temp4 := f_01(d, w4_dn035_, 'oldest')]
d[, temp5 := f_01(d, w5_dn035_, 'oldest')]
d[, temp6 := f_01(d, w6_dn035_, 'oldest')]
d[, sibl_o	:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
h(d,j,40,'mergeid|sibl|temp')
h(d,,40,'mergeid|sibl|temp')

f_dt_NULL(d,'temp')  
d[, temp1 := f_01(d, w1_dn035_, 'youngest')]
d[, temp2 := f_01(d, w2_dn035_, 'youngest')]
d[, temp4 := f_01(d, w4_dn035_, 'youngest')]
d[, temp5 := f_01(d, w5_dn035_, 'youngest')]
d[, temp6 := f_01(d, w6_dn035_, 'youngest')]
d[, sibl_y	:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
h(d,,40,'mergeid|sibl|temp')

f_dt_NULL(d,'temp')  
d[, temp1 := f_01(d, w1_dn035_, 'between')]
d[, temp2 := f_01(d, w2_dn035_, 'between')]
d[, temp4 := f_01(d, w4_dn035_, 'between')]
d[, temp5 := f_01(d, w5_dn035_, 'between')]
d[, temp6 := f_01(d, w6_dn035_, 'between')]
d[, sibl_b	:= f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ]
h(d,,40,'mergeid|sibl|temp')

# dn036_          byte    %10.0g     dkrf       How many brothers alive
f_cn(d, '036')
table(d$w1_dn036_)
table(d$w7_dn036_)
d[, w_sibl_b.1 := f_NA(d, w1_dn036_, -1, 20)]
d[, w_sibl_b.2 := f_NA(d, w2_dn036_, -1, 20)]
d[, w_sibl_b.4 := f_NA(d, w4_dn036_, -1, 20)]
d[, w_sibl_b.5 := f_NA(d, w5_dn036_, -1, 20)]
d[, w_sibl_b.6 := f_NA(d, w6_dn036_, -1, 20)]
d[, w_sibl_b.7 := f_NA(d, w7_dn036_, -1, 20)]
h(d,,40,'mergeid|sibl|temp')

# dn037_          byte    %10.0g     dkrf       How many sisters alive
d[, w_sibl_s.1 := f_NA(d, w1_dn037_, -1, 20)]
d[, w_sibl_s.2 := f_NA(d, w2_dn037_, -1, 20)]
d[, w_sibl_s.4 := f_NA(d, w4_dn037_, -1, 20)]
d[, w_sibl_s.5 := f_NA(d, w5_dn037_, -1, 20)]
d[, w_sibl_s.6 := f_NA(d, w6_dn037_, -1, 20)]
d[, w_sibl_s.7 := f_NA(d, w7_dn037_, -1, 20)]
h(d,,40,'mergeid|sibl|temp')

# dn127_1         int     %10.0g     dkrf       Year of death of parent: mother ONLY WAVE 67
f_cn(d, '127')
table(d$w6_dn127_1)
table(d$w7_dn127_1)
f_dt_NULL(d,'temp')  
d[, temp6 := f_NA(d, w6_dn127_1, 1850, 2020)]
d[, temp7 := f_NA(d, w7_dn127_1, 1850, 2020)]
d[, m_d_y := f_waves(d, temp6,temp7)]
j <- which( !is.na(d$temp7)  )
h(d,j,40,'mergeid|mother|father|temp')

# dn127_2         int     %10.0g     dkrf       Year of death of parent: father ONLY WAVE 67
f_dt_NULL(d,'temp')  
d[, temp6 := f_NA(d, w6_dn127_2, 1850, 2020)]
d[, temp7 := f_NA(d, w7_dn127_2, 1850, 2020)]
d[, f_d_y := f_waves(d, temp6,temp7)]
j <- which( !is.na(d$temp7)  )
h(d,j,40,'mergeid|mother|father|temp')

# WAVE12 in gv_isco NOT RELIABLE, MANY LAB
# isco_mo         str4    %4s    isco-88 mother's job (dn029_1) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA)
# isco_fa         str4    %4s    isco-88 father's job (dn029_2) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA)
# text_mo         str80   %80s   label for isco_mo (mother's job) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA)
# text_fa         str100  %100s  label for isco_fa (father's job) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA)

# dn029isco_1     int     %10.0g     dkrf       ISCO code of mother when respondent was 10 WAVE 67 (WAVE 45 ASKED BUT NOT IN DN)
# dn029isco_2     int     %10.0g     dkrf       ISCO code of father	 when respondent was 10 WAVE 67 (WAVE 45 ASKED BUT NOT IN DN)

f_cn(d, 'isco')
table(d$w6_dn029isco_1)
d[, age10_m_isco_w 	:= f_isco_collar(d, w6_dn029isco_1, 'white') ]
d[, age10_m_isco_b 	:= f_isco_collar(d, w6_dn029isco_1, 'blue') ]
d[, age10_m_isco_m	:= f_isco_collar(d, w6_dn029isco_1, 'military') ]
			
d[, age10_f_isco_w 	:= f_isco_collar(d, w6_dn029isco_2, 'white') ]
d[, age10_f_isco_b 	:= f_isco_collar(d, w6_dn029isco_2, 'blue') ]
d[, age10_f_isco_m 	:= f_isco_collar(d, w6_dn029isco_2, 'military') ]
h(d,,40,'mergeid|_isco|dn029isco')



table(d$age10_f_isco_w,d$w6_dn029isco_2)
table(d$age10_f_isco_b,d$w6_dn029isco_2)
table(d$age10_f_isco_m,d$w6_dn029isco_2)

# dn629_1         byte    %65.0g     dn629_     Employment situation when you were 10: mother WAVE 6
  																							# 1.ret, 2.empl/self, 3.unempl, 4.dis, 5.home, 97.other
# dn629_2         byte    %65.0g     dn629_     Employment situation when you were 10: father
f_cn(d, '629')
table(d$w6_dn629_1)
d[, age10_m_w		:= f_01(d, w6_dn629_1, '^employed') ]
d[, age10_m_u		:= f_01(d, w6_dn629_1, '^unemployed') ]
d[, age10_m_h		:= f_01(d, w6_dn629_1, '^home') ]
d[, age10_m_d		:= f_01(d, w6_dn629_1, 'disabled') ]
d[, age10_m_r		:= f_01(d, w6_dn629_1, 'retired') ]
    
d[, age10_f_w		:= f_01(d, w6_dn629_2, '^employed') ]
d[, age10_f_u		:= f_01(d, w6_dn629_2, '^unemployed') ]
d[, age10_f_h		:= f_01(d, w6_dn629_2, '^home') ]
d[, age10_f_d		:= f_01(d, w6_dn629_2, 'disabled') ]
d[, age10_f_r		:= f_01(d, w6_dn629_2, 'retired') ]
h(d,,40,'mergeid|age10')

# dn051_1         byte    %164.0g    educat     Highest school certificate/degree: mother in SHARE-ISCED
# dn051_2         byte    %164.0g    educat     Highest school certificate/degree: father in SHARE-ISCED
# dn504c          int     %46.0g     country		Country of birth coded: mother in SHARE-LOC
# dn505c          int     %46.0g     country		Country of birth coded: father in SHARE-LOC

#####################################################################
# SAVE
#####################################################################
f_cn(d, '') 
f_cn(d, 'sib') 
f_cn(d, 'mergeid$|^w_|^ed_y|mar|female|mother|father|^m_|^f_|^sibl|^age10') 
d <- d[, grepl("mergeid$|^w_|^ed_y|^educ|^mar|female|mother|father|^m_|^f_|^sibl|^age10", colnames(d)), with=FALSE]
d <- unique(d, by="mergeid")
d <- setorder(d, mergeid)
head(d)
colnames(d)

setwd('c:/SHARE/R/data')
	fwrite(d, file = "data-dn.csv", na=NA)
	d <- d[ grepl('CZ', mergeid) ,]
	fwrite(d, file = "data-dn-CZ.csv", na=NA)
setwd('c:/SHARE/R')
cat("Data saved", "\n")

  