# share-data-dn.R
gc()
rm(list=ls())
setwd('C:/SHARE/R')
options(warn=0) 
options(error=utils::recover)
options(max.print=99999)
closeAllConnections() 

source('share-libraries.R')
source('share-functions.R')

########################################################################
# VARIABLES
###################################
vars 		= "mergeid$|c$|^dn00|^dn01._$|
					|^dn02._$|^dn02._1|^dn02._2|^dn020|^dn021|^dn023.*_1$|^dn023.*_2$|^dn023.*_3$|^dn023.*_4$|^dn023.*_5$|^dn026|^dn027|^dn028|^dn029|
					|^dn03|^dn041|^dn042|^dn051|^dn052|^dn053.*_1$|^dn053.*_2$|^dn053.*_3$|^dn053.*_4$|^dn053.*_5$|^dn054|^dn12|^dn5|^dn62"
varsno 	= "flag|ub|raw|dot$|v1$|v2$|v3$|12$|13$|14$|15$|16$|17$|18$|19$|20$|21$|22$|23$|24$|25$|26$|27$|28$|29$|30$"

########################################################################
# wave 1
# dn003_          int     %12.0g     rfdk5      year of birth
# dn004_          byte    %10.0f     yesno      country of birth
# dn005c          int     %46.0g     dn005c     other country of birth-country code
# dn006_          int     %10.0f     rfdk5      year came to live in country
# dn007_          byte    %10.0f     yesno      citizenship country
# dn008c          int     %46.0g     dn008c     other citizenship-country code
# dn009_          byte    %10.0f     dn009      where lived nov-1,1989
# dn010_          byte    %60.0f     educat     highest educational degree obtained
# dn014_          byte    %44.0g     marritalst marital status 
# dn015_          int     %12.0g     rfdk5      year of marriage, if living together
# dn016_          int     %10.0f     rfdk5      year of registered partnership
# dn017_          int     %12.0g     rfdk5      year of marriage, if living separated
# dn018_          int     %10.0f     rfdk5      since when divorced
# dn019_          int     %10.0f     rfdk5      since when widowed
# dn020_          int     %12.0g     rfdk5      year of birth of former partner
# dn021_          byte    %60.0f     educat     highest educational degree of former partner



setwd('C:/SHARE/Data/sharew1')
d <- setDT(read.dta13(file = 'sharew1_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d1 <- f_wave(d, 'w1', 'mergeid')

########################################################################
# wave 2 
# DN003_ YEAR OF BIRTH
# DN004_ COUNTRY OF BIRTH 
# dn004_          byte    %10.0g     yesno      country of birth
# dn005c          int     %46.0g     dn005c     foreign country of birth coding
# dn006_          int     %12.0g     rfdk5      year came to live in country
# dn007_          byte    %10.0g     yesno      citizenship country
# dn008c          int     %46.0g     dn008c     citizenship country
# dn009_          byte    %10.0g     dn009      where lived nov-1,1989
# DN014_ MARITAL STATUS 
# dn015_          int     %10.0g     rfdk5      year of marriage, if living together
# dn016_          int     %12.0g     rfdk5      year of registered partnership
# dn017_          int     %12.0g     rfdk5      year of marriage, if living separated
# dn018_          int     %12.0g     rfdk5      since when divorced
# dn019_          int     %10.0g     rfdk5      since when widowed
# dn041_          float   %27.0g     dn041      years education
# DN042_ MALE OR FEMALE (1=male, 2=female)
setwd('C:/SHARE/Data/sharew2')
d <- setDT(read.dta13(file = 'sharew2_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d2 <- f_wave(d, 'w2', 'mergeid')

########################################################################
# wave 3: no DN, no CH modules, no GV isced

########################################################################
# wave 4
setwd('C:/SHARE/Data/sharew4')
d <- setDT(read.dta13(file = 'sharew4_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d4 <- f_wave(d, 'w4', 'mergeid')

########################################################################
# wave 5
# same as in wave 2
# dn501_          byte    %47.0g     yesno501   Born a citizen of country of interview
# dn502_          int     %10.0g     dkrf       Year of becoming citizen of country of interview
# dn503_          byte    %29.0g     yesno      Born a citizen of country of interview
# dn504c          int     %46.0g     countryofbirth Country of birth coded: mother
# dn505c          int     %46.0g     countryofbirth Country of birth coded: father
setwd('C:/SHARE/Data/sharew5')
d <- setDT(read.dta13(file = 'sharew5_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d5 <- f_wave(d, 'w5', 'mergeid')

########################################################################
# wave 6
# DN as in wave 5 
# dn127_1         int     %10.0g     dkrf       Year of death of parent: mother
# dn127_2         int     %10.0g     dkrf       Year of death of parent: father
# dn629_1         byte    %65.0g     dn629_     Employment situation when you were 10: mother
# dn629_2         byte    %65.0g     dn629_     Employment situation when you were 10: father
setwd('C:/SHARE/Data/sharew6')
d <- setDT(read.dta13(file = 'sharew6_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d6 <- f_wave(d, 'w6', 'mergeid')
 
########################################################################
# wave 7 
setwd('C:/SHARE/Data/sharew7')
d <- setDT(read.dta13(file = 'sharew7_rel7-0-0_dn.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d7 <- f_wave(d, 'w7', 'mergeid')

################################################################################################################3
# new internal data from wave 7 with country coding
setwd('C:/SHARE/Data/sharew7')
d <- setDT(read.dta13(file = 'w7_dn_including_dn_coding_internal_release_rel0_w7_v01.dta'))
d <- d[, grepl(vars, colnames(d)), with=FALSE]
d7old <- f_wave(d, 'w7old', 'mergeid')

################################################################################################################3
# MERGE WAVES
################################################################################################################3
rm(d)
d <- merge(d1, d2, by = "mergeid", all = TRUE)
d <- merge(d,  d4, by = "mergeid", all = TRUE)
d <- merge(d,  d5, by = "mergeid", all = TRUE)
d <- merge(d,  d6, by = "mergeid", all = TRUE)
d <- merge(d,  d7old, by = "mergeid", all = TRUE)
d <- merge(d,  d7, by = "mergeid", all = TRUE)

f_cn(d,'')
f_cn(d, "_s7$|_w1$|_w2$|_w3$|_w4$|_w5$|_w6$|_w7$")
colnames(d) <- gsub("_s7$|_w1$|_w2$|_w3$|_w4$|_w5$|_w6$|_w7$", "", colnames(d))

f_dt_NULL(d, varsno)  	
unique(d, by="mergeid")
d <- setorder(d, mergeid)
d <- d[ !grepl('^no int', mergeid) ]
d
colnames(d)
	
################################################################################################################3
# SAVE DATA 
###############################################################################################################3
setwd('C:/SHARE/R/data')
	fwrite(d, file = "data-raw-dn.csv", na=NA)
	dcz <- d[ grepl('	CZ', mergeid) ]
	fwrite(dcz, file = "data-raw-dn-CZ.csv", na=NA)
	cat("Data saved", "\n")
setwd('C:/SHARE/R')

	