# share-dn.R cleaning source('share-initialize.R') ################################################################# # setwd('c:/SHARE/R/data') # d_isco <- fread(file = 'data-raw-isco.csv') # d_rp <- fread(file = 'data-raw-rp.csv') # d <- fread(file = 'data-raw-dn.csv') # d <- merge(d, d_isco, by='mergeid', all = TRUE, sort = TRUE) # d <- merge(d, d_rp, by='mergeid', all = TRUE, sort = TRUE) # unique(d, by="mergeid") # d <- setorder(d, mergeid) # head(d) # f_cn(d,'') # f_cn(d,'\\.x') # f_cn(d,'\\.y') # fwrite(d, file = 'data-dn-temp.csv', na=NA) # setwd('c:/SHARE/R') ################################################################# setwd('c:/SHARE/R/data') d <- fread(file = 'data-dn-temp.csv') setwd('c:/SHARE/R') head(d) f_cn(d,'') f_cn(d,'\\.x') f_cn(d,'\\.y') # COUNTRY d[, country := f_country(d, mergeid)] table(d$country) # d <- d[ country=='CZ' ] ################################################################## f_cn(d,'w1_dn') f_cn(d,'w2') f_cn(d,'birth') f_cn(d,'003') # YEAR OF BIRTH dn003 # cv_r: d[, birth_ym := f_ym(d, birth_year, birth_month)], birth_year, birth_month # dn004, dn005, COUNTRY OF BIRTH IN SHARE-LOC # DN014_ MARITAL STAUS -> EVER MARRIED f_cn(d,'dn014') table(d$w3_rp002_) table(d$w7_dn014_) table(d$w7old_dn014_) # useless d[, temp1 := f_nevermarried(d, w1_dn014_)] d[, temp2 := f_nevermarried(d, w2_dn014_)] d[, temp3 := f_01(d, w3_rp002_,'no')] d[, temp4 := f_nevermarried(d, w4_dn014_)] d[, temp5 := f_nevermarried(d, w5_dn014_)] d[, temp6 := f_nevermarried(d, w6_dn014_)] d[, temp7 := f_nevermarried(d, w7_dn014_)] d[, mar_never := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] d[, married := f_01(d, mar_never, 0, -1, 2)] h(d,,40,'mergeid|mar|temp') j <- which( !is.na(d$temp7) ) h(d,which(d$mar_never==1) ,40,'mergeid|mar|temp') h(d,,40,'mergeid|mar|temp') # dn041_ YEARS OF EDUCATION f_cn(d,'dn041') table(d$w2_dn041_) table(d$w4_dn041_) table(d$w7_dn041_) f_dt_NULL(d,'temp') d[, temp2 := f_NA(d, round(w2_dn041_),-1,31)] d[, temp4 := f_NA(d, round(w4_dn041_),-1,31)] d[, temp5 := f_NA(d, round(w5_dn041_),-1,31)] d[, temp6 := f_NA(d, round(w6_dn041_),-1,31)] d[, temp7 := f_NA(d, round(w7_dn041_),-1,31)] d[, ed_y := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] table(d$ed_y) h(d,,40,'ed_y|temp') j <- which( !is.na(d$temp7) ) h(d,j,40,'ed_y|temp') d[, ed_y_p := f_int_01(d, ed_y, 0, 10, -1 , 30 )] d[, ed_y_s := f_int_01(d, ed_y, 10, 14, -1 , 30 )] d[, ed_y_t := f_int_01(d, ed_y, 14, 30, -1 , 30 )] h(d,,40,'ed_y') # FEMALE=1 DN042_ MALE OR FEMALE (1=male, 2=female) in cv_r: gender ###################################################################### # dn026_1 byte %29.0g yesno Is natural parent still alive: mother f_cn(d,'dn026') table(d$w1_dn026_1) table(d$w3_dn026_1) table(d$w7_dn026_1) d[, w_m_alive.1 := f_01(d, w1_dn026_1, 'yes')] d[, w_m_alive.2 := f_01(d, w2_dn026_1, 'yes')] d[, w_m_alive.3 := NA ] d[, w_m_alive.4 := f_01(d, w4_dn026_1, 'yes')] d[, w_m_alive.5 := f_01(d, w5_dn026_1, 'yes')] d[, w_m_alive.6 := f_01(d, w6_dn026_1, 'yes')] d[, w_m_alive.7 := f_01(d, w7_dn026_1, 'yes')] # dn026_2 byte %29.0g yesno Is natural parent still alive: father d[, w_f_alive.1 := f_01(d, w1_dn026_2, 'yes')] d[, w_f_alive.2 := f_01(d, w2_dn026_2, 'yes')] d[, w_f_alive.3 := NA ] d[, w_f_alive.4 := f_01(d, w4_dn026_2, 'yes')] d[, w_f_alive.5 := f_01(d, w5_dn026_2, 'yes')] d[, w_f_alive.6 := f_01(d, w6_dn026_2, 'yes')] d[, w_f_alive.7 := f_01(d, w7_dn026_2, 'yes')] h(d,,40,'mergeid|m_alive|dn026_1') h(d,,-90,'mergeid|m_alive|dn026_1') # dn027_1 int %10.0g dkrf Age of death of parent: mother table(d$w1_dn027_1) table(d$w3_dn027_1) table(d$w7_dn027_1) f_dt_NULL(d,'temp') d[, temp1 := f_NA(d, w1_dn027_1, -1, 120)] d[, temp2 := f_NA(d, w2_dn027_1, -1, 120)] d[, temp4 := f_NA(d, w4_dn027_1, -1, 120)] d[, temp5 := f_NA(d, w5_dn027_1, -1, 120)] d[, temp6 := f_NA(d, w6_dn027_1, -1, 120)] d[, temp7 := f_NA(d, w7_dn027_1, -1, 120)] d[, m_d_a := f_waves(d, temp1,temp2,temp4,temp5,temp6,temp7)] j <- which( !is.na(d$temp7) ) h(d,j,40,'mergeid|m_|temp') # dn027_2 int %10.0g dkrf Age of death of parent: father f_dt_NULL(d,'temp') d[, temp1 := f_NA(d, w1_dn027_2, -1, 120)] d[, temp2 := f_NA(d, w2_dn027_2, -1, 120)] d[, temp4 := f_NA(d, w4_dn027_2, -1, 120)] d[, temp5 := f_NA(d, w5_dn027_2, -1, 120)] d[, temp6 := f_NA(d, w6_dn027_2, -1, 120)] d[, temp7 := f_NA(d, w7_dn027_2, -1, 120)] d[, f_d_a := f_waves(d, temp1,temp2,temp4,temp5,temp6,temp7)] j <- which( !is.na(d$temp7) ) h(d,j,40,'mergeid|f_|temp') # dn028_1 int %10.0g dkrf Age of natural parent: mother table(d$w1_dn028_1) table(d$w3_dn028_1) table(d$w7_dn028_1) d[, w_m_age.1 := f_NA(d, w1_dn028_1, -1, 120)] d[, w_m_age.2 := f_NA(d, w2_dn028_1, -1, 120)] d[, w_m_age.4 := f_NA(d, w4_dn028_1, -1, 120)] d[, w_m_age.5 := f_NA(d, w5_dn028_1, -1, 120)] d[, w_m_age.6 := f_NA(d, w6_dn028_1, -1, 120)] d[, w_m_age.7 := f_NA(d, w7_dn028_1, -1, 120)] h(d,,40,'mergeid|w_age|028_1') # dn028_2 int %10.0g dkrf Age of natural parent: father d[, w_f_age.1 := f_NA(d, w1_dn028_2, -1, 120)] d[, w_f_age.2 := f_NA(d, w2_dn028_2, -1, 120)] d[, w_f_age.4 := f_NA(d, w4_dn028_2, -1, 120)] d[, w_f_age.5 := f_NA(d, w5_dn028_2, -1, 120)] d[, w_f_age.6 := f_NA(d, w6_dn028_2, -1, 120)] d[, w_f_age.7 := f_NA(d, w7_dn028_2, -1, 120)] h(d,,40,'mergeid|w_f_age|028_2') # dn030_1 byte %48.0g distance Where does parent live: mother (distance) d[, w_sn_m_km.1 := f_sn_distance_km(d, 'w1_dn030_1') ] d[, w_sn_m_km.2 := f_sn_distance_km(d, 'w2_dn030_1') ] d[, w_sn_m_km.4 := f_sn_distance_km(d, 'w4_dn030_1') ] d[, w_sn_m_km.5 := f_sn_distance_km(d, 'w5_dn030_1') ] d[, w_sn_m_km.6 := f_sn_distance_km(d, 'w6_dn030_1') ] d[, w_sn_m_km.7 := f_sn_distance_km(d, 'w7_dn030_1') ] # dn030_2 byte %48.0g distance Where does parent live: father (distance) d[, w_sn_f_km.1 := f_sn_distance_km(d, 'w1_dn030_2') ] d[, w_sn_f_km.2 := f_sn_distance_km(d, 'w2_dn030_2') ] d[, w_sn_f_km.4 := f_sn_distance_km(d, 'w4_dn030_2') ] d[, w_sn_f_km.5 := f_sn_distance_km(d, 'w5_dn030_2') ] d[, w_sn_f_km.6 := f_sn_distance_km(d, 'w6_dn030_2') ] d[, w_sn_f_km.7 := f_sn_distance_km(d, 'w7_dn030_2') ] # dn032_1 byte %22.0g contact Personal contact with parent during past 12 months: mother d[, w_sn_m_time.1 := f_sn_freq(d, 'w1_dn032_1') ] d[, w_sn_m_time.2 := f_sn_freq(d, 'w2_dn032_1') ] d[, w_sn_m_time.4 := f_sn_freq(d, 'w4_dn032_1') ] d[, w_sn_m_time.5 := f_sn_freq(d, 'w5_dn032_1') ] d[, w_sn_m_time.6 := f_sn_freq(d, 'w6_dn032_1') ] d[, w_sn_m_time.7 := f_sn_freq(d, 'w7_dn032_1') ] # dn032_2 byte %22.0g contact Personal contact with parent during past 12 months: father d[, w_sn_f_time.1 := f_sn_freq(d, 'w1_dn032_2') ] d[, w_sn_f_time.2 := f_sn_freq(d, 'w2_dn032_2') ] d[, w_sn_f_time.4 := f_sn_freq(d, 'w4_dn032_2') ] d[, w_sn_f_time.5 := f_sn_freq(d, 'w5_dn032_2') ] d[, w_sn_f_time.6 := f_sn_freq(d, 'w6_dn032_2') ] d[, w_sn_f_time.7 := f_sn_freq(d, 'w7_dn032_2') ] h(d,,40,'mergeid|f_time') # dn033_1 byte %10.0g rate Health of parent: mother (scale changed) d[, w_sn_m_hgood.1 := f_01(d, w1_dn033_1, 'good|excellent')] d[, w_sn_m_hgood.2 := f_01(d, w2_dn033_1, 'good|excellent')] d[, w_sn_m_hgood.4 := f_01(d, w4_dn033_1, 'good|excellent')] d[, w_sn_m_hgood.5 := f_01(d, w5_dn033_1, 'good|excellent')] d[, w_sn_m_hgood.6 := f_01(d, w6_dn033_1, 'good|excellent')] d[, w_sn_m_hgood.7 := f_01(d, w7_dn033_1, 'good|excellent')] # dn033_2 byte %10.0g rate Health of parent: father d[, w_sn_f_hgood.1 := f_01(d, w1_dn033_2, 'good|excellent')] d[, w_sn_f_hgood.2 := f_01(d, w2_dn033_2, 'good|excellent')] d[, w_sn_f_hgood.4 := f_01(d, w4_dn033_2, 'good|excellent')] d[, w_sn_f_hgood.5 := f_01(d, w5_dn033_2, 'good|excellent')] d[, w_sn_f_hgood.6 := f_01(d, w6_dn033_2, 'good|excellent')] d[, w_sn_f_hgood.7 := f_01(d, w7_dn033_2, 'good|excellent')] # dn034_ byte %29.0g yesno Ever had any siblings f_cn(d, '034') table(d$w1_dn034_) table(d$w7_dn034_) f_dt_NULL(d,'temp') d[, temp1 := f_01(d, w1_dn034_, 'yes')] d[, temp2 := f_01(d, w2_dn034_, 'yes')] d[, temp4 := f_01(d, w4_dn034_, 'yes')] d[, temp5 := f_01(d, w5_dn034_, 'yes')] d[, temp6 := f_01(d, w6_dn034_, 'yes')] d[, temp7 := f_01(d, w7_dn034_, 'yes')] d[, sibl := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] table(d$siblings) h(d,,40,'mergeid|sibl|temp') # dn035_ byte %10.0g oldyoung Oldest or youngest child f_cn(d, '035') table(d$w1_dn035_) table(d$w6_dn035_) table(d$w7_dn035_) # not asked f_dt_NULL(d,'temp') d[, temp1 := f_01(d, w1_dn035_, 'oldest')] d[, temp2 := f_01(d, w2_dn035_, 'oldest')] d[, temp4 := f_01(d, w4_dn035_, 'oldest')] d[, temp5 := f_01(d, w5_dn035_, 'oldest')] d[, temp6 := f_01(d, w6_dn035_, 'oldest')] d[, sibl_o := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] h(d,j,40,'mergeid|sibl|temp') h(d,,40,'mergeid|sibl|temp') f_dt_NULL(d,'temp') d[, temp1 := f_01(d, w1_dn035_, 'youngest')] d[, temp2 := f_01(d, w2_dn035_, 'youngest')] d[, temp4 := f_01(d, w4_dn035_, 'youngest')] d[, temp5 := f_01(d, w5_dn035_, 'youngest')] d[, temp6 := f_01(d, w6_dn035_, 'youngest')] d[, sibl_y := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] h(d,,40,'mergeid|sibl|temp') f_dt_NULL(d,'temp') d[, temp1 := f_01(d, w1_dn035_, 'between')] d[, temp2 := f_01(d, w2_dn035_, 'between')] d[, temp4 := f_01(d, w4_dn035_, 'between')] d[, temp5 := f_01(d, w5_dn035_, 'between')] d[, temp6 := f_01(d, w6_dn035_, 'between')] d[, sibl_b := f_rowmaxs_notNA(d, f_toc(d,'^temp[0-9]')) ] h(d,,40,'mergeid|sibl|temp') # dn036_ byte %10.0g dkrf How many brothers alive f_cn(d, '036') table(d$w1_dn036_) table(d$w7_dn036_) d[, w_sibl_b.1 := f_NA(d, w1_dn036_, -1, 20)] d[, w_sibl_b.2 := f_NA(d, w2_dn036_, -1, 20)] d[, w_sibl_b.4 := f_NA(d, w4_dn036_, -1, 20)] d[, w_sibl_b.5 := f_NA(d, w5_dn036_, -1, 20)] d[, w_sibl_b.6 := f_NA(d, w6_dn036_, -1, 20)] d[, w_sibl_b.7 := f_NA(d, w7_dn036_, -1, 20)] h(d,,40,'mergeid|sibl|temp') # dn037_ byte %10.0g dkrf How many sisters alive d[, w_sibl_s.1 := f_NA(d, w1_dn037_, -1, 20)] d[, w_sibl_s.2 := f_NA(d, w2_dn037_, -1, 20)] d[, w_sibl_s.4 := f_NA(d, w4_dn037_, -1, 20)] d[, w_sibl_s.5 := f_NA(d, w5_dn037_, -1, 20)] d[, w_sibl_s.6 := f_NA(d, w6_dn037_, -1, 20)] d[, w_sibl_s.7 := f_NA(d, w7_dn037_, -1, 20)] h(d,,40,'mergeid|sibl|temp') # dn127_1 int %10.0g dkrf Year of death of parent: mother ONLY WAVE 67 f_cn(d, '127') table(d$w6_dn127_1) table(d$w7_dn127_1) f_dt_NULL(d,'temp') d[, temp6 := f_NA(d, w6_dn127_1, 1850, 2020)] d[, temp7 := f_NA(d, w7_dn127_1, 1850, 2020)] d[, m_d_y := f_waves(d, temp6,temp7)] j <- which( !is.na(d$temp7) ) h(d,j,40,'mergeid|mother|father|temp') # dn127_2 int %10.0g dkrf Year of death of parent: father ONLY WAVE 67 f_dt_NULL(d,'temp') d[, temp6 := f_NA(d, w6_dn127_2, 1850, 2020)] d[, temp7 := f_NA(d, w7_dn127_2, 1850, 2020)] d[, f_d_y := f_waves(d, temp6,temp7)] j <- which( !is.na(d$temp7) ) h(d,j,40,'mergeid|mother|father|temp') # WAVE12 in gv_isco NOT RELIABLE, MANY LAB # isco_mo str4 %4s isco-88 mother's job (dn029_1) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA) # isco_fa str4 %4s isco-88 father's job (dn029_2) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA) # text_mo str80 %80s label for isco_mo (mother's job) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA) # text_fa str100 %100s label for isco_fa (father's job) DN029_ LAST JOB OR OCCUPATION OF PARENT W1 (w2 ASKED NO DATA) # dn029isco_1 int %10.0g dkrf ISCO code of mother when respondent was 10 WAVE 67 (WAVE 45 ASKED BUT NOT IN DN) # dn029isco_2 int %10.0g dkrf ISCO code of father when respondent was 10 WAVE 67 (WAVE 45 ASKED BUT NOT IN DN) f_cn(d, 'isco') table(d$w6_dn029isco_1) d[, age10_m_isco_w := f_isco_collar(d, w6_dn029isco_1, 'white') ] d[, age10_m_isco_b := f_isco_collar(d, w6_dn029isco_1, 'blue') ] d[, age10_m_isco_m := f_isco_collar(d, w6_dn029isco_1, 'military') ] d[, age10_f_isco_w := f_isco_collar(d, w6_dn029isco_2, 'white') ] d[, age10_f_isco_b := f_isco_collar(d, w6_dn029isco_2, 'blue') ] d[, age10_f_isco_m := f_isco_collar(d, w6_dn029isco_2, 'military') ] h(d,,40,'mergeid|_isco|dn029isco') table(d$age10_f_isco_w,d$w6_dn029isco_2) table(d$age10_f_isco_b,d$w6_dn029isco_2) table(d$age10_f_isco_m,d$w6_dn029isco_2) # dn629_1 byte %65.0g dn629_ Employment situation when you were 10: mother WAVE 6 # 1.ret, 2.empl/self, 3.unempl, 4.dis, 5.home, 97.other # dn629_2 byte %65.0g dn629_ Employment situation when you were 10: father f_cn(d, '629') table(d$w6_dn629_1) d[, age10_m_w := f_01(d, w6_dn629_1, '^employed') ] d[, age10_m_u := f_01(d, w6_dn629_1, '^unemployed') ] d[, age10_m_h := f_01(d, w6_dn629_1, '^home') ] d[, age10_m_d := f_01(d, w6_dn629_1, 'disabled') ] d[, age10_m_r := f_01(d, w6_dn629_1, 'retired') ] d[, age10_f_w := f_01(d, w6_dn629_2, '^employed') ] d[, age10_f_u := f_01(d, w6_dn629_2, '^unemployed') ] d[, age10_f_h := f_01(d, w6_dn629_2, '^home') ] d[, age10_f_d := f_01(d, w6_dn629_2, 'disabled') ] d[, age10_f_r := f_01(d, w6_dn629_2, 'retired') ] h(d,,40,'mergeid|age10') # dn051_1 byte %164.0g educat Highest school certificate/degree: mother in SHARE-ISCED # dn051_2 byte %164.0g educat Highest school certificate/degree: father in SHARE-ISCED # dn504c int %46.0g country Country of birth coded: mother in SHARE-LOC # dn505c int %46.0g country Country of birth coded: father in SHARE-LOC ##################################################################### # SAVE ##################################################################### f_cn(d, '') f_cn(d, 'sib') f_cn(d, 'mergeid$|^w_|^ed_y|mar|female|mother|father|^m_|^f_|^sibl|^age10') d <- d[, grepl("mergeid$|^w_|^ed_y|^educ|^mar|female|mother|father|^m_|^f_|^sibl|^age10", colnames(d)), with=FALSE] d <- unique(d, by="mergeid") d <- setorder(d, mergeid) head(d) colnames(d) setwd('c:/SHARE/R/data') fwrite(d, file = "data-dn.csv", na=NA) d <- d[ grepl('CZ', mergeid) ,] fwrite(d, file = "data-dn-CZ.csv", na=NA) setwd('c:/SHARE/R') cat("Data saved", "\n")