setwd("F:/RuiBlog/posts/R/使用tidyverse清洗CFPS数据")
## 导入数据
library(tidyverse)
file_path_2018 <- "./data/operated_data/data_2018.sav"
df_2018 <- haven::read_sav(file_path_2018) %>% as_tibble() %>% glimpse()
## Rows: 12,231
## Columns: 15
## $ id <chr> "100051_100051502", "100160_120009102", "100286_1300051…
## $ fid16.x <dbl+lbl> 100051, 100160, 100286, 100435, 100453, 100551, 100…
## $ provcd18 <dbl+lbl> 11, 12, 13, 13, 43, 13, 13, 13, 13, 37, 13, 13, 11,…
## $ urban18 <dbl+lbl> 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, …
## $ fml18 <dbl+lbl> 3, 2, 1, 4, 6, 3, 2, 4, 1, 2, 2, 4, 3, 4, 5, 1, 3, …
## $ fincome18 <dbl+lbl> 240000, 180000, 30000, 70000, 118000, 80000, 2…
## $ expense18 <dbl+lbl> 275300, 220200, 46860, 61096, 60000, 83000, 12…
## $ family_debts18 <dbl> 0, 470000, 50000, 180000, 0, 200000, 0, 20000, 500000, …
## $ family_asset18 <dbl> 3050000.0, 2440000.0, 600000.0, 831000.0, 3165000.0, 10…
## $ fid16.y <dbl+lbl> 100051, 100160, 100286, 100435, 100453, 100551, 100…
## $ age <dbl+lbl> 52, 27, 40, 31, 68, 30, 73, 30, 26, 33, 25, 31, 28,…
## $ age2 <dbl> 2704, 729, 1600, 961, 4624, 900, 5329, 900, 676, 1089, …
## $ gender <dbl+lbl> 1, 5, 1, 1, 1, 1, 5, 5, 1, 1, 5, 5, 1, 5, 1, 1, 5, …
## $ marriage <dbl+lbl> 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, …
## $ health <dbl+lbl> -8, 6, -8, -8, 6, -8, 4, 7, -8, -8, -8, 5, -8,…
file_path_2016 <- "./data/operated_data/data_2016.sav"
df_2016 <- haven::read_sav(file_path_2016) %>% as_tibble() %>% glimpse()
## Rows: 12,501
## Columns: 15
## $ id <chr> "100051_100051502", "100160_120009102", "100286_1300051…
## $ fid16.x <dbl+lbl> 100051, 100160, 100286, 100376, 100531, 100569, 100…
## $ provcd16 <dbl+lbl> 11, 12, 13, 13, 13, 13, 13, 13, 37, 13, 13, 11, 13,…
## $ urban16 <dbl+lbl> 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ fml16 <dbl+lbl> 3, 1, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 4, 1, 5, 1, 2, …
## $ expense16 <dbl+lbl> 102140.0, 50691.0, 31900.0, 67000.0, 33600.0, …
## $ fincome16 <dbl+lbl> 180000, 85000, 70700, 76000, 28500, 1800, 20…
## $ family_debts16 <dbl> 0, 0, 120000, 0, 0, 0, 0, 200000, 400000, 0, 12000, 0, …
## $ family_asset16 <dbl> 3180000.0, 75000.0, 158750.0, 4800.0, 536562.5, 1300.0,…
## $ fid16.y <dbl+lbl> 100051, 100160, 100286, 100376, 100531, 100569, 100…
## $ age <dbl+lbl> 50, 25, 38, 20, 31, 71, 28, 25, 31, 23, 29, 32, 35,…
## $ age2 <dbl> 2500, 625, 1444, 400, 961, 5041, 784, 625, 961, 529, 84…
## $ gender <dbl+lbl> 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, …
## $ marriage <dbl+lbl> 2, 1, 4, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, …
## $ health <dbl+lbl> -8, -8, 7, -8, -8, 6, 5, -8, -8, -8, -8, -8, 6,…
## 更改变量名称
df_2018 %<>% select(-fid16.y) %>%
rename(fid = fid16.x,
age18 = age,
age2_18 = age2,
gender18 = gender,
marriage18 = marriage,
health18 = health)
df_2016 %<>% select(-fid16.y) %>%
rename(fid = fid16.x,
age16 = age,
age2_16 = age2,
gender16 = gender,
marriage16 = marriage,
health16 = health)