728x90
반응형
# variable selection
install.packages('MASS')
library(MASS)
reg <- lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + Age +
Num_of_existing_credits + Num_of_people_liable, data=german1)
null <- lm(Credit_amount ~ 1, data = german1)
full <- lm(Credit_amount ~ ., data = german1)
# Partial F-test
reduced_model = lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Num_of_existing_credits + Num_of_people_liable, data=german1)
anova(reduced_model, full)
Analysis of Variance Table
Model 1: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Age + Num_of_existing_credits
Model 2: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Age + Num_of_existing_credits + Num_of_people_liable
Res.Df RSS Df Sum of Sq F Pr(>F)
1 994 3991629352
2 993 3991608356 1 20996 0.0052 0.9424
reduced_model = lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + Age +
Num_of_existing_credits, data=german1)
anova(reduced_model, full)
Analysis of Variance Table
Model 1: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Age + Num_of_existing_credits
Model 2: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Age + Num_of_existing_credits + Num_of_people_liable
Res.Df RSS Df Sum of Sq F Pr(>F)
1 994 3991629352
2 993 3991608356 1 20996 0.0052 0.9424
Partial F test에 의하면 Num_of_people_liable 변수의 경우 있을 경우와 없을 경우 모델의 성능에 거의 영향을 주지 않는다.
따라서 후방 제거법에서 먼저 제거된다.
#전진선택법
forward <- step(null, direction = "forward",scope = list(lower=null, upper=full))
Start: AIC=15891.92
Credit_amount ~ 1
Df Sum of Sq RSS AIC
+ Duration_in_month 1 3109169195 4850706433 15399
+ Installment_rate 1 585944034 7373931593 15818
<none> 7959875627 15892
+ Age 1 8519964 7951355664 15893
+ Present_residence 1 6660284 7953215343 15893
+ Num_of_existing_credits 1 3441957 7956433671 15894
+ Num_of_people_liable 1 2339037 7957536591 15894
Step: AIC=15398.63
Credit_amount ~ Duration_in_month
Df Sum of Sq RSS AIC
+ Installment_rate 1 809622830 4041083603 15218
+ Age 1 24374796 4826331637 15396
<none> 4850706433 15399
+ Num_of_people_liable 1 8175092 4842531341 15399
+ Num_of_existing_credits 1 6173147 4844533286 15399
+ Present_residence 1 464529 4850241904 15400
Step: AIC=15218.02
Credit_amount ~ Duration_in_month + Installment_rate
Df Sum of Sq RSS AIC
+ Age 1 44760941 3996322662 15209
+ Num_of_existing_credits 1 9782981 4031300622 15218
<none> 4041083603 15218
+ Present_residence 1 4075067 4037008536 15219
+ Num_of_people_liable 1 774089 4040309514 15220
Step: AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age
Df Sum of Sq RSS AIC
<none> 3996322662 15209
+ Num_of_existing_credits 1 4674843 3991647819 15210
+ Present_residence 1 61831 3996260831 15211
+ Num_of_people_liable 1 3906 3996318756 15211
#후진소거법
backward <- step(reg, direction = "backward")
Start: AIC=15213.7
Credit_amount ~ Duration_in_month + Installment_rate + Present_residence +
Age + Num_of_existing_credits + Num_of_people_liable
Df Sum of Sq RSS AIC
- Present_residence 1 18867 3991627223 15212
- Num_of_people_liable 1 20996 3991629352 15212
- Num_of_existing_credits 1 4649024 3996257379 15213
<none> 3991608356 15214
- Age 1 36419836 4028028192 15221
- Installment_rate 1 826415919 4818024275 15400
- Duration_in_month 1 3352586147 7344194503 15821
Step: AIC=15211.71
Credit_amount ~ Duration_in_month + Installment_rate + Age +
Num_of_existing_credits + Num_of_people_liable
Df Sum of Sq RSS AIC
- Num_of_people_liable 1 20595 3991647819 15210
- Num_of_existing_credits 1 4691533 3996318756 15211
<none> 3991627223 15212
- Age 1 39387168 4031014391 15220
- Installment_rate 1 826999930 4818627153 15398
- Duration_in_month 1 3359594009 7351221232 15820
Step: AIC=15209.71
Credit_amount ~ Duration_in_month + Installment_rate + Age +
Num_of_existing_credits
Df Sum of Sq RSS AIC
- Num_of_existing_credits 1 4674843 3996322662 15209
<none> 3991647819 15210
- Age 1 39652804 4031300622 15218
- Installment_rate 1 831554727 4823202545 15397
- Duration_in_month 1 3360391088 7352038906 15818
Step: AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age
Df Sum of Sq RSS AIC
<none> 3996322662 15209
- Age 1 44760941 4041083603 15218
- Installment_rate 1 830008975 4826331637 15396
- Duration_in_month 1 3358802305 7355124967 15817
#교차선택법
stepwise <- step(null, direction = "both", scope = list(lower=null, upper=full))
Start: AIC=15891.92
Credit_amount ~ 1
Df Sum of Sq RSS AIC
+ Duration_in_month 1 3109169195 4850706433 15399
+ Installment_rate 1 585944034 7373931593 15818
<none> 7959875627 15892
+ Age 1 8519964 7951355664 15893
+ Present_residence 1 6660284 7953215343 15893
+ Num_of_existing_credits 1 3441957 7956433671 15894
+ Num_of_people_liable 1 2339037 7957536591 15894
Step: AIC=15398.63
Credit_amount ~ Duration_in_month
Df Sum of Sq RSS AIC
+ Installment_rate 1 809622830 4041083603 15218
+ Age 1 24374796 4826331637 15396
<none> 4850706433 15399
+ Num_of_people_liable 1 8175092 4842531341 15399
+ Num_of_existing_credits 1 6173147 4844533286 15399
+ Present_residence 1 464529 4850241904 15400
- Duration_in_month 1 3109169195 7959875627 15892
Step: AIC=15218.02
Credit_amount ~ Duration_in_month + Installment_rate
Df Sum of Sq RSS AIC
+ Age 1 44760941 3996322662 15209
+ Num_of_existing_credits 1 9782981 4031300622 15218
<none> 4041083603 15218
+ Present_residence 1 4075067 4037008536 15219
+ Num_of_people_liable 1 774089 4040309514 15220
- Installment_rate 1 809622830 4850706433 15399
- Duration_in_month 1 3332847990 7373931593 15818
Step: AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age
Df Sum of Sq RSS AIC
<none> 3996322662 15209
+ Num_of_existing_credits 1 4674843 3991647819 15210
+ Present_residence 1 61831 3996260831 15211
+ Num_of_people_liable 1 3906 3996318756 15211
- Age 1 44760941 4041083603 15218
- Installment_rate 1 830008975 4826331637 15396
- Duration_in_month 1 3358802305 7355124967 15817
반응형
'데이터 다루기 > Base of R' 카테고리의 다른 글
[R] Ridge, Lasso, ElasticNet Regression (1) | 2020.03.09 |
---|---|
[R] Data Partition (데이터 분할) (0) | 2020.03.09 |
[R] 회귀 분석 (0) | 2020.03.06 |
[Data] LendingClub (P2P Default 예측 데이터) (1) | 2020.01.17 |
[R] dplyr 패키지로 데이터 전처리하기 (0) | 2019.08.27 |