본문 바로가기

데이터 다루기/Base of R

[R] 회귀 분석 (변수선택)

728x90
반응형
# variable selection
install.packages('MASS')
library(MASS)

reg <- lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + Age +
            Num_of_existing_credits + Num_of_people_liable, data=german1)
null <- lm(Credit_amount ~ 1, data = german1)
full <- lm(Credit_amount ~ ., data = german1)

# Partial F-test
reduced_model = lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
                    Num_of_existing_credits + Num_of_people_liable, data=german1)
anova(reduced_model, full)


Analysis of Variance Table

Model 1: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
    Age + Num_of_existing_credits
Model 2: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
    Age + Num_of_existing_credits + Num_of_people_liable
  Res.Df        RSS Df Sum of Sq      F Pr(>F)
1    994 3991629352                           
2    993 3991608356  1     20996 0.0052 0.9424

reduced_model = lm(Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + Age + 
                     Num_of_existing_credits, data=german1)
anova(reduced_model, full)


Analysis of Variance Table

Model 1: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
    Age + Num_of_existing_credits
Model 2: Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
    Age + Num_of_existing_credits + Num_of_people_liable
  Res.Df        RSS Df Sum of Sq      F Pr(>F)
1    994 3991629352                           
2    993 3991608356  1     20996 0.0052 0.9424

Partial F test에 의하면 Num_of_people_liable 변수의 경우 있을 경우와 없을 경우 모델의 성능에 거의 영향을 주지 않는다.

따라서 후방 제거법에서 먼저 제거된다.

 

#전진선택법
forward <- step(null, direction = "forward",scope = list(lower=null, upper=full))

Start:  AIC=15891.92
Credit_amount ~ 1

                          Df  Sum of Sq        RSS   AIC
+ Duration_in_month        1 3109169195 4850706433 15399
+ Installment_rate         1  585944034 7373931593 15818
<none>                                  7959875627 15892
+ Age                      1    8519964 7951355664 15893
+ Present_residence        1    6660284 7953215343 15893
+ Num_of_existing_credits  1    3441957 7956433671 15894
+ Num_of_people_liable     1    2339037 7957536591 15894

Step:  AIC=15398.63
Credit_amount ~ Duration_in_month

                          Df Sum of Sq        RSS   AIC
+ Installment_rate         1 809622830 4041083603 15218
+ Age                      1  24374796 4826331637 15396
<none>                                 4850706433 15399
+ Num_of_people_liable     1   8175092 4842531341 15399
+ Num_of_existing_credits  1   6173147 4844533286 15399
+ Present_residence        1    464529 4850241904 15400

Step:  AIC=15218.02
Credit_amount ~ Duration_in_month + Installment_rate

                          Df Sum of Sq        RSS   AIC
+ Age                      1  44760941 3996322662 15209
+ Num_of_existing_credits  1   9782981 4031300622 15218
<none>                                 4041083603 15218
+ Present_residence        1   4075067 4037008536 15219
+ Num_of_people_liable     1    774089 4040309514 15220

Step:  AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age

                          Df Sum of Sq        RSS   AIC
<none>                                 3996322662 15209
+ Num_of_existing_credits  1   4674843 3991647819 15210
+ Present_residence        1     61831 3996260831 15211
+ Num_of_people_liable     1      3906 3996318756 15211
#후진소거법
backward <- step(reg, direction = "backward")

Start:  AIC=15213.7
Credit_amount ~ Duration_in_month + Installment_rate + Present_residence + 
    Age + Num_of_existing_credits + Num_of_people_liable

                          Df  Sum of Sq        RSS   AIC
- Present_residence        1      18867 3991627223 15212
- Num_of_people_liable     1      20996 3991629352 15212
- Num_of_existing_credits  1    4649024 3996257379 15213
<none>                                  3991608356 15214
- Age                      1   36419836 4028028192 15221
- Installment_rate         1  826415919 4818024275 15400
- Duration_in_month        1 3352586147 7344194503 15821

Step:  AIC=15211.71
Credit_amount ~ Duration_in_month + Installment_rate + Age + 
    Num_of_existing_credits + Num_of_people_liable

                          Df  Sum of Sq        RSS   AIC
- Num_of_people_liable     1      20595 3991647819 15210
- Num_of_existing_credits  1    4691533 3996318756 15211
<none>                                  3991627223 15212
- Age                      1   39387168 4031014391 15220
- Installment_rate         1  826999930 4818627153 15398
- Duration_in_month        1 3359594009 7351221232 15820

Step:  AIC=15209.71
Credit_amount ~ Duration_in_month + Installment_rate + Age + 
    Num_of_existing_credits

                          Df  Sum of Sq        RSS   AIC
- Num_of_existing_credits  1    4674843 3996322662 15209
<none>                                  3991647819 15210
- Age                      1   39652804 4031300622 15218
- Installment_rate         1  831554727 4823202545 15397
- Duration_in_month        1 3360391088 7352038906 15818

Step:  AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age

                    Df  Sum of Sq        RSS   AIC
<none>                            3996322662 15209
- Age                1   44760941 4041083603 15218
- Installment_rate   1  830008975 4826331637 15396
- Duration_in_month  1 3358802305 7355124967 15817
#교차선택법
stepwise <- step(null, direction = "both", scope = list(lower=null, upper=full))

Start:  AIC=15891.92
Credit_amount ~ 1

                          Df  Sum of Sq        RSS   AIC
+ Duration_in_month        1 3109169195 4850706433 15399
+ Installment_rate         1  585944034 7373931593 15818
<none>                                  7959875627 15892
+ Age                      1    8519964 7951355664 15893
+ Present_residence        1    6660284 7953215343 15893
+ Num_of_existing_credits  1    3441957 7956433671 15894
+ Num_of_people_liable     1    2339037 7957536591 15894

Step:  AIC=15398.63
Credit_amount ~ Duration_in_month

                          Df  Sum of Sq        RSS   AIC
+ Installment_rate         1  809622830 4041083603 15218
+ Age                      1   24374796 4826331637 15396
<none>                                  4850706433 15399
+ Num_of_people_liable     1    8175092 4842531341 15399
+ Num_of_existing_credits  1    6173147 4844533286 15399
+ Present_residence        1     464529 4850241904 15400
- Duration_in_month        1 3109169195 7959875627 15892

Step:  AIC=15218.02
Credit_amount ~ Duration_in_month + Installment_rate

                          Df  Sum of Sq        RSS   AIC
+ Age                      1   44760941 3996322662 15209
+ Num_of_existing_credits  1    9782981 4031300622 15218
<none>                                  4041083603 15218
+ Present_residence        1    4075067 4037008536 15219
+ Num_of_people_liable     1     774089 4040309514 15220
- Installment_rate         1  809622830 4850706433 15399
- Duration_in_month        1 3332847990 7373931593 15818

Step:  AIC=15208.89
Credit_amount ~ Duration_in_month + Installment_rate + Age

                          Df  Sum of Sq        RSS   AIC
<none>                                  3996322662 15209
+ Num_of_existing_credits  1    4674843 3991647819 15210
+ Present_residence        1      61831 3996260831 15211
+ Num_of_people_liable     1       3906 3996318756 15211
- Age                      1   44760941 4041083603 15218
- Installment_rate         1  830008975 4826331637 15396
- Duration_in_month        1 3358802305 7355124967 15817
​
반응형