load("../../fluanalysis/data/clean_symptoms.RData")
# Attempt # 15 of ordering/deording the variables so the models run well.
# balanced_symptoms$Weakness<-as.factor(balanced_symptoms$Weakness)
# balanced_symptoms$CoughIntensity<-as.factor(balanced_symptoms$CoughIntensity)
# balanced_symptoms$Myalgia<-as.factor(balanced_symptoms$Myalgia)
Machine Learning
I had the worst luck with this exercise. Something went wrong with my ordinal factors. I peaked at other students’ code and mine matched, but the code did not run. If the model coding was the same, I can only assume something went wrong with the data wranging, but all the steps I tried and different methods for dummy/ordered factors didn’t change the outcome. Maybe ML and I aren’t meant to be :(
Set it Up
Read in previously cleaned data
Split the data
set.seed(123)
<- initial_split(balanced_symptoms, prop = 7/10, strata = BodyTemp)
data_split
# Create data frames for the two sets:
<- training(data_split)
train_data <- testing(data_split) test_data
Set up the 5x5 split
<- vfold_cv(train_data, v = 5, repeats =5, strata = BodyTemp) folds
Create a recipe
<-
model_recipe3 recipe(BodyTemp ~ ., data = train_data) %>%
step_dummy(all_nominal_predictors())
# step_ordinalscore(Myalgia, CoughIntensity, Weakness) commenting out because it doesn't want to find the variables unless under VERY specific conditions (idk what those conditions are)
# model_recipe3<-prep(model_recipe3, training = train_data)
# An attempt I saw someone else do, it didn't work for me :(
Model Creation
Null Model
Build the model
<-null_model() %>%
nullset_engine("parsnip") %>%
set_mode("regression") %>%
translate()
<-
null_wf workflow() %>%
add_model(null) %>%
add_recipe(model_recipe3)
<-
null_fit %>%
null_wf fit(train_data)
%>%
null_fit extract_fit_parsnip() %>%
tidy()
# A tibble: 1 × 1
value
<dbl>
1 98.9
View Trained RMSE
<-
null_train_aug augment(null_fit, train_data)
::rmse(null_train_aug, BodyTemp, .pred) yardstick
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 1.22
Test the Null Model
predict(null_fit, test_data)
# A tibble: 223 × 1
.pred
<dbl>
1 98.9
2 98.9
3 98.9
4 98.9
5 98.9
6 98.9
7 98.9
8 98.9
9 98.9
10 98.9
# … with 213 more rows
<-
null_test_aug augment(null_fit, test_data)
::rmse(null_test_aug, BodyTemp, .pred) yardstick
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 1.14
Decision Tree
<-
tune_spec decision_tree(
cost_complexity = tune(),
tree_depth = tune()
%>%
) set_engine("rpart") %>%
set_mode("regression")
<- grid_regular(cost_complexity(),
tree_grid tree_depth(),
levels = 5)
<- workflow() %>%
tree_wf add_model(tune_spec) %>%
add_recipe(model_recipe3)
## Commenting out because otherwise it will run errors and won't knit
# tree_res <-
# tree_wf %>%
# tune_grid(
# resamples = folds, #recall this created CV from earlier
# grid = tree_grid
# )
# tree_res %>%collect_metrics()
#
# tree_res %>%
# show_best("rmse")
LASSO
set.seed(123)
<- linear_reg(penalty = tune(), mixture = 1) %>%
lr_mod set_engine("glmnet")
<-
lr_workflow workflow() %>%
add_model(lr_mod) %>%
add_recipe(model_recipe3)
<- tibble(penalty = 10^seq(-4, -1, length.out = 30))
lr_reg_grid
%>% top_n(-5) # lowest penalty values lr_reg_grid
Selecting by penalty
# A tibble: 5 × 1
penalty
<dbl>
1 0.0001
2 0.000127
3 0.000161
4 0.000204
5 0.000259
%>% top_n(5) # highest penalty values lr_reg_grid
Selecting by penalty
# A tibble: 5 × 1
penalty
<dbl>
1 0.0386
2 0.0489
3 0.0621
4 0.0788
5 0.1
## Commenting out because otherwise it will run errors and won't knit
# lr_res <- lr_workflow %>%
# tune_grid(resamples = folds,
# grid = lr_reg_grid) #Had more options here but other students didn't and theirs ran ok, mine did not regardless
#
# lr_res %>% show_best("rmse")
# best_lasso = lasso_res %>%
# select_best("rmse")
Random Forest
<-
rf_mod rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
set_engine("ranger") %>%
set_mode("regression")
<- recipe(BodyTemp ~ ., data = train_data)
ml_model2
<-
rf_workflow workflow() %>%
add_model(rf_mod) %>%
add_recipe(ml_model2)
## Commenting out because otherwise it will run errors and won't knit
# rf_res <-
# rf_workflow %>%
# tune_grid(folds)