Predictive Analytics in R

The following is a script file containing all R code of all sections in this slide set.

Evaluation Metrics

trueVals <- c("c1","c1","c2","c1","c3","c1","c2","c3","c2","c3")
preds <- c("c1","c2","c1","c3","c3","c1","c1","c3","c1","c2")
confMatrix <- table(trueVals,preds)
confMatrix
errorRate <- 1-sum(diag(confMatrix))/sum(confMatrix)
errorRate

trueVals <- c(10.2,-3,5.4,3,-43,21,
              32.4,10.4,-65,23)
preds <-  c(13.1,-6,0.4,-1.3,-30,1.6,
            3.9,16.2,-6,20.4)
mse <- mean((trueVals-preds)^2)
mse
rmse <- sqrt(mse)
rmse
mae <- mean(abs(trueVals-preds))
mae
nmse <- sum((trueVals-preds)^2) / 
        sum((trueVals-mean(trueVals))^2)
nmse

nmae <- sum(abs(trueVals-preds)) / 
        sum(abs(trueVals-mean(trueVals)))
nmae
mape <- mean(abs(trueVals-preds)/trueVals)
mape
smape <- 1/length(preds) * sum(abs(preds - trueVals) /
                        (abs(preds)+abs(trueVals)))
smape
corr <- cor(trueVals,preds)
corr

Multiple Linear Regression

library(tidymodels)
data(algae, package="DMwR2")
alg <- as_tibble(algae) %>%    # Preparing the data
  select(1:12) %>% slice(-c(62,199))

lmSpec <- 
  linear_reg() %>%    # the type of model
  set_engine("lm")    # the implementation to use

lm <- lmSpec %>% fit(a1 ~ ., data = alg)  # fit the model to the data

tidy(lm)   # showing the model

dataSplit <- initial_split(alg, prop = 0.7)
algTr <- training(dataSplit)  # training set
algTs <- testing(dataSplit)   # test set

lmTr <- 
  lmSpec %>% fit(a1 ~ ., data = algTr)

preds <- predict(lmTr, new_data = algTs)
algTs %>% bind_cols(preds) %>% metrics(a1,.pred)

library(ggplot2)
ggplot(bind_cols(algTs,preds), aes(x=a1,y=.pred)) + geom_point() + geom_abline(slope=1,intercept=0) + xlab("True") + ylab("Predicted")

Support Vector Machines (SVMs)

library(tidymodels)
data(iris)

svmSpec <- 
  svm_rbf() %>%               # the type of model
  set_engine("kernlab") %>%   # the implementation to use
  set_mode("classification")  # type of task

s <- svmSpec %>% fit(Species ~ ., data = iris)  # fit the model to the data

svmSpec2 <- 
  svm_rbf(cost=10, margin = 0.01) %>%  
  set_engine("kernlab") %>% 
  set_mode("classification")  

s2 <- svmSpec2 %>% fit(Species ~ ., data = iris)

dataSplit <- initial_split(iris, prop = 0.7)
irTr <- training(dataSplit)  # training set
irTs <- testing(dataSplit)   # test set

svmTr <- 
  svmSpec %>% fit(Species ~ ., data = irTr)

results <- irTs %>% select(Species) %>% bind_cols(predict(svmTr, new_data = irTs))
head(results)
results %>% metrics(Species,.pred_class)

results %>% conf_mat(Species,.pred_class)

autoplot(results %>% conf_mat(Species,.pred_class), 
         type="heatmap")

data(Boston,package='MASS')
dataSplit <- initial_split(Boston, prop = 0.7)
bTr <- training(dataSplit)  # training set
bTs <- testing(dataSplit)   # test set

svmSpec <- 
  svm_rbf() %>%               # the type of model
  set_engine("kernlab") %>%   # the implementation to use
  set_mode("regression")  # type of task

sTr <- 
  svmSpec %>% fit(medv ~ ., data = bTr)

preds <- predict(sTr, new_data = bTs)
bTs %>% bind_cols(preds) %>% metrics(medv,.pred)

library(ggplot2)
ggplot(bind_cols(bTs,preds), aes(x=medv,y=.pred)) +
  geom_point() + geom_abline(slope=1,intercept=0) + 
  xlab("True") + ylab("Predicted")

Model Ensembles

Model Ensembles and Random Forests

library(tidymodels)
data(Boston,package='MASS')
dataSplit <- initial_split(Boston, prop = 0.7)
bTr <- training(dataSplit)  # training set
bTs <- testing(dataSplit)   # test set

rfSpec <- 
  rand_forest() %>%          # the type of model
  set_engine("ranger") %>%   # the implementation to use
  set_mode("regression")     # type of task

rfTr <- 
  rfSpec %>% fit(medv ~ ., data = bTr)

preds <- predict(rfTr, new_data = bTs)
bTs %>% bind_cols(preds) %>% metrics(medv,.pred)

data(iris)
dataSplit <- initial_split(iris, prop = 0.7)
irTr <- training(dataSplit)  # training set
irTs <- testing(dataSplit)   # test set

rfSpec <- 
  rand_forest() %>%                # the type of model
  set_engine("randomForest") %>%   # the implementation to use
  set_mode("classification")     # type of task

rfTr <- 
  rfSpec %>% fit(Species ~ ., data = irTr)

results <- irTs %>% select(Species) %>% bind_cols(predict(rfTr, new_data = irTs))
results %>% metrics(Species,.pred_class)