Machine Learning in Mining

Author

Naphtali Dube

Published

April 17, 2024

library(ggplot2) # Data visualization
library(plotly) # Interactive data visualizations
library(psych) # Will be used for correlation visualizations
library(rattle) # Graphing decision trees
library(caret) # Machine learning
library(tidymodels)
library(tidyverse)
ore_stregth host_rock_strength thickness shape dip ore_uniformity depth ore_grade mining_method
Strong Moderate-strong Intermediate Thick Tabular Uniform Moderate Moderate Sublevel stoping
Moderate Moderate-strong Very narrow-narrow Tabular Intermediate uniform moderate / deep Moderate Longwall
Moderate Strong Narrow Tabular Steep variable moderate/deep Fairly high Cut and Fill
Strong Moderate-strong Thick Irregular Intermediate variable moderate/deep Fairly high Cut and Fill
Weak Moderate Very narrow Irregular Steep variable moderate/deep Fairly high Cut and Fill
Moderate Weak-moderate Very narrow Tabular Flat uniform Shallow / moderate Moderate Breast stoping
table(out_new$mining_method)

  Apparent mining      Block Caving    Breast stoping      Cut and Fill 
               16                49                23               195 
         Longwall         Open Cast          Open Pit   Room and Pillar 
               65                32                32                83 
        Shrinkage Shrinkage Stoping   Solution Mining        Square Set 
               34                32                32                32 
    Stull Stoping   Sublevel Caving  Sublevel stoping  Sublevel Stoping 
               32                48                16               283 
table(out_new$ore_stregth)

               Any        Fair-strong           Moderate    Moderate-strong 
                64                 96                219                 66 
            Strong Strong-very strong        Very strong     Very weak-weak 
               166                 48                 17                 64 
              Weak          Weak-fair      Weak-moderate 
                83                164                 17 

Data cleaning

out_new<-out_new |> 
  mutate(mining_method=fct_collapse(mining_method ,
                                    `Room and Pillar` = c("Room and Pillar","Open Cast"),
                                    `Shrinkage Stoping` = c("Shrinkage Stoping","Shrinkage","Open Pit"),
                                    `Cut and Fill` = c("Cut and Fill"),
                                    Longwall = c("Longwall"),
                                    `Sublevel stoping`=c("Sublevel stoping","Sublevel Stoping"),
                                    )) |>
  mutate(thickness = case_when(str_detect(thickness,"hick")~"Thick",
                               str_detect(thickness,"Narrow")~"Narrow",
                               .default="Moderate"),
         ore_stregth = case_when(str_detect(ore_stregth,"eak")~"Weak",
                                 str_detect(ore_stregth,"oderate")~"Moderate",
                               str_detect(ore_stregth,"strong")~"Strong",
                               .default="fair"),
          host_rock_strength = case_when(str_detect(host_rock_strength,"eak")~"Weak",
                                 str_detect(host_rock_strength,"oderate")~"Moderate",
                               str_detect(host_rock_strength,"strong")~"Strong",
                               .default="fair"),
          dip = case_when(str_detect(dip,"teep")~"Steep",
                                 str_detect(dip,"lat")~"Flat",
                               str_detect(str_to_lower(dip),"deep")~"Deep",
                               .default="Moderate"),
         ore_grade = case_when(str_detect(ore_grade,"high")~"High",
                                 str_detect(ore_grade,"hallow")~"Shallow",
                               str_detect(str_to_lower(ore_grade),"deep")~"Deep",
                               .default="Moderate"),
         ore_uniformity = case_when(
                               str_detect(str_to_lower(ore_uniformity),"variable")~"variable",
                               str_detect(str_to_lower(ore_uniformity),"uniform")~"uniform",
                               .default="Moderate"),
         depth = case_when(str_detect(str_to_lower(depth),"moderate")~"Moderate",
                               str_detect(str_to_lower(depth),"high")~"high",
                               .default= depth))

Distribution of mining methods

plt_df <- out_new %>%
  group_by(mining_method) %>%
  mutate(x.lab=paste0(mining_method,
                      "\n",
                      "(n=",
                      n(),
                      ")")) |> 
  group_by(x.lab) |> 
  tally() |> 
  mutate(pct = n/sum(n)*100,
         lbl = sprintf("%.1f%%", pct)) |> 
  mutate(variable="variable") 
  

plt_df %>%
 ggplot() +
  aes(x = fct_reorder(x.lab,pct),y=pct,fill=variable) +
  geom_col(width=0.5) +
  geom_text(aes(label=lbl),position=position_stack(vjust=0.5),size=2.5)+
  labs(title = "", y = "percentage", x ="",fill="") +
  ggthemes::scale_fill_economist() +
  ggthemes::theme_tufte() +
  theme(legend.position = "none",
        axis.text.x = element_text(face="bold", 
                                   color="black",
                                   size=8,angle = 60))

Model building

# Fix the random numbers by setting the seed 
# This enables the analysis to be reproducible 
set.seed(123)

# Put 3/4 of the data into the training set 
data_split <- initial_split(out_new, 
                           prop = 0.70, 
                           strata = mining_method)

# Create dataframes for the two sets:
train_data <- training(data_split) 
test_data <- testing(data_split)
housing_rec <-
  recipe(mining_method ~ .,
         data = train_data) %>%
  step_naomit(everything(), skip = TRUE) |>
  step_dummy(all_nominal(), -all_outcomes()) 
housing_rec_prep<-housing_rec |> 
  prep()

mine_train<-bake(housing_rec_prep,NULL)
mine_test<-bake(housing_rec_prep,test_data)
summary(housing_rec)
# A tibble: 9 × 4
  variable           type      role      source  
  <chr>              <list>    <chr>     <chr>   
1 ore_stregth        <chr [3]> predictor original
2 host_rock_strength <chr [3]> predictor original
3 thickness          <chr [3]> predictor original
4 shape              <chr [3]> predictor original
5 dip                <chr [3]> predictor original
6 ore_uniformity     <chr [3]> predictor original
7 depth              <chr [3]> predictor original
8 ore_grade          <chr [3]> predictor original
9 mining_method      <chr [3]> outcome   original
prepped_data <- 
  housing_rec |># use the recipe object
  prep() |># perform the recipe on training data
  juice() # extract only the preprocessed dataframe 

5 fold cross validation

set.seed(100)

cv_folds <-
 vfold_cv(train_data, 
          v = 5, 
          strata = mining_method) 

setup models

mtl_spec<-multinom_reg() |> 
  set_engine("nnet") |> 
  set_mode("classification")

# Random forest
rf_spec <- 
  rand_forest() |>
  set_engine("ranger", importance = "impurity") |>
  set_mode("classification")
#Boosted tree (XGBoost)
xgb_spec <- 
  boost_tree() |>
  set_engine("xgboost") |>
  set_mode("classification") 
# K-nearest neighbor
knn_spec <- 
  nearest_neighbor(neighbors = 5) |># we can adjust the number of neighbors 
  set_engine("kknn") |>
  set_mode("classification") 
nnet_spec <-
  mlp(penalty=0.01) %>%
  set_mode("classification") |>
  set_engine("keras", verbose=0) 
mtl_wflow <-
 workflow() %>%
 add_recipe(housing_rec) |>
 add_model(mtl_spec)

rf_wflow <-
 workflow() %>%
 add_recipe(housing_rec) |>
 add_model(rf_spec) 
xgb_wflow <-
 workflow() %>%
 add_recipe(housing_rec) |>
 add_model(xgb_spec)



knn_wflow <-
 workflow() %>%
 add_recipe(housing_rec) |>
 add_model(knn_spec)
nnet_wflow <-
 workflow() %>%
 add_recipe(housing_rec) |>
 add_model(nnet_spec)
get_model <- function(x) {
  extract_fit_parsnip(x) |>tidy()
}

neural network

mtl_res <-
  mtl_wflow |>
  fit_resamples(
    resamples = cv_folds, 
    metrics = metric_set(
      f_meas, 
      yardstick::accuracy, kap,
      roc_auc, sens),
    control = control_resamples(save_pred = TRUE)
    ) 

mtl_res |> collect_metrics(summarize = TRUE)
# A tibble: 5 × 6
  .metric  .estimator  mean     n std_err .config             
  <chr>    <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy multiclass 0.963     5 0.00878 Preprocessor1_Model1
2 f_meas   macro      0.956     5 0.00907 Preprocessor1_Model1
3 kap      multiclass 0.956     5 0.00982 Preprocessor1_Model1
4 roc_auc  hand_till  0.997     5 0.00114 Preprocessor1_Model1
5 sens     macro      0.958     5 0.00888 Preprocessor1_Model1

Random forest

rf_res <-
  rf_wflow |>
  fit_resamples(
    resamples = cv_folds, 
    metrics = metric_set(
      f_meas, 
      yardstick::accuracy, kap,
      roc_auc, sens),
    control = control_resamples(save_pred = TRUE)
    ) 

rf_res |> collect_metrics(summarize = TRUE)
# A tibble: 5 × 6
  .metric  .estimator  mean     n std_err .config             
  <chr>    <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy multiclass 0.973     5 0.00471 Preprocessor1_Model1
2 f_meas   macro      0.973     5 0.00550 Preprocessor1_Model1
3 kap      multiclass 0.968     5 0.00542 Preprocessor1_Model1
4 roc_auc  hand_till  0.997     5 0.00258 Preprocessor1_Model1
5 sens     macro      0.966     5 0.00691 Preprocessor1_Model1

XGBoost

xgb_res <- 
  xgb_wflow |>
  fit_resamples(
    resamples = cv_folds, 
    metrics = metric_set(
     f_meas, 
      yardstick::accuracy, kap,
      roc_auc, sens),
    control = control_resamples(save_pred = TRUE)
    ) 

xgb_res |>collect_metrics(summarize = TRUE)
# A tibble: 5 × 6
  .metric  .estimator  mean     n  std_err .config             
  <chr>    <chr>      <dbl> <int>    <dbl> <chr>               
1 accuracy multiclass 0.963     5 0.0108   Preprocessor1_Model1
2 f_meas   macro      0.970     5 0.00851  Preprocessor1_Model1
3 kap      multiclass 0.956     5 0.0122   Preprocessor1_Model1
4 roc_auc  hand_till  1.00      5 0.000186 Preprocessor1_Model1
5 sens     macro      0.969     5 0.00900  Preprocessor1_Model1

K-nearest neighbor

knn_res <- 
  knn_wflow |>
  fit_resamples(
    resamples = cv_folds, 
    metrics = metric_set(
     f_meas, 
      yardstick::accuracy, kap,
      roc_auc, sens),
    control = control_resamples(save_pred = TRUE)
    ) 

knn_res |>collect_metrics(summarize = TRUE)
# A tibble: 5 × 6
  .metric  .estimator  mean     n std_err .config             
  <chr>    <chr>      <dbl> <int>   <dbl> <chr>               
1 accuracy multiclass 0.970     5 0.00989 Preprocessor1_Model1
2 f_meas   macro      0.969     5 0.00865 Preprocessor1_Model1
3 kap      multiclass 0.965     5 0.0113  Preprocessor1_Model1
4 roc_auc  hand_till  0.987     5 0.00705 Preprocessor1_Model1
5 sens     macro      0.969     5 0.0119  Preprocessor1_Model1

Compare models

Extract metrics from our models to compare them:

rf_metrics <- 
  rf_res |>
  collect_metrics(summarise = TRUE) %>%
  mutate(model = "Random Forest")

xgb_metrics <- 
  xgb_res |>
  collect_metrics(summarise = TRUE) %>%
  mutate(model = "XGBoost")

knn_metrics <- 
  mtl_res |>
  collect_metrics(summarise = TRUE) %>%
  mutate(model = "Knn")


mtl_metrics <- 
  knn_res |>
  collect_metrics(summarise = TRUE) %>%
  mutate(model = "neural net")
# create dataframe with all models
model_compare <- bind_rows(mtl_metrics,
                           rf_metrics,
                           xgb_metrics,
                           knn_metrics
                           ) 

# change data structure
model_comp <- 
  model_compare |>
  select(model, .metric, mean, std_err) |>
  pivot_wider(names_from = .metric, values_from = c(mean, std_err)) 


# show mean F1-Score for every model
model_comp |>
  arrange(mean_f_meas) |>
  mutate(model = fct_reorder(model, mean_f_meas)) |># order results
  ggplot(aes(model, mean_f_meas, fill=model)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(palette = "Blues") +
   geom_text(
     size = 3,
     aes(label = round(mean_f_meas, 2), y = mean_f_meas + 0.08),
     vjust = 1
  )

# show mean area under the curve (auc) per model
model_comp |>
  arrange(mean_roc_auc) |>
  mutate(model = fct_reorder(model, mean_roc_auc)) %>%
  ggplot(aes(model, mean_roc_auc, fill=model)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(palette = "Blues") + 
     geom_text(
     size = 3,
     aes(label = round(mean_roc_auc, 2), y = mean_roc_auc + 0.08),
     vjust = 1
  )

# show mean accuracy per model
model_comp |>
  arrange(mean_accuracy) |>
  mutate(model = fct_reorder(model, mean_roc_auc)) %>%
  ggplot(aes(model, mean_accuracy, fill=model)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(palette = "Blues") + 
     geom_text(
     size = 3,
     aes(label = round(mean_accuracy, 2), y = mean_accuracy + 0.08),
     vjust = 1
  )

Note that the model results are all quite similar. In our example we choose the Accuracy as performance measure to select the best model. Let’s find the maximum mean Accuracy:

model_comp |>slice_max(mean_accuracy)
# A tibble: 1 × 11
  model         mean_accuracy mean_f_meas mean_kap mean_roc_auc mean_sens
  <chr>                 <dbl>       <dbl>    <dbl>        <dbl>     <dbl>
1 Random Forest         0.973       0.973    0.968        0.997     0.966
# ℹ 5 more variables: std_err_accuracy <dbl>, std_err_f_meas <dbl>,
#   std_err_kap <dbl>, std_err_roc_auc <dbl>, std_err_sens <dbl>
last_fit_rf <- last_fit(mtl_wflow, 
                        split = data_split,
                        metrics = metric_set(
                          f_meas, 
                          yardstick::accuracy, kap,
                          roc_auc, sens)
                        )

Show performance metrics

last_fit_rf |>
  collect_metrics()
# A tibble: 5 × 4
  .metric  .estimator .estimate .config             
  <chr>    <chr>          <dbl> <chr>               
1 f_meas   macro          0.984 Preprocessor1_Model1
2 accuracy multiclass     0.964 Preprocessor1_Model1
3 kap      multiclass     0.955 Preprocessor1_Model1
4 sens     macro          0.981 Preprocessor1_Model1
5 roc_auc  hand_till      0.998 Preprocessor1_Model1
last_fit_rf %>%
  collect_predictions() |>
  conf_mat(mining_method, .pred_class) |>
  autoplot(type = "heatmap") +
  scale_fill_gradient(
  high = "#07193b",  
  low = "#4a6c75"  
)+
  labs(title="Confusion matrix (Neural Network)")

Let’s create the ROC curve. Again, since the event we are predicting is the first level in the Results factor (“above”), we provide roc_curve() with the relevant class probability .pred_above:

last_fit_rf |>
  collect_predictions() |>
  roc_curve(mining_method, `.pred_Apparent mining`:`.pred_Sublevel stoping`) |>
  autoplot()

Based on all of the results, the validation set and test set performance statistics are very close, so we would have pretty high confidence that our random forest model with the selected hyperparameters would perform well when predicting new data.

Neural net diagram

library(neuralnet)


model = neuralnet(
    mining_method~.,
data=mine_train,
hidden=c(10,12),
linear.output = FALSE
)
plot(model,rep="best")