IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    Comparing Many Models: Crude Oil Futures Price

    Selcuk Disci发表于 2024-08-22 12:32:13
    love 0
    [This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

    Crude oil futures prices have fluctuated above the point forecast line (XGBoost) year to date.

    library(tidyverse)
    library(tidymodels)
    library(tidyquant)
    library(timetk)
    library(modeltime)
    library(workflowsets)
    
    
    #Crude Oil Futures(USD) (Index 2014 = 100)
    df_crude_oil <- 
      tq_get("CL=F") %>% 
      tq_transmute(select = close,
                   mutate_fun = to.monthly,
                   col_rename = "crude_oil") %>% 
      mutate(date = as.Date(date))
    
    
    #Federal Funds Effective Rate
    df_fed_funds <- 
      tq_get("FEDFUNDS", get = "economic.data") %>% 
      select(date, fed_funds = price)
    
    #Industrial Production: Mining: Crude Oil (NAICS = 21112) (Index 2014 = 100)
    df_crude_oil_production <- 
      tq_get("IPG21112S", get = "economic.data") %>% 
      select(date, crude_oil_production = price) 
    
    #Merging all the data sets
    df_merged <- 
      df_crude_oil %>%
      left_join(df_fed_funds) %>% 
      left_join(df_crude_oil_production) %>% 
      drop_na()
    
    
    #Splitting tha data
    df_split <- 
      df_merged %>% 
      time_series_split(assess = "1 year",
                        cumulative = TRUE)
    
    df_train <- training(df_split)
    df_test <- testing(df_split)
    
    #Bootstrapping for tuning
    set.seed(12345)
    df_folds <- bootstraps(df_train,
                           times = 100)
    
    
    #Preprocessing
    rec_all <- 
      recipe(crude_oil ~ ., data = df_train) %>% 
      step_mutate(date_num = as.numeric(date)) %>% 
      step_rm(date) %>% 
      step_normalize(all_numeric_predictors()) 
      
    
    #Models
    
    #Radial basis function support vector machines (SVMs) via kernlab
    mod_svm_rbf <- 
      svm_rbf(cost = tune(),
              rbf_sigma = tune(), 
              margin = tune()) %>%  
      set_engine("kernlab") %>% 
      set_mode("regression")
    
    
    #Multivariate adaptive regression splines (MARS) via earth
    mod_mars <- 
      mars(num_terms = tune(), 
           prune_method = tune()) %>% 
      set_engine("earth", nfold = 10) %>% 
      set_mode("regression") 
    
    
    #Boosted trees via xgboost
    mod_boost_tree <- 
      boost_tree(mtry = tune(), 
                 trees = tune(), 
                 min_n = tune(), 
                 learn_rate = tune()) %>%
      set_engine("xgboost") %>%
      set_mode("regression") 
    
    
    
    #Workflow sets
    wflow_all <- 
      workflow_set(
        preproc = list(svm_all = rec_all),
        models = list(SVM_rbf = mod_svm_rbf,
                      MARS = mod_mars,
                      XGBoost = mod_boost_tree)) %>% 
      #Making the workflow ID's a little more simple:
      mutate(wflow_id = str_remove(wflow_id, "svm_all_"))
    
    
    
    #Tuning and evaluating all the models
    grid_ctrl <-
      control_grid(
        save_pred = TRUE,
        parallel_over = "everything",
        save_workflow = TRUE
      )
    
    grid_results <-
      wflow_all %>%
      workflow_map(
        seed = 98765,
        resamples = df_folds,
        grid = 10,
        control = grid_ctrl
      )
    
    
    #Accuracy of the grid results
    grid_results %>% 
      rank_results(select_best = TRUE, 
                   rank_metric = "rsq") %>%
      select(Models = wflow_id, .metric, mean)
    
    
    #Finalizing the model with the best parameters
    best_param <- 
      grid_results %>%
      extract_workflow_set_result("XGBoost") %>% 
      select_best(metric = "rsq")
    
    
    wflw_fit <- 
      grid_results %>% 
      extract_workflow("XGBoost") %>% 
      finalize_workflow(best_param) %>% 
      fit(df_train)
    
    #Calibration data
    df_cal <- 
      wflw_fit %>% 
      modeltime_calibrate(new_data = df_test)
    
    #Predictive intervals for XGBoost
    df_cal %>%
      modeltime_forecast(actual_data = df_merged %>% 
                                       filter(date >= last(date) - months(12)),
                         new_data = df_test) %>%
      plot_modeltime_forecast(.interactive = FALSE,
                              .legend_show = FALSE,
                              .line_size = 1,
                              .color_lab = "",
                              .title = "Confidence Intervals for Crude Oil Future Prices (WTI)") +
      labs(subtitle = "Using XGBoost") + 
      theme_minimal(base_family = "Bricolage Grotesque",
                    base_size = 16)
    
    

    To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

    R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
    Continue reading: Comparing Many Models: Crude Oil Futures Price


沪ICP备19023445号-2号
友情链接