IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    Comparing Many Models: An Uptrend for Nvidia?

    Selcuk Disci发表于 2024-09-24 09:00:00
    love 0
    [This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

    Since April, Nvidia has tried to hold above the point forecasting line. As predictive intervals’ slopes are slightly up; could Nvidia continue an uptrend?

    Source code:

    library(tidyverse)
    library(timetk)
    library(tidymodels)
    library(modeltime)
    library(ggthemes)
    
    #Nvidia (monthly)
    df_nvidia <- 
      tq_get("NVDA", to = "2024-08-29") %>%
      tq_transmute(select = close,
                   mutate_fun = to.monthly,
                   col_rename = "nvidia") %>% 
      mutate(date = as.Date(date))
    
    
    
    #FEDFUNDS
    df_fedfunds <- read_csv("fedfunds.csv")
    
    #Tidying the data
    df_fedfunds_tidy <- 
      df_fedfunds %>% 
      janitor::clean_names() %>% 
      select(date = release_date, fedfunds = actual) %>% 
      #Converts string to date object
      mutate(date = case_when(
        !is.na(parse_date(date, format = "%b %d, %Y")) ~ parse_date(date, format = "%b %d, %Y"),
        !is.na(parse_date(date, format = "%d-%b-%y")) ~ parse_date(date, format = "%d-%b-%y")
      )) %>% 
      #Removes the first three blank rows
      slice_tail(n = -3) %>% 
      mutate(date = floor_date(date, "month") %m+% months(1),
             fedfunds = str_remove(fedfunds, "%") %>% as.numeric()) %>% 
      #makes regular time series by filling the time gaps
      pad_by_time(date, .by = "month") %>% 
      fill(fedfunds, .direction = "up") %>% 
      distinct(date, .keep_all = TRUE)
    
    #Mergs all the data sets
    df_merged <- 
      df_nvidia %>% 
      left_join(df_fedfunds_tidy)
    
    
    #Modeling
    
    #Splitting tha data
    df_split <- 
      df_merged %>% 
      time_series_split(assess = "1 year",
                        cumulative = TRUE)
    
    df_train <- training(df_split)
    df_test <- testing(df_split)
    
    #Bootstrapping for tuning
    set.seed(12345)
    df_folds <- bootstraps(df_train,
                           times = 100)
    
    
    #Preprocessing
    
    #Preprocessing for Boosting ARIMA
    rec_arima_boost <- 
      recipe(nvidia ~ ., data = df_train) %>% 
      step_date(date, features = c("year", "month")) %>% 
      step_dummy(date_month, one_hot = TRUE) %>% 
      step_normalize(all_numeric_predictors())
    
    #Preprocessing for XGBoost and MARS
    rec_mars_xgboost <- 
      rec_arima_boost %>% 
      step_rm(date)
    
    
    #Models
    
    #Boosted ARIMA Regression
    #(https://business-science.github.io/modeltime/reference/arima_boost.html)
    mod_arima_boost <- 
      arima_boost(
        min_n = tune(),
        learn_rate = tune(),
        trees = tune()
      ) %>%
      set_engine(engine = "auto_arima_xgboost")
    
    # Model 1: auto_arima ----
    arima_reg() %>%
      set_engine(engine = "auto_arima") %>%
      fit(nvidia ~ ., data = df_merged)
    
    
    
    #Multivariate adaptive regression splines (MARS) via earth
    #(https://parsnip.tidymodels.org/reference/details_mars_earth.html)
    mod_mars <- 
      mars(num_terms = tune(), 
           prune_method = tune()) %>% 
      set_engine("earth", nfold = 10) %>% 
      set_mode("regression") 
    
    
    #Boosted trees via xgboost
    #(https://parsnip.tidymodels.org/reference/details_boost_tree_xgboost.html)
    mod_boost_tree <- 
      boost_tree(mtry = tune(), 
                 trees = tune(), 
                 min_n = tune(), 
                 learn_rate = tune()) %>%
      set_engine("xgboost") %>%
      set_mode("regression") 
    
    
    
    #Workflow sets
    
    wflow_arima_boost <- 
      workflow_set(
        preproc = list(rec_arima_boost = rec_arima_boost),
        models = list(ARIMA_boost = mod_arima_boost)
      ) 
    
    wflow_mars_xgboost <- 
      workflow_set(
        preproc = list(rec_mars_xgboost = rec_mars_xgboost),
        models = list(MARS = mod_mars,
                      XGBoost = mod_boost_tree)
      )
    
    #Combining all the workflows
    wflow_all <- 
      bind_rows(
        wflow_arima_boost,
        wflow_mars_xgboost
      ) %>% 
      #Making the workflow ID's a little more simple:
      mutate(wflow_id = str_remove(wflow_id, "(rec_arima_boost_)|(rec_mars_xgboost_)"))
    
    
    
    #Tuning and evaluating all the models
    grid_ctrl <-
      control_grid(
        save_pred = TRUE,
        parallel_over = "everything",
        save_workflow = TRUE
      )
    
    grid_results <-
      wflow_all %>%
      workflow_map(
        seed = 98765,
        resamples = df_folds,
        grid = 10,
        control = grid_ctrl
      )
    
    
    #Accuracy of the grid results
    grid_results %>% 
      rank_results(select_best = TRUE, 
                   rank_metric = "rsq") %>%
      select(Models = wflow_id, .metric, mean)
    
    
    #Finalizing the model with the best parameters
    best_param <- 
      grid_results %>%
      extract_workflow_set_result("ARIMA_boost") %>% 
      select_best(metric = "rsq")
    
    
    wflw_fit <- 
      grid_results %>% 
      extract_workflow("ARIMA_boost") %>% 
      finalize_workflow(best_param) %>% 
      fit(df_train)
    
    #Calibration data
    df_cal <- 
      wflw_fit %>% 
      modeltime_calibrate(new_data = df_test)
    
    #Predictive intervals for Boosted ARIMA
    df_cal %>%
      modeltime_forecast(actual_data = df_merged %>% 
                           filter(date >= last(date) - months(12)),
                         new_data = df_test) %>%
      plot_modeltime_forecast(.interactive = FALSE,
                              .legend_show = FALSE,
                              .line_size = 1,
                              .color_lab = "",
                              .title = "Predictive Intervals for Nvidia") +
      labs(subtitle = "Monthly Stock Prices<br><span style = 'color:red;'>Boosted ARIMA Point Forecasting Line</span>") + 
      scale_x_date(breaks = c(make_date(2023,8,1), 
                              make_date(2024,4,1),
                              make_date(2024,8,1)),
                   labels = scales::label_date(format = "%Y %b"),
                   expand = expansion(mult = c(.1, .1))) +
      theme_wsj(base_family = "Bricolage Grotesque",
                color = "grey",
                base_size = 12) +
      theme(legend.position = "none",
            plot.subtitle = ggtext::element_markdown(size = 17, face = "bold"))
    
    To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

    R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
    Continue reading: Comparing Many Models: An Uptrend for Nvidia?


沪ICP备19023445号-2号
友情链接