IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    Time Series Machine Learning: Shanghai Composite

    Selcuk Disci发表于 2024-11-13 09:00:00
    love 0
    [This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

    Shanghai Composite does not seem to be at an ideal point for entry.

    Source code:

    library(tidyverse)
    library(tidyquant)
    library(timetk)
    library(tidymodels)
    library(modeltime)
    library(workflowsets)
    
    #Shanghai Composite Index (000001.SS)
    df_shanghai <- 
      tq_get("000001.SS", from = "2015-09-01") %>% 
      tq_transmute(select = close,
                   mutate_fun = to.monthly,
                   col_rename = "sse") %>% 
      mutate(date = as.Date(date)) 
    
    #Splitting
    split <- 
      df_shanghai %>% 
      time_series_split(assess = "1 year", 
                        cumulative = TRUE)
    
    df_train <- training(split)
    df_test <- testing(split)
    
    #Time series cross validation for tuning
    df_folds <- time_series_cv(df_train,
                               initial = 77, 
                               assess = 12)
    
    
    #Preprocessing 
    rec <- 
      recipe(sse ~ date, data = df_train) %>% 
      step_mutate(date_num = as.numeric(date)) %>% 
      step_date(date, features = "month") %>% 
      step_dummy(date_month, one_hot = TRUE) %>% 
      step_normalize(all_numeric_predictors())
    
    
    rec %>% 
      prep() %>% 
      bake(new_data = NULL) %>% view()
    
    
    #Model
    mod <- 
      arima_boost(
        min_n = tune(),
        learn_rate = tune(),
        trees = tune()
      ) %>%
      set_engine(engine = "auto_arima_xgboost")
    
    #Workflow set
    wflow_mod <- 
      workflow_set(
        preproc = list(rec = rec),
        models = list(mod = mod)
      ) 
    
    
    #Tuning and evaluating the model on all the samples
    grid_ctrl <-
      control_grid(
        save_pred = TRUE,
        parallel_over = "everything",
        save_workflow = TRUE
      )
    
    grid_results <-
      wflow_mod %>%
      workflow_map(
        seed = 98765,
        resamples = df_folds,
        grid = 10,
        control = grid_ctrl
      )
    
    
    #Accuracy of the grid results
    grid_results %>% 
      rank_results(select_best = TRUE, 
                   rank_metric = "rmse") %>%
      select(Models = wflow_id, .metric, mean)
    
    
    #Finalizing the model with the best parameters
    best_param <- 
      grid_results %>%
      extract_workflow_set_result("rec_mod") %>% 
      select_best(metric = "rmse")
    
    
    wflw_fit <- 
      grid_results %>% 
      extract_workflow("rec_mod") %>% 
      finalize_workflow(best_param) %>% 
      fit(df_train)
    
    
    #Calibrate the model to the testing set
    calibration_boost <- 
      wflw_fit %>%
      modeltime_calibrate(new_data = df_test)
    
    #Accuracy of the finalized model
    calibration_boost %>%
      modeltime_accuracy(metric_set = metric_set(mape, smape))
    
    
    
    #Predictive intervals
    calibration_boost %>%
      modeltime_forecast(actual_data = df_merged %>% 
                           filter(date >= last(date) - months(12)),
                         new_data = df_test) %>%
      plot_modeltime_forecast(.interactive = FALSE,
                              .legend_show = FALSE,
                              .line_size = 1.5,
                              .color_lab = "",
                              .title = "Shanghai Composite Index") +
      geom_point(aes(color = .key)) +
      labs(subtitle = "Monthly Data<br><span style = 'color:darkgrey;'>Predictive Intervals</span><br><span style = 'color:red;'>Point Forecast Line</span>") + 
      scale_x_date(breaks = c(make_date(2023,11,1), 
                              make_date(2024,5,1),
                              make_date(2024,10,1)),
                   labels = scales::label_date(format = "%b'%y"),
                   expand = expansion(mult = c(.1, .1))) +
      ggthemes::theme_wsj(
        base_family = "Roboto Slab",
        title_family = "Roboto Slab",
        color = "blue",
        base_size = 12) +
      theme(legend.position = "none",
            plot.background = element_rect(fill = "lightyellow", color = "lightyellow"),
            plot.title = element_text(size = 24),
            axis.text = element_text(size = 16),
            plot.subtitle = ggtext::element_markdown(size = 20, face = "bold"))
    
    

    To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

    R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
    Continue reading: Time Series Machine Learning: Shanghai Composite


沪ICP备19023445号-2号
友情链接