IT博客汇
  • 首页
  • 精华
  • 技术
  • 设计
  • 资讯
  • 扯淡
  • 权利声明
  • 登录 注册

    Impact of Budget Deficits on Treasury Yields with XGBoost

    Selcuk Disci发表于 2025-06-11 09:00:00
    love 0
    [This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

    Charles Schwab analysts said that historically, budget deficits have had minimal impact on Treasury yields, primarily due to the United States’ economic dominance and its status as the issuer of the world’s reserve currency.

    The variable importance analysis with the XGBoost machine learning model confirms the aforementioned statement.

    Source code:

    library(tidyverse)
    library(tidymodels)
    library(tidyquant)
    
    #30-year Treasury yield (^TYX) 
    df_yield_30 <- 
      tq_get("^TYX") %>% 
      tq_transmute(select = close,
                   mutate_fun = to.monthly,
                   col_rename = "yield_30") %>% 
      mutate(date = as.Date(date))
    
    #Federal Surplus or Deficit [-] (MTSDS133FMS)
    df_deficit <- 
      tq_get("MTSDS133FMS", get = "economic.data") %>% 
      select(date, deficit = price)
    
    #Merging the datasets
    df_merged <- 
      df_yield_30 %>% 
      left_join(df_deficit) %>% 
      drop_na()
    
    #Data split
    splits <- initial_time_split(df_merged, prop = 0.8)
    df_train <- training(splits)
    df_test <- testing(splits)
    
    #Bootstrapping for tuning
    set.seed(12345)
    df_folds <- bootstraps(df_train,
                           times = 100)
    #Model
    model_spec <- 
      boost_tree(trees = tune(),
                 learn_rate = tune()) %>%
      set_engine("xgboost") %>% 
      set_mode("regression")
    
    #Preprocessing
    recipe_spec <- 
      recipe(yield_30 ~ ., data = df_train) %>%
      step_date(date, features = "month", ordinal = FALSE) %>%
      step_dummy(all_nominal_predictors(), one_hot = TRUE) %>% 
      step_mutate(date_num = as.numeric(date)) %>%
      step_normalize(all_numeric_predictors()) %>%
      step_rm(date)
    
    
    #Workflow sets
    wflow_xgboost <- 
      workflow_set(
        preproc = list(recipe = recipe_spec),
        models = list(model = model_spec)
      ) 
    
    
    #Tuning and evaluating all the models
    grid_ctrl <-
      control_grid(
        save_pred = TRUE,
        parallel_over = "everything",
        save_workflow = TRUE
      )
    
    grid_results <-
      wflow_xgboost %>%
      workflow_map(
        seed = 98765,
        resamples = df_folds,
        grid = 10,
        control = grid_ctrl
      )
    
    #Accuracy of the grid results
    grid_results %>% 
      rank_results(select_best = TRUE, 
                   rank_metric = "rsq") %>%
      select(Models = wflow_id, .metric, mean)
    
    
    #Finalizing the model with the best parameters
    best_param <- 
      grid_results %>%
      extract_workflow_set_result("recipe_model") %>% 
      select_best(metric = "rsq")
    
    
    wflw_fit <- 
      grid_results %>% 
      extract_workflow("recipe_model") %>% 
      finalize_workflow(best_param) %>% 
      fit(df_train)
    
    
    #Variable importance
    library(DALEXtra)
    
    #Fitted workflow for KNN
    set.seed(98765)
    
    knn_wflow_fitted <- 
      workflow() %>% 
      add_recipe(rec_features) %>% 
      add_model(spec_knn) %>% 
      fit(df_train)
    
    #Processed data frame for variable importance calculation
    imp_data <- 
      recipe_spec %>% 
      prep() %>% 
      bake(new_data = NULL) 
    
    #Explainer object
    explainer_xgboost <- 
      explain_tidymodels(
        wflw_fit %>% extract_fit_parsnip(), 
        data = imp_data %>% select(-yield_30), 
        y = imp_data$yield_30,
        label = "",
        verbose = FALSE
      )
    
    
    #Calculating permutation-based variable importance 
    set.seed(1983)
    vip_xgboost <- model_parts(explainer_xgboost, 
                               loss_function = loss_root_mean_square,
                               type = "difference",
                               B = 100,#the number of permutations
                               label = "")
    
    #Plot VIP
    vip_xgboost %>% 
      plot() +
      labs(color = "",
           x = "",
           y = "",
           subtitle = "Higher indicates more important",
           title = "Factors Affecting 30-year Treasury Yield") +
      theme_minimal(base_family = "Roboto Slab",
                    base_size = 16) +
      theme(legend.position = "none",
            plot.title = element_text(hjust = 0.5, 
                                      size = 14,
                                      face = "bold"),
            plot.subtitle = element_text(hjust = 0.5, size = 12),
            panel.grid.minor.x = element_blank(),
            panel.grid.major.y = element_blank(),
            plot.background = element_rect(fill = "azure"))
    
    To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

    R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
    Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
    Continue reading: Impact of Budget Deficits on Treasury Yields with XGBoost


沪ICP备19023445号-2号
友情链接