diff --git a/Optimize-Me-30nov2023.xlsx b/Optimize-Me-30nov2023.xlsx index 963ad60..f2cfff1 100644 Binary files a/Optimize-Me-30nov2023.xlsx and b/Optimize-Me-30nov2023.xlsx differ diff --git a/model-performances.png b/model-performances.png index 11425e7..a9b2199 100644 Binary files a/model-performances.png and b/model-performances.png differ diff --git a/optimize_stake.R b/optimize_stake.R index 524d987..0d2aa29 100644 --- a/optimize_stake.R +++ b/optimize_stake.R @@ -31,8 +31,8 @@ options(scipen = 999) ## Vlad Input Data and Settings # -NMR <- 142.6 model_df <- read_excel("Optimize-Me-30nov2023.xlsx") # Read in the list of model names and their start (and possibly end) round. +NMR <- 142.6 # sum(model_df$Notes) 142.6 colnames(model_df) <- c("name","start","notes") current_round <- Rnumerai::get_current_round() oldest_round <- min(model_df$start) @@ -42,7 +42,7 @@ oldest_round <- min(model_df$start) ## Collect daily scores, and filter for models that have at least 60 data points (you shouldn't use Vlad for models with less data points) # I have set the starting round at 339, as that is the first round of the daily tournament, but you are free to change that round back and forth. # -daily_data <- build_RAW(model_df, MinfromRound = 339, corr_multiplier = 0.5, mmc_multiplier = 3) +daily_data <- build_RAW(model_df, MinfromRound = 339, corr_multiplier = 0.5, mmc_multiplier = 2) daily_data <- daily_data[,colnames(daily_data) %in% colnames(daily_data)[colSums(!is.na(daily_data)) > 60]] @@ -79,9 +79,9 @@ ggsave("model-performances.png",scale=1,width=15,height=15) # # numerai_perf <- left_join(model_df,dplyr::select(model_stats,name,mean,drawdown)) %>% na.omit() # sum(numerai_perf$mean * numerai_perf$notes) / sum(numerai_perf$notes) # weighted mean -benchmark_mean <- 0.00653 +benchmark_mean <- 0.00532 # sum(numerai_perf$drawdown * numerai_perf$notes) / sum(numerai_perf$notes) # weighted drawdown -benchmark_drawdown <- 0.841 +benchmark_drawdown <- 0.755 # # Tweak these thresholds as you feel is most appropriate good_models <- model_stats %>% dplyr::filter(mean > benchmark_mean * 0.8, drawdown < benchmark_drawdown / 0.8) @@ -137,35 +137,41 @@ condensed$stake <- round(condensed$stake) # kable(combined,digits=3) -# |name |weight |stake |mean |Cov |CVaR |VaR |samplesize |starting_round | -# |:-------------------|:------|:-----|:------|:------|:------|:------|:----------|:--------------| -# |INTEGRATION_TEST |0.479 |68 |0.0196 |0.0201 |0.0117 |0.0079 |279 |339 | -# |V42_LGBM_CLAUDIA20 |0.221 |32 | | | | | | | -# |V42_LGBM_TEAGER20 |0.076 |11 | | | | | | | -# |V42_LGBM_TEAGER60 |0.162 |23 | | | | | | | -# |V4_LGBM_VICTOR20 |0.061 |9 | | | | | | | -# | | | | | | | | | | -# |INTEGRATION_TEST |0.451 |64 |0.019 |0.0197 |0.0127 |0.0089 |279 |340 | -# |V41_LGBM_CAROLINE20 |0.049 |7 | | | | | | | -# |V42_LGBM_CLAUDIA20 |0.196 |28 | | | | | | | -# |V42_LGBM_TEAGER20 |0.072 |10 | | | | | | | -# |V42_LGBM_TEAGER60 |0.169 |24 | | | | | | | -# |V4_LGBM_VICTOR20 |0.064 |9 | | | | | | | -# | | | | | | | | | | +# |name |weight |stake |mean |Cov |CVaR |VaR |samplesize |starting_round | +# |:------------------|:------|:-----|:------|:------|:------|:------|:----------|:--------------| +# |INTEGRATION_TEST |0.439 |63 |0.014 |0.0157 |0.0118 |0.0074 |289 |339 | +# |V41_LGBM_XERXES20 |0.051 |7 | | | | | | | +# |V42_LGBM_CLAUDIA20 |0.182 |26 | | | | | | | +# |V42_LGBM_ROWAN20 |0.064 |9 | | | | | | | +# |V42_LGBM_TEAGER20 |0.1 |14 | | | | | | | +# |V42_LGBM_TEAGER60 |0.117 |17 | | | | | | | +# |V4_LGBM_VICTOR20 |0.046 |7 | | | | | | | +# | | | | | | | | | | +# |INTEGRATION_TEST |0.427 |61 |0.0142 |0.0163 |0.0129 |0.0084 |289 |340 | +# |V41_EXAMPLE_PREDS |0.051 |7 | | | | | | | +# |V42_LGBM_CLAUDIA20 |0.163 |23 | | | | | | | +# |V42_LGBM_ROWAN20 |0.063 |9 | | | | | | | +# |V42_LGBM_TEAGER20 |0.099 |14 | | | | | | | +# |V42_LGBM_TEAGER60 |0.152 |22 | | | | | | | +# |V4_LGBM_VICTOR20 |0.045 |6 | | | | | | | +# | | | | | | | | | | + ## Printout combined portfolio across starting rounds (equal weight for each starting points for now) # For Numer.ai's models, the two timepoints (starting_round 339 and 340) don't matter much, so merging them changes little. # kable(condensed,digits=3) -# name | weight| stake|mean |Cov |CVaR |VaR |samplesize | -# |:-------------------|------:|-----:|:------|:------|:-----|:------|:----------| -# |INTEGRATION_TEST | 0.465| 66|0.0193 |0.0199 |0.012 |0.0089 |279 | -# |V41_LGBM_CAROLINE20 | 0.024| 4| | | | | | -# |V42_LGBM_CLAUDIA20 | 0.209| 30| | | | | | -# |V42_LGBM_TEAGER20 | 0.074| 10| | | | | | -# |V42_LGBM_TEAGER60 | 0.166| 24| | | | | | -# |V4_LGBM_VICTOR20 | 0.062| 9| | | | | | +# |name | weight| stake|mean |Cov |CVaR |VaR |samplesize | +# |:------------------|------:|-----:|:------|:-----|:------|:------|:----------| +# |INTEGRATION_TEST | 0.433| 62|0.0141 |0.016 |0.0123 |0.0078 |289 | +# |V41_EXAMPLE_PREDS | 0.026| 4| | | | | | +# |V41_LGBM_XERXES20 | 0.026| 4| | | | | | +# |V42_LGBM_CLAUDIA20 | 0.173| 25| | | | | | +# |V42_LGBM_ROWAN20 | 0.064| 9| | | | | | +# |V42_LGBM_TEAGER20 | 0.100| 14| | | | | | +# |V42_LGBM_TEAGER60 | 0.135| 20| | | | | | +# |V4_LGBM_VICTOR20 | 0.046| 7| | | | | | # This is numer.ai's current stake distribution for models with a samplesize > 100. (nov 2023) @@ -177,8 +183,8 @@ numerai_stake$weight <- numerai_stake$stake / sum(numerai_stake$stake) # You can calculate its return by feeding virtual_returns the daily_data and a dataframe of c(name, weight) kable(virtual_returns(daily_data,numerai_stake), digits=4) -# | mean| Cov| CVaR| VaR| samplesize| -# |------:|------:|------:|-----:|----------:| -# | 0.0066| 0.0257| 0.0486| 0.038| 278| +# | mean| Cov| CVaR| VaR| samplesize| +# |------:|----:|-----:|------:|----------:| +# | 0.0054| 0.02| 0.038| 0.0298| 288| # So, using Vlad suggests a higher account-wide return is possible against a lower CVaR and VaR. Time for stake management? :-). \ No newline at end of file