-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgather_daily_shorts.R
182 lines (136 loc) · 5.74 KB
/
gather_daily_shorts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# LIBRARIES ----
library(rvest)
library(tidyverse)
library(lubridate)
library(wesanderson)
library(ggtext)
# Scrape short sales data from the ASIC website
# daily_shorts <- read_csv("https://asic.gov.au/Reports/Daily/2021/04/RR20210412-001-SSDailyAggShortPos.csv")
url <- "http://asic.gov.au/regulatory-resources/markets/short-selling/short-position-reports-table/"
pg <- read_html(url)
# OBTAIN CSVs ----
csv_list <- pg %>%
html_nodes("a") %>%
html_attr("href") %>%
str_subset(".csv")
# Link to latest is 2nd, based on current website configuration
latest <- csv_list[2]
url_begin <- "http://asic.gov.au"
latest_csv <- str_c(url_begin, latest, sep = "")
# Review
latest_csv
# READ IN DATA ----
# Cannot read file in with readr::read_csv. Use read.csv instead.
daily_shorts <- read.csv(latest_csv,
header = TRUE,
sep = '\t',
fileEncoding = "utf-16")
# DATA WRANGLING 1 ----
daily_shorts_cleaned <- daily_shorts %>%
as_tibble() %>%
# remove white space after product.code (and any character cols)
mutate(across(where(is.character), str_trim)) %>%
# rename required columns
select(company = Product,
ticker = Product.Code,
short_ratio = X..of.Total.Product.in.Issue.Reported.as.Short.Positions) %>%
# Create a label to place half way down geom_bar
group_by(ticker) %>%
mutate(label_y = cumsum(short_ratio) - 0.5 * short_ratio) %>%
ungroup()
# Add Trade Date - may use later to look at changes over time.
daily_shorts_cleaned <- daily_shorts_cleaned %>%
mutate(trade_date = str_split(latest, 'RR', simplify = TRUE)[,2] %>%
str_sub(1,8) %>%
as_date())
# Review
daily_shorts_cleaned
# VISUALISATION 1 ----
daily_shorts_cleaned %>%
arrange(desc(short_ratio)) %>%
slice(1:50) %>%
ggplot(aes(reorder(ticker, short_ratio), short_ratio, fill = short_ratio)) +
geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = str_glue("{round(short_ratio, 2)} %")),
size = 3.25, colour = "white", vjust = 0.3) +
scale_y_continuous(expand = c(0,0), breaks = seq(0, max(daily_shorts_cleaned$short_ratio), 3)) +
coord_flip() +
theme_light() +
scale_fill_gradient(trans = "reverse", low = "#79402E", high = "#CCBA72") +
labs(title = "Top 50 most Shorted ASX stocks",
x = "",
y = "Short Positions / Shares Outstanding (%)",
caption = "Source: @GrantChalmers | https://asic.gov.au/") +
theme(plot.caption = element_text(size = 8, color = "gray50", face = "italic"),
plot.background = element_rect(fill = 'antiquewhite', colour = 'antiquewhite'),
panel.background = element_rect(fill = 'snow'),
legend.position = "none")
# Save ggplot
ggsave("top_50_shorted_asx_stocks.png", plot = last_plot(), path = "images",
width = 5, height = 8)
# OPTIONAL - archive data in RDS file ------------------------------------------
# Write initial tibble to rds file - only need to run once
# daily_shorts_cleaned %>% write_rds("data/daily_shorts.rds")
# This script could be setup to run as a cron job (or Windows Task Scheduler) every weekday afternoon.
# Read historical rds file
daily_shorts_history <- read_rds("data/daily_shorts.rds")
# Combine with latest
combined_tbl <- daily_shorts_history %>% bind_rows(daily_shorts_cleaned)
# Remove any duplicates
combined_tbl <- combined_tbl %>% distinct()
# Update rds with latest information
combined_tbl %>% write_rds("data/daily_shorts.rds")
# Gather mean week-over-week change
wk_over_wk_chg <- combined_tbl %>%
mutate(week = week(trade_date),
year = isoyear(trade_date)) %>%
group_by(ticker, year, week) %>%
summarise(mean_short = mean(short_ratio)) %>%
arrange(ticker, year, week) %>%
ungroup %>%
mutate(change = mean_short - lag(mean_short))
# DATA WRANGLING 2 ----
# Generate a top and bottom tibble, then bind rows
# Too much data to view all
top_30 <- wk_over_wk_chg %>%
filter(week == max(week)) %>%
arrange(desc(change)) %>%
slice(1:30)
bottom_30 <- wk_over_wk_chg %>%
filter(week == max(week)) %>%
arrange(change) %>%
slice(1:30)
top_30_bottom_30 <- bind_rows(top_30, bottom_30)
# VISUALISATION 2 ----
# Plot
top_30_bottom_30 %>%
filter(week == max(week)) %>%
arrange(desc(change)) %>%
ggplot(aes(x = fct_reorder(ticker, change), y = change)) +
geom_point(aes(colour = (change <= 0)), size = 2) +
geom_segment(aes(x = ticker,
xend = ticker,
y = 0,
yend = change,
colour = (change <= 0)), size = 0.75) +
scale_colour_manual(values = wes_palette("Moonrise2")) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 12)) +
coord_flip() +
theme_light() +
labs(title = "Movement in Shorted ASX Stocks (<span style='color:#798E87'>top</span> 30 & <span style='color:#C27D38'>bottom</span> 30)",
y = "Short Positions / Shares Outstanding Week over Week (mean) Change (%)", x = NULL,
caption = "Source: @GrantChalmers | https://asic.gov.au/") +
theme(plot.title = element_markdown(face = "bold", size = 11),
# plot.title = element_text(size = 11, face = "bold"),
axis.text.x = element_text(size = 10, angle = 00),
axis.title.x = element_text(size = 9),
axis.title = element_text(size = 11), legend.position = "none",
plot.caption = element_text(size = 8, color = "gray50", face = "italic"),
plot.background = element_rect(fill = 'antiquewhite', colour = 'antiquewhite'),
panel.background = element_rect(fill = 'snow'),
legend.title = element_blank())
# Save ggplot
ggsave("top_bottom_30_shorted_asx_stocks.png", plot = last_plot(), path = "images",
width = 6, height = 10)
# REFERENCES ----
# https://rpubs.com/Cormac/313070