Skip to content

Commit

Permalink
Merge pull request #2140 from ranaroussi/dev
Browse files Browse the repository at this point in the history
dev -> main
  • Loading branch information
ValueRaider authored Nov 19, 2024
2 parents b1d9f54 + 2167fb3 commit e797a30
Show file tree
Hide file tree
Showing 9 changed files with 3,853 additions and 3,443 deletions.
1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

1,389 changes: 724 additions & 665 deletions tests/data/KWS-L-1d-bad-div.csv

Large diffs are not rendered by default.

816 changes: 428 additions & 388 deletions tests/data/NVT-L-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

546 changes: 293 additions & 253 deletions tests/data/NVT-L-1d-bad-div.csv

Large diffs are not rendered by default.

886 changes: 460 additions & 426 deletions tests/data/SCR-TO-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

480 changes: 257 additions & 223 deletions tests/data/SCR-TO-1d-bad-div.csv

Large diffs are not rendered by default.

1,109 changes: 586 additions & 523 deletions tests/data/SOLB-BR-1d-bad-div-fixed.csv

Large diffs are not rendered by default.

615 changes: 339 additions & 276 deletions tests/data/SOLB-BR-1d-bad-div.csv

Large diffs are not rendered by default.

66 changes: 42 additions & 24 deletions yfinance/scrapers/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ def _fix_bad_div_adjust(self, df, interval, currency):
typical_volatility = np.nan
else:
diffs = df2['Close'].iloc[start:end-1].to_numpy() - df2['Low'].iloc[start+1:end].to_numpy()
typical_volatility = np.median(np.abs(diffs))
typical_volatility = np.mean(np.abs(diffs))

possibilities = []
if (drops==0.0).all() and df2['Volume'].iloc[div_idx]==0:
Expand Down Expand Up @@ -1681,10 +1681,6 @@ def cluster_dividends(df, column='div', threshold=7):
div_status_df.loc[phantom_div_dt, c] = False
checks.append('phantom')

if not div_status_df[checks].any().any():
# Perfect
return df

# Remove phantoms early
if 'phantom' in div_status_df.columns:
f_phantom = div_status_df['phantom']
Expand All @@ -1709,6 +1705,29 @@ def cluster_dividends(df, column='div', threshold=7):
if 'phantom' in checks:
checks.remove('phantom')

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

if not div_status_df[checks].any().any():
# Perfect
if df_modified:
return df2
else:
return df

# Check if the present div-adjustment contradicts price action
for i in range(len(div_status_df)):
div_idx = div_status_df['idx'].iloc[i]
Expand Down Expand Up @@ -1789,7 +1808,8 @@ def cluster_dividends(df, column='div', threshold=7):
elif adjDelta_drop > 0.39*adjDiv:
# Still true that applied adjustment exceeds price action,
# just not clear what solution is (if any).
div_adj_exceeds_prices = True
if (x['Adj']<1.0).any():
div_adj_exceeds_prices = True
break

# Can prune the space:
Expand Down Expand Up @@ -1843,22 +1863,6 @@ def cluster_dividends(df, column='div', threshold=7):

checks += ['adj_exceeds_prices', 'div_date_wrong']

if not div_status_df[checks].any().any():
# Maybe failed to detect a too-small div. If div is ~0.01x of previous and next, then
# treat as a 0.01x error
if len(div_status_df) > 1:
for i in range(0, len(div_status_df)):
r_pre, r_post = None, None
if i > 0:
r_pre = div_status_df['%'].iloc[i-1] / div_status_df['%'].iloc[i]
if i < (len(div_status_df)-1):
r_post = div_status_df['%'].iloc[i+1] / div_status_df['%'].iloc[i]
r_pre = r_pre or r_post
r_post = r_post or r_pre
if abs(r_pre-currency_divide)<20 and abs(r_post-currency_divide)<20:
div_dt = div_status_df.index[i]
div_status_df.loc[div_dt, 'div_too_small'] = True

for c in checks:
if not div_status_df[c].any():
div_status_df = div_status_df.drop(c, axis=1)
Expand Down Expand Up @@ -1887,11 +1891,16 @@ def cluster_dividends(df, column='div', threshold=7):
div_pcts['avg yr yield'] = div_pcts['%'] / div_pcts['period']

for c in checks:
if not cluster[c].to_numpy().any():
cluster = cluster.drop(c, axis=1)
cluster_checks = [c for c in checks if c in cluster.columns]

for c in cluster_checks:
f_fail = cluster[c].to_numpy()
n_fail = np.sum(f_fail)
if n_fail in [0, n]:
continue
pct_fail = np.sum(f_fail) / n
pct_fail = n_fail / n
if c == 'div_too_big':
true_threshold = 1.0
fals_threshold = 0.2
Expand All @@ -1900,7 +1909,16 @@ def cluster_dividends(df, column='div', threshold=7):
continue

if 'adj_exceeds_prices' in cluster.columns and (cluster[c] == (cluster[c] & cluster['adj_exceeds_prices'])).all():
# More likely that true-positive. Maybe the div never happened
# Treat div_too_big=False as false positives IFF adj_exceeds_prices=true AND
# true ratio above (lowered) threshold.
true_threshold = 0.5
f_adj_exceeds_prices = cluster['adj_exceeds_prices'].to_numpy()
n = np.sum(f_adj_exceeds_prices)
n_fail = np.sum(f_fail[f_adj_exceeds_prices])
pct_fail = n_fail / n
if pct_fail > true_threshold:
f = fc & div_status_df['adj_exceeds_prices'].to_numpy()
div_status_df.loc[f, c] = True
continue

if 'div_exceeds_adj' in cluster.columns and cluster['div_exceeds_adj'].all():
Expand Down

0 comments on commit e797a30

Please sign in to comment.