Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiday div-adjust ; Tidy adjust args #1434

Draft
wants to merge 5 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 94 additions & 14 deletions yfinance/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def stats(self, proxy=None):

def history(self, period="1mo", interval="1d",
start=None, end=None, prepost=False, actions=True,
auto_adjust=True, back_adjust=False, repair=False, keepna=False,
auto_adjust=None, back_adjust=None, # deprecated
div_adjust=False,
repair=False, keepna=False,
proxy=None, rounding=False, timeout=10,
debug=True, raise_errors=False) -> pd.DataFrame:
"""
Expand All @@ -114,10 +116,8 @@ def history(self, period="1mo", interval="1d",
prepost : bool
Include Pre and Post market data in results?
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is True
back_adjust: bool
Back-adjusted data to mimic true historical prices
div_adjust: bool
Dividend-adjust all OHLC data? Default is False
repair: bool or "silent"
Detect currency unit 100x mixups and attempt repair.
If True, fix & print summary. If "silent", just fix.
Expand All @@ -142,6 +142,25 @@ def history(self, period="1mo", interval="1d",
exceptions instead of printing to console.
"""

utils.print_once("NOTICE: yfinance.Ticker::history(): Be aware that dividend-adjustment is now default disabled, default used to be enabled")

# Handle deprecated arguments first
if auto_adjust is not None:
utils.print_once("WARNING: yfinance.Ticker::history(): 'auto_adjust' is deprecated, switch to 'div_adjust' instead")
div_adjust = auto_adjust
auto_adjust = None
elif back_adjust is not None:
utils.print_once("WARNING: yfinance.Ticker::history(): 'back_adjust' is deprecated, switch to 'div_adjust' instead")
back_adjust = None

if start is not None or end is not None:
period = None

if div_adjust and interval in ["1wk", "1mo", "3mo"]:
hist_args = locals() # function arguments
df = self._get_div_adjusted_multiday_prices(hist_args)
return df

if raise_errors:
debug = True

Expand Down Expand Up @@ -383,15 +402,11 @@ def history(self, period="1mo", interval="1d",

# Auto/back adjust
try:
if auto_adjust:
df = utils.auto_adjust(df)
elif back_adjust:
df = utils.back_adjust(df)
if div_adjust:
df = utils.adjust_with_Yahoo_adj_close(df)
except Exception as e:
if auto_adjust:
err_msg = "auto_adjust failed with %s" % e
else:
err_msg = "back_adjust failed with %s" % e
if div_adjust:
err_msg = "div_adjust failed with %s" % e
shared._DFS[self.ticker] = utils.empty_df()
shared._ERRORS[self.ticker] = err_msg
if debug:
Expand Down Expand Up @@ -583,7 +598,7 @@ def _reconstruct_intervals_batch(self, df, interval, prepost, tag=-1, silent=Fal
fetch_start = max(min_dt.date(), fetch_start)
logger.debug(f"Fetching {sub_interval} prepost={prepost} {fetch_start}->{fetch_end}")
r = "silent" if silent else True
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, auto_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
df_fine = self.history(start=fetch_start, end=fetch_end, interval=sub_interval, div_adjust=False, actions=False, prepost=prepost, repair=r, keepna=True)
if df_fine is None or df_fine.empty:
if not silent:
logger.warning(f"Cannot reconstruct {interval} block starting {start_d}, too old, Yahoo is rejecting request for finer-grain data")
Expand Down Expand Up @@ -988,6 +1003,71 @@ def _fetch_ticker_tz(self, debug_mode, proxy, timeout):
print("-------------")
return None

def _get_div_adjusted_multiday_prices(self, hist_args):
# Not possible to correctly div-adjust multi-day intervals
# using only 'Adj Close' returned by Yahoo.
# Need to fetch 1d -> div adjust -> aggregate into larger interval

interval = hist_args["interval"]
del hist_args["self"]

ohlcv = ["Open", "High", "Low", "Close", "Volume"]

df_unadj = None
if "period" in hist_args and hist_args["period"] is not None:
Copy link

@paulmcq paulmcq Feb 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be more legible as:
if hist_args.get("period", None) is not None:

# Yahoo sets period start differently depending on interval, need to fetch to know.
hist_args["div_adjust"] = False # avoid this code path
df_unadj = self.history(**hist_args)
hist_args["start"] = df_unadj.index[0].date()
del hist_args["period"]
hist_args["div_adjust"] = True

hist_args["interval"] = "1d"
df_daily = self.history(**hist_args)

if interval == "1wk":
pd_period = "W"
elif interval == "1mo":
pd_period = "M"
elif interval == "3mo":
# Not quarterly. How Yahoo aggregates depends on if period set:
# - period set => aggregate backwards => last-3mo-interval-end is last month.
# - else => start set => aggregate forwards => first-3mo-interval-start is start month
# So like quarterly but offset.
if not "period" in hist_args or not hist_args["period"] is None:
offset_months = df_daily.index[-1].month - 1
else:
offset_months = df_daily.index[0].month - 1
offset_td = pd.tseries.offsets.DateOffset(months=offset_months)
pd_period = "Q"
df_daily.index = df_daily.index - offset_td

df_daily.loc[df_daily["Stock Splits"]==0,"Stock Splits"] = 1
df = df_daily.groupby(df_daily.index.tz_localize(None).to_period(pd_period)).agg(
Open=("Open", "first"),
High=("High", "max"),
Low=("Low", "min"),
Close=("Close", "last"),
Volume=("Volume", "sum"),
Dividends=("Dividends", "sum"),
StockSplits=("Stock Splits", "prod"))
df = df.rename(columns={"StockSplits":"Stock Splits"})
df.loc[df["Stock Splits"]==1,"Stock Splits"] = 0
df.index = df.index.start_time.tz_localize(df_daily.index.tz)

if interval == "3mo":
# Reverse the offset
df.index = df.index + offset_td

if df_unadj is not None:
# Copy over 'Volume', because fetching daily -> aggregating can differ slightly,
# because Yahoo returning slightly different volumes.
df = df.drop("Volume", axis=1).join(df_unadj[["Volume"]], validate="1:1")
df = df[["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"]]

return df


def get_recommendations(self, proxy=None, as_dict=False):
self._quote.proxy = proxy
data = self._quote.recommendations
Expand Down
36 changes: 24 additions & 12 deletions yfinance/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
logger = logging.getLogger(__name__)

def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=None,
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
group_by='column',
auto_adjust=None, back_adjust=None,
div_adjust=False,
repair=False, keepna=False,
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
proxy=None, rounding=False, timeout=10):
"""Download yahoo tickers
Expand All @@ -58,8 +61,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
prepost : bool
Include Pre and Post market data in results?
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is False
div_adjust: bool
Dividend-adjust all OHLC data? Default is False
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
Expand All @@ -84,6 +87,15 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
seconds. (Can also be a fraction of a second e.g. 0.01)
"""

# Handle deprecated arguments first
if auto_adjust is not None:
print("WARNING: 'auto_adjust' is deprecated, switch to 'div_adjust' instead")
div_adjust = auto_adjust
auto_adjust = None
if back_adjust is not None:
print("WARNING: 'back_adjust' is deprecated, it was nonsense")
back_adjust = None

if ignore_tz is None:
# Set default value depending on interval
if interval[1:] in ['m', 'h']:
Expand Down Expand Up @@ -125,8 +137,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
for i, ticker in enumerate(tickers):
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
actions=actions, div_adjust=div_adjust,
repair=repair, keepna=keepna,
progress=(progress and i > 0), proxy=proxy,
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
Expand All @@ -137,8 +149,8 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
for i, ticker in enumerate(tickers):
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
actions=actions, div_adjust=div_adjust,
repair=repair, keepna=keepna,
proxy=proxy,
rounding=rounding, timeout=timeout)
shared._DFS[ticker.upper()] = data
Expand Down Expand Up @@ -211,12 +223,12 @@ def _realign_dfs():

@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
div_adjust=False, repair=False,
actions=False, progress=True, period="max",
interval="1d", prepost=False, proxy=None,
keepna=False, rounding=False, timeout=10):
try:
data = _download_one(ticker, start, end, auto_adjust, back_adjust, repair,
data = _download_one(ticker, start, end, div_adjust, repair,
actions, period, interval, prepost, proxy, rounding,
keepna, timeout)
except Exception as e:
Expand All @@ -230,15 +242,15 @@ def _download_one_threaded(ticker, start=None, end=None,


def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
div_adjust=False, repair=False,
actions=False, period="max", interval="1d",
prepost=False, proxy=None, rounding=False,
keepna=False, timeout=10):
return Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, proxy=proxy,
actions=actions,
div_adjust=div_adjust, repair=repair, proxy=proxy,
rounding=rounding, keepna=keepna, timeout=timeout,
debug=False, raise_errors=False # debug and raise_errors false to not log and raise errors in threads
)
30 changes: 4 additions & 26 deletions yfinance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,29 +343,7 @@ def _interval_to_timedelta(interval):
return _pd.Timedelta(interval)


def auto_adjust(data):
col_order = data.columns
df = data.copy()
ratio = df["Close"] / df["Adj Close"]
df["Adj Open"] = df["Open"] / ratio
df["Adj High"] = df["High"] / ratio
df["Adj Low"] = df["Low"] / ratio

df.drop(
["Open", "High", "Low", "Close"],
axis=1, inplace=True)

df.rename(columns={
"Adj Open": "Open", "Adj High": "High",
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)

return df[[c for c in col_order if c in df.columns]]


def back_adjust(data):
""" back-adjusted data to mimic true historical prices """

def adjust_with_Yahoo_adj_close(data):
col_order = data.columns
df = data.copy()
ratio = df["Adj Close"] / df["Close"]
Expand All @@ -374,12 +352,12 @@ def back_adjust(data):
df["Adj Low"] = df["Low"] * ratio

df.drop(
["Open", "High", "Low", "Adj Close"],
["Open", "High", "Low", "Close"],
axis=1, inplace=True)

df.rename(columns={
"Adj Open": "Open", "Adj High": "High",
"Adj Low": "Low"
"Adj Low": "Low", "Adj Close": "Close"
}, inplace=True)

return df[[c for c in col_order if c in df.columns]]
Expand Down Expand Up @@ -511,7 +489,7 @@ def fix_Yahoo_returning_live_separate(quotes, interval, tz_exchange):
elif interval == "1mo":
last_rows_same_interval = dt1.month == dt2.month
elif interval == "3mo":
last_rows_same_interval = dt1.year == dt2.year and dt1.quarter == dt2.quarter
last_rows_same_interval = (dt1 - _dateutil.relativedelta.relativedelta(months=3)) < dt2
else:
last_rows_same_interval = (dt1-dt2) < _pd.Timedelta(interval)

Expand Down