Skip to content

Commit

Permalink
Added standard reduce methods and cleaned up from old code.
Browse files Browse the repository at this point in the history
  • Loading branch information
Suchismit4 committed Jan 9, 2025
1 parent d6e39c0 commit d1bbb41
Show file tree
Hide file tree
Showing 16 changed files with 241 additions and 428 deletions.
Empty file removed config/transformations.yaml
Empty file.
54 changes: 35 additions & 19 deletions example_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# hindsight/example_load.py
# ----------------------------------------
# This script demonstrates how to load financial datasets using the DataManager,
# perform basic operations on the data, and visualize the adjusted prices from
# the CRSP dataset alongside the closing prices from Yahoo Finance for Apple (AAPL).
# ----------------------------------------

from src import DataManager
import xarray as xr
Expand All @@ -11,45 +16,56 @@
import matplotlib.pyplot as plt

def main():
"""
Main function to load financial datasets, compute adjusted prices,
and plot CRSP adjusted prices vs. Yahoo Finance closing prices.
"""
# Initialize the DataManager to handle data retrieval
data_manager = DataManager()

dataset = data_manager.get_data("data_requests.yaml") # un post-proc.
# Load datasets as specified in a YAML configuration file
dataset_collection = data_manager.get_data("data_requests.yaml") # post-processing not applied here

crsp_ds = dataset["wrds/equity/crsp"]
crsp_ds["adj_prc"] = crsp_ds["prc"] / crsp_ds["cfacpr"]
# Extract the CRSP dataset and compute adjusted prices
crsp_dataset = dataset_collection["wrds/equity/crsp"]
crsp_dataset["adj_prc"] = crsp_dataset["prc"] / crsp_dataset["cfacpr"]

yfinance_ds = dataset["openbb/equity/price/historical"]
# Extract the Yahoo Finance dataset for historical equity prices
yfinance_dataset = dataset_collection["openbb/equity/price/historical"]

# Time-index them
crsp_sel = crsp_ds.sel(asset=14593).dt.to_time_indexed()
yf_sel = yfinance_ds.sel(asset="AAPL").dt.to_time_indexed()
# Select specific assets and convert their data to time-indexed form for plotting
# Select CRSP data for asset with permno=14593
crsp_time_series = crsp_dataset.sel(asset=14593).dt.to_time_indexed()
# Select Yahoo Finance data for Apple (AAPL)
yf_time_series = yfinance_dataset.sel(asset="AAPL").dt.to_time_indexed()

crsp_adj = crsp_sel["adj_prc"]
yf_close = yf_sel["close"]
# Extract the adjusted price and closing price series
crsp_adj_price = crsp_time_series["adj_prc"]
yf_close_price = yf_time_series["close"]

# Create subplots: two rows, one column
# Create subplots: two rows, one column for side-by-side comparison
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 8))

# Plot CRSP data on the first (top) subplot
crsp_adj.plot.line(x="time", ax=ax1, label="CRSP (permno=14593)", color="blue")
# Plot CRSP adjusted prices on the first subplot
crsp_adj_price.plot.line(x="time", ax=ax1, label="CRSP (permno=14593)", color="blue")
ax1.set_title("CRSP Adjusted Price")
ax1.set_xlabel("Time")
ax1.set_ylabel("Price")
ax1.legend()

# Plot YF data on the second (bottom) subplot
yf_close.plot.line(x="time", ax=ax2, label="AAPL (YF)", color="red")
ax2.set_title("YFinance Close")
# Plot Yahoo Finance closing prices on the second subplot
yf_close_price.plot.line(x="time", ax=ax2, label="AAPL (Yahoo Finance)", color="red")
ax2.set_title("Yahoo Finance Close Price")
ax2.set_xlabel("Time")
ax2.set_ylabel("Price")
ax2.legend()

# Adjust spacing, if needed
# Adjust layout for better spacing between subplots
plt.tight_layout()

# Save the figure
# Save the figure to a file
plt.savefig("crsp_vs_yfinance_subplots.png")
plt.close()

if __name__ == "__main__":
main()
main()
87 changes: 29 additions & 58 deletions example_rolling.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# hindsight/example_rolling.py
# ----------------------------------------
# This script demonstrates how to compute and plot the rolling Exponential Moving Average (EMA)
# for Apple's (AAPL) and Tesla's (TSLA) closing stock prices using data from yfinance.
# The script uses the DataManager to fetch historical price data, applies a rolling window EMA,
# and visualizes both the original closing prices and the EMA for each asset.
# ----------------------------------------

from src import DataManager
# from src.core.operations import mean
from src.data.core.operations import mean, median, mode, ema

import xarray as xr
import xarray_jax as xj
Expand All @@ -13,10 +19,15 @@
import matplotlib.pyplot as plt

def main():

"""
Main function to fetch historical stock data, compute rolling EMA,
and plot the original closing prices alongside the EMA for AAPL and TSLA.
"""
# Initialize the DataManager to handle dataset operations
dm = DataManager()

# Pull in the yfinance data of Apple and Tesla.
# Pull in the yfinance data for Apple and Tesla.
# Data parameters: symbols, date range, and data provider configuration.
datasets = dm.get_data([{
"data_path": "openbb/equity/price/historical",
"config": {
Expand All @@ -27,76 +38,34 @@ def main():
}
}])

# Define a function to compute Exponential Moving Average (EMA)
# This function will be used with the u_roll method for efficient computation
@partial(jax.jit, static_argnames=['window_size'])
def ema(i: int, carry, block: jnp.ndarray, window_size: int):
"""
Compute the Exponential Moving Average (EMA) for a given window.
This function is designed to work with JAX's JIT compilation and
the u_roll method defined in the Tensor class. It computes the EMA
efficiently over a rolling window of data.
Args:
i (int): Current index in the time series
state (tuple): Contains current values, carry (previous EMA), and data block
window_size (int): Size of the moving window
Returns:
tuple: Updated state (new EMA value, carry, and data block)
"""

# Initialize the first value
if carry is None:
# Compute the sum of the first window
current_window_sum = block[:window_size].reshape(-1,
block.shape[1],
block.shape[2]).sum(axis=0)


return (current_window_sum * (1/window_size), current_window_sum * (1/window_size))

# Get the current price
current_price = block[i]

# Compute the new EMA
# EMA = α * current_price + (1 - α) * previous_EMA
# where α = 1 / (window_size)
alpha = 1 / window_size

new_ema = alpha * current_price + (1 - alpha) * carry

return (new_ema, new_ema)

# Original dataset
# Extract the original dataset for historical prices.
dataset = datasets["openbb/equity/price/historical"]

# Rolling-EMA of "close" over a 200-day window
# Compute the rolling Exponential Moving Average (EMA) of the "close" price over a 252-day window.
# 252 days correspond to roughly one trading year.
ema_dataset = dataset.dt.rolling(dim='time', window=252).reduce(ema)

# Convert to time-indexed form for plotting
# -- Original closing prices --
# Convert the xarray datasets to time-indexed Pandas DataFrames for easier plotting.
# --- Original closing prices ---
apple_orig = dataset.sel(asset="AAPL").dt.to_time_indexed()
tsla_orig = dataset.sel(asset="TSLA").dt.to_time_indexed()
# -- EMA-rolled closing prices --

# --- EMA-rolled closing prices ---
apple_ema = ema_dataset.sel(asset="AAPL").dt.to_time_indexed()
tsla_ema = ema_dataset.sel(asset="TSLA").dt.to_time_indexed()

# Extract the close and the new "ema_close"
# Extract the closing prices from the original and EMA datasets.
apple_close_orig = apple_orig["close"]
tsla_close_orig = tsla_orig["close"]

apple_close_ema = apple_ema["ema_close"]
tsla_close_ema = tsla_ema["ema_close"]

print(apple_close_orig[:30])
print(apple_close_ema[:30])

# Create subplots: two rows, one column
# Create subplots with two rows and one column for Apple and Tesla plots.
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 8))

# --- Apple subplot ---
# Plot original closing prices and EMA for AAPL on the first subplot.
apple_close_orig.plot.line(
x="time", ax=ax1, label="AAPL Close", color="blue", linestyle="-"
)
Expand All @@ -109,6 +78,7 @@ def ema(i: int, carry, block: jnp.ndarray, window_size: int):
ax1.legend()

# --- Tesla subplot ---
# Plot original closing prices and EMA for TSLA on the second subplot.
tsla_close_orig.plot.line(
x="time", ax=ax2, label="TSLA Close", color="red", linestyle="-"
)
Expand All @@ -120,11 +90,12 @@ def ema(i: int, carry, block: jnp.ndarray, window_size: int):
ax2.set_ylabel("Price (USD)")
ax2.legend()

# Adjust layout for better spacing between subplots.
plt.tight_layout()

# Save the figure
# Save the figure to a file.
plt.savefig("apple_tsla_ema.png")
plt.close()

if __name__ == "__main__":
main()
main()
92 changes: 0 additions & 92 deletions examples/deprecated/example_crspastensors.py

This file was deleted.

Loading

0 comments on commit d1bbb41

Please sign in to comment.