Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2026-02-17 09:28:04 -06:00
parent f731aa90e5
commit 246c3be86b
6 changed files with 313 additions and 35 deletions

View File

@ -224,8 +224,8 @@ Treats low-volume bars as fake:
### Hide Market-Closed Gaps (Stocks) ### Hide Market-Closed Gaps (Stocks)
Compresses non-trading time on stock charts: Compresses non-trading time on stock charts:
- `1d`: removes weekend spacing - `1d`: removes weekends and full missing days (for example exchange holidays with no bars)
- intraday (`1m`..`1h`): removes weekends and overnight closed hours - intraday (`1m`..`1h`): removes weekends, full missing days, and overnight closed hours; x-axis labels show one date per trading day
Use OFF for 24/7 markets (for example many crypto workflows) when you want continuous time. Use OFF for 24/7 markets (for example many crypto workflows) when you want continuous time.
@ -284,8 +284,9 @@ streamlit run app.py --server.port 8502
### I still see some time gaps ### I still see some time gaps
- For stocks, keep `Hide market-closed gaps (stocks)` ON. - For stocks, keep `Hide market-closed gaps (stocks)` ON.
- Daily charts remove weekends; intraday removes weekends + closed hours. - Daily charts remove weekends + full no-data days; intraday removes weekends + full no-data days + closed hours.
- Some exchange holidays/half-days can still produce spacing depending on the data feed. - Day-based periods (`1d`, `5d`) backfill to target trading-day count when provider limits allow.
- Half-days can still look visually compressed because they are partial sessions, not missing sessions.
### Exports crash with timestamp errors ### Exports crash with timestamp errors
- Pull latest project changes (export logic now handles named index columns) - Pull latest project changes (export logic now handles named index columns)

View File

@ -25,6 +25,7 @@ Provide an analysis-only charting tool that classifies OHLC bars as real/fake, t
- filter/toggle settings - filter/toggle settings
- optional advanced controls (alerts, replay, compare symbols, backtest controls, regime filter) - optional advanced controls (alerts, replay, compare symbols, backtest controls, regime filter)
2. App fetches OHLCV via Yahoo Finance (`yfinance`). 2. App fetches OHLCV via Yahoo Finance (`yfinance`).
- For day-based periods (for example `1d`, `5d`), fetch widens calendar lookback as needed, then trims to the latest N trading days that actually contain bars.
3. Optional last-bar drop (live-bar guard) for intraday intervals. 3. Optional last-bar drop (live-bar guard) for intraday intervals.
4. Bars are classified (`real_bull`, `real_bear`, `fake`, `unclassified` for first bar). 4. Bars are classified (`real_bull`, `real_bear`, `fake`, `unclassified` for first bar).
5. Trend state is derived from classification sequence. 5. Trend state is derived from classification sequence.
@ -158,7 +159,8 @@ Important:
Gap handling (`hide_market_closed_gaps`): Gap handling (`hide_market_closed_gaps`):
- Always removes weekend gaps (`sat` -> `mon`). - Always removes weekend gaps (`sat` -> `mon`).
- For intraday intervals, also removes inferred overnight hours using session bounds. - Removes full missing calendar days between first/last bar (for example market holidays with no bars).
- For intraday intervals, uses contiguous bar-order x-axis (no closed-session spacing) and day-level tick labels.
- For daily interval, weekend break removal is applied. - For daily interval, weekend break removal is applied.
## 8. Help and Onboarding Behavior ## 8. Help and Onboarding Behavior

View File

@ -0,0 +1,92 @@
from __future__ import annotations
import pandas as pd
from web_core.charting import _missing_calendar_day_values, build_figure
from web_core.constants import TREND_NEUTRAL
def _make_daily_df(days: list[str]) -> pd.DataFrame:
index = pd.DatetimeIndex([pd.Timestamp(day, tz="UTC") for day in days])
count = len(index)
return pd.DataFrame(
{
"Open": [100 + i for i in range(count)],
"High": [101 + i for i in range(count)],
"Low": [99 + i for i in range(count)],
"Close": [100.5 + i for i in range(count)],
"Volume": [1000 + i for i in range(count)],
"classification": ["fake"] * count,
"trend_state": [TREND_NEUTRAL] * count,
},
index=index,
)
def _make_intraday_df(days: list[str]) -> pd.DataFrame:
index_values: list[pd.Timestamp] = []
for day in days:
session = pd.date_range(
start=f"{day} 09:30:00",
end=f"{day} 15:45:00",
freq="15min",
tz="America/New_York",
)
index_values.extend(session.to_list())
index = pd.DatetimeIndex(index_values)
count = len(index)
return pd.DataFrame(
{
"Open": [100 + i for i in range(count)],
"High": [101 + i for i in range(count)],
"Low": [99 + i for i in range(count)],
"Close": [100.5 + i for i in range(count)],
"Volume": [1000 + i for i in range(count)],
"classification": ["fake"] * count,
"trend_state": [TREND_NEUTRAL] * count,
},
index=index,
)
def test_missing_calendar_day_values_include_weekday_holidays_only() -> None:
df = _make_daily_df(["2026-02-13", "2026-02-17"])
missing = _missing_calendar_day_values(df)
assert "2026-02-15" not in missing
assert "2026-02-16" in missing
def test_build_figure_adds_missing_day_rangebreak_values() -> None:
df = _make_daily_df(["2026-02-13", "2026-02-17"])
fig = build_figure(
df,
gray_fake=False,
interval="1d",
hide_market_closed_gaps=True,
)
rangebreak_values: list[str] = []
for rb in fig.layout.xaxis.rangebreaks:
values = list(getattr(rb, "values", ()) or ())
rangebreak_values.extend(str(v) for v in values)
assert "2026-02-16" in rangebreak_values
assert "2026-02-15" not in rangebreak_values
def test_build_figure_intraday_uses_category_axis_when_hiding_gaps() -> None:
df = _make_intraday_df(["2026-02-13", "2026-02-17"])
fig = build_figure(
df,
gray_fake=False,
interval="15m",
hide_market_closed_gaps=True,
)
assert fig.layout.xaxis.type == "category"
assert len(fig.layout.xaxis.rangebreaks) == 0
assert fig.layout.xaxis.tickmode == "array"
assert list(fig.layout.xaxis.ticktext) == ["2/13", "2/17"]
assert len(fig.layout.xaxis.tickvals) == 2

View File

@ -0,0 +1,75 @@
from __future__ import annotations
from datetime import datetime
import pandas as pd
from web_core import data as data_module
def _make_intraday_df(days: list[str]) -> pd.DataFrame:
index_values: list[pd.Timestamp] = []
for day in days:
index_values.append(pd.Timestamp(f"{day} 14:30:00", tz="UTC"))
index_values.append(pd.Timestamp(f"{day} 15:30:00", tz="UTC"))
index = pd.DatetimeIndex(index_values)
return pd.DataFrame(
{
"Open": [100.0 + i for i in range(len(index))],
"High": [101.0 + i for i in range(len(index))],
"Low": [99.0 + i for i in range(len(index))],
"Close": [100.5 + i for i in range(len(index))],
"Volume": [1000 + i for i in range(len(index))],
},
index=index,
)
def test_fetch_ohlc_day_period_backfills_until_target_trading_days(monkeypatch) -> None:
fixed_now = datetime(2026, 2, 17, 20, 0, 0)
four_day_df = _make_intraday_df(["2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"])
five_day_df = _make_intraday_df(["2026-02-11", "2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"])
calls: list[dict[str, object]] = []
class FakeTicker:
def history(self, **kwargs: object) -> pd.DataFrame:
calls.append(kwargs)
start = kwargs.get("start")
if start is None:
return four_day_df.copy()
lookback_days = (fixed_now - pd.Timestamp(start).to_pydatetime()).days
return five_day_df.copy() if lookback_days >= 12 else four_day_df.copy()
monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker())
monkeypatch.setattr(data_module, "_utc_now", lambda: fixed_now)
data_module.fetch_ohlc.clear()
out = data_module.fetch_ohlc(symbol="TSLA", interval="2m", period="5d")
session_days = pd.DatetimeIndex(out.index).normalize().unique()
assert len(session_days) == 5
assert pd.Timestamp("2026-02-11", tz="UTC") in session_days
assert len(calls) >= 2
assert all("start" in call and "end" in call for call in calls)
def test_fetch_ohlc_non_day_period_uses_period_request(monkeypatch) -> None:
calls: list[dict[str, object]] = []
month_df = _make_intraday_df(["2026-01-05", "2026-01-06", "2026-01-07"])
class FakeTicker:
def history(self, **kwargs: object) -> pd.DataFrame:
calls.append(kwargs)
return month_df.copy()
monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker())
data_module.fetch_ohlc.clear()
out = data_module.fetch_ohlc(symbol="AAPL", interval="1h", period="1mo")
assert len(out) == len(month_df)
assert len(calls) == 1
assert calls[0].get("period") == "1mo"
assert "start" not in calls[0]
assert "end" not in calls[0]

View File

@ -15,30 +15,36 @@ def _is_daily_interval(interval: str) -> bool:
return interval == "1d" return interval == "1d"
def _infer_session_bounds(df: pd.DataFrame) -> tuple[float, float] | None: def _intraday_day_ticks(index: pd.DatetimeIndex) -> tuple[list[pd.Timestamp], list[str]]:
if len(index) == 0:
return [], []
normalized = index.normalize()
first_mask = ~normalized.duplicated()
tickvals = [index[pos] for pos, keep in enumerate(first_mask) if bool(keep)]
ticktext = [f"{ts.month}/{ts.day}" for ts in tickvals]
return tickvals, ticktext
def _missing_calendar_day_values(df: pd.DataFrame) -> list[str]:
if df.empty: if df.empty:
return None return []
index = pd.DatetimeIndex(df.index) index = pd.DatetimeIndex(df.index)
if index.tz is None: session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values()
return None if len(session_days) < 2:
return []
minutes = index.hour * 60 + index.minute if session_days.tz is None:
session_df = pd.DataFrame({"date": index.date, "minute": minutes}) all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D")
day_bounds = session_df.groupby("date")["minute"].agg(["min", "max"])
if day_bounds.empty:
return None
start_minute = float(day_bounds["min"].median())
# Include the final candle width roughly by adding one median step when possible.
if len(index) > 1:
deltas = pd.Series(index[1:] - index[:-1]).dt.total_seconds().div(60.0)
step = float(deltas[deltas > 0].median()) if not deltas[deltas > 0].empty else 0.0
else: else:
step = 0.0 all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D", tz=session_days.tz)
end_minute = float(day_bounds["max"].median() + step)
return end_minute / 60.0, start_minute / 60.0 missing_days = all_days.difference(session_days)
# Weekend gaps are already handled by sat->mon bounds; keep explicit values
# for weekday closures (e.g., exchange holidays) to avoid overlap artifacts.
weekday_missing = [day for day in missing_days if day.dayofweek < 5]
return [day.strftime("%Y-%m-%d") for day in weekday_missing]
def build_figure( def build_figure(
@ -143,17 +149,26 @@ def build_figure(
height=760, height=760,
) )
if hide_market_closed_gaps: if hide_market_closed_gaps:
rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])]
if _is_intraday_interval(interval): if _is_intraday_interval(interval):
# Collapse inferred overnight closed hours from the data's timezone/session. # Intraday rangebreak combinations can produce axis rendering artifacts
inferred_bounds = _infer_session_bounds(df) # with some feeds/timezones. Categorical axis keeps chronological bars
hour_bounds = list(inferred_bounds) if inferred_bounds else [16, 9.5] # contiguous and removes closed-session gaps reliably.
rangebreaks.append(dict(pattern="hour", bounds=hour_bounds)) tickvals, ticktext = _intraday_day_ticks(pd.DatetimeIndex(df.index))
fig.update_xaxes(
type="category",
categoryorder="array",
categoryarray=list(df.index),
tickmode="array",
tickvals=tickvals,
ticktext=ticktext,
tickangle=0,
)
elif _is_daily_interval(interval): elif _is_daily_interval(interval):
# Daily charts still show weekend spacing on a continuous date axis. rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])]
# Weekend rangebreak removes these non-trading gaps. missing_days = _missing_calendar_day_values(df)
pass if missing_days:
fig.update_xaxes(rangebreaks=rangebreaks) rangebreaks.append(dict(values=missing_days))
fig.update_xaxes(rangebreaks=rangebreaks)
fig.update_yaxes(title_text="Price", row=1, col=1) fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_yaxes(title_text="Volume", row=2, col=1) fig.update_yaxes(title_text="Volume", row=2, col=1)

View File

@ -1,16 +1,104 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime from datetime import datetime, timedelta
import re
import pandas as pd import pandas as pd
import streamlit as st import streamlit as st
import yfinance as yf import yfinance as yf
_DAY_PERIOD_PATTERN = re.compile(r"^([1-9]\d*)d$")
_DAY_PERIOD_FETCH_ATTEMPTS = 5
def _utc_now() -> datetime:
return datetime.utcnow()
def _parse_day_period(period: str) -> int | None:
match = _DAY_PERIOD_PATTERN.match(str(period).strip().lower())
if not match:
return None
return int(match.group(1))
def _intraday_max_lookback_days(interval: str) -> int | None:
interval_key = str(interval).strip().lower()
if interval_key == "1m":
return 7
if interval_key in {"2m", "5m", "15m", "30m", "60m", "90m", "1h"}:
return 60
return None
def _initial_calendar_lookback_days(trading_days: int) -> int:
# Expand beyond target trading days to account for weekends and holidays.
return max(trading_days + 2, int((trading_days * 7) / 5) + 3)
def _trading_day_count(df: pd.DataFrame) -> int:
if df.empty:
return 0
index = pd.DatetimeIndex(df.index)
return int(index.normalize().nunique())
def _trim_to_recent_trading_days(df: pd.DataFrame, trading_days: int) -> pd.DataFrame:
if df.empty or trading_days <= 0:
return df.copy()
index = pd.DatetimeIndex(df.index)
session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values()
if len(session_days) <= trading_days:
return df.copy()
keep_days = session_days[-trading_days:]
mask = index.normalize().isin(keep_days)
return df.loc[mask].copy()
def _fetch_history_for_period(ticker: yf.Ticker, interval: str, period: str) -> pd.DataFrame:
history_kwargs = {"interval": interval, "auto_adjust": False, "actions": False}
day_period = _parse_day_period(period)
if day_period is None:
return ticker.history(period=period, **history_kwargs)
lookback_days = _initial_calendar_lookback_days(day_period)
max_lookback_days = _intraday_max_lookback_days(interval)
if max_lookback_days is not None:
lookback_days = min(lookback_days, max_lookback_days)
now = _utc_now()
best_df = pd.DataFrame()
for _ in range(_DAY_PERIOD_FETCH_ATTEMPTS):
start = now - timedelta(days=lookback_days)
df = ticker.history(start=start, end=now, **history_kwargs)
if len(df) > len(best_df):
best_df = df
if _trading_day_count(df) >= day_period:
return df
if max_lookback_days is not None and lookback_days >= max_lookback_days:
break
next_lookback = int(lookback_days * 1.6) + 1
if max_lookback_days is not None:
next_lookback = min(next_lookback, max_lookback_days)
if next_lookback <= lookback_days:
break
lookback_days = next_lookback
if not best_df.empty:
return best_df
return ticker.history(period=period, **history_kwargs)
@st.cache_data(ttl=60, show_spinner=False) @st.cache_data(ttl=60, show_spinner=False)
def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame: def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame:
ticker = yf.Ticker(symbol) ticker = yf.Ticker(symbol)
df = ticker.history(period=period, interval=interval, auto_adjust=False, actions=False) df = _fetch_history_for_period(ticker=ticker, interval=interval, period=period)
if df.empty: if df.empty:
raise ValueError("No data returned. Check symbol/interval/period compatibility.") raise ValueError("No data returned. Check symbol/interval/period compatibility.")
@ -20,7 +108,12 @@ def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame:
if missing: if missing:
raise ValueError(f"Missing required columns: {missing}") raise ValueError(f"Missing required columns: {missing}")
return df[required].dropna().copy() out = df[required].dropna().copy()
day_period = _parse_day_period(period)
if day_period is not None:
out = _trim_to_recent_trading_days(out, day_period)
return out
def maybe_drop_live_bar(df: pd.DataFrame, interval: str, enabled: bool) -> pd.DataFrame: def maybe_drop_live_bar(df: pd.DataFrame, interval: str, enabled: bool) -> pd.DataFrame: