diff --git a/web/src/ONBOARDING.md b/web/src/ONBOARDING.md index f40608e..0708978 100644 --- a/web/src/ONBOARDING.md +++ b/web/src/ONBOARDING.md @@ -224,8 +224,8 @@ Treats low-volume bars as fake: ### Hide Market-Closed Gaps (Stocks) Compresses non-trading time on stock charts: -- `1d`: removes weekend spacing -- intraday (`1m`..`1h`): removes weekends and overnight closed hours +- `1d`: removes weekends and full missing days (for example exchange holidays with no bars) +- intraday (`1m`..`1h`): removes weekends, full missing days, and overnight closed hours; x-axis labels show one date per trading day Use OFF for 24/7 markets (for example many crypto workflows) when you want continuous time. @@ -284,8 +284,9 @@ streamlit run app.py --server.port 8502 ### I still see some time gaps - For stocks, keep `Hide market-closed gaps (stocks)` ON. -- Daily charts remove weekends; intraday removes weekends + closed hours. -- Some exchange holidays/half-days can still produce spacing depending on the data feed. +- Daily charts remove weekends + full no-data days; intraday removes weekends + full no-data days + closed hours. +- Day-based periods (`1d`, `5d`) backfill to target trading-day count when provider limits allow. +- Half-days can still look visually compressed because they are partial sessions, not missing sessions. ### Exports crash with timestamp errors - Pull latest project changes (export logic now handles named index columns) diff --git a/web/src/PRD.md b/web/src/PRD.md index 41ac151..ec4961d 100644 --- a/web/src/PRD.md +++ b/web/src/PRD.md @@ -25,6 +25,7 @@ Provide an analysis-only charting tool that classifies OHLC bars as real/fake, t - filter/toggle settings - optional advanced controls (alerts, replay, compare symbols, backtest controls, regime filter) 2. App fetches OHLCV via Yahoo Finance (`yfinance`). + - For day-based periods (for example `1d`, `5d`), fetch widens calendar lookback as needed, then trims to the latest N trading days that actually contain bars. 3. Optional last-bar drop (live-bar guard) for intraday intervals. 4. Bars are classified (`real_bull`, `real_bear`, `fake`, `unclassified` for first bar). 5. Trend state is derived from classification sequence. @@ -158,7 +159,8 @@ Important: Gap handling (`hide_market_closed_gaps`): - Always removes weekend gaps (`sat` -> `mon`). -- For intraday intervals, also removes inferred overnight hours using session bounds. +- Removes full missing calendar days between first/last bar (for example market holidays with no bars). +- For intraday intervals, uses contiguous bar-order x-axis (no closed-session spacing) and day-level tick labels. - For daily interval, weekend break removal is applied. ## 8. Help and Onboarding Behavior diff --git a/web/src/tests/test_charting.py b/web/src/tests/test_charting.py new file mode 100644 index 0000000..b56033c --- /dev/null +++ b/web/src/tests/test_charting.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +import pandas as pd + +from web_core.charting import _missing_calendar_day_values, build_figure +from web_core.constants import TREND_NEUTRAL + + +def _make_daily_df(days: list[str]) -> pd.DataFrame: + index = pd.DatetimeIndex([pd.Timestamp(day, tz="UTC") for day in days]) + count = len(index) + return pd.DataFrame( + { + "Open": [100 + i for i in range(count)], + "High": [101 + i for i in range(count)], + "Low": [99 + i for i in range(count)], + "Close": [100.5 + i for i in range(count)], + "Volume": [1000 + i for i in range(count)], + "classification": ["fake"] * count, + "trend_state": [TREND_NEUTRAL] * count, + }, + index=index, + ) + + +def _make_intraday_df(days: list[str]) -> pd.DataFrame: + index_values: list[pd.Timestamp] = [] + for day in days: + session = pd.date_range( + start=f"{day} 09:30:00", + end=f"{day} 15:45:00", + freq="15min", + tz="America/New_York", + ) + index_values.extend(session.to_list()) + + index = pd.DatetimeIndex(index_values) + count = len(index) + return pd.DataFrame( + { + "Open": [100 + i for i in range(count)], + "High": [101 + i for i in range(count)], + "Low": [99 + i for i in range(count)], + "Close": [100.5 + i for i in range(count)], + "Volume": [1000 + i for i in range(count)], + "classification": ["fake"] * count, + "trend_state": [TREND_NEUTRAL] * count, + }, + index=index, + ) + + +def test_missing_calendar_day_values_include_weekday_holidays_only() -> None: + df = _make_daily_df(["2026-02-13", "2026-02-17"]) + missing = _missing_calendar_day_values(df) + + assert "2026-02-15" not in missing + assert "2026-02-16" in missing + + +def test_build_figure_adds_missing_day_rangebreak_values() -> None: + df = _make_daily_df(["2026-02-13", "2026-02-17"]) + fig = build_figure( + df, + gray_fake=False, + interval="1d", + hide_market_closed_gaps=True, + ) + + rangebreak_values: list[str] = [] + for rb in fig.layout.xaxis.rangebreaks: + values = list(getattr(rb, "values", ()) or ()) + rangebreak_values.extend(str(v) for v in values) + + assert "2026-02-16" in rangebreak_values + assert "2026-02-15" not in rangebreak_values + + +def test_build_figure_intraday_uses_category_axis_when_hiding_gaps() -> None: + df = _make_intraday_df(["2026-02-13", "2026-02-17"]) + fig = build_figure( + df, + gray_fake=False, + interval="15m", + hide_market_closed_gaps=True, + ) + + assert fig.layout.xaxis.type == "category" + assert len(fig.layout.xaxis.rangebreaks) == 0 + assert fig.layout.xaxis.tickmode == "array" + assert list(fig.layout.xaxis.ticktext) == ["2/13", "2/17"] + assert len(fig.layout.xaxis.tickvals) == 2 diff --git a/web/src/tests/test_data.py b/web/src/tests/test_data.py new file mode 100644 index 0000000..487b1b5 --- /dev/null +++ b/web/src/tests/test_data.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from datetime import datetime + +import pandas as pd + +from web_core import data as data_module + + +def _make_intraday_df(days: list[str]) -> pd.DataFrame: + index_values: list[pd.Timestamp] = [] + for day in days: + index_values.append(pd.Timestamp(f"{day} 14:30:00", tz="UTC")) + index_values.append(pd.Timestamp(f"{day} 15:30:00", tz="UTC")) + + index = pd.DatetimeIndex(index_values) + return pd.DataFrame( + { + "Open": [100.0 + i for i in range(len(index))], + "High": [101.0 + i for i in range(len(index))], + "Low": [99.0 + i for i in range(len(index))], + "Close": [100.5 + i for i in range(len(index))], + "Volume": [1000 + i for i in range(len(index))], + }, + index=index, + ) + + +def test_fetch_ohlc_day_period_backfills_until_target_trading_days(monkeypatch) -> None: + fixed_now = datetime(2026, 2, 17, 20, 0, 0) + four_day_df = _make_intraday_df(["2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"]) + five_day_df = _make_intraday_df(["2026-02-11", "2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"]) + calls: list[dict[str, object]] = [] + + class FakeTicker: + def history(self, **kwargs: object) -> pd.DataFrame: + calls.append(kwargs) + start = kwargs.get("start") + if start is None: + return four_day_df.copy() + lookback_days = (fixed_now - pd.Timestamp(start).to_pydatetime()).days + return five_day_df.copy() if lookback_days >= 12 else four_day_df.copy() + + monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker()) + monkeypatch.setattr(data_module, "_utc_now", lambda: fixed_now) + data_module.fetch_ohlc.clear() + + out = data_module.fetch_ohlc(symbol="TSLA", interval="2m", period="5d") + session_days = pd.DatetimeIndex(out.index).normalize().unique() + + assert len(session_days) == 5 + assert pd.Timestamp("2026-02-11", tz="UTC") in session_days + assert len(calls) >= 2 + assert all("start" in call and "end" in call for call in calls) + + +def test_fetch_ohlc_non_day_period_uses_period_request(monkeypatch) -> None: + calls: list[dict[str, object]] = [] + month_df = _make_intraday_df(["2026-01-05", "2026-01-06", "2026-01-07"]) + + class FakeTicker: + def history(self, **kwargs: object) -> pd.DataFrame: + calls.append(kwargs) + return month_df.copy() + + monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker()) + data_module.fetch_ohlc.clear() + + out = data_module.fetch_ohlc(symbol="AAPL", interval="1h", period="1mo") + + assert len(out) == len(month_df) + assert len(calls) == 1 + assert calls[0].get("period") == "1mo" + assert "start" not in calls[0] + assert "end" not in calls[0] diff --git a/web/src/web_core/charting.py b/web/src/web_core/charting.py index 8dc7972..c20bf8e 100644 --- a/web/src/web_core/charting.py +++ b/web/src/web_core/charting.py @@ -15,30 +15,36 @@ def _is_daily_interval(interval: str) -> bool: return interval == "1d" -def _infer_session_bounds(df: pd.DataFrame) -> tuple[float, float] | None: +def _intraday_day_ticks(index: pd.DatetimeIndex) -> tuple[list[pd.Timestamp], list[str]]: + if len(index) == 0: + return [], [] + + normalized = index.normalize() + first_mask = ~normalized.duplicated() + tickvals = [index[pos] for pos, keep in enumerate(first_mask) if bool(keep)] + ticktext = [f"{ts.month}/{ts.day}" for ts in tickvals] + return tickvals, ticktext + + +def _missing_calendar_day_values(df: pd.DataFrame) -> list[str]: if df.empty: - return None + return [] index = pd.DatetimeIndex(df.index) - if index.tz is None: - return None + session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values() + if len(session_days) < 2: + return [] - minutes = index.hour * 60 + index.minute - session_df = pd.DataFrame({"date": index.date, "minute": minutes}) - day_bounds = session_df.groupby("date")["minute"].agg(["min", "max"]) - if day_bounds.empty: - return None - - start_minute = float(day_bounds["min"].median()) - # Include the final candle width roughly by adding one median step when possible. - if len(index) > 1: - deltas = pd.Series(index[1:] - index[:-1]).dt.total_seconds().div(60.0) - step = float(deltas[deltas > 0].median()) if not deltas[deltas > 0].empty else 0.0 + if session_days.tz is None: + all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D") else: - step = 0.0 - end_minute = float(day_bounds["max"].median() + step) + all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D", tz=session_days.tz) - return end_minute / 60.0, start_minute / 60.0 + missing_days = all_days.difference(session_days) + # Weekend gaps are already handled by sat->mon bounds; keep explicit values + # for weekday closures (e.g., exchange holidays) to avoid overlap artifacts. + weekday_missing = [day for day in missing_days if day.dayofweek < 5] + return [day.strftime("%Y-%m-%d") for day in weekday_missing] def build_figure( @@ -143,17 +149,26 @@ def build_figure( height=760, ) if hide_market_closed_gaps: - rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])] if _is_intraday_interval(interval): - # Collapse inferred overnight closed hours from the data's timezone/session. - inferred_bounds = _infer_session_bounds(df) - hour_bounds = list(inferred_bounds) if inferred_bounds else [16, 9.5] - rangebreaks.append(dict(pattern="hour", bounds=hour_bounds)) + # Intraday rangebreak combinations can produce axis rendering artifacts + # with some feeds/timezones. Categorical axis keeps chronological bars + # contiguous and removes closed-session gaps reliably. + tickvals, ticktext = _intraday_day_ticks(pd.DatetimeIndex(df.index)) + fig.update_xaxes( + type="category", + categoryorder="array", + categoryarray=list(df.index), + tickmode="array", + tickvals=tickvals, + ticktext=ticktext, + tickangle=0, + ) elif _is_daily_interval(interval): - # Daily charts still show weekend spacing on a continuous date axis. - # Weekend rangebreak removes these non-trading gaps. - pass - fig.update_xaxes(rangebreaks=rangebreaks) + rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])] + missing_days = _missing_calendar_day_values(df) + if missing_days: + rangebreaks.append(dict(values=missing_days)) + fig.update_xaxes(rangebreaks=rangebreaks) fig.update_yaxes(title_text="Price", row=1, col=1) fig.update_yaxes(title_text="Volume", row=2, col=1) diff --git a/web/src/web_core/data.py b/web/src/web_core/data.py index 80213d7..e3652cf 100644 --- a/web/src/web_core/data.py +++ b/web/src/web_core/data.py @@ -1,16 +1,104 @@ from __future__ import annotations -from datetime import datetime +from datetime import datetime, timedelta +import re import pandas as pd import streamlit as st import yfinance as yf +_DAY_PERIOD_PATTERN = re.compile(r"^([1-9]\d*)d$") +_DAY_PERIOD_FETCH_ATTEMPTS = 5 + + +def _utc_now() -> datetime: + return datetime.utcnow() + + +def _parse_day_period(period: str) -> int | None: + match = _DAY_PERIOD_PATTERN.match(str(period).strip().lower()) + if not match: + return None + return int(match.group(1)) + + +def _intraday_max_lookback_days(interval: str) -> int | None: + interval_key = str(interval).strip().lower() + if interval_key == "1m": + return 7 + if interval_key in {"2m", "5m", "15m", "30m", "60m", "90m", "1h"}: + return 60 + return None + + +def _initial_calendar_lookback_days(trading_days: int) -> int: + # Expand beyond target trading days to account for weekends and holidays. + return max(trading_days + 2, int((trading_days * 7) / 5) + 3) + + +def _trading_day_count(df: pd.DataFrame) -> int: + if df.empty: + return 0 + index = pd.DatetimeIndex(df.index) + return int(index.normalize().nunique()) + + +def _trim_to_recent_trading_days(df: pd.DataFrame, trading_days: int) -> pd.DataFrame: + if df.empty or trading_days <= 0: + return df.copy() + + index = pd.DatetimeIndex(df.index) + session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values() + if len(session_days) <= trading_days: + return df.copy() + + keep_days = session_days[-trading_days:] + mask = index.normalize().isin(keep_days) + return df.loc[mask].copy() + + +def _fetch_history_for_period(ticker: yf.Ticker, interval: str, period: str) -> pd.DataFrame: + history_kwargs = {"interval": interval, "auto_adjust": False, "actions": False} + day_period = _parse_day_period(period) + if day_period is None: + return ticker.history(period=period, **history_kwargs) + + lookback_days = _initial_calendar_lookback_days(day_period) + max_lookback_days = _intraday_max_lookback_days(interval) + if max_lookback_days is not None: + lookback_days = min(lookback_days, max_lookback_days) + + now = _utc_now() + best_df = pd.DataFrame() + for _ in range(_DAY_PERIOD_FETCH_ATTEMPTS): + start = now - timedelta(days=lookback_days) + df = ticker.history(start=start, end=now, **history_kwargs) + if len(df) > len(best_df): + best_df = df + + if _trading_day_count(df) >= day_period: + return df + + if max_lookback_days is not None and lookback_days >= max_lookback_days: + break + + next_lookback = int(lookback_days * 1.6) + 1 + if max_lookback_days is not None: + next_lookback = min(next_lookback, max_lookback_days) + if next_lookback <= lookback_days: + break + lookback_days = next_lookback + + if not best_df.empty: + return best_df + return ticker.history(period=period, **history_kwargs) + + @st.cache_data(ttl=60, show_spinner=False) def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame: ticker = yf.Ticker(symbol) - df = ticker.history(period=period, interval=interval, auto_adjust=False, actions=False) + df = _fetch_history_for_period(ticker=ticker, interval=interval, period=period) if df.empty: raise ValueError("No data returned. Check symbol/interval/period compatibility.") @@ -20,7 +108,12 @@ def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame: if missing: raise ValueError(f"Missing required columns: {missing}") - return df[required].dropna().copy() + out = df[required].dropna().copy() + day_period = _parse_day_period(period) + if day_period is not None: + out = _trim_to_recent_trading_days(out, day_period) + + return out def maybe_drop_live_bar(df: pd.DataFrame, interval: str, enabled: bool) -> pd.DataFrame: