Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2026-02-17 09:28:04 -06:00
parent f731aa90e5
commit 246c3be86b
6 changed files with 313 additions and 35 deletions

View File

@ -224,8 +224,8 @@ Treats low-volume bars as fake:
### Hide Market-Closed Gaps (Stocks)
Compresses non-trading time on stock charts:
- `1d`: removes weekend spacing
- intraday (`1m`..`1h`): removes weekends and overnight closed hours
- `1d`: removes weekends and full missing days (for example exchange holidays with no bars)
- intraday (`1m`..`1h`): removes weekends, full missing days, and overnight closed hours; x-axis labels show one date per trading day
Use OFF for 24/7 markets (for example many crypto workflows) when you want continuous time.
@ -284,8 +284,9 @@ streamlit run app.py --server.port 8502
### I still see some time gaps
- For stocks, keep `Hide market-closed gaps (stocks)` ON.
- Daily charts remove weekends; intraday removes weekends + closed hours.
- Some exchange holidays/half-days can still produce spacing depending on the data feed.
- Daily charts remove weekends + full no-data days; intraday removes weekends + full no-data days + closed hours.
- Day-based periods (`1d`, `5d`) backfill to target trading-day count when provider limits allow.
- Half-days can still look visually compressed because they are partial sessions, not missing sessions.
### Exports crash with timestamp errors
- Pull latest project changes (export logic now handles named index columns)

View File

@ -25,6 +25,7 @@ Provide an analysis-only charting tool that classifies OHLC bars as real/fake, t
- filter/toggle settings
- optional advanced controls (alerts, replay, compare symbols, backtest controls, regime filter)
2. App fetches OHLCV via Yahoo Finance (`yfinance`).
- For day-based periods (for example `1d`, `5d`), fetch widens calendar lookback as needed, then trims to the latest N trading days that actually contain bars.
3. Optional last-bar drop (live-bar guard) for intraday intervals.
4. Bars are classified (`real_bull`, `real_bear`, `fake`, `unclassified` for first bar).
5. Trend state is derived from classification sequence.
@ -158,7 +159,8 @@ Important:
Gap handling (`hide_market_closed_gaps`):
- Always removes weekend gaps (`sat` -> `mon`).
- For intraday intervals, also removes inferred overnight hours using session bounds.
- Removes full missing calendar days between first/last bar (for example market holidays with no bars).
- For intraday intervals, uses contiguous bar-order x-axis (no closed-session spacing) and day-level tick labels.
- For daily interval, weekend break removal is applied.
## 8. Help and Onboarding Behavior

View File

@ -0,0 +1,92 @@
from __future__ import annotations
import pandas as pd
from web_core.charting import _missing_calendar_day_values, build_figure
from web_core.constants import TREND_NEUTRAL
def _make_daily_df(days: list[str]) -> pd.DataFrame:
index = pd.DatetimeIndex([pd.Timestamp(day, tz="UTC") for day in days])
count = len(index)
return pd.DataFrame(
{
"Open": [100 + i for i in range(count)],
"High": [101 + i for i in range(count)],
"Low": [99 + i for i in range(count)],
"Close": [100.5 + i for i in range(count)],
"Volume": [1000 + i for i in range(count)],
"classification": ["fake"] * count,
"trend_state": [TREND_NEUTRAL] * count,
},
index=index,
)
def _make_intraday_df(days: list[str]) -> pd.DataFrame:
index_values: list[pd.Timestamp] = []
for day in days:
session = pd.date_range(
start=f"{day} 09:30:00",
end=f"{day} 15:45:00",
freq="15min",
tz="America/New_York",
)
index_values.extend(session.to_list())
index = pd.DatetimeIndex(index_values)
count = len(index)
return pd.DataFrame(
{
"Open": [100 + i for i in range(count)],
"High": [101 + i for i in range(count)],
"Low": [99 + i for i in range(count)],
"Close": [100.5 + i for i in range(count)],
"Volume": [1000 + i for i in range(count)],
"classification": ["fake"] * count,
"trend_state": [TREND_NEUTRAL] * count,
},
index=index,
)
def test_missing_calendar_day_values_include_weekday_holidays_only() -> None:
df = _make_daily_df(["2026-02-13", "2026-02-17"])
missing = _missing_calendar_day_values(df)
assert "2026-02-15" not in missing
assert "2026-02-16" in missing
def test_build_figure_adds_missing_day_rangebreak_values() -> None:
df = _make_daily_df(["2026-02-13", "2026-02-17"])
fig = build_figure(
df,
gray_fake=False,
interval="1d",
hide_market_closed_gaps=True,
)
rangebreak_values: list[str] = []
for rb in fig.layout.xaxis.rangebreaks:
values = list(getattr(rb, "values", ()) or ())
rangebreak_values.extend(str(v) for v in values)
assert "2026-02-16" in rangebreak_values
assert "2026-02-15" not in rangebreak_values
def test_build_figure_intraday_uses_category_axis_when_hiding_gaps() -> None:
df = _make_intraday_df(["2026-02-13", "2026-02-17"])
fig = build_figure(
df,
gray_fake=False,
interval="15m",
hide_market_closed_gaps=True,
)
assert fig.layout.xaxis.type == "category"
assert len(fig.layout.xaxis.rangebreaks) == 0
assert fig.layout.xaxis.tickmode == "array"
assert list(fig.layout.xaxis.ticktext) == ["2/13", "2/17"]
assert len(fig.layout.xaxis.tickvals) == 2

View File

@ -0,0 +1,75 @@
from __future__ import annotations
from datetime import datetime
import pandas as pd
from web_core import data as data_module
def _make_intraday_df(days: list[str]) -> pd.DataFrame:
index_values: list[pd.Timestamp] = []
for day in days:
index_values.append(pd.Timestamp(f"{day} 14:30:00", tz="UTC"))
index_values.append(pd.Timestamp(f"{day} 15:30:00", tz="UTC"))
index = pd.DatetimeIndex(index_values)
return pd.DataFrame(
{
"Open": [100.0 + i for i in range(len(index))],
"High": [101.0 + i for i in range(len(index))],
"Low": [99.0 + i for i in range(len(index))],
"Close": [100.5 + i for i in range(len(index))],
"Volume": [1000 + i for i in range(len(index))],
},
index=index,
)
def test_fetch_ohlc_day_period_backfills_until_target_trading_days(monkeypatch) -> None:
fixed_now = datetime(2026, 2, 17, 20, 0, 0)
four_day_df = _make_intraday_df(["2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"])
five_day_df = _make_intraday_df(["2026-02-11", "2026-02-12", "2026-02-13", "2026-02-14", "2026-02-17"])
calls: list[dict[str, object]] = []
class FakeTicker:
def history(self, **kwargs: object) -> pd.DataFrame:
calls.append(kwargs)
start = kwargs.get("start")
if start is None:
return four_day_df.copy()
lookback_days = (fixed_now - pd.Timestamp(start).to_pydatetime()).days
return five_day_df.copy() if lookback_days >= 12 else four_day_df.copy()
monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker())
monkeypatch.setattr(data_module, "_utc_now", lambda: fixed_now)
data_module.fetch_ohlc.clear()
out = data_module.fetch_ohlc(symbol="TSLA", interval="2m", period="5d")
session_days = pd.DatetimeIndex(out.index).normalize().unique()
assert len(session_days) == 5
assert pd.Timestamp("2026-02-11", tz="UTC") in session_days
assert len(calls) >= 2
assert all("start" in call and "end" in call for call in calls)
def test_fetch_ohlc_non_day_period_uses_period_request(monkeypatch) -> None:
calls: list[dict[str, object]] = []
month_df = _make_intraday_df(["2026-01-05", "2026-01-06", "2026-01-07"])
class FakeTicker:
def history(self, **kwargs: object) -> pd.DataFrame:
calls.append(kwargs)
return month_df.copy()
monkeypatch.setattr(data_module.yf, "Ticker", lambda symbol: FakeTicker())
data_module.fetch_ohlc.clear()
out = data_module.fetch_ohlc(symbol="AAPL", interval="1h", period="1mo")
assert len(out) == len(month_df)
assert len(calls) == 1
assert calls[0].get("period") == "1mo"
assert "start" not in calls[0]
assert "end" not in calls[0]

View File

@ -15,30 +15,36 @@ def _is_daily_interval(interval: str) -> bool:
return interval == "1d"
def _infer_session_bounds(df: pd.DataFrame) -> tuple[float, float] | None:
def _intraday_day_ticks(index: pd.DatetimeIndex) -> tuple[list[pd.Timestamp], list[str]]:
if len(index) == 0:
return [], []
normalized = index.normalize()
first_mask = ~normalized.duplicated()
tickvals = [index[pos] for pos, keep in enumerate(first_mask) if bool(keep)]
ticktext = [f"{ts.month}/{ts.day}" for ts in tickvals]
return tickvals, ticktext
def _missing_calendar_day_values(df: pd.DataFrame) -> list[str]:
if df.empty:
return None
return []
index = pd.DatetimeIndex(df.index)
if index.tz is None:
return None
session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values()
if len(session_days) < 2:
return []
minutes = index.hour * 60 + index.minute
session_df = pd.DataFrame({"date": index.date, "minute": minutes})
day_bounds = session_df.groupby("date")["minute"].agg(["min", "max"])
if day_bounds.empty:
return None
start_minute = float(day_bounds["min"].median())
# Include the final candle width roughly by adding one median step when possible.
if len(index) > 1:
deltas = pd.Series(index[1:] - index[:-1]).dt.total_seconds().div(60.0)
step = float(deltas[deltas > 0].median()) if not deltas[deltas > 0].empty else 0.0
if session_days.tz is None:
all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D")
else:
step = 0.0
end_minute = float(day_bounds["max"].median() + step)
all_days = pd.date_range(start=session_days[0], end=session_days[-1], freq="D", tz=session_days.tz)
return end_minute / 60.0, start_minute / 60.0
missing_days = all_days.difference(session_days)
# Weekend gaps are already handled by sat->mon bounds; keep explicit values
# for weekday closures (e.g., exchange holidays) to avoid overlap artifacts.
weekday_missing = [day for day in missing_days if day.dayofweek < 5]
return [day.strftime("%Y-%m-%d") for day in weekday_missing]
def build_figure(
@ -143,16 +149,25 @@ def build_figure(
height=760,
)
if hide_market_closed_gaps:
rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])]
if _is_intraday_interval(interval):
# Collapse inferred overnight closed hours from the data's timezone/session.
inferred_bounds = _infer_session_bounds(df)
hour_bounds = list(inferred_bounds) if inferred_bounds else [16, 9.5]
rangebreaks.append(dict(pattern="hour", bounds=hour_bounds))
# Intraday rangebreak combinations can produce axis rendering artifacts
# with some feeds/timezones. Categorical axis keeps chronological bars
# contiguous and removes closed-session gaps reliably.
tickvals, ticktext = _intraday_day_ticks(pd.DatetimeIndex(df.index))
fig.update_xaxes(
type="category",
categoryorder="array",
categoryarray=list(df.index),
tickmode="array",
tickvals=tickvals,
ticktext=ticktext,
tickangle=0,
)
elif _is_daily_interval(interval):
# Daily charts still show weekend spacing on a continuous date axis.
# Weekend rangebreak removes these non-trading gaps.
pass
rangebreaks: list[dict[str, object]] = [dict(bounds=["sat", "mon"])]
missing_days = _missing_calendar_day_values(df)
if missing_days:
rangebreaks.append(dict(values=missing_days))
fig.update_xaxes(rangebreaks=rangebreaks)
fig.update_yaxes(title_text="Price", row=1, col=1)

View File

@ -1,16 +1,104 @@
from __future__ import annotations
from datetime import datetime
from datetime import datetime, timedelta
import re
import pandas as pd
import streamlit as st
import yfinance as yf
_DAY_PERIOD_PATTERN = re.compile(r"^([1-9]\d*)d$")
_DAY_PERIOD_FETCH_ATTEMPTS = 5
def _utc_now() -> datetime:
return datetime.utcnow()
def _parse_day_period(period: str) -> int | None:
match = _DAY_PERIOD_PATTERN.match(str(period).strip().lower())
if not match:
return None
return int(match.group(1))
def _intraday_max_lookback_days(interval: str) -> int | None:
interval_key = str(interval).strip().lower()
if interval_key == "1m":
return 7
if interval_key in {"2m", "5m", "15m", "30m", "60m", "90m", "1h"}:
return 60
return None
def _initial_calendar_lookback_days(trading_days: int) -> int:
# Expand beyond target trading days to account for weekends and holidays.
return max(trading_days + 2, int((trading_days * 7) / 5) + 3)
def _trading_day_count(df: pd.DataFrame) -> int:
if df.empty:
return 0
index = pd.DatetimeIndex(df.index)
return int(index.normalize().nunique())
def _trim_to_recent_trading_days(df: pd.DataFrame, trading_days: int) -> pd.DataFrame:
if df.empty or trading_days <= 0:
return df.copy()
index = pd.DatetimeIndex(df.index)
session_days = pd.DatetimeIndex(index.normalize().unique()).sort_values()
if len(session_days) <= trading_days:
return df.copy()
keep_days = session_days[-trading_days:]
mask = index.normalize().isin(keep_days)
return df.loc[mask].copy()
def _fetch_history_for_period(ticker: yf.Ticker, interval: str, period: str) -> pd.DataFrame:
history_kwargs = {"interval": interval, "auto_adjust": False, "actions": False}
day_period = _parse_day_period(period)
if day_period is None:
return ticker.history(period=period, **history_kwargs)
lookback_days = _initial_calendar_lookback_days(day_period)
max_lookback_days = _intraday_max_lookback_days(interval)
if max_lookback_days is not None:
lookback_days = min(lookback_days, max_lookback_days)
now = _utc_now()
best_df = pd.DataFrame()
for _ in range(_DAY_PERIOD_FETCH_ATTEMPTS):
start = now - timedelta(days=lookback_days)
df = ticker.history(start=start, end=now, **history_kwargs)
if len(df) > len(best_df):
best_df = df
if _trading_day_count(df) >= day_period:
return df
if max_lookback_days is not None and lookback_days >= max_lookback_days:
break
next_lookback = int(lookback_days * 1.6) + 1
if max_lookback_days is not None:
next_lookback = min(next_lookback, max_lookback_days)
if next_lookback <= lookback_days:
break
lookback_days = next_lookback
if not best_df.empty:
return best_df
return ticker.history(period=period, **history_kwargs)
@st.cache_data(ttl=60, show_spinner=False)
def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame:
ticker = yf.Ticker(symbol)
df = ticker.history(period=period, interval=interval, auto_adjust=False, actions=False)
df = _fetch_history_for_period(ticker=ticker, interval=interval, period=period)
if df.empty:
raise ValueError("No data returned. Check symbol/interval/period compatibility.")
@ -20,7 +108,12 @@ def fetch_ohlc(symbol: str, interval: str, period: str) -> pd.DataFrame:
if missing:
raise ValueError(f"Missing required columns: {missing}")
return df[required].dropna().copy()
out = df[required].dropna().copy()
day_period = _parse_day_period(period)
if day_period is not None:
out = _trim_to_recent_trading_days(out, day_period)
return out
def maybe_drop_live_bar(df: pd.DataFrame, interval: str, enabled: bool) -> pd.DataFrame: