"""PortfolioLens — Phase 2 proof-of-concept instrument universe.

A single, explicit definition of *what* the PoC pulls, mapping each instrument
back to the Chapter-1 transmission channels. Keeping the universe in one place
prevents scope creep and lets the chapter, the pull orchestrator, and the
quality checks all agree on the same list.

Each entry:
    symbol     — the source's native identifier (e.g. "^VIX", "BTC-USD", "DGS10")
    filekey    — filesystem-safe name used for parquet files / manifest
    name       — human-readable description
    channel    — Chapter-1 transmission channel this instrument proxies
    source     — one of: yfinance | fred | alphavantage | eia | treasury
    asset_type — equity-ETF | equity | index | crypto | commodity-price | rate |
                 rate-spread | macro-index | macro-rate | FX
    frequency  — expected native cadence (daily | business-day | weekly | monthly)
    needs_key  — True if the source requires an API key
    notes      — short rationale
"""

from __future__ import annotations

from dataclasses import dataclass, asdict
from typing import List

import pandas as pd


@dataclass(frozen=True)
class Instrument:
    symbol: str
    filekey: str
    name: str
    channel: str
    source: str
    asset_type: str
    frequency: str
    needs_key: bool
    notes: str = ""


# ─────────────────────────────────────────────────────────────────────────────
# The universe. ~20 instruments — one+ proxy per channel, plus macro/rates,
# crypto, one single-name, and minimal Alpha Vantage / EIA / Treasury entries to
# demonstrate all four mandated free sources.
# ─────────────────────────────────────────────────────────────────────────────
UNIVERSE: List[Instrument] = [
    # ── yfinance (no API key) ────────────────────────────────────────────────
    Instrument("SPY", "SPY", "SPDR S&P 500 ETF Trust", "Broad-market baseline",
               "yfinance", "equity-ETF", "business-day", False, "Benchmark / market beta"),
    Instrument("XLE", "XLE", "Energy Select Sector SPDR Fund", "Energy",
               "yfinance", "equity-ETF", "business-day", False, "Oil & gas majors exposure"),
    Instrument("ITA", "ITA", "iShares U.S. Aerospace & Defense ETF", "Defense procurement",
               "yfinance", "equity-ETF", "business-day", False, "LMT/RTX/NOC/GD basket; inception 2006"),
    Instrument("DBA", "DBA", "Invesco DB Agriculture Fund", "Agricultural commodities",
               "yfinance", "equity-ETF", "business-day", False, "Grains/softs basket; inception 2007"),
    Instrument("BDRY", "BDRY", "Breakwave Dry Bulk Shipping ETF", "Shipping & insurance",
               "yfinance", "equity-ETF", "business-day", False, "Freight-rate proxy; inception 2018"),
    Instrument("GLD", "GLD", "SPDR Gold Shares", "Safe havens & FX",
               "yfinance", "equity-ETF", "business-day", False, "Flight-to-safety; inception 2004"),
    Instrument("^VIX", "VIX", "CBOE Volatility Index", "Safe havens & FX",
               "yfinance", "index", "business-day", False, "Risk-off / equity stress proxy"),
    Instrument("UUP", "UUP", "Invesco DB US Dollar Index Bullish Fund", "Safe havens & FX",
               "yfinance", "equity-ETF", "business-day", False, "USD proxy (^DXY unreliable in yfinance); inception 2007"),
    Instrument("BTC-USD", "BTC-USD", "Bitcoin (USD)", "Crypto (regime-dependent)",
               "yfinance", "crypto", "daily", False, "Trades 7 days/week; history from ~2014"),
    Instrument("ETH-USD", "ETH-USD", "Ethereum (USD)", "Crypto (regime-dependent)",
               "yfinance", "crypto", "daily", False, "Trades 7 days/week; history from ~2017"),
    Instrument("LMT", "LMT", "Lockheed Martin Corp.", "Defense procurement",
               "yfinance", "equity", "business-day", False, "Single-name vs ETF behavior check"),

    # ── FRED (API key, or keyless pandas-datareader fallback) ─────────────────
    Instrument("DGS10", "DGS10", "10-Year Treasury Constant Maturity Rate", "Safe havens & FX",
               "fred", "rate", "business-day", True, "Safe-haven rates / policy stance"),
    Instrument("DGS2", "DGS2", "2-Year Treasury Constant Maturity Rate", "Safe havens & FX",
               "fred", "rate", "business-day", True, "Short end of the curve"),
    Instrument("T10Y3M", "T10Y3M", "10Y minus 3M Treasury Spread", "Macro context",
               "fred", "rate-spread", "business-day", True, "Recession / term-structure signal"),
    Instrument("DCOILWTICO", "DCOILWTICO", "WTI Crude Oil Spot Price", "Energy",
               "fred", "commodity-price", "business-day", True, "Energy supply-shock signal (authoritative)"),
    Instrument("DHHNGSP", "DHHNGSP", "Henry Hub Natural Gas Spot Price", "Energy",
               "fred", "commodity-price", "business-day", True, "US natural-gas spot"),
    Instrument("VIXCLS", "VIXCLS", "CBOE Volatility Index (FRED)", "Safe havens & FX",
               "fred", "index", "business-day", True, "Cross-checks the yfinance ^VIX"),
    Instrument("CPILFESL", "CPILFESL", "Core CPI (All Urban, less food & energy)", "Macro context",
               "fred", "macro-index", "monthly", True, "Inflation; release-lagged (look-ahead caution)"),
    Instrument("UNRATE", "UNRATE", "Unemployment Rate", "Macro context",
               "fred", "macro-rate", "monthly", True, "Labor-market slack; release-lagged"),
    Instrument("WPU01210101", "WPU_WHEAT", "PPI by Commodity: Farm Products: Wheat", "Agricultural commodities",
               "fred", "macro-index", "monthly", True, "Conflict-sensitive grain price (IMF global-wheat series was discontinued)"),

    # ── Regime-dashboard & strategy.md indicators (FRED keyless + yfinance) ────
    Instrument("BAMLH0A0HYM2", "HY_OAS", "ICE BofA US High Yield OAS", "Credit / risk regime",
               "fred", "credit-spread", "business-day", True, "Top risk-regime gauge (bands 300/450/600/800 bps) BUT FRED/ICE licensing limits free history to ~mid-2023+ (no past recession in-sample)"),
    Instrument("BAMLC0A0CM", "IG_OAS", "ICE BofA US Corporate (IG) OAS", "Credit / risk regime",
               "fred", "credit-spread", "business-day", True, "HY-vs-IG = idiosyncratic vs systemic; same ~2023+ free-history limit as HY OAS"),
    Instrument("BAA10Y", "BAA10Y", "Moody's Baa Corporate minus 10Y Treasury (credit spread)", "Credit / risk regime",
               "fred", "credit-spread", "business-day", True, "Long-history (1990+) credit spread spanning 2008 & 2020 — free Moody's series; the recession-tested gauge the ICE OAS cannot be"),
    Instrument("AAA10Y", "AAA10Y", "Moody's Aaa Corporate minus 10Y Treasury (credit spread)", "Credit / risk regime",
               "fred", "credit-spread", "business-day", True, "With BAA10Y gives the Baa-Aaa quality spread"),
    Instrument("NFCI", "NFCI", "Chicago Fed National Financial Conditions Index", "Financial conditions",
               "fred", "index", "weekly", True, "Positive = tighter than average; leads slowdowns"),
    Instrument("ANFCI", "ANFCI", "Chicago Fed Adjusted NFCI", "Financial conditions",
               "fred", "index", "weekly", True, "Business-cycle component stripped out"),
    Instrument("T10YIE", "T10YIE", "10-Year Breakeven Inflation Rate", "Inflation expectations",
               "fred", "rate", "business-day", True, "Market-implied inflation = DGS10 - DFII10"),
    Instrument("DFII10", "DFII10", "10-Year TIPS Real Yield", "Real rates",
               "fred", "rate", "business-day", True, "Drives gold (~-0.8 corr) and long-duration equity valuation"),
    Instrument("DFF", "DFF", "Effective Federal Funds Rate", "Monetary policy",
               "fred", "rate", "business-day", True, "Policy anchor for risk assets"),
    Instrument("M2SL", "M2SL", "M2 Money Supply (seasonally adjusted)", "Liquidity",
               "fred", "macro-level", "monthly", True, "Money->inflation link loose outside extremes (2021 spike)"),
    Instrument("INDPRO", "INDPRO", "Industrial Production Index", "Growth (coincident)",
               "fred", "macro-index", "monthly", True, "CEI component; cyclical-sector sensitive"),
    Instrument("SAHMREALTIME", "SAHM", "Sahm Rule Recession Indicator (real-time)", "Recession regime",
               "fred", "macro-rate", "monthly", True, "Onset signal; >=0.50 triggers; derived from UNRATE"),
    Instrument("HG=F", "COPPER", "COMEX Copper Futures (Dr. Copper)", "Commodity / growth barometer",
               "yfinance", "commodity", "business-day", False, "Copper/gold ratio tracks 10y yield & risk appetite"),

    # ── Alpha Vantage (API key; used sparingly, <=25 calls/day) ───────────────
    Instrument("EUR/USD", "EURUSD_AV", "EUR/USD daily (Alpha Vantage FX_DAILY)", "Safe havens & FX",
               "alphavantage", "FX", "business-day", True, "Demonstrates the AV FX endpoint"),

    # ── EIA (API key) ─────────────────────────────────────────────────────────
    Instrument("PET.RWTC.D", "EIA_WTI", "WTI Crude Oil Spot (EIA)", "Energy",
               "eia", "commodity-price", "business-day", True, "Demonstrates the EIA v2 API; cross-checks FRED WTI"),

    # ── US Treasury fiscaldata (keyless) ──────────────────────────────────────
    Instrument("EURO", "TREAS_EUR", "Treasury Reporting Rate of Exchange — Euro", "Safe havens & FX",
               "treasury", "FX", "quarterly", False, "Demonstrates keyless Treasury fiscaldata API"),
]


def universe_df() -> pd.DataFrame:
    """Return the universe as a DataFrame (handy for tables in the chapter)."""
    return pd.DataFrame([asdict(i) for i in UNIVERSE])


if __name__ == "__main__":
    pd.set_option("display.max_rows", None)
    pd.set_option("display.width", 160)
    df = universe_df()
    print(f"PoC universe: {len(df)} instruments across {df['channel'].nunique()} channels, "
          f"{df['source'].nunique()} sources.\n")
    print(df[["symbol", "name", "channel", "source", "asset_type", "frequency", "needs_key"]].to_string(index=False))
