#!/usr/bin/env python3
"""
Global Wealth Data Fetcher
Fetches and combines data from multiple sources to create an accurate,
up-to-date visualization of global wealth distribution.

Data Sources:
- Countries: World Bank API (GDP data)
- Billionaires: RTB-API via Statically CDN (Forbes Real-Time data)
- Companies: yfinance library (free, no API key needed)
"""

import json
import os
import time
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
import requests
from pathlib import Path

class GlobalWealthDataFetcher:
    """Fetches and caches data from multiple sources for wealth visualization."""

    def __init__(self, cache_dir: str = "./cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)

        # API configurations
        self.world_bank_base = "https://api.worldbank.org/v2"
        # Use Statically CDN for reliable RTB-API access
        self.rtb_api_base = "https://cdn.statically.io/gh/komed3/rtb-api/v1/api"

    def _get_cache_path(self, data_type: str) -> Path:
        """Generate cache file path for a data type."""
        return self.cache_dir / f"{data_type}_{datetime.now().strftime('%Y%m%d')}.json"

    def _is_cache_valid(self, cache_path: Path, max_age_hours: int = 24) -> bool:
        """Check if cache file exists and is recent enough."""
        if not cache_path.exists():
            return False

        file_age = datetime.now() - datetime.fromtimestamp(cache_path.stat().st_mtime)
        return file_age < timedelta(hours=max_age_hours)

    def _fetch_with_cache(self, data_type: str, fetch_func, max_age_hours: int = 24) -> List[Dict]:
        """Fetch data with caching logic."""
        cache_path = self._get_cache_path(data_type)

        # Use cache if valid
        if self._is_cache_valid(cache_path, max_age_hours):
            print(f"Using cached {data_type} data from {cache_path}")
            with open(cache_path, 'r') as f:
                return json.load(f)

        # Fetch fresh data
        print(f"Fetching fresh {data_type} data...")
        data = fetch_func()

        # Save to cache
        with open(cache_path, 'w') as f:
            json.dump(data, f, indent=2)

        return data

    def fetch_country_gdp_data(self) -> List[Dict[str, Any]]:
        """Fetch GDP data for all countries from World Bank API."""
        def _fetch():
            countries_by_code = {}  # Track latest data per country

            # Get latest available year (usually 1-2 years behind current)
            current_year = datetime.now().year
            # Expand date range to capture more countries (some have data from earlier years)
            date_range = f"{current_year-5}:{current_year}"

            print(f"Fetching GDP data for date range: {date_range}")

            # Aggregate/region codes to exclude (comprehensive list)
            exclude_codes = {'WLD', 'HIC', 'OED', 'LIC', 'UMC', 'LMC', 'EAS', 'ECS',
                           'LCN', 'MEA', 'NAC', 'SAS', 'SSA', 'PST', 'PRE', 'LDC',
                           'IDX', 'IBD', 'IBT', 'IDB', 'IDT', 'IDA', 'PSS', 'SSF',
                           'EAR', 'CSS', 'EAP', 'EMU', 'ECA', 'EUU', 'FCS', 'HPC',
                           'LAC', 'LTE', 'MNA', 'MIC', 'ARB', 'TEA', 'TEC', 'TLA',
                           'TMN', 'TSA', 'TSS', 'OSS', 'INX', 'XKX', 'SST'}

            # Also exclude by name patterns (for entries that slip through)
            exclude_patterns = ['income', 'OECD', 'dividend', 'IBRD', 'IDA', 'blend',
                              'fragile', 'developing', 'developed', 'classified',
                              'central europe', 'euro area']

            # Fetch data in multiple pages to ensure we get all countries
            page = 1
            per_page = 1000  # Increased page size
            total_fetched = 0

            while page <= 10:  # Safety limit of 10 pages
                url = f"{self.world_bank_base}/country/all/indicator/NY.GDP.MKTP.CD"
                params = {
                    "format": "json",
                    "per_page": per_page,
                    "page": page,
                    "date": date_range,
                    "source": 2  # World Development Indicators
                }

                try:
                    response = requests.get(url, params=params, timeout=30)
                    response.raise_for_status()
                    data = response.json()

                    # Check if we have data
                    if len(data) < 2 or not isinstance(data[1], list) or len(data[1]) == 0:
                        break  # No more data

                    entries = data[1]
                    total_fetched += len(entries)

                    for entry in entries:
                        country_code = entry.get("countryiso3code", "")
                        country_name = entry.get("country", {}).get("value", "").lower()

                        # Skip aggregate regions by code or name pattern
                        if (entry.get("value") is not None and
                            country_code and
                            country_code not in exclude_codes and
                            len(country_code) == 3 and
                            not any(pattern in country_name for pattern in exclude_patterns)):

                            year = int(entry["date"])

                            # Keep only the most recent year for each country
                            if country_code not in countries_by_code or \
                               year > countries_by_code[country_code]["year"]:
                                countries_by_code[country_code] = {
                                    "name": entry["country"]["value"],
                                    "gdp": entry["value"],
                                    "year": year,
                                    "country_code": country_code,
                                    "type": "country"
                                }

                    # Check if there are more pages
                    if len(data) > 0 and isinstance(data[0], dict):
                        total_pages = data[0].get("pages", 1)
                        if page >= total_pages:
                            break
                    else:
                        break

                    page += 1

                except Exception as e:
                    print(f"Error fetching page {page}: {e}")
                    break

            # Convert dict to list
            countries = list(countries_by_code.values())

            print(f"Total GDP entries fetched: {total_fetched}")
            print(f"Unique countries with GDP data: {len(countries)}")

            # Sort by GDP descending
            countries.sort(key=lambda x: x["gdp"], reverse=True)
            return countries

        return self._fetch_with_cache("countries", _fetch, max_age_hours=24)

    def fetch_billionaire_data(self) -> List[Dict[str, Any]]:
        """Fetch billionaire data from RTB-API via Statically CDN."""
        def _fetch():
            billionaires = []

            try:
                # Use Statically CDN for reliable access to RTB-API
                url = f"{self.rtb_api_base}/list/rtb/latest"
                print(f"Fetching billionaire data from: {url}")

                response = requests.get(url, timeout=30)
                response.raise_for_status()
                data = response.json()

                # Extract billionaire data from the response
                # The API returns: {date, count, woman, total, list: [...]}
                if isinstance(data, dict) and 'list' in data:
                    billionaire_list = data['list']
                else:
                    print(f"Unexpected API structure. Keys found: {list(data.keys()) if isinstance(data, dict) else 'Not a dict'}")
                    billionaire_list = []

                print(f"Found {len(billionaire_list)} billionaires in API response")

                # Process the billionaires - take top 100
                for person in billionaire_list[:100]:
                    # Extract fields (API provides: name, networth, citizenship, industry, source)
                    name = person.get("name", "")
                    # 'networth' field is in millions (e.g., 401205.356 = $401.2 billion)
                    net_worth_millions = person.get("networth", 0)

                    # Convert millions to dollars
                    net_worth = float(net_worth_millions) * 1_000_000

                    if name and net_worth > 0:  # Only include if we have valid data
                        # Get source (list of companies) and join into string
                        source_list = person.get("source", [])
                        source_str = ", ".join(source_list) if isinstance(source_list, list) else str(source_list)

                        # Get industry (list)
                        industry = person.get("industry", [])

                        # Get country code (citizenship is 2-letter code like 'us')
                        country_code = person.get("citizenship", "").upper()

                        billionaires.append({
                            "name": name,
                            "netWorth": net_worth,
                            "source": source_str,
                            "country": country_code,
                            "industry": industry,
                            "type": "individual"
                        })

                print(f"Successfully processed {len(billionaires)} billionaires")

            except requests.exceptions.RequestException as e:
                print(f"Network error fetching billionaire data: {e}")
                print("Using fallback billionaire data...")
                billionaires = self._get_fallback_billionaire_data()
            except Exception as e:
                print(f"Error processing billionaire data: {e}")
                print("Using fallback billionaire data...")
                billionaires = self._get_fallback_billionaire_data()

            # If we got no data from the API, use fallback
            if not billionaires:
                print("No billionaire data retrieved, using fallback...")
                billionaires = self._get_fallback_billionaire_data()

            # Sort by net worth descending
            billionaires.sort(key=lambda x: x["netWorth"], reverse=True)
            return billionaires

        return self._fetch_with_cache("billionaires", _fetch, max_age_hours=24)

    def _get_fallback_billionaire_data(self) -> List[Dict[str, Any]]:
        """Fallback billionaire data - December 2025 (Bloomberg Billionaires Index)."""
        return [
            {"name": "Elon Musk", "netWorth": 419e9, "source": "Tesla, SpaceX", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Larry Ellison", "netWorth": 349e9, "source": "Oracle", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Mark Zuckerberg", "netWorth": 265e9, "source": "Meta", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Jeff Bezos", "netWorth": 250e9, "source": "Amazon", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Larry Page", "netWorth": 211e9, "source": "Google", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Sergey Brin", "netWorth": 198e9, "source": "Google", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Steve Ballmer", "netWorth": 175e9, "source": "Microsoft", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Bernard Arnault", "netWorth": 163e9, "source": "LVMH", "country": "France", "industry": ["Fashion & Retail"], "type": "individual"},
            {"name": "Jensen Huang", "netWorth": 155e9, "source": "NVIDIA", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Warren Buffett", "netWorth": 148e9, "source": "Berkshire Hathaway", "country": "United States", "industry": ["Finance"], "type": "individual"},
            {"name": "Michael Dell", "netWorth": 148e9, "source": "Dell Technologies", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Jim Walton", "netWorth": 128e9, "source": "Walmart", "country": "United States", "industry": ["Retail"], "type": "individual"},
            {"name": "Rob Walton", "netWorth": 126e9, "source": "Walmart", "country": "United States", "industry": ["Retail"], "type": "individual"},
            {"name": "Alice Walton", "netWorth": 125e9, "source": "Walmart", "country": "United States", "industry": ["Retail"], "type": "individual"},
            {"name": "Bill Gates", "netWorth": 121e9, "source": "Microsoft", "country": "United States", "industry": ["Technology"], "type": "individual"},
            {"name": "Amancio Ortega", "netWorth": 114e9, "source": "Zara", "country": "Spain", "industry": ["Fashion & Retail"], "type": "individual"},
            {"name": "Carlos Slim", "netWorth": 108e9, "source": "Telecom", "country": "Mexico", "industry": ["Telecommunications"], "type": "individual"},
            {"name": "Mukesh Ambani", "netWorth": 99e9, "source": "Reliance Industries", "country": "India", "industry": ["Energy"], "type": "individual"},
            {"name": "Francoise Bettencourt Meyers", "netWorth": 95e9, "source": "L'Oréal", "country": "France", "industry": ["Consumer Goods"], "type": "individual"},
            {"name": "Gautam Adani", "netWorth": 82e9, "source": "Adani Group", "country": "India", "industry": ["Industrial"], "type": "individual"},
            {"name": "Julia Flesher Koch", "netWorth": 80e9, "source": "Koch Industries", "country": "United States", "industry": ["Industrial"], "type": "individual"},
            {"name": "Zhong Shanshan", "netWorth": 77e9, "source": "Nongfu Spring", "country": "China", "industry": ["Beverages"], "type": "individual"},
            {"name": "Thomas Peterffy", "netWorth": 76e9, "source": "Interactive Brokers", "country": "United States", "industry": ["Finance"], "type": "individual"},
            {"name": "Charles Koch", "netWorth": 73e9, "source": "Koch Industries", "country": "United States", "industry": ["Industrial"], "type": "individual"},
            {"name": "Ma Huateng", "netWorth": 69e9, "source": "Tencent", "country": "China", "industry": ["Technology"], "type": "individual"},
        ]

    def fetch_company_data(self, limit: int = 100) -> List[Dict[str, Any]]:
        """Fetch company market cap data using yfinance."""
        def _fetch():
            companies = []

            try:
                # Try to import yfinance
                import yfinance as yf

                print("Fetching company market cap data via yfinance...")

                # List of major companies to fetch (top companies by market cap)
                # This is a curated list of tickers for the largest companies
                # NOTE: Using USD-traded ADRs for non-US stocks to avoid currency conversion issues
                major_tickers = [
                    # Trillion-dollar companies
                    "NVDA", "AAPL", "MSFT", "GOOGL", "AMZN", "META", "AVGO", "TSLA",
                    "TSM", "BRK-B",
                    # 500B+ companies
                    "WMT", "JPM", "ORCL", "LLY", "TCEHY", "V", "NFLX", "MA",
                    "XOM", "SSNLF",  # Samsung USD ADR instead of 005930.KS (KRW)
                    # 300B-500B companies
                    "COST", "ASML", "HD", "PG", "BAC", "JNJ", "AMD", "ABBV", "SAP", "KO",
                    # 200B-300B companies
                    "CVX", "CSCO", "IBM", "UNH", "MS", "MRK", "WFC", "CRM", "NVO", "TMO",
                    "MCD", "SHOP", "DIS", "ACN", "PEP",
                    # 150B-200B companies
                    "UBER", "DHR", "NOW", "T", "ABT", "QCOM", "INTC", "AMAT", "AXP", "CAT",
                    "SONY", "VZ", "TXN", "GS", "INTU", "BKNG", "BA", "AMGN", "ISRG", "SCHW",
                    # 100B-150B companies
                    "ADBE", "SPGI", "LOW", "HON", "PFE", "SPOT", "BLK", "COP", "GILD", "PANW",
                    "MU", "LRCX", "ADI", "BMY", "MDT", "CMCSA", "NKE", "SBUX", "SNPS", "CDNS",
                    "MCO", "MRVL", "CRWD", "ABNB", "TGT", "WDAY", "FTNT", "PYPL", "ADSK", "SNOW",
                    "SQ", "DDOG",
                ]

                for ticker in major_tickers[:limit]:
                    try:
                        stock = yf.Ticker(ticker)
                        info = stock.info

                        # Extract market cap and company info
                        market_cap = info.get('marketCap')
                        company_name = info.get('longName') or info.get('shortName', ticker)

                        if market_cap and market_cap > 0:
                            companies.append({
                                "name": company_name,
                                "marketCap": float(market_cap),
                                "ticker": ticker,
                                "sector": info.get("sector", ""),
                                "country": info.get("country", ""),
                                "type": "company"
                            })
                            print(f"  ✓ {company_name}: ${market_cap/1e9:.1f}B")

                        # Rate limiting to avoid overwhelming the API
                        time.sleep(0.5)

                    except Exception as e:
                        print(f"  ✗ Error fetching {ticker}: {e}")
                        continue

                print(f"Successfully fetched {len(companies)} companies via yfinance")

            except ImportError:
                print("yfinance not installed, using fallback data")
                print("Install with: pip install yfinance")
                companies = self._get_fallback_company_data()
            except Exception as e:
                print(f"Error fetching company data via yfinance: {e}")
                print("Using fallback data...")
                companies = self._get_fallback_company_data()

            # If we got no data, use fallback
            if not companies:
                print("No company data retrieved, using fallback...")
                companies = self._get_fallback_company_data()

            # Sort by market cap descending
            companies.sort(key=lambda x: x["marketCap"], reverse=True)
            return companies[:limit]

        return self._fetch_with_cache("companies", _fetch, max_age_hours=12)

    def _get_fallback_company_data(self) -> List[Dict[str, Any]]:
        """Fallback company data when API is not available - December 2025."""
        # Top 100+ companies by market cap (December 2025 data)
        return [
            # Trillion-dollar companies
            {"name": "NVIDIA", "marketCap": 4.4e12, "ticker": "NVDA", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Apple", "marketCap": 3.9e12, "ticker": "AAPL", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Microsoft", "marketCap": 3.8e12, "ticker": "MSFT", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Alphabet", "marketCap": 3.0e12, "ticker": "GOOGL", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Amazon", "marketCap": 2.4e12, "ticker": "AMZN", "sector": "Consumer", "country": "United States", "type": "company"},
            {"name": "Meta Platforms", "marketCap": 1.8e12, "ticker": "META", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Broadcom", "marketCap": 1.6e12, "ticker": "AVGO", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Tesla", "marketCap": 1.5e12, "ticker": "TSLA", "sector": "Automotive", "country": "United States", "type": "company"},
            {"name": "Taiwan Semiconductor", "marketCap": 1.5e12, "ticker": "TSM", "sector": "Technology", "country": "Taiwan", "type": "company"},
            {"name": "Berkshire Hathaway", "marketCap": 1.1e12, "ticker": "BRK.A", "sector": "Finance", "country": "United States", "type": "company"},
            # 500B+ companies
            {"name": "Walmart", "marketCap": 847e9, "ticker": "WMT", "sector": "Retail", "country": "United States", "type": "company"},
            {"name": "JPMorgan Chase", "marketCap": 817e9, "ticker": "JPM", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Oracle", "marketCap": 784e9, "ticker": "ORCL", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Eli Lilly", "marketCap": 757e9, "ticker": "LLY", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Tencent", "marketCap": 733e9, "ticker": "TCEHY", "sector": "Technology", "country": "China", "type": "company"},
            {"name": "Visa", "marketCap": 669e9, "ticker": "V", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Netflix", "marketCap": 527e9, "ticker": "NFLX", "sector": "Media", "country": "United States", "type": "company"},
            {"name": "Mastercard", "marketCap": 517e9, "ticker": "MA", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "ExxonMobil", "marketCap": 481e9, "ticker": "XOM", "sector": "Energy", "country": "United States", "type": "company"},
            {"name": "Samsung Electronics", "marketCap": 465e9, "ticker": "SSNLF", "sector": "Technology", "country": "South Korea", "type": "company"},
            # 300B-500B companies
            {"name": "Costco", "marketCap": 415e9, "ticker": "COST", "sector": "Retail", "country": "United States", "type": "company"},
            {"name": "ASML", "marketCap": 405e9, "ticker": "ASML", "sector": "Technology", "country": "Netherlands", "type": "company"},
            {"name": "Home Depot", "marketCap": 394e9, "ticker": "HD", "sector": "Retail", "country": "United States", "type": "company"},
            {"name": "Procter & Gamble", "marketCap": 380e9, "ticker": "PG", "sector": "Consumer", "country": "United States", "type": "company"},
            {"name": "Bank of America", "marketCap": 378e9, "ticker": "BAC", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Johnson & Johnson", "marketCap": 373e9, "ticker": "JNJ", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "AMD", "marketCap": 362e9, "ticker": "AMD", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "AbbVie", "marketCap": 329e9, "ticker": "ABBV", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "SAP", "marketCap": 312e9, "ticker": "SAP", "sector": "Technology", "country": "Germany", "type": "company"},
            {"name": "Coca-Cola", "marketCap": 295e9, "ticker": "KO", "sector": "Consumer", "country": "United States", "type": "company"},
            # 200B-300B companies
            {"name": "Chevron", "marketCap": 280e9, "ticker": "CVX", "sector": "Energy", "country": "United States", "type": "company"},
            {"name": "Cisco", "marketCap": 279e9, "ticker": "CSCO", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "IBM", "marketCap": 267e9, "ticker": "IBM", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "UnitedHealth Group", "marketCap": 265e9, "ticker": "UNH", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Morgan Stanley", "marketCap": 264e9, "ticker": "MS", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Merck", "marketCap": 250e9, "ticker": "MRK", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Wells Fargo", "marketCap": 249e9, "ticker": "WFC", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Salesforce", "marketCap": 235e9, "ticker": "CRM", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Novo Nordisk", "marketCap": 234e9, "ticker": "NVO", "sector": "Healthcare", "country": "Denmark", "type": "company"},
            {"name": "Thermo Fisher", "marketCap": 220e9, "ticker": "TMO", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "McDonald's", "marketCap": 219e9, "ticker": "MCD", "sector": "Consumer", "country": "United States", "type": "company"},
            {"name": "Shopify", "marketCap": 216e9, "ticker": "SHOP", "sector": "Technology", "country": "Canada", "type": "company"},
            {"name": "Disney", "marketCap": 205e9, "ticker": "DIS", "sector": "Media", "country": "United States", "type": "company"},
            {"name": "Accenture", "marketCap": 200e9, "ticker": "ACN", "sector": "Technology", "country": "Ireland", "type": "company"},
            {"name": "PepsiCo", "marketCap": 198e9, "ticker": "PEP", "sector": "Consumer", "country": "United States", "type": "company"},
            # 150B-200B companies
            {"name": "Uber", "marketCap": 195e9, "ticker": "UBER", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Danaher", "marketCap": 190e9, "ticker": "DHR", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "ServiceNow", "marketCap": 188e9, "ticker": "NOW", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "AT&T", "marketCap": 188e9, "ticker": "T", "sector": "Telecom", "country": "United States", "type": "company"},
            {"name": "Abbott Laboratories", "marketCap": 185e9, "ticker": "ABT", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Qualcomm", "marketCap": 181e9, "ticker": "QCOM", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Intel", "marketCap": 181e9, "ticker": "INTC", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Applied Materials", "marketCap": 180e9, "ticker": "AMAT", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "American Express", "marketCap": 180e9, "ticker": "AXP", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Caterpillar", "marketCap": 175e9, "ticker": "CAT", "sector": "Industrial", "country": "United States", "type": "company"},
            {"name": "Sony", "marketCap": 174e9, "ticker": "SONY", "sector": "Technology", "country": "Japan", "type": "company"},
            {"name": "Verizon", "marketCap": 171e9, "ticker": "VZ", "sector": "Telecom", "country": "United States", "type": "company"},
            {"name": "Texas Instruments", "marketCap": 170e9, "ticker": "TXN", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Goldman Sachs", "marketCap": 170e9, "ticker": "GS", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Intuit", "marketCap": 170e9, "ticker": "INTU", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Booking Holdings", "marketCap": 165e9, "ticker": "BKNG", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Boeing", "marketCap": 163e9, "ticker": "BA", "sector": "Aerospace", "country": "United States", "type": "company"},
            {"name": "Amgen", "marketCap": 155e9, "ticker": "AMGN", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Intuitive Surgical", "marketCap": 150e9, "ticker": "ISRG", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Charles Schwab", "marketCap": 150e9, "ticker": "SCHW", "sector": "Finance", "country": "United States", "type": "company"},
            # 100B-150B companies
            {"name": "Adobe", "marketCap": 145e9, "ticker": "ADBE", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "S&P Global", "marketCap": 145e9, "ticker": "SPGI", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Lowe's", "marketCap": 140e9, "ticker": "LOW", "sector": "Retail", "country": "United States", "type": "company"},
            {"name": "Honeywell", "marketCap": 140e9, "ticker": "HON", "sector": "Industrial", "country": "United States", "type": "company"},
            {"name": "Pfizer", "marketCap": 139e9, "ticker": "PFE", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Spotify", "marketCap": 139e9, "ticker": "SPOT", "sector": "Media", "country": "Sweden", "type": "company"},
            {"name": "Blackrock", "marketCap": 135e9, "ticker": "BLK", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "ConocoPhillips", "marketCap": 130e9, "ticker": "COP", "sector": "Energy", "country": "United States", "type": "company"},
            {"name": "Gilead Sciences", "marketCap": 120e9, "ticker": "GILD", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Palo Alto Networks", "marketCap": 120e9, "ticker": "PANW", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Micron Technology", "marketCap": 120e9, "ticker": "MU", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Lam Research", "marketCap": 120e9, "ticker": "LRCX", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Analog Devices", "marketCap": 115e9, "ticker": "ADI", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Bristol-Myers Squibb", "marketCap": 110e9, "ticker": "BMY", "sector": "Healthcare", "country": "United States", "type": "company"},
            {"name": "Medtronic", "marketCap": 110e9, "ticker": "MDT", "sector": "Healthcare", "country": "Ireland", "type": "company"},
            {"name": "Comcast", "marketCap": 109e9, "ticker": "CMCSA", "sector": "Media", "country": "United States", "type": "company"},
            {"name": "Nike", "marketCap": 99e9, "ticker": "NKE", "sector": "Consumer", "country": "United States", "type": "company"},
            {"name": "Starbucks", "marketCap": 96e9, "ticker": "SBUX", "sector": "Consumer", "country": "United States", "type": "company"},
            {"name": "Synopsys", "marketCap": 95e9, "ticker": "SNPS", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Cadence Design", "marketCap": 90e9, "ticker": "CDNS", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Moody's", "marketCap": 90e9, "ticker": "MCO", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Marvell Technology", "marketCap": 80e9, "ticker": "MRVL", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Crowdstrike", "marketCap": 80e9, "ticker": "CRWD", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Airbnb", "marketCap": 78e9, "ticker": "ABNB", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Target", "marketCap": 70e9, "ticker": "TGT", "sector": "Retail", "country": "United States", "type": "company"},
            {"name": "Workday", "marketCap": 70e9, "ticker": "WDAY", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Fortinet", "marketCap": 65e9, "ticker": "FTNT", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "PayPal", "marketCap": 64e9, "ticker": "PYPL", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Autodesk", "marketCap": 60e9, "ticker": "ADSK", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Snowflake", "marketCap": 50e9, "ticker": "SNOW", "sector": "Technology", "country": "United States", "type": "company"},
            {"name": "Block", "marketCap": 46e9, "ticker": "SQ", "sector": "Finance", "country": "United States", "type": "company"},
            {"name": "Datadog", "marketCap": 45e9, "ticker": "DDOG", "sector": "Technology", "country": "United States", "type": "company"},
        ]


    def _format_currency(self, value: float) -> str:
        """Format currency value with appropriate units (trillion, billion, million)."""
        if value >= 1e12:
            return f"${value/1e12:.2f} trillion"
        elif value >= 1e9:
            return f"${value/1e9:.2f} billion"
        elif value >= 1e6:
            return f"${value/1e6:.2f} million"
        else:
            return f"${value:,.2f}"

    def combine_and_rank_all_data(self) -> Dict[str, Any]:
        """Combine all data sources and create unified ranking."""
        print("Fetching all data sources...")

        # Fetch data from all sources
        countries = self.fetch_country_gdp_data()
        billionaires = self.fetch_billionaire_data()
        companies = self.fetch_company_data()

        # Combine all entities
        all_entities = []

        # Add countries
        for country in countries:
            all_entities.append({
                "rank": 0,  # Will be assigned after sorting
                "name": country["name"],
                "value": country["gdp"],
                "value_formatted": self._format_currency(country["gdp"]),
                "type": "country",
                "metadata": {
                    "year": country.get("year"),
                    "country_code": country.get("country_code")
                }
            })

        # Add billionaires
        for billionaire in billionaires:
            all_entities.append({
                "rank": 0,
                "name": billionaire["name"],
                "value": billionaire["netWorth"],
                "value_formatted": self._format_currency(billionaire["netWorth"]),
                "type": "individual",
                "metadata": {
                    "source": billionaire.get("source"),
                    "country": billionaire.get("country"),
                    "industry": billionaire.get("industry")
                }
            })

        # Add companies
        for company in companies:
            all_entities.append({
                "rank": 0,
                "name": company["name"],
                "value": company["marketCap"],
                "value_formatted": self._format_currency(company["marketCap"]),
                "type": "company",
                "metadata": {
                    "ticker": company.get("ticker"),
                    "sector": company.get("sector"),
                    "country": company.get("country")
                }
            })

        # Sort by value descending
        all_entities.sort(key=lambda x: x["value"], reverse=True)

        # Assign ranks
        for i, entity in enumerate(all_entities, 1):
            entity["rank"] = i

        # Create summary statistics
        stats = {
            "total_entities": len(all_entities),
            "countries": len(countries),
            "individuals": len(billionaires),
            "companies": len(companies),
            "last_updated": datetime.now().isoformat(),
            "data_sources": {
                "countries": "World Bank API",
                "individuals": "Forbes Real-Time Billionaires (via RTB-API/Statically CDN)",
                "companies": "yfinance"
            }
        }

        return {
            "entities": all_entities,
            "stats": stats,
            "metadata": {
                "generated_at": datetime.now().isoformat(),
                "version": "2.1"
            }
        }

    def export_for_web(self, output_path: str = "wealth_data.js"):
        """Export combined data in JavaScript format for web visualization."""
        data = self.combine_and_rank_all_data()

        # Create JavaScript file with data
        js_content = f"""// Global Wealth Data
// Generated: {data['metadata']['generated_at']}
// Data Sources:
//   Countries: {data['stats']['data_sources']['countries']}
//   Individuals: {data['stats']['data_sources']['individuals']}
//   Companies: {data['stats']['data_sources']['companies']}

const globalWealthData = {json.dumps(data, indent=2)};

// Export for use in browser
if (typeof module !== 'undefined' && module.exports) {{
    module.exports = globalWealthData;
}}
"""

        with open(output_path, 'w') as f:
            f.write(js_content)

        print(f"\nData exported to {output_path}")
        print(f"Total entities: {data['stats']['total_entities']}")
        print(f"Countries: {data['stats']['countries']}")
        print(f"Individuals: {data['stats']['individuals']}")
        print(f"Companies: {data['stats']['companies']}")

        return data


def main():
    """Main function to fetch and export data."""
    print("=" * 60)
    print("Global Wealth Data Fetcher")
    print("=" * 60)

    fetcher = GlobalWealthDataFetcher()

    # Export data for web use
    data = fetcher.export_for_web("wealth_data.js")

    # Also save as JSON
    with open("wealth_data.json", "w") as f:
        json.dump(data, f, indent=2)

    # Print top 10 entities
    print("\nTop 10 Wealthiest Entities:")
    print("-" * 60)
    for entity in data["entities"][:10]:
        print(f"{entity['rank']:2d}. {entity['name']:<30} ({entity['type']:<10}): {entity['value_formatted']}")

    print("\n" + "=" * 60)
    print("Data fetching complete!")
    print("=" * 60)


if __name__ == "__main__":
    main()
