#!/usr/bin/env python3
"""
Fetch World Bank economic data for Central and South American countries.

Author: Luke Steuber
Date: 2025-12-15
"""

import requests
import json
import time
from typing import Dict, List, Optional

# Country codes (ISO3)
COUNTRIES = {
    'BLZ': 'Belize',
    'CRI': 'Costa Rica',
    'SLV': 'El Salvador',
    'GTM': 'Guatemala',
    'HND': 'Honduras',
    'NIC': 'Nicaragua',
    'PAN': 'Panama',
    'ARG': 'Argentina',
    'BOL': 'Bolivia',
    'BRA': 'Brazil',
    'CHL': 'Chile',
    'COL': 'Colombia',
    'ECU': 'Ecuador',
    'GUY': 'Guyana',
    'PRY': 'Paraguay',
    'PER': 'Peru',
    'SUR': 'Suriname',
    'URY': 'Uruguay',
    'VEN': 'Venezuela'
}

# Economic indicators
INDICATORS = {
    'NY.GDP.MKTP.CD': 'GDP (current USD)',
    'NY.GDP.PCAP.CD': 'GDP per capita (current USD)',
    'SP.POP.TOTL': 'Population, total',
    'NE.TRD.GNFS.ZS': 'Trade (% of GDP)',
    'SI.POV.GINI': 'Gini coefficient',
    'SP.DYN.LE00.IN': 'Life expectancy at birth (years)',
    'BX.TRF.PWKR.DT.GD.ZS': 'Personal remittances, received (% of GDP)'
}

BASE_URL = "https://api.worldbank.org/v2"


def fetch_indicator_data(country_code: str, indicator_code: str,
                         start_year: int = 2015, end_year: int = 2024) -> Optional[Dict]:
    """
    Fetch data for a specific country and indicator from World Bank API.

    Returns the most recent non-null value.
    """
    url = f"{BASE_URL}/country/{country_code}/indicator/{indicator_code}"
    params = {
        'format': 'json',
        'date': f'{start_year}:{end_year}',
        'per_page': 100
    }

    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()

        data = response.json()

        # World Bank API returns [metadata, data_array]
        if len(data) < 2 or not data[1]:
            return None

        # Find most recent non-null value
        for entry in data[1]:
            if entry['value'] is not None:
                return {
                    'value': entry['value'],
                    'year': entry['date'],
                    'country': entry['country']['value'],
                    'countryiso3code': entry['countryiso3code']
                }

        return None

    except requests.exceptions.RequestException as e:
        print(f"Error fetching {indicator_code} for {country_code}: {e}")
        return None


def fetch_all_data() -> Dict:
    """
    Fetch all indicators for all countries.

    Returns structured JSON with country data.
    """
    result = {
        'metadata': {
            'source': 'World Bank API',
            'url': 'https://api.worldbank.org/v2/',
            'fetched_date': time.strftime('%Y-%m-%d'),
            'year_range': '2015-2024',
            'note': 'Most recent available value shown for each indicator'
        },
        'countries': {}
    }

    total_requests = len(COUNTRIES) * len(INDICATORS)
    current_request = 0

    for country_code, country_name in COUNTRIES.items():
        print(f"\nFetching data for {country_name} ({country_code})...")

        country_data = {
            'name': country_name,
            'code': country_code,
            'indicators': {}
        }

        for indicator_code, indicator_name in INDICATORS.items():
            current_request += 1
            print(f"  [{current_request}/{total_requests}] {indicator_name}...", end=' ')

            data = fetch_indicator_data(country_code, indicator_code)

            if data:
                country_data['indicators'][indicator_code] = {
                    'name': indicator_name,
                    'value': data['value'],
                    'year': data['year']
                }
                print(f"✓ ({data['year']})")
            else:
                country_data['indicators'][indicator_code] = {
                    'name': indicator_name,
                    'value': None,
                    'year': None
                }
                print("✗ (no data)")

            # Rate limiting - be nice to the API
            time.sleep(0.2)

        result['countries'][country_code] = country_data

    return result


def main():
    """Main execution function."""
    print("=" * 60)
    print("World Bank Economic Data Fetcher")
    print("Central and South American Countries")
    print("=" * 60)

    data = fetch_all_data()

    # Save to JSON file
    output_path = 'world-bank-data.json'
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

    print(f"\n{'=' * 60}")
    print(f"Data saved to: {output_path}")
    print(f"Total countries: {len(data['countries'])}")

    # Summary statistics
    total_indicators = 0
    missing_indicators = 0

    for country_code, country_data in data['countries'].items():
        for indicator_code, indicator_data in country_data['indicators'].items():
            total_indicators += 1
            if indicator_data['value'] is None:
                missing_indicators += 1

    print(f"Total indicators fetched: {total_indicators}")
    print(f"Missing values: {missing_indicators} ({missing_indicators/total_indicators*100:.1f}%)")
    print(f"Success rate: {(total_indicators-missing_indicators)/total_indicators*100:.1f}%")
    print("=" * 60)


if __name__ == '__main__':
    main()
