#!/usr/bin/env python3
"""
Fetch hot sauce and pepper Scoville ratings from multiple sources.
"""

import requests
import json
import csv
from datetime import datetime
from pathlib import Path
from time import sleep
from io import StringIO

# Output paths
OUTPUT_DIR = Path(__file__).parent
DATA_FILE = OUTPUT_DIR / "hot_sauces.json"
METADATA_FILE = OUTPUT_DIR / "hot_sauces_metadata.json"

# Data sources
GITHUB_PEPPERS_URL = "https://raw.githubusercontent.com/alemosie/hot_peppers/master/data/peppers_data.csv"

def fetch_github_peppers():
    """Fetch pepper data from GitHub CSV."""
    print("Fetching GitHub hot peppers dataset...")
    try:
        response = requests.get(GITHUB_PEPPERS_URL, timeout=30)
        response.raise_for_status()

        # Parse CSV
        csv_data = StringIO(response.text)
        reader = csv.DictReader(csv_data)

        peppers = []
        for row in reader:
            peppers.append({
                'name': row.get('name', '').strip(),
                'scoville_min': int(row.get('scoville_min', 0)) if row.get('scoville_min') else None,
                'scoville_max': int(row.get('scoville_max', 0)) if row.get('scoville_max') else None,
                'scoville_avg': int(row.get('scoville_avg', 0)) if row.get('scoville_avg') else None,
                'origin': row.get('origin', '').strip(),
                'species': row.get('species', '').strip(),
                'source': 'GitHub alemosie/hot_peppers',
                'type': 'pepper'
            })

        print(f"  Found {len(peppers)} peppers")
        return peppers
    except Exception as e:
        print(f"  Error: {e}")
        return []

def fetch_openfoodfacts_hot_sauces():
    """Fetch hot sauce products from OpenFoodFacts."""
    print("Fetching hot sauces from OpenFoodFacts...")

    search_terms = [
        "hot sauce",
        "sriracha",
        "tabasco",
        "habanero sauce",
        "ghost pepper sauce",
        "carolina reaper sauce"
    ]

    all_products = []
    seen_codes = set()

    for term in search_terms:
        url = "https://world.openfoodfacts.org/cgi/search.pl"
        params = {
            'search_terms': term,
            'search_simple': 1,
            'action': 'process',
            'json': 1,
            'page_size': 50
        }

        print(f"  Searching: {term}...")
        try:
            response = requests.get(url, params=params, timeout=30)
            response.raise_for_status()
            data = response.json()
            products = data.get('products', [])

            for product in products:
                code = product.get('code')
                if code and code not in seen_codes:
                    seen_codes.add(code)

                    all_products.append({
                        'name': product.get('product_name', ''),
                        'brand': product.get('brands', ''),
                        'countries': product.get('countries', ''),
                        'categories': product.get('categories', ''),
                        'ingredients': product.get('ingredients_text', ''),
                        'labels': product.get('labels', ''),
                        'url': f"https://world.openfoodfacts.org/product/{code}",
                        'source': 'OpenFoodFacts',
                        'type': 'hot_sauce_product'
                    })

            print(f"    Found {len(products)} products")
            sleep(1)  # Rate limiting
        except Exception as e:
            print(f"    Error: {e}")

    print(f"  Total unique hot sauce products: {len(all_products)}")
    return all_products

def main():
    print("=" * 60)
    print("HOT SAUCES & PEPPERS FETCHER")
    print("=" * 60)

    # Fetch from all sources
    peppers = fetch_github_peppers()
    hot_sauces = fetch_openfoodfacts_hot_sauces()

    # Combine datasets
    all_data = peppers + hot_sauces
    print(f"\nTotal records: {len(all_data)}")
    print(f"  Peppers: {len(peppers)}")
    print(f"  Hot sauce products: {len(hot_sauces)}")

    # Save data
    with open(DATA_FILE, 'w', encoding='utf-8') as f:
        json.dump(all_data, f, indent=2, ensure_ascii=False)
    print(f"\nSaved data: {DATA_FILE}")

    # Create metadata
    metadata = {
        'title': 'Hot Sauces and Pepper Scoville Ratings',
        'description': 'Pepper Scoville heat ratings and commercial hot sauce products from GitHub and OpenFoodFacts',
        'source': 'Multiple sources',
        'source_urls': [
            GITHUB_PEPPERS_URL,
            'https://world.openfoodfacts.org/cgi/search.pl'
        ],
        'fetch_date': datetime.now().isoformat(),
        'record_count': len(all_data),
        'pepper_count': len(peppers),
        'product_count': len(hot_sauces),
        'fields': {
            'peppers': ['name', 'scoville_min', 'scoville_max', 'scoville_avg', 'origin', 'species', 'source', 'type'],
            'products': ['name', 'brand', 'countries', 'categories', 'ingredients', 'labels', 'url', 'source', 'type']
        },
        'license': 'Mixed: GitHub (check repo), OpenFoodFacts (ODbL)',
        'scoville_scale': {
            'mild': '0-1,000',
            'medium': '1,000-10,000',
            'hot': '10,000-100,000',
            'very_hot': '100,000-350,000',
            'extremely_hot': '350,000+'
        }
    }

    with open(METADATA_FILE, 'w', encoding='utf-8') as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)
    print(f"Saved metadata: {METADATA_FILE}")

    # Print sample records
    print(f"\nSample pepper records (first 3):")
    for i, pepper in enumerate([p for p in all_data if p.get('type') == 'pepper'][:3], 1):
        print(f"\n{i}. {pepper['name']}")
        print(f"   Scoville: {pepper['scoville_min']:,} - {pepper['scoville_max']:,}")
        print(f"   Origin: {pepper['origin']}")
        print(f"   Species: {pepper['species']}")

    print(f"\nSample hot sauce products (first 3):")
    for i, sauce in enumerate([p for p in all_data if p.get('type') == 'hot_sauce_product'][:3], 1):
        print(f"\n{i}. {sauce['name']}")
        print(f"   Brand: {sauce['brand']}")
        print(f"   Countries: {sauce['countries']}")

    print("\n" + "=" * 60)
    print("COMPLETE")
    print("=" * 60)

if __name__ == '__main__':
    main()
