#!/usr/bin/env python3
"""
Fetch real carnivorous plant occurrence data from GBIF (Global Biodiversity Information Facility) API.
Covers sundews, pitcher plants, Venus flytraps, and other carnivorous species.
"""

import requests
import json
from datetime import datetime
from pathlib import Path

# GBIF taxon keys for carnivorous plant families and notable species
CARNIVOROUS_TAXA = [
    {'name': 'Droseraceae', 'key': 6953, 'common': 'Sundews'},  # Family
    {'name': 'Nepenthaceae', 'key': 2441, 'common': 'Tropical pitcher plants'},  # Family
    {'name': 'Sarraceniaceae', 'key': 2440, 'common': 'North American pitcher plants'},  # Family
    {'name': 'Dionaea muscipula', 'key': 2870055, 'common': 'Venus flytrap'},  # Species
    {'name': 'Utricularia', 'key': 2887912, 'common': 'Bladderworts'},  # Genus
]

def fetch_gbif_occurrences(taxon_key, taxon_name, limit=300):
    """Fetch occurrence records from GBIF API."""
    url = f"https://api.gbif.org/v1/occurrence/search?taxonKey={taxon_key}&limit={limit}&hasCoordinate=true"

    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        data = response.json()

        records = []
        if 'results' in data:
            for record in data['results']:
                processed = {
                    'scientificName': record.get('scientificName', taxon_name),
                    'species': record.get('species', taxon_name),
                    'genus': record.get('genus', ''),
                    'family': record.get('family', ''),
                    'latitude': record.get('decimalLatitude'),
                    'longitude': record.get('decimalLongitude'),
                    'country': record.get('country', 'Unknown'),
                    'stateProvince': record.get('stateProvince', ''),
                    'locality': record.get('locality', ''),
                    'basisOfRecord': record.get('basisOfRecord', 'Unknown'),
                    'year': record.get('year'),
                    'month': record.get('month'),
                    'coordinateUncertainty': record.get('coordinateUncertaintyInMeters'),
                    'gbifID': record.get('gbifID'),
                }

                # Only include records with valid coordinates
                if processed['latitude'] and processed['longitude']:
                    records.append(processed)

        return records

    except Exception as e:
        print(f"  Error fetching {taxon_name}: {e}")
        return []

def main():
    print("Fetching carnivorous plant data from GBIF API...")
    print("=" * 60)

    all_records = []

    for taxon in CARNIVOROUS_TAXA:
        print(f"Fetching {taxon['name']} ({taxon['common']})...")
        records = fetch_gbif_occurrences(taxon['key'], taxon['name'])
        print(f"  Found {len(records)} records")
        all_records.extend(records)

    print("=" * 60)
    print(f"Total records collected: {len(all_records)}")

    # Save data
    output_file = Path(__file__).parent / 'carnivorous_plants_real.json'
    with open(output_file, 'w') as f:
        json.dump(all_records, f, indent=2)

    # Save metadata
    metadata = {
        'filename': 'carnivorous_plants_real.json',
        'title': 'Carnivorous Plants - GBIF Occurrence Database',
        'description': 'Real occurrence records of carnivorous plants from GBIF. Includes sundews (Droseraceae), pitcher plants (Nepenthaceae, Sarraceniaceae), Venus flytraps, bladderworts, and other insectivorous plants.',
        'source': 'GBIF (Global Biodiversity Information Facility) API',
        'source_url': 'https://www.gbif.org',
        'date_fetched': datetime.now().isoformat(),
        'record_count': len(all_records),
        'taxa_queried': [{'name': t['name'], 'common': t['common']} for t in CARNIVOROUS_TAXA],
        'geographic_scope': 'Global',
        'fields': list(all_records[0].keys()) if all_records else [],
        'license': 'CC0 / CC-BY (varies by record - see basisOfRecord)',
    }

    metadata_file = Path(__file__).parent / 'carnivorous_plants_real_metadata.json'
    with open(metadata_file, 'w') as f:
        json.dump(metadata, f, indent=2)

    print(f"\nSaved to: {output_file}")
    print(f"Metadata: {metadata_file}")

    # Summary statistics
    if all_records:
        species = set(r['species'] for r in all_records if r['species'])
        families = set(r['family'] for r in all_records if r['family'])
        countries = set(r['country'] for r in all_records if r['country'] != 'Unknown')

        print(f"\nDataset Summary:")
        print(f"  Unique species: {len(species)}")
        print(f"  Unique families: {len(families)}")
        print(f"  Countries represented: {len(countries)}")

if __name__ == '__main__':
    main()
