{
  "dataset_name": "NOAA Lightning Strike Data",
  "description": "Daily lightning strike counts with geographic center points from NOAA's National Lightning Detection Network. Covers continental United States.",
  "last_updated": "2026-01-21",
  "source": "Kaggle (noaa/lightning-strikes)",
  "original_source": "NOAA National Lightning Detection Network",
  "source_url": "https://www.kaggle.com/datasets/noaa/lightning-strikes",
  "license": "Public Domain (US Government Data)",
  "record_count": 3401012,
  "file_format": "CSV",
  "file_size_mb": 106,
  "date_range": "2018-01-01 to approximately 2018-12-31",
  "fields": {
    "date": "Date of observation (YYYY-MM-DD format)",
    "number_of_strikes": "Count of lightning strikes for that date/location",
    "center_point_geom": "Geographic center point (WKT POINT format, e.g., 'POINT(-75 27)')"
  },
  "geographic_coverage": {
    "region": "Continental United States",
    "coordinate_system": "WGS84 (EPSG:4326)",
    "longitude_range": "Approximately -130 to -60 (West to East)",
    "latitude_range": "Approximately 20 to 50 (South to North)"
  },
  "data_characteristics": {
    "granularity": "Daily aggregates by geographic cell",
    "typical_daily_records": "~9,000-15,000 records per day (varies by season)",
    "peak_season": "June-August (summer thunderstorm season)",
    "low_season": "December-February (winter)"
  },
  "processing_notes": [
    "Large dataset (3.4M rows) - consider sampling for initial exploration",
    "Parse center_point_geom to extract longitude/latitude for mapping",
    "Aggregate by month for seasonal analysis",
    "Grid size appears to be approximately 1-degree cells"
  ],
  "visualization_suggestions": [
    "Heatmap of lightning strike density",
    "Seasonal patterns (animated monthly map)",
    "Lightning alley visualization (Florida, Gulf Coast)",
    "Daily strike count time series",
    "Correlation with weather events"
  ],
  "sample_python_code": "import pandas as pd\n\n# Load data (large file - may take a moment)\ndf = pd.read_csv('lightning_strikes_noaa.csv')\n\n# Parse coordinates\ndf[['lon', 'lat']] = df['center_point_geom'].str.extract(r'POINT\\(([\\-\\d.]+) ([\\-\\d.]+)\\)').astype(float)\n\n# Aggregate by month\ndf['month'] = pd.to_datetime(df['date']).dt.month\nmonthly = df.groupby('month')['number_of_strikes'].sum()"
}
