{
  "dataset_name": "Global Shark Attack File (GSAF)",
  "description": "Shark attack records from the Shark Research Institute. Covers provoked, unprovoked, and boat attacks with detailed incident information.",
  "last_updated": "2026-01-21",
  "source": "Kaggle (teajay/global-shark-attacks)",
  "original_source": "Shark Research Institute - Global Shark Attack File",
  "source_url": "https://www.kaggle.com/datasets/teajay/global-shark-attacks",
  "license": "CC BY-NC-SA 4.0",
  "record_count": 6463,
  "file_format": "CSV",
  "file_size_mb": 3.5,
  "date_range": "1845 to 2020",
  "files": {
    "shark_attacks_gsaf.csv": {
      "description": "Main GSAF database with all incident records",
      "records": 6463,
      "notes": "Contains many empty trailing columns (Unnamed: 9 onwards) - these can be ignored"
    },
    "shark_attacks_analysis.csv": {
      "description": "Supplementary analysis: gender breakdown of shark attacks",
      "records": 8,
      "notes": "Summary statistics on male vs female attack victims"
    },
    "shark_attacks_analysis.json": {
      "description": "JSON version of gender analysis",
      "records": 8
    }
  },
  "key_fields": {
    "Case Number": "Unique identifier for the incident (format: YYYY.MM.DD.X)",
    "Date": "Date of incident",
    "Year": "Year of incident",
    "Type": "Attack type (Provoked, Unprovoked, Boat, Sea Disaster, Invalid, Questionable)",
    "Country": "Country where attack occurred",
    "Area": "State/province/region",
    "Location": "Specific location description",
    "Activity": "What victim was doing (Swimming, Surfing, Diving, Fishing, etc.)",
    "Name": "Victim name (if known)",
    "Age": "Victim age",
    "Injury": "Description of injury",
    "Fatal (Y/N)": "Whether attack was fatal",
    "Time": "Time of day",
    "Species": "Shark species (if identified)",
    "Investigator or Source": "Source of the report"
  },
  "data_quality_notes": [
    "Historical records (pre-1900) may be incomplete",
    "Species identification often uncertain",
    "Many columns after column 21 are empty and can be dropped",
    "Some dates are approximate or estimated"
  ],
  "visualization_suggestions": [
    "Geographic hotspots (choropleth map)",
    "Activity type risk comparison",
    "Fatal vs non-fatal trends over time",
    "Shark species involvement",
    "Seasonal patterns",
    "Male vs female victim demographics"
  ]
}
