{"attributions":[{"component":"fastText lid.176 language identification model","license":"CC-BY-SA-3.0","note":"Language counts in this report were produced with the fastText lid.176 model, licensed CC-BY-SA-3.0. This report is a derivative work and carries the same license for those figures.","url":"https://fasttext.cc/docs/en/language-identification.html"}],"columns":[{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5268,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[9.5,9.525,9.55,9.575,9.6,9.625,9.65,9.675,9.7,9.725,9.75,9.775,9.8,9.825,9.85,9.875,9.9,9.925,9.95,9.975,10.0,10.025,10.05,10.075,10.1,10.125,10.15,10.175,10.2,10.225,10.25,10.275,10.3,10.325,10.35,10.375,10.4,10.425,10.45,10.475,10.5]},"near_unique":false,"sample":["09/03/1915","07/02/1990","12/05/1997","01/14/1995","03/09/1968","03/07/2006","12/31/1968","03/17/2000","04/23/1995","06/29/1929","09/14/1979","10/22/1974","10/28/1956","04/07/1918","01/16/1958","07/01/1948","03/08/1938","02/11/1996","05/02/2008","12/18/1966","02/03/1959","09/17/1929","12/10/1946","10/10/1938","12/25/1986","06/01/1991","04/05/1976","11/04/1948","07/20/1935","06/02/2006","08/17/1983","02/20/2009","02/24/1984","02/04/1966","10/18/1963","10/25/1968","06/04/2003","05/19/1973","07/13/1969","09/15/1974","10/12/1994","07/04/2002","07/13/1956","06/04/2002","01/13/2005","03/17/2007","06/30/1962","07/11/1966","09/19/1946","08/07/1999"],"top_values":[["06/18/1972",4],["02/28/1973",4],["08/28/1976",4],["08/31/1988",4],["08/27/1992",4],["09/11/2001",4],["09/17/1929",3],["11/15/1934",3],["06/06/1944",3],["11/29/1944",3],["11/11/1945",3],["12/28/1946",3],["01/25/1947",3],["05/29/1947",3],["11/27/1947",3],["07/12/1951",3],["05/13/1957",3],["09/02/1958",3],["02/26/1960",3],["03/08/1962",3]],"top_words":[["06/18/1972",4],["02/28/1973",4],["08/28/1976",4],["08/31/1988",4],["08/27/1992",4],["09/11/2001",4],["09/17/1929",3],["11/15/1934",3],["06/06/1944",3],["11/29/1944",3],["11/11/1945",3],["12/28/1946",3],["01/25/1947",3],["05/29/1947",3],["11/27/1947",3],["07/12/1951",3],["05/13/1957",3],["09/02/1958",3],["02/26/1960",3],["03/08/1962",3],["09/10/1962",3],["11/23/1962",3],["01/08/1968",3],["04/02/1969",3],["07/30/1971",3]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5268,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":5268,"n_null":0,"n_unique":4753,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.09776006074411542,"emoji_rate":0.0,"len_max":10,"len_mean":10.0,"len_median":10.0,"len_min":10,"len_p95":10.0,"n_duplicates":515,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4753,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.7% rows are all-caps"},{"code":"null_rate","level":"warn","message":"42.1% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"67.0% duplicate strings"}],"column":"Time","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[7,0,0,0,0,0,0,0,0,0,0,0,0,3033,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,6],"edges":[4.0,4.075,4.15,4.225,4.3,4.375,4.45,4.525,4.6,4.675,4.75,4.825,4.9,4.975,5.05,5.125,5.2,5.275,5.35,5.425,5.5,5.575,5.65,5.725,5.8,5.875,5.95,6.025,6.1,6.175,6.25,6.324999999999999,6.4,6.475,6.55,6.625,6.699999999999999,6.775,6.85,6.925,7.0]},"near_unique":false,"sample":["10:30","22:00","18:21","09:40","12:45","05:08","07:34","09:32","17:54","16:35","06:50","114:20","09:22","09:18","17:22","03:36","00:20","08:00","15:06","23:17","12:20","23:00","22:23","07:40","15:40","17:01","23:17","23:19","07:42","17:18","09:29","00:34","c: 2:00","22:00","16:00","09:40","15:45","09:32","13:42","18:40","12:00","22:18","05:29","10:45","07:30","11:00","22:02","20:48","02:00","11:35"],"top_values":[["15:00",32],["12:00",31],["11:00",29],["16:00",26],["19:30",26],["14:00",25],["19:00",24],["10:30",22],["17:00",22],["09:30",22],["13:00",20],["08:30",19],["17:30",19],["14:30",19],["20:30",19],["08:00",18],["09:00",17],["20:00",17],["12:30",17],["10:15",17]],"top_words":[["15:00",32],["12:00",31],["11:00",29],["16:00",26],["19:30",26],["14:00",25],["19:00",24],["10:30",22],["17:00",22],["09:30",22],["13:00",20],["08:30",19],["17:30",19],["14:30",19],["20:30",19],["08:00",18],["09:00",17],["20:00",17],["12:30",17],["10:15",17],["18:00",17],["16:30",17],["23:00",16],["07:00",15],["11:30",15]],"vocab_skipped":null,"word_histogram":{"counts":[3046,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":5268,"n_null":2219,"n_unique":1005,"null_rate":0.4212224753227031,"stats":{"allcaps_rate":0.9973761889143982,"boilerplate_rate":0.0,"duplicate_rate":0.6703837323712692,"emoji_rate":0.0,"len_max":7,"len_mean":5.002623811085602,"len_median":5.0,"len_min":4,"len_p95":5.0,"n_duplicates":2044,"n_empty":0,"one_word_rate":0.9990160708428993,"readability_flesch_mean":121.21492500000004,"url_rate":0.0,"vocab_size":1004,"word_mean":1.0009839291571008,"word_median":1.0}},{"alerts":[],"column":"Location","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[21,8,22,16,61,326,280,289,753,413,828,383,313,479,189,170,244,85,114,36,28,52,25,23,26,11,18,3,13,3,1,5,2,1,3,1,0,0,1,2],"edges":[5.0,6.375,7.75,9.125,10.5,11.875,13.25,14.625,16.0,17.375,18.75,20.125,21.5,22.875,24.25,25.625,27.0,28.375,29.75,31.125,32.5,33.875,35.25,36.625,38.0,39.375,40.75,42.125,43.5,44.875,46.25,47.625,49.0,50.375,51.75,53.125,54.5,55.875,57.25,58.625,60.0]},"near_unique":false,"sample":["Off Cuxhaven, Germany","Near Port Morseby, New Guinea","Little Grand Rapids, Canada","Kathmandu, Nepal","St. Louis, Missouri","Labiano, Spain","Near Bradford, Pennsylvania","Ennadai Lake, Canada","Near Palaly AFB, Sri Lanka","Lake Constance, Switzerland","Blink Horn Point, Canada","Blair, Oklahoma","Hommelfjell, Norway","Over the Mediterranean","Norwalk, California","PacifiOcean between Hong Kong and Macao","Near Formia, Italy","Port-au-Prince, Hati","Near Rumbek, Sudan","Hilo, Hawaii","Beirut, Lebanon","Jacumba, California","Near Rio de Janeiro, Brazil","Soest, Germany","Ay, Saudi Arabia","Off Matthewtown, Great Inagua","Friday Harbor, Washington","Kiangwan, China","Off Chandeleur Island, Mississippi","Yaocun, China","Near Dundo, Angola","Luxor, Egypt","Colombia","Near Tokyo, Japan","Near Tisbury, England","Savannakhet, Laos","Milan, Italy","Near New Delhi, India","Kekaha, Hawaii","Fairbanks, Alaska","Near Formoso do Aragala, Brazil","San Dimas, California","Cold Bay, Alaska","Oiapoque, Amapa, Brazil","Manguipayan, Colombia","Samara, Russia","Near Junnar, Maharashtra, India","Athens, Greece","Stephenville, Newfoundland","Sept-\u00celes, Canada"],"top_values":[["Sao Paulo, Brazil",15],["Moscow, Russia",15],["Rio de Janeiro, Brazil",14],["Bogota, Colombia",13],["Manila, Philippines",13],["Anchorage, Alaska",13],["New York, New York",12],["Cairo, Egypt",12],["Chicago, Illinois",11],["Near Moscow, Russia",9],["AtlantiOcean",9],["Tehran, Iran",9],["Paris, France",8],["Amsterdam, Netherlands",8],["Denver, Colorado",8],["Ankara, Turkey",8],["Rome, Italy",8],["Cleveland, Ohio",7],["Bucharest, Romania",7],["Burbank, California",7]],"top_words":[["near",1272],["off",355],["new",257],["brazil",190],["alaska",178],["russia",174],["colombia",150],["canada",147],["california",144],["france",133],["mexico",122],["england",105],["india",100],["south",94],["germany",92],["island,",89],["china",89],["indonesia",87],["city,",85],["san",84],["australia",81],["italy",79],["vietnam",74],["york",68],["de",67]],"vocab_skipped":null,"word_histogram":{"counts":[59,0,0,2136,0,0,0,2001,0,0,0,711,0,0,0,239,0,0,67,0,0,0,22,0,0,0,7,0,0,6],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":5268,"n_null":20,"n_unique":4303,"null_rate":0.0037965072133637054,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.1800685975609756,"emoji_rate":0.0,"len_max":60,"len_mean":20.379954268292682,"len_median":19.0,"len_min":5,"len_p95":31.0,"n_duplicates":945,"n_empty":0,"one_word_rate":0.011242378048780487,"readability_flesch_mean":24.03101428571431,"url_rate":0.0,"vocab_size":4541,"word_mean":2.8656631097560976,"word_median":3.0}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"duplicates","level":"warn","message":"52.8% duplicate strings"}],"column":"Operator","extras":{"language_counts":{"__engine":"fasttext:4,537","bs":3,"ca":22,"ceb":6,"cs":4,"da":2,"de":202,"en":3340,"es":224,"eu":1,"fi":3,"fr":183,"gl":2,"hr":6,"hu":5,"id":40,"it":278,"ku":1,"lt":2,"ms":3,"nl":25,"no":11,"pl":20,"pt":55,"ro":2,"ru":27,"sl":17,"sv":34,"tr":9,"uk":6,"vi":2},"language_sample_size":5000,"length_histogram":{"counts":[96,233,140,462,169,184,128,395,270,447,407,143,340,205,542,166,229,102,194,54,127,62,35,30,13,25,11,5,7,7,7,0,9,1,3,0,1,0,0,1],"edges":[3.0,4.55,6.1,7.65,9.2,10.75,12.3,13.85,15.4,16.950000000000003,18.5,20.05,21.6,23.150000000000002,24.7,26.25,27.8,29.35,30.900000000000002,32.45,34.0,35.550000000000004,37.1,38.65,40.2,41.75,43.300000000000004,44.85,46.4,47.95,49.5,51.050000000000004,52.6,54.15,55.7,57.25,58.800000000000004,60.35,61.9,63.45,65.0]},"near_unique":false,"sample":["Military - German Navy","Eagle Air","Military - Russian Air Force","Air Taxi - Wolfe Air Aviation Ltd.","Military - Russian Air Force","TriCoastal Air","China Air Lines","Aeroperlas","Bristow Helicopters","Deutsche Lufthansa","Military - U.S. Air Force","Willow Air Service - Air Taxi","Linee Aeree Italiane","Military - German Navy","Military - U.S. Navy","Civil Air Transport","Cruzeiro do Sul","Military - Taliban Militia","Southern Sudan Air Connection","Air Taxi","Aviaco","Bremerton-Seattle Air Taxi / Gorst Air Lines","Kansas City Southern Skyways","Trans Canada Air Lines","Military - Spanish Air Force.","Carib Air Transport","Chartair - Air Taxi","China National Aviation Corporation","Private","International Jet Charter","Korean Airlines","Aerolift","Military - U.S. Air Force","Canadian PacifiAir Lines","Pan American World Airways","Stikine Air Service - Air Taxi","Air Taxi - Marco Zero Air Taxi","Aeroflot","Air Algerie","North Cay Airways","Iran Asseman Airlines","Prestige Airlines (Cargo) Charter","Pan American World Airways","Keystone Air Services Ltd. - Air Taxi","Airline Transport","UTAir Airlines","United Arab Airlines","Servicios Americanos","Air Ocean","Regionnair"],"top_values":[["Aeroflot",179],["Military - U.S. Air Force",176],["Air France",70],["Deutsche Lufthansa",65],["China National Aviation Corporation",44],["United Air Lines",44],["Air Taxi",44],["Military - U.S. Army Air Forces",43],["Pan American World Airways",41],["Military - U.S. Navy",36],["US Aerial Mail Service",36],["American Airlines",36],["Military - Royal Air Force",36],["Indian Airlines",34],["KLM Royal Dutch Airlines",33],["Philippine Air Lines",33],["Private",31],["Aeropostale",26],["Northwest Orient Airlines",25],["Eastern Air Lines",25]],"top_words":[["air",1911],["-",1172],["airlines",815],["military",798],["force",577],["airways",447],["taxi",425],["u.s.",345],["aviation",190],["aeroflot",190],["lines",182],["service",165],["royal",129],["american",122],["army",102],["transport",100],["national",91],["/",90],["de",86],["inc.",85],["services",82],["private",80],["navy",79],["china",79],["british",77]],"vocab_skipped":null,"word_histogram":{"counts":[867,0,1450,0,0,1244,0,584,0,0,692,0,297,0,0,67,0,17,0,0,16,0,6,0,0,8,0,1,0,1],"edges":[1.0,1.4,1.8,2.2,2.6,3.0,3.4000000000000004,3.8000000000000003,4.2,4.6,5.0,5.4,5.800000000000001,6.2,6.6000000000000005,7.0,7.4,7.800000000000001,8.2,8.600000000000001,9.0,9.4,9.8,10.200000000000001,10.600000000000001,11.0,11.4,11.8,12.200000000000001,12.600000000000001,13.0]}},"kind":"text","n":5268,"n_null":18,"n_unique":2476,"null_rate":0.003416856492027335,"stats":{"allcaps_rate":0.037333333333333336,"boilerplate_rate":0.0,"duplicate_rate":0.5283809523809524,"emoji_rate":0.0,"len_max":65,"len_mean":19.493904761904762,"len_median":19.0,"len_min":3,"len_p95":35.0,"n_duplicates":2774,"n_empty":0,"one_word_rate":0.16514285714285715,"readability_flesch_mean":19.610950000000024,"url_rate":0.0,"vocab_size":2370,"word_mean":3.0474285714285716,"word_median":3.0}},{"alerts":[{"code":"long_tail","level":"info","message":"543 singleton categories"},{"code":"null_rate","level":"warn","message":"79.7% null"}],"column":"Flight #","extras":{"singletons":543,"top_values":[["-",67],["1",10],["4",7],["6",6],["21",6],["101",6],["901",6],["7",5],["201",5],["701",5],["706",5],["703",5],["2",4],["203",4],["304",4],["601",4],["514",4],["11",4],["217",4],["114",4]]},"kind":"categorical","n":5268,"n_null":4199,"n_unique":724,"null_rate":0.79707668944571,"stats":{"cardinality":724,"entropy":9.05754931252951,"entropy_ratio":0.9534417105486859,"top_rate":0.0626753975678204,"top_value":"-"}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"null_rate","level":"warn","message":"32.4% null"}],"column":"Route","extras":{"language_counts":{"__engine":"fasttext:3,375","ca":17,"ceb":8,"cs":7,"da":3,"de":88,"en":2567,"eo":7,"es":237,"et":3,"fi":3,"fr":64,"gd":1,"hr":8,"hu":1,"id":53,"it":88,"ja":1,"la":2,"ms":3,"nl":48,"no":11,"pl":11,"pt":100,"ro":4,"sh":3,"sl":3,"sv":17,"te":1,"tr":9,"vi":1},"language_sample_size":5000,"length_histogram":{"counts":[8,4,93,6,5,100,99,155,452,247,443,170,179,286,155,135,245,94,213,71,49,74,39,40,51,20,27,12,10,19,6,4,17,9,6,3,4,8,2,2],"edges":[4.0,5.375,6.75,8.125,9.5,10.875,12.25,13.625,15.0,16.375,17.75,19.125,20.5,21.875,23.25,24.625,26.0,27.375,28.75,30.125,31.5,32.875,34.25,35.625,37.0,38.375,39.75,41.125,42.5,43.875,45.25,46.625,48.0,49.375,50.75,52.125,53.5,54.875,56.25,57.625,59.0]},"near_unique":false,"sample":["Lympne, England - Rotterdam, The Netherlands","Isfahan - Terhan","Mexico City - Reynosa - Matamoros","Anchorage, AK - Hoholitna River, AK","Panchkhal - Tribuvan","Kongolo - Goma","Honolulu - Lihue","Jomsom - Pokhara","Jaffna - Colombo","El Paso, TX - Pueblo, CO","Tallin - L'vov","Greenville, MS - Charlotte, NC","Sydney - Canberra","London, England - India","Chicago - Burbank","Monterrey - Falcon Dam","Gander - Shannon","Kathmandu - New Delhi","Sabine Pass - Oil Platform","Valdez, AK - Local","London - Innsbruck","Santiago, Chili - Cristobal, Panama","Kildala - Vancouver","Akron, OH - Winston/Salem, NC","Kira-Kira - Honiara","Buenos Aires - La Paz - Quito","Dhahran - Karachi","Blackbushe, UK - Beogard, Serbia","Pittsburgh - Morgantown","Sightseeing","Montlu\u00e7on - Paris","Cucuta - Arauca","Aqua Caliente, Mexico - Los Angeles","Leningrad - Erevan","Reynosa - Tampico","Amman, Jordan - Beiruit, Lebanon - Yerevan, Armenia","Bucharest -Lasi","Geneva - Funchal","Thief River, MN - Owatonna, MN","Clarksburg - Washington DC","Denpasar - Taipei","Katmandou - Lukla","Ankara - Adana","Sao Paulo - Tefe - Manaus","Yerevan, Armenia - Sochi, Russia","Cordova - Ancorage","Athens - Nicosia","La Paz - San Borja","Training","Iyachisakus Lake - Mollet Lake"],"top_values":[["Training",81],["Sightseeing",29],["Test flight",17],["Test",6],["Sao Paulo - Rio de Janeiro",5],["Saigon - Paris",4],["Bogota - Barranquilla",4],["Sao Paulo - Porto Alegre",4],["Villavicencio - Mitu",4],["Demonstration",3],["Cleveland - Chicago",3],["Santiago de Cuba - Havana",3],["Buenos Aires - Santiago",3],["Rio de Janeiro - Sao Paulo",3],["Guayaquil - Cuenca",3],["Rome - Cairo",3],["Barranquilla - Bogota",3],["Villavicencio - Bogota",3],["Burbank - Oakland",3],["Rome - Athens",3]],"top_words":[["-",3658],["ak",158],["city",154],["new",140],["san",133],["york",96],["training",86],["de",78],["ca",74],["paris",69],["chicago",61],["la",61],["los",60],["london",57],["afb",56],["angeles",51],["rio",51],["rome",48],["bogota",48],["lake",46],["ny",44],["st.",43],["miami",41],["tx",41],["moscow",41]],"vocab_skipped":null,"word_histogram":{"counts":[146,0,49,0,0,1506,0,0,576,0,736,0,0,318,0,0,113,0,0,67,0,24,0,0,18,0,0,4,0,5],"edges":[1.0,1.3666666666666667,1.7333333333333334,2.0999999999999996,2.466666666666667,2.833333333333333,3.1999999999999997,3.5666666666666664,3.933333333333333,4.3,4.666666666666666,5.033333333333333,5.3999999999999995,5.766666666666667,6.133333333333333,6.5,6.866666666666666,7.2333333333333325,7.6,7.966666666666666,8.333333333333332,8.7,9.066666666666666,9.433333333333334,9.799999999999999,10.166666666666666,10.533333333333333,10.899999999999999,11.266666666666666,11.633333333333333,12.0]}},"kind":"text","n":5268,"n_null":1706,"n_unique":3244,"null_rate":0.32384206529992404,"stats":{"allcaps_rate":0.00028074115665356543,"boilerplate_rate":0.0,"duplicate_rate":0.0892756878158338,"emoji_rate":0.0,"len_max":59,"len_mean":22.088152723189218,"len_median":20.0,"len_min":4,"len_p95":37.0,"n_duplicates":318,"n_empty":0,"one_word_rate":0.04098820887142055,"readability_flesch_mean":27.15470892857145,"url_rate":0.0,"vocab_size":3647,"word_mean":4.065412689500281,"word_median":4.0}},{"alerts":[{"code":"duplicates","level":"warn","message":"53.3% duplicate strings"}],"column":"Type","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[6,5,6,19,32,57,178,255,685,0,522,331,441,369,208,158,154,166,154,0,109,120,158,188,174,107,73,85,39,0,66,58,55,43,16,25,16,9,21,133],"edges":[4.0,4.9,5.8,6.7,7.6,8.5,9.4,10.3,11.2,12.1,13.0,13.9,14.8,15.700000000000001,16.6,17.5,18.4,19.3,20.2,21.1,22.0,22.900000000000002,23.8,24.7,25.6,26.5,27.400000000000002,28.3,29.2,30.1,31.0,31.900000000000002,32.8,33.7,34.6,35.5,36.4,37.300000000000004,38.2,39.1,40.0]},"near_unique":false,"sample":["Zeppelin L-10 (airship)","Beech King Air B90","Swearingen SA-226T Metro II","de Havilland Canada DHC-6 Twin Otter 300","Bell UH-1H / Bell UH-1H (helicopter)","Swearingen SA.226TC Metro II","Handley Page Dart Herald 201","de Havilland Canada DHC-6 Twin Otter 300","Hawker Siddeley HS-748-357/2B SCD","Lockheed Vega","McDonnell Douglas DC-8-62","Douglas DC-3","Convair CV-240-0","Zeppelin L-59 (airship)","Vickers Viscount 745D","Douglas DC-3 (Douglas C-47A-10-DK)","Heinkel 116","de Havilland Canada DHC-6 Twin Otter 300","de Havilland Canada DHC-2 Mark I Beaver","Antonov AN-12","Vickers Viscount 745D","Junkers G-24","Douglas C-47A-1-DK","Junkers JU90V2","Antonov AN-12","Antonov AN-24","Britten-Norman BN-2A-21 Islander","Avro 685 York 1","Boeing 247D","KJ-2000","Boeing B-737-2P6","Antonov 12V","Fairchild F-27M","Cessna 172D","Beech D18S","Boeing 707-321CF","Piper PA-31-350 Navajo Chieftain","Fairchild-Hiller FH-227B","Boeing B-707-328B","Cessna 172M","Mil Mi-8MTV (helicopter)","de Havilland DHC-2","Ilyushin IL-12B","PA- 31-350 Chieftain","Black Hawk helicopter","Tupelov 134AK","Tupolev TU-104A","Cessna 180","Douglas DC-3","McDonnell Douglas MD-90-30"],"top_values":[["Douglas DC-3",334],["de Havilland Canada DHC-6 Twin Otter 300",81],["Douglas C-47A",74],["Douglas C-47",62],["Douglas DC-4",40],["Yakovlev YAK-40",37],["Antonov AN-26",36],["Junkers JU-52/3m",32],["Douglas C-47B",29],["De Havilland DH-4",28],["Douglas DC-6B",27],["Breguet 14",23],["Curtiss C-46A",21],["Douglas C-47-DL",20],["Douglas DC-6",20],["Antonov AN-12",19],["Antonov AN-24",19],["McDonnell Douglas DC-9-32",19],["Curtiss C-46",18],["Embraer 110P1 Bandeirante",18]],"top_words":[["douglas",1113],["boeing",384],["dc-3",376],["lockheed",343],["cessna",307],["de",301],["havilland",300],["antonov",248],["canada",159],["otter",147],["fokker",133],["piper",133],["dhc-6",131],["twin",129],["mcdonnell",125],["curtiss",122],["beechcraft",121],["/",111],["ilyushin",98],["tupolev",97],["vickers",95],["300",91],["convair",86],["junkers",84],["hercules",83]],"vocab_skipped":null,"word_histogram":{"counts":[39,0,0,0,3095,0,0,0,1147,0,0,0,557,0,0,0,0,205,0,0,0,66,0,0,0,129,0,0,0,3],"edges":[1.0,1.2333333333333334,1.4666666666666668,1.7,1.9333333333333333,2.166666666666667,2.4,2.6333333333333333,2.8666666666666667,3.1,3.3333333333333335,3.566666666666667,3.8,4.033333333333333,4.266666666666667,4.5,4.733333333333333,4.966666666666667,5.2,5.433333333333334,5.666666666666667,5.9,6.133333333333334,6.366666666666667,6.6,6.833333333333333,7.066666666666666,7.3,7.533333333333333,7.766666666666667,8.0]}},"kind":"text","n":5268,"n_null":27,"n_unique":2446,"null_rate":0.005125284738041002,"stats":{"allcaps_rate":0.009540164090822362,"boilerplate_rate":0.0,"duplicate_rate":0.53329517267697,"emoji_rate":0.0,"len_max":40,"len_mean":18.325701202060674,"len_median":16.0,"len_min":4,"len_p95":34.0,"n_duplicates":2795,"n_empty":0,"one_word_rate":0.007441327990841442,"readability_flesch_mean":69.25886785714287,"url_rate":0.0,"vocab_size":2534,"word_mean":2.7181835527571074,"word_median":2.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.4% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.0% rows are a single word"},{"code":"allcaps","level":"info","message":"99.2% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Registration","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,36,0,0,64,0,0,69,0,0,398,0,0,3228,0,0,512,0,0,267,0,42,0,0,206,0,0,10,0,0,12,0,0,41,0,0,8,0,39],"edges":[1.0,1.35,1.7,2.05,2.4,2.75,3.0999999999999996,3.4499999999999997,3.8,4.15,4.5,4.85,5.199999999999999,5.55,5.8999999999999995,6.25,6.6,6.949999999999999,7.3,7.6499999999999995,8.0,8.35,8.7,9.049999999999999,9.399999999999999,9.75,10.1,10.45,10.799999999999999,11.149999999999999,11.5,11.85,12.2,12.549999999999999,12.899999999999999,13.25,13.6,13.95,14.299999999999999,14.649999999999999,15.0]},"near_unique":true,"sample":["77","FAC-1150","HP-986PS","4R-HVA","PP-SAD","P4-AOD","PI-C1131","LV-ZSR","RA-65617","P-BALSA","FAP-348","68-218","N5904","F-ALAI","N7840B","PI-C22","CF-TCP","N35207","3C-5GE","B-112","N1554V","NC191E","NC36498","40-2370","N144SP","PK-VIP","C-FAWF","N95425","D-AXAV","ZS-PDV","N1910L","I-FEEV","64-0624","PP-SDJ","CP-639","N558MA","I-ERJC","OO-SRD","145927","5063","7T-VEE","PK-TAR","G-ALZU","RA85816/A9C-DHL","GN-97121","N45MF","CP-45","HR-SAG","42-72572","SX-ECH"],"top_values":[],"top_words":[["/",36],["49",3],["hk-",3],["82",2],["32",2],["77",2],["204",2],["305",2],["223",2],["nc10809",2],["vh-usg",2],["cf-tcl",2],["g-aeuh",2],["w4026",2],["n91303",2],["xa-got",2],["128441",2],["cccp",2],["cp-45",2],["n67941",2],["cp-",2],["65-0990",2],["cccp-09303",2],["012",2],["n864ja",2]],"vocab_skipped":null,"word_histogram":{"counts":[4883,0,0,0,0,0,0,14,0,0,0,0,0,0,0,35,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":5268,"n_null":335,"n_unique":4905,"null_rate":0.06359149582384206,"stats":{"allcaps_rate":0.9918913440097303,"boilerplate_rate":0.0,"duplicate_rate":0.005676059193188729,"emoji_rate":0.0,"len_max":15,"len_mean":6.393877964727347,"len_median":6.0,"len_min":1,"len_p95":10.0,"n_duplicates":28,"n_empty":0,"one_word_rate":0.989864180012163,"readability_flesch_mean":103.02592500000003,"url_rate":0.0,"vocab_size":4948,"word_mean":1.017839043178593,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"98.4% rows are a single word"},{"code":"allcaps","level":"info","message":"96.6% rows are all-caps"},{"code":"null_rate","level":"warn","message":"23.3% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"cn/In","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[23,0,113,0,604,0,866,0,895,0,268,0,269,0,281,0,457,0,125,0,0,92,0,14,0,9,0,2,0,5,0,4,0,5,0,2,0,2,0,4],"edges":[1.0,1.475,1.95,2.425,2.9,3.375,3.8499999999999996,4.324999999999999,4.8,5.2749999999999995,5.75,6.225,6.699999999999999,7.175,7.6499999999999995,8.125,8.6,9.075,9.549999999999999,10.025,10.5,10.975,11.45,11.924999999999999,12.399999999999999,12.875,13.35,13.825,14.299999999999999,14.774999999999999,15.25,15.725,16.2,16.674999999999997,17.15,17.625,18.099999999999998,18.575,19.05,19.525,20.0]},"near_unique":false,"sample":["HP-25","24805/1878","12","10670","20436/788","742","3817","45108","31-033B","1957","45754/224","10570","45290 /4021","30","4817","43057/73","11714","53-13","45-028","556","0402104","1914","43144/155","42-68715","U197","MA036","9721753","2628","640601","25664/2393","77A254","48050/989","439","18712/373","176","2109","120070","61427","20494/850","45972/357","6344506","208B-0549","17629/8","31-7405203","525-0176","1100","17515/124","4491","10171","8275013"],"top_values":[["178",6],["19",5],["229",5],["125",5],["213",5],["1",4],["31",4],["160",4],["4",4],["439",4],["44",4],["442",4],["195",4],["1965",4],["212",4],["55",4],["103",4],["6",4],["36",3],["2",3]],"top_words":[["/",49],["178",6],["1",5],["19",5],["229",5],["125",5],["213",5],["31",4],["160",4],["4",4],["439",4],["44",4],["442",4],["195",4],["1965",4],["212",4],["55",4],["103",4],["6",4],["36",3],["2",3],["86",3],["1567",3],["053",3],["151",3]],"vocab_skipped":null,"word_histogram":{"counts":[3976,0,0,0,0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0,38,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":5268,"n_null":1228,"n_unique":3707,"null_rate":0.2331055429005315,"stats":{"allcaps_rate":0.9663366336633663,"boilerplate_rate":0.0,"duplicate_rate":0.08242574257425743,"emoji_rate":0.0,"len_max":20,"len_mean":5.64480198019802,"len_median":5.0,"len_min":1,"len_p95":10.0,"n_duplicates":333,"n_empty":0,"one_word_rate":0.9841584158415841,"readability_flesch_mean":121.20477500000001,"url_rate":0.0,"vocab_size":3739,"word_mean":1.0257425742574258,"word_median":1.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+4.25"},{"code":"outliers","level":"warn","message":"10.1% rows beyond 1.5 IQR"}],"column":"Aboard","extras":{"histogram":{"counts":[2978,1055,430,230,129,105,75,56,46,35,27,16,8,7,9,4,9,3,9,3,2,3,1,1,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1],"edges":[0.0,16.1,32.2,48.300000000000004,64.4,80.5,96.60000000000001,112.70000000000002,128.8,144.9,161.0,177.10000000000002,193.20000000000002,209.3,225.40000000000003,241.50000000000003,257.6,273.70000000000005,289.8,305.90000000000003,322.0,338.1,354.20000000000005,370.3,386.40000000000003,402.50000000000006,418.6,434.70000000000005,450.80000000000007,466.90000000000003,483.00000000000006,499.1,515.2,531.3000000000001,547.4000000000001,563.5,579.6,595.7,611.8000000000001,627.9000000000001,644.0]},"sample":[1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,6.0,1.0,8.0,8.0,16.0,1.0,6.0,1.0,2.0,10.0,7.0,10.0,5.0,6.0,2.0,15.0,15.0,7.0,12.0,12.0,7.0,14.0,3.0,5.0,19.0,12.0,3.0,17.0,4.0,14.0,17.0,9.0,16.0,2.0,16.0,10.0,22.0,13.0,17.0,37.0,19.0,17.0,27.0,20.0,15.0,25.0,2.0,7.0,6.0,6.0,3.0,11.0,10.0,22.0,23.0,3.0,20.0,25.0,24.0,12.0,14.0,12.0,49.0,13.0,22.0,21.0,44.0,25.0,18.0,2.0,23.0,53.0,48.0,26.0,3.0,43.0,44.0,9.0,18.0,2.0,15.0,19.0,17.0,24.0,26.0,9.0,17.0,21.0,4.0,44.0,13.0,52.0,38.0,20.0,23.0,29.0,23.0,16.0,2.0,50.0,27.0,1.0,3.0,8.0,35.0,3.0,21.0,16.0,28.0,18.0,27.0,5.0,26.0,26.0,6.0,14.0,3.0,22.0,36.0,25.0,62.0,21.0,23.0,12.0,35.0,3.0,67.0,44.0,26.0,49.0,11.0,15.0,45.0,3.0,17.0,18.0,4.0,5.0,18.0,14.0,6.0,3.0,40.0,45.0,10.0,9.0,44.0,4.0,72.0,48.0,23.0,12.0,28.0,14.0,18.0,5.0,107.0,132.0,84.0,13.0,8.0,6.0,29.0,4.0,22.0,83.0,3.0,3.0,80.0,2.0,49.0,7.0,30.0,32.0,3.0,9.0,2.0,38.0,29.0,2.0,25.0,9.0,15.0,4.0,3.0,80.0,82.0,7.0,66.0,127.0,130.0,3.0,6.0,63.0,5.0,29.0,7.0,3.0,21.0,7.0,9.0,25.0,4.0,23.0,11.0,12.0,5.0,12.0,31.0,33.0,18.0,14.0,2.0,4.0,6.0,6.0,8.0,31.0,229.0,79.0,60.0,12.0,15.0,64.0,4.0,12.0,83.0,34.0,126.0,111.0,8.0,25.0,8.0,2.0,3.0,176.0,5.0,15.0,3.0,100.0,5.0,5.0,2.0,73.0,2.0,107.0,12.0,3.0,5.0,5.0,11.0,82.0,3.0,4.0,2.0,3.0,4.0,21.0,6.0,2.0,76.0,5.0,10.0,45.0,20.0,4.0,77.0,10.0,44.0,30.0,165.0,2.0,200.0,134.0,9.0,18.0,3.0,7.0,6.0,4.0,64.0,12.0,7.0,3.0,10.0,6.0,10.0,3.0,7.0,7.0,34.0,89.0,45.0,4.0,50.0,8.0,24.0,7.0,66.0,167.0,12.0,4.0,13.0,18.0,79.0,35.0,118.0,10.0,15.0,10.0,2.0,50.0,3.0,46.0,12.0,4.0,30.0,192.0,50.0,4.0,9.0,10.0,4.0,89.0,8.0,4.0,87.0,14.0,8.0,1.0,11.0,3.0,9.0,4.0,6.0,9.0,82.0,21.0,15.0,12.0,11.0,2.0,5.0,10.0,104.0,34.0,29.0,12.0,5.0,8.0,22.0,2.0,11.0,30.0,10.0,16.0,5.0,63.0,21.0,42.0,71.0,28.0,1.0,46.0,21.0,85.0,5.0,129.0,130.0,38.0,13.0,3.0,3.0,10.0,53.0,15.0,13.0,7.0,2.0,15.0,13.0,22.0,6.0,126.0,17.0,43.0,2.0,11.0,3.0,3.0,110.0,7.0,3.0,2.0,3.0,9.0,2.0,1.0,293.0,28.0,62.0,29.0,24.0,10.0,189.0,32.0,19.0,349.0,8.0,36.0,4.0,22.0,33.0,57.0,26.0,10.0,6.0,31.0,5.0,1.0,5.0,4.0,12.0,20.0,6.0,4.0,9.0,143.0,9.0,17.0,18.0,12.0,3.0,8.0,18.0,2.0,16.0,2.0,9.0,7.0,6.0,71.0,10.0,16.0,5.0,4.0,2.0,46.0,25.0,161.0,148.0,10.0,6.0,7.0,7.0,2.0,6.0,7.0,53.0,2.0,11.0,15.0,10.0,39.0,14.0,11.0,100.0,117.0,3.0,6.0,4.0,11.0,3.0,5.0,11.0,10.0,18.0,10.0,1.0,3.0,20.0,8.0,13.0,6.0,1.0,13.0]},"kind":"numeric","n":5268,"n_null":22,"n_unique":239,"null_rate":0.004176157934700076,"stats":{"iqr":25.0,"kurtosis":28.413952537101586,"max":644.0,"mean":27.554517727792604,"median":13.0,"min":0.0,"n_outliers":529,"outlier_rate":0.10083873427373237,"q1":5.0,"q3":30.0,"skew":4.246965214115307,"std":43.076711027774856,"zero_rate":0.0003812428516965307}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+4.95"},{"code":"outliers","level":"warn","message":"8.4% rows beyond 1.5 IQR"}],"column":"Fatalities","extras":{"histogram":{"counts":[3314,980,343,215,96,90,51,42,39,19,18,9,11,3,2,6,2,5,4,1,1,0,1,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1],"edges":[0.0,14.575,29.15,43.724999999999994,58.3,72.875,87.44999999999999,102.02499999999999,116.6,131.17499999999998,145.75,160.325,174.89999999999998,189.475,204.04999999999998,218.625,233.2,247.77499999999998,262.34999999999997,276.925,291.5,306.075,320.65,335.22499999999997,349.79999999999995,364.375,378.95,393.525,408.09999999999997,422.67499999999995,437.25,451.825,466.4,480.97499999999997,495.54999999999995,510.125,524.6999999999999,539.275,553.85,568.425,583.0]},"sample":[1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0,3.0,3.0,2.0,1.0,3.0,2.0,1.0,2.0,1.0,2.0,10.0,5.0,1.0,3.0,6.0,2.0,1.0,11.0,7.0,12.0,12.0,5.0,14.0,3.0,4.0,19.0,10.0,3.0,2.0,4.0,14.0,16.0,5.0,14.0,1.0,12.0,10.0,18.0,2.0,7.0,25.0,35.0,24.0,20.0,24.0,2.0,3.0,5.0,24.0,21.0,22.0,23.0,4.0,3.0,11.0,30.0,28.0,29.0,59.0,22.0,15.0,8.0,20.0,14.0,25.0,13.0,20.0,24.0,17.0,31.0,22.0,87.0,2.0,2.0,25.0,1.0,3.0,0.0,53.0,25.0,5.0,14.0,30.0,6.0,25.0,2.0,3.0,12.0,8.0,24.0,14.0,7.0,15.0,31.0,14.0,17.0,25.0,23.0,31.0,12.0,29.0,11.0,18.0,37.0,2.0,8.0,10.0,25.0,36.0,5.0,20.0,3.0,15.0,0.0,16.0,1.0,16.0,28.0,4.0,18.0,2.0,34.0,14.0,3.0,1.0,23.0,8.0,7.0,3.0,5.0,35.0,32.0,49.0,11.0,4.0,7.0,24.0,3.0,51.0,65.0,15.0,8.0,68.0,6.0,5.0,3.0,34.0,61.0,9.0,31.0,10.0,8.0,3.0,6.0,72.0,5.0,6.0,11.0,15.0,26.0,81.0,12.0,44.0,2.0,101.0,2.0,31.0,20.0,28.0,0.0,2.0,2.0,2.0,37.0,68.0,58.0,2.0,1.0,24.0,11.0,4.0,2.0,4.0,25.0,5.0,126.0,4.0,35.0,24.0,42.0,18.0,66.0,1.0,1.0,5.0,2.0,82.0,123.0,29.0,7.0,21.0,40.0,4.0,47.0,2.0,13.0,25.0,23.0,39.0,12.0,12.0,1.0,15.0,31.0,1.0,14.0,2.0,15.0,6.0,6.0,5.0,14.0,47.0,2.0,12.0,15.0,64.0,4.0,12.0,78.0,32.0,97.0,111.0,1.0,22.0,1.0,2.0,3.0,103.0,5.0,15.0,3.0,11.0,89.0,5.0,66.0,2.0,107.0,22.0,29.0,3.0,5.0,9.0,75.0,3.0,2.0,2.0,3.0,3.0,21.0,3.0,2.0,28.0,5.0,4.0,45.0,4.0,73.0,77.0,20.0,2.0,30.0,100.0,2.0,2.0,4.0,9.0,18.0,3.0,2.0,3.0,4.0,45.0,86.0,2.0,3.0,61.0,6.0,10.0,3.0,7.0,6.0,34.0,89.0,3.0,2.0,7.0,11.0,2.0,1.0,6.0,99.0,4.0,2.0,12.0,18.0,74.0,35.0,2.0,20.0,2.0,46.0,2.0,6.0,11.0,8.0,0.0,12.0,9.0,10.0,8.0,1.0,12.0,3.0,1.0,14.0,19.0,11.0,55.0,2.0,87.0,21.0,38.0,3.0,11.0,50.0,9.0,2.0,50.0,37.0,2.0,18.0,15.0,12.0,23.0,1.0,6.0,1.0,35.0,33.0,12.0,4.0,5.0,77.0,22.0,6.0,176.0,6.0,3.0,2.0,34.0,54.0,2.0,6.0,4.0,9.0,19.0,1.0,3.0,21.0,23.0,5.0,8.0,4.0,2.0,38.0,4.0,223.0,2.0,15.0,14.0,13.0,2.0,51.0,7.0,4.0,3.0,167.0,12.0,3.0,76.0,132.0,11.0,41.0,55.0,7.0,18.0,8.0,80.0,28.0,8.0,4.0,22.0,15.0,6.0,10.0,42.0,63.0,4.0,3.0,6.0,143.0,349.0,14.0,30.0,10.0,22.0,5.0,39.0,26.0,8.0,5.0,6.0,5.0,9.0,7.0,4.0,12.0,10.0,3.0,4.0,9.0,3.0,9.0,16.0,18.0,145.0,10.0,1.0,18.0,2.0,6.0,16.0,2.0,7.0,6.0,3.0,10.0,18.0,1.0,2.0,1.0,2.0,8.0,1.0,21.0,46.0,275.0,17.0,140.0,10.0,6.0,7.0,7.0,7.0,53.0,2.0,15.0,10.0,0.0,11.0,2.0,117.0,3.0,6.0,4.0,15.0,3.0,1.0,187.0,8.0,2.0,6.0,3.0,0.0,11.0]},"kind":"numeric","n":5268,"n_null":12,"n_unique":191,"null_rate":0.002277904328018223,"stats":{"iqr":20.0,"kurtosis":42.79146214638747,"max":583.0,"mean":20.06830289193303,"median":9.0,"min":0.0,"n_outliers":444,"outlier_rate":0.08447488584474885,"q1":3.0,"q3":23.0,"skew":4.948312044472851,"std":33.199952080203964,"zero_rate":0.011035007610350075}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+50.34"}],"column":"Ground","extras":{"histogram":{"counts":[5235,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2],"edges":[0.0,68.75,137.5,206.25,275.0,343.75,412.5,481.25,550.0,618.75,687.5,756.25,825.0,893.75,962.5,1031.25,1100.0,1168.75,1237.5,1306.25,1375.0,1443.75,1512.5,1581.25,1650.0,1718.75,1787.5,1856.25,1925.0,1993.75,2062.5,2131.25,2200.0,2268.75,2337.5,2406.25,2475.0,2543.75,2612.5,2681.25,2750.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":5268,"n_null":22,"n_unique":50,"null_rate":0.004176157934700076,"stats":{"iqr":0.0,"kurtosis":2558.595146965409,"max":2750.0,"mean":1.6088448341593595,"median":0.0,"min":0.0,"n_outliers":219,"outlier_rate":0.04174609226077011,"q1":0.0,"q3":0.0,"skew":50.33625228107394,"std":53.987827158856334,"zero_rate":0.9582539077392299}},{"alerts":[{"code":"near_unique","level":"info","message":"95.8% of rows are unique strings"}],"column":"Summary","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[822,1039,800,547,364,280,231,172,123,128,86,50,57,33,37,19,15,16,11,10,5,1,6,3,4,3,2,1,1,3,1,2,1,4,0,0,0,0,0,1],"edges":[6.0,54.7,103.4,152.10000000000002,200.8,249.5,298.20000000000005,346.90000000000003,395.6,444.3,493.0,541.7,590.4000000000001,639.1,687.8000000000001,736.5,785.2,833.9000000000001,882.6,931.3000000000001,980.0,1028.7,1077.4,1126.1000000000001,1174.8000000000002,1223.5,1272.2,1320.9,1369.6000000000001,1418.3000000000002,1467.0,1515.7,1564.4,1613.1000000000001,1661.8000000000002,1710.5,1759.2,1807.9,1856.6000000000001,1905.3000000000002,1954.0]},"near_unique":true,"sample":["Crashed into trees while attempting to land after being shot down by British and French aircraft.","Flew into a box canyon and crashed at an elevation of 4,000 ft.  VFR flight by the pilot into instrument meteorological conditions, and the pilot's failure to maintain sufficient altitude and/or clearance from mountainous terrain. Factors related to the accident were: the adverse weather and terrain conditions.","Midair collision. The Beechcraft was on a flight from Lyon to Lorient, approaching Lorient, when it requested permission to fly over the ocean liner Norway. While circling the Norway, it collided with the Cessna. One killed aboard the Cessna, 14 aboard the Beechcraft.  Failure of both pilots to 'see and avoid' each other under VFR condition.","The aircraft crashed into a 8,000 ft. mountain  in the Sierra Grande range while climbing en route from Comodoro Rivadavia to Cordoba in heavy rain and strong turbulence. The passengers included military personnel and their dependents.","The helicopter collided with trees after experiencing engine failure. Pilot overshot two suitable landing areas.","The jetliner crashed into the Black Sea and broke up in driving rain and low visibility after making a second attempt to land. The plane disappeared from radar screens just under four miles from shore and crashed after making a turn and heading toward Adler airport for a landing. Pilot error. The pilots of the Airbus-320  allowed the plane to descend too low as it faced bad weather on its approach to the airport.","Due to heavy traffic, the flight was diverted from the planned route. The aircraft failed to follow the assigned airway and crashed into a cloud obscured Montseny Mountain while on approach. The deviation from the assigned airway may have been caused by malfunctioning equipment. In addition, the ATC did not realize the aircraft was deviating from its assigned course.","The aircraft crashed into the Persian Gulf and exploded in flames while attempting to land at Bahrain International Airport. The crew decided to perform a missed approach after it was determined the aircraft was coming in too high and fast. Instructions were given for a 180 degree turn and climb to 2,500 feet. While performing the missed approach the plane suddenly descended rapidly from an altitude of 1,000 feet and crashed into the shallow waters of the gulf approximately 1 mile from the airport. The accident was a result of a fatal combination of factors, including the captain's failure to comply with standard operating procedures and the copilot's actions in not drawing the captain's attention to the deviations of the aircraft from the standard flight parameters. The captain may have suffered a 'spatial disorientation' to ground warning systems, which could have made him falsely perceive the aircraft was pitching up. He responded by making a nose down input, resulting in the aircraft starting to descend, when aircraft warning systems were saying he should increase altitude.","Diverted from Madang to Bagasin, overran the runway and crashed.","Crashed into a radio antenna tower and tore off a wing  in dense  fog.","Crashed on final approach after encountering windshear. Pilot not briefed on possibility of turbulence and windshear by FSS personnel.","Crashed during takeoff.","Crashed into a  mountain while en route..","Shot down by British aircraft.","Crashed onto a mountainside at 9,000 ft. The pilot misjudged the weather conditions and continued to fly into deteriorating weather conditions while trying to maintain VFR. The pilot attempted to climb at a speed below the minimum safe climbing speed of the aircraft.","The aircraft crashed while attempting to land at Kirkland AFB in gusty winds. The pilot appeared to have difficulty keeping the wings level. The plane crossed the threshold left of the center line. When the pilot corrected, the right starboard jet pod and No. 6 propeller struck the runway. The pilot attempted a go-around with the No.6 engine afire but crashed to earth bursting into flames.","Crashed shortly after takeoff in a snow storm. Failure of the pilot to recognize his proximity to the ground due to heavy snow which entirely covered the terrain.","Crashed short of the runway in dense fog while attempting to land a second time at Lucapa.","The tour helicopter crashed near Banning House lodge in rain and poor visibility. The engine appears to have failed.","Engine failure on takeoff. Pilot failed to follow proper emergency procedures. Cause of engine failure unknown.","Failed to gain altitude after a missed approach and crashed. The malfunctioning of the automatipitch coarsening unit of the starboard propeller. This deprived the captain of the necessary degree of control of the aircraft at a critical stage of the flight.","Crashed into Mar Chiquita Lagoon during a heavy rainstorm.","The aircraft crashed into a mountain shortly after taking off from Florianpolis.","Missing on a flight from France to Spain. Shot down by an American Bristol Beau allied night fighter.","The aircraft struck a mountain at 6,200 feet after taking off in mist and haze. The crew took off under VFR conditions during adverse weather conditions. Inappropriate presence of a non-crew pilot in the cockpit and his great talkativeness distracting part of the crew which resulted in carelessness and disorientation in monitoring the heading and altitude necessary to maneuver the plane. VFR flight into IFR conditions.","Crashed into Blewett Falls Lake.","Experienced engine loss on final approach. Wrong engine feathered. Cashed. Inadequate maintenance and inspection. Engine fuel manifold valve defective.","Cleared for an approach to Oakland Municipal Airport, the aircraft crashed 15 miles SSE of the airport into Tolman peak. Patches of fog obscured the terrain. The aircraft struck a hill at an elevation of about 1,000 ft. at 225 to 240 mph. Neither of its 2 low-frequency receivers were tuned to the Oakland station and the captain may have attempted to fly by visual reference using the ADF.  As a result, the flight was 3 miles off course and well below the minimum prescribed altitude. The failure of the captain to adhere to instrument procedures in the Newark area during an approach to the Oakland Municipal  Airport.","One Swedish passenger was killed when the plane was attacked by German fighters. The plane was able to land safely in Amsterdam.","The cargo plane was on final approach when it hit a antenna tower and crashed. Fog and poor visibility prevailed at the time of the accident.","Crashed into trees while attempting a go-around in poor weather. Improper minimum descent atlitiude and missed approach. Improper inflight decision. Improper missed approach. Minimum descent altitude not maintained.","While attemping to take off from Luxor, the cargo plane slammed to the ground, slid down the runway and caught fire.","The aircraft suffered an aft pressure bulkhead failure at 23,900 ft. The aircraft had severe control difficulties with loss of all controls and eventually after 40 minutes, collided with a mountain. Improper repair of the  bulkhead while being supervised by Boeing engineers after a tail strike in 1978. Worst single plane disaster in aviation history. Kyu Sakamoto, 43, famous for his Japanese song 'Sukiyaki' was killed in the accident.","Lost an engine on approach and was unable to maintain altitude. Aircraft overloaded by 827 lbs.","Airframe failure after flying into adverse weather. VFR flight into adverse weather conditions.","Shot down by enemy fire.","The sightseeing helicopter, headed for the bottom of the Grand Canyon, hit the face of a cliff and crashed in rugged terrain killing all aboard. Tourists were supposed to board a pontoon boat when the accident occurred 2/3 of the way down the canyon. The pilot's disregard of safe flying procedures and misjudgment of the helicopter's proximity to terrain. Contributing to the accident was the failure of Sundance Helicopters and the FAA to provide adequate surveillance of Sundance's air tour operations in Descent Canyon.","Crashed into the ocean.","Crashed 7 kms from the runway in a sandstorm. Decision to land while visibility was below company minimums for that particular airport at night.","The plane rolled to the left and crashed into houses after takeoff. Failure of both artificial horizons.","During the takeoff cargo shifted in the plane causing the aircraft to lose altitude and crash into power lines.","The aircraft flew into the side of a cliff during it's inaugural flight from Gohu Airstrip in the Finisterre Mountains.","Exlpoded and caught fire near Bahia de Kino.  A bomb is believed to have exploded onboard.","While on a positioning flight the plane entered an uncontrolled descent and crashed. The pilot's inadvertent flight into mountain wave weather conditions while IMC, resulting in a loss of aircraft control.","While on a training mission the aircraft crashed into a mountainous area and was destroyed.","The pilot issued a distress signal within five minutes of taking off from General Mitchell International Airport in Milwaukee, requesting an emergency return to the airport. The plane then crashed into Lake Michigan two miles off shore. The plane was carrying an organ transplant team. The pilot reported to ATC he had runaway trim prior to the accident.","Crashed into Mt. Nova. Deviated off course for unknown reasons.","Crashed in icing conditions at a low altitude. Evasive maneuver to avoid trees. Pilot not instrument rated. Continued VFR flight into adverse weather conditions.","The aircraft crashed shortly after taking off. Engine failure due to fuel contamination.","The aircraft crashed near a pond 25 km from Cubuk, 20 minutes after taking off. Failure to use de-icing and poor weather was to blame."],"top_values":[],"top_words":[["the",14455],["a",4621],["and",4405],["to",4347],["of",4337],["in",2951],["crashed",2925],["into",2300],["aircraft",2031],["was",2002],["plane",1685],["after",1658],["while",1548],["on",1474],["an",1320],["at",1160],["from",1128],["pilot",891],["by",829],["flight",793],["with",754],["engine",749],["during",746],["off",701],["failure",674]],"vocab_skipped":null,"word_histogram":{"counts":[1150,1277,789,469,314,262,167,148,83,54,39,35,20,18,15,8,4,7,2,3,1,1,2,5,0,3,1,0,0,1],"edges":[1.0,11.8,22.6,33.400000000000006,44.2,55.0,65.80000000000001,76.60000000000001,87.4,98.2,109.0,119.80000000000001,130.60000000000002,141.4,152.20000000000002,163.0,173.8,184.60000000000002,195.4,206.20000000000002,217.0,227.8,238.60000000000002,249.4,260.20000000000005,271.0,281.8,292.6,303.40000000000003,314.20000000000005,325.0]}},"kind":"text","n":5268,"n_null":390,"n_unique":4673,"null_rate":0.07403189066059225,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.04202542025420254,"emoji_rate":0.0,"len_max":1954,"len_mean":200.73575235752358,"len_median":136.0,"len_min":6,"len_p95":584.0,"n_duplicates":205,"n_empty":0,"one_word_rate":0.0004100041000410004,"readability_flesch_mean":61.67790515313969,"url_rate":0.0,"vocab_size":12513,"word_mean":33.23964739647396,"word_median":23.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["Fatalities.stats.mean","Fatalities.stats.max","Fatalities.stats.skew","Aboard.stats.mean","Aboard.stats.max","Aboard.stats.skew","Ground.stats.median","Ground.stats.max","Ground.stats.zero_rate","Operator.top_values","Type.top_values","row_count"],"featured_charts":[{"caption":"Expect a sharp right skew \u2014 the vast majority of crashes kill fewer than 25 people, but rare outliers reach 583; look for the long tail.","column":"Fatalities","kind":"histogram"},{"caption":"Distribution of passengers aboard mirrors fatalities' skew, with most flights carrying under 30 people and a handful of jumbo-jet disasters pushing past 600.","column":"Aboard","kind":"histogram"},{"caption":"Aeroflot and U.S. military operators lead crash counts by a wide margin \u2014 check whether this reflects era bias or operational volume.","column":"Operator","kind":"bar"},{"caption":"Douglas DC-3 appears 334 times, far ahead of any other aircraft type, reflecting its dominance in mid-20th-century aviation.","column":"Type","kind":"bar"},{"caption":"Ground casualties are zero in over 95% of incidents, but extreme outliers exist \u2014 spot the rare catastrophic urban crash events.","column":"Ground","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset catalogues 5,268 aviation accidents spanning roughly a century, recording details such as date, operator, aircraft type, location, passengers aboard, fatalities, and ground casualties. Two numeric columns stand out immediately: Fatalities (mean 20, max 583) and Aboard (mean 28, max 644) are both highly right-skewed with significant outliers, suggesting a small number of catastrophic mass-casualty events dominate the tail. The Operator column reveals that Aeroflot (179 incidents) and U.S. military branches collectively account for a large share of recorded crashes, worth examining for era-specific clustering. Ground fatalities are near-zero in 95% of cases but spike dramatically in rare events (max 2,750), likely reflecting high-profile urban crashes.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["top_values","null_rate","duplicate_rate","n_unique","len_min","len_max","allcaps_rate"],"model":"anthropic:default","narrative":"This column contains clock times in HH:MM format (lengths 4\u20137 characters), almost certainly representing scheduled or recorded event times. Two signals warrant attention: the null rate is high at 42.12%, meaning nearly half of all 5,268 rows carry no time value, and the duplicate rate is 67.04% \u2014 expected for a time-of-day field with only 1,005 distinct values across non-null rows. The 'allcaps' alert is a false positive from saturn misclassifying colon-separated digit strings.","role":"feature","scope":"column","target":"Time","treatment":"Parse to datetime.time or extract hour/minute as numeric features; investigate the 42.12% null rate before deciding on imputation or exclusion strategy."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","one_word_rate","allcaps_rate","len_median","len_max","n_unique","n_duplicates","duplicate_rate","top_values","top_words"],"model":"anthropic:default","narrative":"This column ('cn/In') appears to be a short coded identifier or reference field \u2014 likely a chemical notation, index number, or abbreviated category code \u2014 given its near-universal single-word (98.4%), all-caps (96.6%) character and very short values (median length 5, max 20). The top word '/' appearing 49 times suggests some values are compound codes using slash-delimited notation (e.g., 'CN/IN' style references), while most top values are pure numeric strings ('178', '19', '229', etc.). Two signals warrant attention: the null rate is high at 23.3%, and despite 3,707 unique values across 5,268 rows, there are 333 duplicates, indicating this is not a strict unique identifier.","role":"label","scope":"column","target":"cn/In","treatment":"Investigate nulls (23.3% missing) before use; treat as categorical label or join key after resolving slash-delimited compound values."},{"confidence":"high","critiques":[],"evidence_keys":["allcaps_rate","one_word_rate","len_median","len_max","n_unique","n","duplicate_rate","n_duplicates","null_rate","top_words"],"model":"anthropic:default","narrative":"This column contains vehicle or aircraft registration codes \u2014 short, almost entirely uppercase alphanumeric identifiers (allcaps_rate 99.2%, median length 6 characters) consistent with licence plates or tail numbers. With 4905 unique values out of 5268 rows and only 28 duplicates, it behaves as a near-unique identifier, though the 6.36% null rate and occasional slash-containing entries (top word '/' appears 36 times) suggest some composite or malformed registrations worth inspecting. The presence of tokens like 'HK-' (a Colombian aviation prefix) and 'NC10809' hints at international aircraft tail numbers rather than road vehicle plates.","role":"identifier","scope":"column","target":"Registration","treatment":"Use as a near-unique entity key; cleanse slash-delimited entries and nulls before joining or deduplicating on this field."},{"confidence":"high","critiques":[],"evidence_keys":["len_min","len_max","len_mean","null_rate","duplicate_rate","n_duplicates","n_unique","n","top_values","allcaps_rate"],"model":"anthropic:default","narrative":"This column contains dates stored as text strings in MM/DD/YYYY format, with every value exactly 10 characters long and zero nulls across 5,268 rows. The duplicate rate of ~9.8% (515 duplicates across only 4,753 unique values) is notable \u2014 multiple records share the same date, with the most frequent dates appearing up to 4 times, including historically significant dates like 09/11/2001 and 06/06/1944, suggesting the dataset may track events tied to recurring or landmark dates. The 'allcaps' alert is a false positive from the date format containing no letters.","role":"timestamp","scope":"column","target":"Date","treatment":"Parse to datetime dtype (strptime MM/DD/YYYY) before any time-based analysis or feature engineering."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","n_unique","stats.duplicate_rate","stats.n_duplicates","language_counts","alerts","n","null_rate"],"model":"anthropic:default","narrative":"This column contains the name of the airline or military branch operating an aircraft involved in an incident, making it a categorical label field. With 2,476 unique values across 5,268 rows, the duplicate rate of 52.8% is expected for a label of this type \u2014 operators recur across multiple incidents. The multilingual alert is a natural artifact of international airline names (German, French, Italian, Spanish, Russian operators all present), not a data quality issue per se, though analysts should be aware that variant spellings of the same operator may inflate cardinality. Top values (Aeroflot at 179, U.S. Air Force at 176) reveal a mix of commercial and military operators.","role":"label","scope":"column","target":"Operator","treatment":"Normalize operator name variants, then encode as categorical (target-encode or embed) for modelling; consider grouping military sub-branches under a single 'Military' category."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","n","stats.duplicate_rate","stats.n_duplicates","top_values","top_words","language_counts","alerts"],"model":"anthropic:default","narrative":"This column represents aviation route descriptions, capturing both origin-destination pairs (e.g., 'Saigon - Paris', 'Bogota - Barranquilla') and flight purpose labels (e.g., 'Training', 'Sightseeing', 'Test flight'). The null rate of 32.38% is a significant concern, meaning roughly one-third of records lack route information. The multilingual alert is expected given the international nature of routes \u2014 English dominates at 2,567 detections but Spanish (237), Portuguese (100), German (88), and French (64) are well-represented, reflecting global aviation data. The high n_unique count (3,244 of 5,268 non-null values) with a duplicate rate of 8.93% (318 duplicates) confirms this is a descriptive label field with many distinct routes but some recurring purpose/training entries.","role":"label","scope":"column","target":"Route","treatment":"Impute or flag nulls (32.38% missing); split into 'purpose' vs 'OD-pair' subtypes using presence of '-' delimiter before encoding or embedding."},{"confidence":"high","critiques":[],"evidence_keys":["stats.skew","stats.kurtosis","stats.median","stats.mean","stats.max","stats.iqr","stats.std","n_outliers","outlier_rate"],"model":"anthropic:default","narrative":"This column records the number of fatalities per incident (likely aviation accidents, conflicts, or similar events). The distribution is extremely right-skewed (skew = 4.95, kurtosis = 42.79): the median is only 9 fatalities while the mean is 20.07 and the maximum reaches 583, indicating a long tail of mass-casualty events. 444 rows (8.4%) are flagged as outliers, and the IQR of 20 against a std of 33.2 confirms that most incidents are low-fatality but a meaningful minority are catastrophic.","role":"numeric_target","scope":"column","target":"Fatalities","treatment":"Log-transform (log1p) before regression or modelling to reduce skew; retain outliers as they represent real high-severity events."},{"confidence":"high","critiques":[],"evidence_keys":["stats.skew","stats.kurtosis","stats.median","stats.mean","stats.max","stats.q1","stats.q3","stats.iqr","stats.n_outliers","stats.outlier_rate","alerts"],"model":"anthropic:default","narrative":"This column records the number of people aboard a vehicle (likely an aircraft or ship) at the time of an incident. The distribution is severely right-skewed (skew=4.25, kurtosis=28.41): the median is only 13 passengers while the mean is 27.6, and the max reaches 644 \u2014 consistent with a few large commercial aircraft disasters pulling the tail far right. Roughly 10% of rows (529) are flagged as outliers, and the IQR spans just 5\u201330, meaning the vast majority of incidents involve small craft.","role":"feature","scope":"column","target":"Aboard","treatment":"Log-transform (log1p) before regression or modelling to reduce skew; retain outliers as they represent real large-scale events."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","n","entropy_ratio","alerts"],"model":"anthropic:default","narrative":"This column represents a flight number identifier, likely recording the flight designation for each row in the dataset. Two major issues stand out: 79.71% of values are null, making the column largely unpopulated, and the most frequent non-null value is a placeholder dash ('-') appearing 67 times, suggesting systematic missing-data encoding. With 724 unique values across only 1,073 non-null rows and an entropy ratio of 0.953, the distribution is near-uniform with a pronounced long tail \u2014 no single flight number dominates meaningfully beyond the placeholder.","role":"label","scope":"column","target":"Flight #","treatment":"Treat '-' as null, impute or drop rows depending on whether flight number is required; with 79.71% nulls, consider dropping this column unless the analysis specifically targets flight-level granularity."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","n_unique","duplicate_rate","n_duplicates","n","null_rate","vocab_size"],"model":"anthropic:default","narrative":"This column captures aircraft model designations (e.g., 'Douglas DC-3', 'de Havilland Canada DHC-6 Twin Otter 300'), making it an aircraft type label in what appears to be an aviation incident or accident dataset. The duplicate rate of 53.3% (2,795 of 5,268 rows) is expected for a categorical-like field where many incidents share the same aircraft type, with 'Douglas DC-3' alone appearing 334 times. There are 2,446 unique values against a vocabulary of 2,534 words, indicating many near-unique variant spellings or sub-model suffixes (e.g., 'Douglas C-47', 'Douglas C-47A', 'Douglas C-47B' are counted separately), which is the key analyst surprise. Null rate is negligible at 0.51%.","role":"label","scope":"column","target":"Type","treatment":"Normalize variant spellings and sub-model suffixes into canonical families before grouping or encoding; consider a manufacturer + model hierarchy for feature engineering."},{"confidence":"high","critiques":[],"evidence_keys":["top_words","n","n_unique","stats.duplicate_rate","stats.n_duplicates","stats.len_min","stats.len_median","stats.len_max","stats.readability_flesch_mean","null_rate","alerts"],"model":"anthropic:default","narrative":"This column contains free-text narrative summaries of aviation incidents or accidents, as evidenced by dominant domain terms 'crashed', 'into', and 'aircraft' appearing thousands of times across 5,268 records. Text length varies widely (min 6, median 136, max 1,954 characters), suggesting entries range from brief one-liners to detailed multi-sentence accounts. A duplicate rate of 4.2% (205 duplicates) is mildly surprising for free-text summaries and may indicate repeated incident templates or copy-paste entries. Flesch readability of 61.7 indicates moderate accessibility, consistent with factual incident reporting prose.","role":"free_text","scope":"column","target":"Summary","treatment":"Tokenize and embed (e.g., TF-IDF or sentence transformer) for modelling; deduplicate 205 exact-match rows before training."},{"confidence":"medium","critiques":[],"evidence_keys":["zero_rate","max","skew","kurtosis","n_unique","iqr","q1","q3","n_outliers","outlier_rate","median","mean","std"],"model":"anthropic:default","narrative":"This column likely represents a ground elevation, ground clearance, or grounding-related measurement (possibly in feet or meters) associated with physical infrastructure or flight/equipment records. The distribution is extreme: 95.8% of values are exactly zero, yet the maximum reaches 2750.0 with a skew of 50.34 and kurtosis of 2558.60, indicating a tiny fraction of records carry very large non-zero values. Only 50 unique values exist across 5,268 rows, and 219 observations (4.17%) are flagged as outliers \u2014 the near-zero IQR (Q1=Q3=0) confirms the overwhelming concentration at zero.","role":"feature","scope":"column","target":"Ground","treatment":"Treat as sparse indicator/feature; consider binarizing (zero vs. non-zero) or log1p-transforming the non-zero subset, and investigate whether the 2750.0 outliers are valid or data-entry errors."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","duplicate_rate","n_duplicates","word_mean","word_median","len_mean","len_median","top_values","top_words","vocab_size"],"model":"anthropic:default","narrative":"This column contains free-text geographic location descriptions, most commonly in 'City, Country/State' format (mean ~2.9 words, median length 19 characters), representing where individual events occurred. The high frequency of the word 'near' (1,272 occurrences out of 5,268 rows) indicates a substantial proportion of entries are approximate locations rather than precise place names, which could complicate geocoding. The duplicate rate of 18% (945 duplicates across 4,303 unique values) is expected for a location field but the long tail of near-unique entries (vocab size 4,541) suggests significant free-text variation in how locations are recorded.","role":"feature","scope":"column","target":"Location","treatment":"Normalize 'near X' / 'off X' patterns, then geocode or extract country/region via NLP before modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4421,"prompt_tokens":18907,"total_tokens":23328}},"language_counts":{"bs":3,"ca":39,"ceb":14,"cs":11,"da":5,"de":290,"en":5907,"eo":7,"es":461,"et":3,"eu":1,"fi":6,"fr":247,"gd":1,"gl":2,"hr":14,"hu":6,"id":93,"it":366,"ja":1,"ku":1,"la":2,"lt":2,"ms":6,"nl":73,"no":22,"pl":31,"pt":155,"ro":6,"ru":27,"sh":3,"sl":20,"sv":51,"te":1,"tr":18,"uk":6,"vi":3},"meta":{"generated_at":"2026-06-21T23:24:17+00:00","mode":"full","row_count":5268,"sampled_rows":5268,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/wild/disasters/airplane_crashes.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"Aboard":"numeric","Date":"text","Fatalities":"numeric","Flight #":"categorical","Ground":"numeric","Location":"text","Operator":"text","Registration":"text","Route":"text","Summary":"text","Time":"text","Type":"text","cn/In":"text"}}
