{"columns":[{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5268,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[9.5,9.525,9.55,9.575,9.6,9.625,9.65,9.675,9.7,9.725,9.75,9.775,9.8,9.825,9.85,9.875,9.9,9.925,9.95,9.975,10.0,10.025,10.05,10.075,10.1,10.125,10.15,10.175,10.2,10.225,10.25,10.275,10.3,10.325,10.35,10.375,10.4,10.425,10.45,10.475,10.5]},"near_unique":false,"sample":["09/03/1915","07/02/1990","12/05/1997","01/14/1995","03/09/1968","03/07/2006","12/31/1968","03/17/2000","04/23/1995","06/29/1929","09/14/1979","10/22/1974","10/28/1956","04/07/1918","01/16/1958","07/01/1948","03/08/1938","02/11/1996","05/02/2008","12/18/1966","02/03/1959","09/17/1929","12/10/1946","10/10/1938","12/25/1986","06/01/1991","04/05/1976","11/04/1948","07/20/1935","06/02/2006","08/17/1983","02/20/2009","02/24/1984","02/04/1966","10/18/1963","10/25/1968","06/04/2003","05/19/1973","07/13/1969","09/15/1974","10/12/1994","07/04/2002","07/13/1956","06/04/2002","01/13/2005","03/17/2007","06/30/1962","07/11/1966","09/19/1946","08/07/1999"],"top_values":[["06/18/1972",4],["02/28/1973",4],["08/28/1976",4],["08/31/1988",4],["08/27/1992",4],["09/11/2001",4],["09/17/1929",3],["11/15/1934",3],["06/06/1944",3],["11/29/1944",3],["11/11/1945",3],["12/28/1946",3],["01/25/1947",3],["05/29/1947",3],["11/27/1947",3],["07/12/1951",3],["05/13/1957",3],["09/02/1958",3],["02/26/1960",3],["03/08/1962",3]],"top_words":[["06/18/1972",4],["02/28/1973",4],["08/28/1976",4],["08/31/1988",4],["08/27/1992",4],["09/11/2001",4],["09/17/1929",3],["11/15/1934",3],["06/06/1944",3],["11/29/1944",3],["11/11/1945",3],["12/28/1946",3],["01/25/1947",3],["05/29/1947",3],["11/27/1947",3],["07/12/1951",3],["05/13/1957",3],["09/02/1958",3],["02/26/1960",3],["03/08/1962",3],["09/10/1962",3],["11/23/1962",3],["01/08/1968",3],["04/02/1969",3],["07/30/1971",3]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5268,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":5268,"n_null":0,"n_unique":4753,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.09776006074411542,"emoji_rate":0.0,"len_max":10,"len_mean":10.0,"len_median":10.0,"len_min":10,"len_p95":10.0,"n_duplicates":515,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4753,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.7% rows are all-caps"},{"code":"null_rate","level":"warn","message":"42.1% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"67.0% duplicate strings"}],"column":"Time","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[7,0,0,0,0,0,0,0,0,0,0,0,0,3033,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,6],"edges":[4.0,4.075,4.15,4.225,4.3,4.375,4.45,4.525,4.6,4.675,4.75,4.825,4.9,4.975,5.05,5.125,5.2,5.275,5.35,5.425,5.5,5.575,5.65,5.725,5.8,5.875,5.95,6.025,6.1,6.175,6.25,6.324999999999999,6.4,6.475,6.55,6.625,6.699999999999999,6.775,6.85,6.925,7.0]},"near_unique":false,"sample":["10:30","22:00","18:21","09:40","12:45","05:08","07:34","09:32","17:54","16:35","06:50","114:20","09:22","09:18","17:22","03:36","00:20","08:00","15:06","23:17","12:20","23:00","22:23","07:40","15:40","17:01","23:17","23:19","07:42","17:18","09:29","00:34","c: 2:00","22:00","16:00","09:40","15:45","09:32","13:42","18:40","12:00","22:18","05:29","10:45","07:30","11:00","22:02","20:48","02:00","11:35"],"top_values":[["15:00",32],["12:00",31],["11:00",29],["16:00",26],["19:30",26],["14:00",25],["19:00",24],["10:30",22],["17:00",22],["09:30",22],["13:00",20],["08:30",19],["17:30",19],["14:30",19],["20:30",19],["08:00",18],["09:00",17],["20:00",17],["12:30",17],["10:15",17]],"top_words":[["15:00",32],["12:00",31],["11:00",29],["16:00",26],["19:30",26],["14:00",25],["19:00",24],["10:30",22],["17:00",22],["09:30",22],["13:00",20],["08:30",19],["17:30",19],["14:30",19],["20:30",19],["08:00",18],["09:00",17],["20:00",17],["12:30",17],["10:15",17],["18:00",17],["16:30",17],["23:00",16],["07:00",15],["11:30",15]],"vocab_skipped":null,"word_histogram":{"counts":[3046,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":5268,"n_null":2219,"n_unique":1005,"null_rate":0.4212224753227031,"stats":{"allcaps_rate":0.9973761889143982,"boilerplate_rate":0.0,"duplicate_rate":0.6703837323712692,"emoji_rate":0.0,"len_max":7,"len_mean":5.002623811085602,"len_median":5.0,"len_min":4,"len_p95":5.0,"n_duplicates":2044,"n_empty":0,"one_word_rate":0.9990160708428993,"readability_flesch_mean":121.21492500000004,"url_rate":0.0,"vocab_size":1004,"word_mean":1.0009839291571008,"word_median":1.0}},{"alerts":[],"column":"Location","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[21,8,22,16,61,326,280,289,753,413,828,383,313,479,189,170,244,85,114,36,28,52,25,23,26,11,18,3,13,3,1,5,2,1,3,1,0,0,1,2],"edges":[5.0,6.375,7.75,9.125,10.5,11.875,13.25,14.625,16.0,17.375,18.75,20.125,21.5,22.875,24.25,25.625,27.0,28.375,29.75,31.125,32.5,33.875,35.25,36.625,38.0,39.375,40.75,42.125,43.5,44.875,46.25,47.625,49.0,50.375,51.75,53.125,54.5,55.875,57.25,58.625,60.0]},"near_unique":false,"sample":["Off Cuxhaven, Germany","Near Port Morseby, New Guinea","Little Grand Rapids, Canada","Kathmandu, Nepal","St. Louis, Missouri","Labiano, Spain","Near Bradford, Pennsylvania","Ennadai Lake, Canada","Near Palaly AFB, Sri Lanka","Lake Constance, Switzerland","Blink Horn Point, Canada","Blair, Oklahoma","Hommelfjell, Norway","Over the Mediterranean","Norwalk, California","PacifiOcean between Hong Kong and Macao","Near Formia, Italy","Port-au-Prince, Hati","Near Rumbek, Sudan","Hilo, Hawaii","Beirut, Lebanon","Jacumba, California","Near Rio de Janeiro, Brazil","Soest, Germany","Ay, Saudi Arabia","Off Matthewtown, Great Inagua","Friday Harbor, Washington","Kiangwan, China","Off Chandeleur Island, Mississippi","Yaocun, China","Near Dundo, Angola","Luxor, Egypt","Colombia","Near Tokyo, Japan","Near Tisbury, England","Savannakhet, Laos","Milan, Italy","Near New Delhi, India","Kekaha, Hawaii","Fairbanks, Alaska","Near Formoso do Aragala, Brazil","San Dimas, California","Cold Bay, Alaska","Oiapoque, Amapa, Brazil","Manguipayan, Colombia","Samara, Russia","Near Junnar, Maharashtra, India","Athens, Greece","Stephenville, Newfoundland","Sept-\u00celes, Canada"],"top_values":[["Sao Paulo, Brazil",15],["Moscow, Russia",15],["Rio de Janeiro, Brazil",14],["Bogota, Colombia",13],["Manila, Philippines",13],["Anchorage, Alaska",13],["New York, New York",12],["Cairo, Egypt",12],["Chicago, Illinois",11],["Near Moscow, Russia",9],["AtlantiOcean",9],["Tehran, Iran",9],["Paris, France",8],["Amsterdam, Netherlands",8],["Denver, Colorado",8],["Ankara, Turkey",8],["Rome, Italy",8],["Cleveland, Ohio",7],["Bucharest, Romania",7],["Burbank, California",7]],"top_words":[["near",1272],["off",355],["new",257],["brazil",190],["alaska",178],["russia",174],["colombia",150],["canada",147],["california",144],["france",133],["mexico",122],["england",105],["india",100],["south",94],["germany",92],["island,",89],["china",89],["indonesia",87],["city,",85],["san",84],["australia",81],["italy",79],["vietnam",74],["york",68],["de",67]],"vocab_skipped":null,"word_histogram":{"counts":[59,0,0,2136,0,0,0,2001,0,0,0,711,0,0,0,239,0,0,67,0,0,0,22,0,0,0,7,0,0,6],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":5268,"n_null":20,"n_unique":4303,"null_rate":0.0037965072133637054,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.1800685975609756,"emoji_rate":0.0,"len_max":60,"len_mean":20.379954268292682,"len_median":19.0,"len_min":5,"len_p95":31.0,"n_duplicates":945,"n_empty":0,"one_word_rate":0.011242378048780487,"readability_flesch_mean":24.03101428571431,"url_rate":0.0,"vocab_size":4541,"word_mean":2.8656631097560976,"word_median":3.0}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"duplicates","level":"warn","message":"52.8% duplicate strings"}],"column":"Operator","extras":{"language_counts":{"__engine":"fasttext:4,537","bs":3,"ca":22,"ceb":6,"cs":4,"da":2,"de":202,"en":3340,"es":224,"eu":1,"fi":3,"fr":183,"gl":2,"hr":6,"hu":5,"id":40,"it":278,"ku":1,"lt":2,"ms":3,"nl":25,"no":11,"pl":20,"pt":55,"ro":2,"ru":27,"sl":17,"sv":34,"tr":9,"uk":6,"vi":2},"language_sample_size":5000,"length_histogram":{"counts":[96,233,140,462,169,184,128,395,270,447,407,143,340,205,542,166,229,102,194,54,127,62,35,30,13,25,11,5,7,7,7,0,9,1,3,0,1,0,0,1],"edges":[3.0,4.55,6.1,7.65,9.2,10.75,12.3,13.85,15.4,16.950000000000003,18.5,20.05,21.6,23.150000000000002,24.7,26.25,27.8,29.35,30.900000000000002,32.45,34.0,35.550000000000004,37.1,38.65,40.2,41.75,43.300000000000004,44.85,46.4,47.95,49.5,51.050000000000004,52.6,54.15,55.7,57.25,58.800000000000004,60.35,61.9,63.45,65.0]},"near_unique":false,"sample":["Military - German Navy","Eagle Air","Military - Russian Air Force","Air Taxi - Wolfe Air Aviation Ltd.","Military - Russian Air Force","TriCoastal Air","China Air Lines","Aeroperlas","Bristow Helicopters","Deutsche Lufthansa","Military - U.S. Air Force","Willow Air Service - Air Taxi","Linee Aeree Italiane","Military - German Navy","Military - U.S. Navy","Civil Air Transport","Cruzeiro do Sul","Military - Taliban Militia","Southern Sudan Air Connection","Air Taxi","Aviaco","Bremerton-Seattle Air Taxi / Gorst Air Lines","Kansas City Southern Skyways","Trans Canada Air Lines","Military - Spanish Air Force.","Carib Air Transport","Chartair - Air Taxi","China National Aviation Corporation","Private","International Jet Charter","Korean Airlines","Aerolift","Military - U.S. Air Force","Canadian PacifiAir Lines","Pan American World Airways","Stikine Air Service - Air Taxi","Air Taxi - Marco Zero Air Taxi","Aeroflot","Air Algerie","North Cay Airways","Iran Asseman Airlines","Prestige Airlines (Cargo) Charter","Pan American World Airways","Keystone Air Services Ltd. - Air Taxi","Airline Transport","UTAir Airlines","United Arab Airlines","Servicios Americanos","Air Ocean","Regionnair"],"top_values":[["Aeroflot",179],["Military - U.S. Air Force",176],["Air France",70],["Deutsche Lufthansa",65],["China National Aviation Corporation",44],["United Air Lines",44],["Air Taxi",44],["Military - U.S. Army Air Forces",43],["Pan American World Airways",41],["Military - U.S. Navy",36],["US Aerial Mail Service",36],["American Airlines",36],["Military - Royal Air Force",36],["Indian Airlines",34],["KLM Royal Dutch Airlines",33],["Philippine Air Lines",33],["Private",31],["Aeropostale",26],["Northwest Orient Airlines",25],["Eastern Air Lines",25]],"top_words":[["air",1911],["-",1172],["airlines",815],["military",798],["force",577],["airways",447],["taxi",425],["u.s.",345],["aviation",190],["aeroflot",190],["lines",182],["service",165],["royal",129],["american",122],["army",102],["transport",100],["national",91],["/",90],["de",86],["inc.",85],["services",82],["private",80],["navy",79],["china",79],["british",77]],"vocab_skipped":null,"word_histogram":{"counts":[867,0,1450,0,0,1244,0,584,0,0,692,0,297,0,0,67,0,17,0,0,16,0,6,0,0,8,0,1,0,1],"edges":[1.0,1.4,1.8,2.2,2.6,3.0,3.4000000000000004,3.8000000000000003,4.2,4.6,5.0,5.4,5.800000000000001,6.2,6.6000000000000005,7.0,7.4,7.800000000000001,8.2,8.600000000000001,9.0,9.4,9.8,10.200000000000001,10.600000000000001,11.0,11.4,11.8,12.200000000000001,12.600000000000001,13.0]}},"kind":"text","n":5268,"n_null":18,"n_unique":2476,"null_rate":0.003416856492027335,"stats":{"allcaps_rate":0.037333333333333336,"boilerplate_rate":0.0,"duplicate_rate":0.5283809523809524,"emoji_rate":0.0,"len_max":65,"len_mean":19.493904761904762,"len_median":19.0,"len_min":3,"len_p95":35.0,"n_duplicates":2774,"n_empty":0,"one_word_rate":0.16514285714285715,"readability_flesch_mean":19.610950000000024,"url_rate":0.0,"vocab_size":2370,"word_mean":3.0474285714285716,"word_median":3.0}},{"alerts":[{"code":"long_tail","level":"info","message":"543 singleton categories"},{"code":"null_rate","level":"warn","message":"79.7% null"}],"column":"Flight #","extras":{"singletons":543,"top_values":[["-",67],["1",10],["4",7],["6",6],["21",6],["101",6],["901",6],["7",5],["201",5],["701",5],["706",5],["703",5],["2",4],["203",4],["304",4],["601",4],["514",4],["11",4],["217",4],["114",4]]},"kind":"categorical","n":5268,"n_null":4199,"n_unique":724,"null_rate":0.79707668944571,"stats":{"cardinality":724,"entropy":9.05754931252951,"entropy_ratio":0.9534417105486859,"top_rate":0.0626753975678204,"top_value":"-"}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"null_rate","level":"warn","message":"32.4% null"}],"column":"Route","extras":{"language_counts":{"__engine":"fasttext:3,375","ca":17,"ceb":8,"cs":7,"da":3,"de":88,"en":2567,"eo":7,"es":237,"et":3,"fi":3,"fr":64,"gd":1,"hr":8,"hu":1,"id":53,"it":88,"ja":1,"la":2,"ms":3,"nl":48,"no":11,"pl":11,"pt":100,"ro":4,"sh":3,"sl":3,"sv":17,"te":1,"tr":9,"vi":1},"language_sample_size":5000,"length_histogram":{"counts":[8,4,93,6,5,100,99,155,452,247,443,170,179,286,155,135,245,94,213,71,49,74,39,40,51,20,27,12,10,19,6,4,17,9,6,3,4,8,2,2],"edges":[4.0,5.375,6.75,8.125,9.5,10.875,12.25,13.625,15.0,16.375,17.75,19.125,20.5,21.875,23.25,24.625,26.0,27.375,28.75,30.125,31.5,32.875,34.25,35.625,37.0,38.375,39.75,41.125,42.5,43.875,45.25,46.625,48.0,49.375,50.75,52.125,53.5,54.875,56.25,57.625,59.0]},"near_unique":false,"sample":["Lympne, England - Rotterdam, The Netherlands","Isfahan - Terhan","Mexico City - Reynosa - Matamoros","Anchorage, AK - Hoholitna River, AK","Panchkhal - Tribuvan","Kongolo - Goma","Honolulu - Lihue","Jomsom - Pokhara","Jaffna - Colombo","El Paso, TX - Pueblo, CO","Tallin - L'vov","Greenville, MS - Charlotte, NC","Sydney - Canberra","London, England - India","Chicago - Burbank","Monterrey - Falcon Dam","Gander - Shannon","Kathmandu - New Delhi","Sabine Pass - Oil Platform","Valdez, AK - Local","London - Innsbruck","Santiago, Chili - Cristobal, Panama","Kildala - Vancouver","Akron, OH - Winston/Salem, NC","Kira-Kira - Honiara","Buenos Aires - La Paz - Quito","Dhahran - Karachi","Blackbushe, UK - Beogard, Serbia","Pittsburgh - Morgantown","Sightseeing","Montlu\u00e7on - Paris","Cucuta - Arauca","Aqua Caliente, Mexico - Los Angeles","Leningrad - Erevan","Reynosa - Tampico","Amman, Jordan - Beiruit, Lebanon - Yerevan, Armenia","Bucharest -Lasi","Geneva - Funchal","Thief River, MN - Owatonna, MN","Clarksburg - Washington DC","Denpasar - Taipei","Katmandou - Lukla","Ankara - Adana","Sao Paulo - Tefe - Manaus","Yerevan, Armenia - Sochi, Russia","Cordova - Ancorage","Athens - Nicosia","La Paz - San Borja","Training","Iyachisakus Lake - Mollet Lake"],"top_values":[["Training",81],["Sightseeing",29],["Test flight",17],["Test",6],["Sao Paulo - Rio de Janeiro",5],["Saigon - Paris",4],["Bogota - Barranquilla",4],["Sao Paulo - Porto Alegre",4],["Villavicencio - Mitu",4],["Demonstration",3],["Cleveland - Chicago",3],["Santiago de Cuba - Havana",3],["Buenos Aires - Santiago",3],["Rio de Janeiro - Sao Paulo",3],["Guayaquil - Cuenca",3],["Rome - Cairo",3],["Barranquilla - Bogota",3],["Villavicencio - Bogota",3],["Burbank - Oakland",3],["Rome - Athens",3]],"top_words":[["-",3658],["ak",158],["city",154],["new",140],["san",133],["york",96],["training",86],["de",78],["ca",74],["paris",69],["chicago",61],["la",61],["los",60],["london",57],["afb",56],["angeles",51],["rio",51],["rome",48],["bogota",48],["lake",46],["ny",44],["st.",43],["miami",41],["tx",41],["moscow",41]],"vocab_skipped":null,"word_histogram":{"counts":[146,0,49,0,0,1506,0,0,576,0,736,0,0,318,0,0,113,0,0,67,0,24,0,0,18,0,0,4,0,5],"edges":[1.0,1.3666666666666667,1.7333333333333334,2.0999999999999996,2.466666666666667,2.833333333333333,3.1999999999999997,3.5666666666666664,3.933333333333333,4.3,4.666666666666666,5.033333333333333,5.3999999999999995,5.766666666666667,6.133333333333333,6.5,6.866666666666666,7.2333333333333325,7.6,7.966666666666666,8.333333333333332,8.7,9.066666666666666,9.433333333333334,9.799999999999999,10.166666666666666,10.533333333333333,10.899999999999999,11.266666666666666,11.633333333333333,12.0]}},"kind":"text","n":5268,"n_null":1706,"n_unique":3244,"null_rate":0.32384206529992404,"stats":{"allcaps_rate":0.00028074115665356543,"boilerplate_rate":0.0,"duplicate_rate":0.0892756878158338,"emoji_rate":0.0,"len_max":59,"len_mean":22.088152723189218,"len_median":20.0,"len_min":4,"len_p95":37.0,"n_duplicates":318,"n_empty":0,"one_word_rate":0.04098820887142055,"readability_flesch_mean":27.15470892857145,"url_rate":0.0,"vocab_size":3647,"word_mean":4.065412689500281,"word_median":4.0}},{"alerts":[{"code":"duplicates","level":"warn","message":"53.3% duplicate strings"}],"column":"Type","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[6,5,6,19,32,57,178,255,685,0,522,331,441,369,208,158,154,166,154,0,109,120,158,188,174,107,73,85,39,0,66,58,55,43,16,25,16,9,21,133],"edges":[4.0,4.9,5.8,6.7,7.6,8.5,9.4,10.3,11.2,12.1,13.0,13.9,14.8,15.700000000000001,16.6,17.5,18.4,19.3,20.2,21.1,22.0,22.900000000000002,23.8,24.7,25.6,26.5,27.400000000000002,28.3,29.2,30.1,31.0,31.900000000000002,32.8,33.7,34.6,35.5,36.4,37.300000000000004,38.2,39.1,40.0]},"near_unique":false,"sample":["Zeppelin L-10 (airship)","Beech King Air B90","Swearingen SA-226T Metro II","de Havilland Canada DHC-6 Twin Otter 300","Bell UH-1H / Bell UH-1H (helicopter)","Swearingen SA.226TC Metro II","Handley Page Dart Herald 201","de Havilland Canada DHC-6 Twin Otter 300","Hawker Siddeley HS-748-357/2B SCD","Lockheed Vega","McDonnell Douglas DC-8-62","Douglas DC-3","Convair CV-240-0","Zeppelin L-59 (airship)","Vickers Viscount 745D","Douglas DC-3 (Douglas C-47A-10-DK)","Heinkel 116","de Havilland Canada DHC-6 Twin Otter 300","de Havilland Canada DHC-2 Mark I Beaver","Antonov AN-12","Vickers Viscount 745D","Junkers G-24","Douglas C-47A-1-DK","Junkers JU90V2","Antonov AN-12","Antonov AN-24","Britten-Norman BN-2A-21 Islander","Avro 685 York 1","Boeing 247D","KJ-2000","Boeing B-737-2P6","Antonov 12V","Fairchild F-27M","Cessna 172D","Beech D18S","Boeing 707-321CF","Piper PA-31-350 Navajo Chieftain","Fairchild-Hiller FH-227B","Boeing B-707-328B","Cessna 172M","Mil Mi-8MTV (helicopter)","de Havilland DHC-2","Ilyushin IL-12B","PA- 31-350 Chieftain","Black Hawk helicopter","Tupelov 134AK","Tupolev TU-104A","Cessna 180","Douglas DC-3","McDonnell Douglas MD-90-30"],"top_values":[["Douglas DC-3",334],["de Havilland Canada DHC-6 Twin Otter 300",81],["Douglas C-47A",74],["Douglas C-47",62],["Douglas DC-4",40],["Yakovlev YAK-40",37],["Antonov AN-26",36],["Junkers JU-52/3m",32],["Douglas C-47B",29],["De Havilland DH-4",28],["Douglas DC-6B",27],["Breguet 14",23],["Curtiss C-46A",21],["Douglas C-47-DL",20],["Douglas DC-6",20],["Antonov AN-12",19],["Antonov AN-24",19],["McDonnell Douglas DC-9-32",19],["Curtiss C-46",18],["Embraer 110P1 Bandeirante",18]],"top_words":[["douglas",1113],["boeing",384],["dc-3",376],["lockheed",343],["cessna",307],["de",301],["havilland",300],["antonov",248],["canada",159],["otter",147],["fokker",133],["piper",133],["dhc-6",131],["twin",129],["mcdonnell",125],["curtiss",122],["beechcraft",121],["/",111],["ilyushin",98],["tupolev",97],["vickers",95],["300",91],["convair",86],["junkers",84],["hercules",83]],"vocab_skipped":null,"word_histogram":{"counts":[39,0,0,0,3095,0,0,0,1147,0,0,0,557,0,0,0,0,205,0,0,0,66,0,0,0,129,0,0,0,3],"edges":[1.0,1.2333333333333334,1.4666666666666668,1.7,1.9333333333333333,2.166666666666667,2.4,2.6333333333333333,2.8666666666666667,3.1,3.3333333333333335,3.566666666666667,3.8,4.033333333333333,4.266666666666667,4.5,4.733333333333333,4.966666666666667,5.2,5.433333333333334,5.666666666666667,5.9,6.133333333333334,6.366666666666667,6.6,6.833333333333333,7.066666666666666,7.3,7.533333333333333,7.766666666666667,8.0]}},"kind":"text","n":5268,"n_null":27,"n_unique":2446,"null_rate":0.005125284738041002,"stats":{"allcaps_rate":0.009540164090822362,"boilerplate_rate":0.0,"duplicate_rate":0.53329517267697,"emoji_rate":0.0,"len_max":40,"len_mean":18.325701202060674,"len_median":16.0,"len_min":4,"len_p95":34.0,"n_duplicates":2795,"n_empty":0,"one_word_rate":0.007441327990841442,"readability_flesch_mean":69.25886785714287,"url_rate":0.0,"vocab_size":2534,"word_mean":2.7181835527571074,"word_median":2.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.4% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.0% rows are a single word"},{"code":"allcaps","level":"info","message":"99.2% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Registration","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,36,0,0,64,0,0,69,0,0,398,0,0,3228,0,0,512,0,0,267,0,42,0,0,206,0,0,10,0,0,12,0,0,41,0,0,8,0,39],"edges":[1.0,1.35,1.7,2.05,2.4,2.75,3.0999999999999996,3.4499999999999997,3.8,4.15,4.5,4.85,5.199999999999999,5.55,5.8999999999999995,6.25,6.6,6.949999999999999,7.3,7.6499999999999995,8.0,8.35,8.7,9.049999999999999,9.399999999999999,9.75,10.1,10.45,10.799999999999999,11.149999999999999,11.5,11.85,12.2,12.549999999999999,12.899999999999999,13.25,13.6,13.95,14.299999999999999,14.649999999999999,15.0]},"near_unique":true,"sample":["77","FAC-1150","HP-986PS","4R-HVA","PP-SAD","P4-AOD","PI-C1131","LV-ZSR","RA-65617","P-BALSA","FAP-348","68-218","N5904","F-ALAI","N7840B","PI-C22","CF-TCP","N35207","3C-5GE","B-112","N1554V","NC191E","NC36498","40-2370","N144SP","PK-VIP","C-FAWF","N95425","D-AXAV","ZS-PDV","N1910L","I-FEEV","64-0624","PP-SDJ","CP-639","N558MA","I-ERJC","OO-SRD","145927","5063","7T-VEE","PK-TAR","G-ALZU","RA85816/A9C-DHL","GN-97121","N45MF","CP-45","HR-SAG","42-72572","SX-ECH"],"top_values":[],"top_words":[["/",36],["49",3],["hk-",3],["82",2],["32",2],["77",2],["204",2],["305",2],["223",2],["nc10809",2],["vh-usg",2],["cf-tcl",2],["g-aeuh",2],["w4026",2],["n91303",2],["xa-got",2],["128441",2],["cccp",2],["cp-45",2],["n67941",2],["cp-",2],["65-0990",2],["cccp-09303",2],["012",2],["n864ja",2]],"vocab_skipped":null,"word_histogram":{"counts":[4883,0,0,0,0,0,0,14,0,0,0,0,0,0,0,35,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":5268,"n_null":335,"n_unique":4905,"null_rate":0.06359149582384206,"stats":{"allcaps_rate":0.9918913440097303,"boilerplate_rate":0.0,"duplicate_rate":0.005676059193188729,"emoji_rate":0.0,"len_max":15,"len_mean":6.393877964727347,"len_median":6.0,"len_min":1,"len_p95":10.0,"n_duplicates":28,"n_empty":0,"one_word_rate":0.989864180012163,"readability_flesch_mean":103.02592500000003,"url_rate":0.0,"vocab_size":4948,"word_mean":1.017839043178593,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"98.4% rows are a single word"},{"code":"allcaps","level":"info","message":"96.6% rows are all-caps"},{"code":"null_rate","level":"warn","message":"23.3% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"cn/In","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[23,0,113,0,604,0,866,0,895,0,268,0,269,0,281,0,457,0,125,0,0,92,0,14,0,9,0,2,0,5,0,4,0,5,0,2,0,2,0,4],"edges":[1.0,1.475,1.95,2.425,2.9,3.375,3.8499999999999996,4.324999999999999,4.8,5.2749999999999995,5.75,6.225,6.699999999999999,7.175,7.6499999999999995,8.125,8.6,9.075,9.549999999999999,10.025,10.5,10.975,11.45,11.924999999999999,12.399999999999999,12.875,13.35,13.825,14.299999999999999,14.774999999999999,15.25,15.725,16.2,16.674999999999997,17.15,17.625,18.099999999999998,18.575,19.05,19.525,20.0]},"near_unique":false,"sample":["HP-25","24805/1878","12","10670","20436/788","742","3817","45108","31-033B","1957","45754/224","10570","45290 /4021","30","4817","43057/73","11714","53-13","45-028","556","0402104","1914","43144/155","42-68715","U197","MA036","9721753","2628","640601","25664/2393","77A254","48050/989","439","18712/373","176","2109","120070","61427","20494/850","45972/357","6344506","208B-0549","17629/8","31-7405203","525-0176","1100","17515/124","4491","10171","8275013"],"top_values":[["178",6],["19",5],["229",5],["125",5],["213",5],["1",4],["31",4],["160",4],["4",4],["439",4],["44",4],["442",4],["195",4],["1965",4],["212",4],["55",4],["103",4],["6",4],["36",3],["2",3]],"top_words":[["/",49],["178",6],["1",5],["19",5],["229",5],["125",5],["213",5],["31",4],["160",4],["4",4],["439",4],["44",4],["442",4],["195",4],["1965",4],["212",4],["55",4],["103",4],["6",4],["36",3],["2",3],["86",3],["1567",3],["053",3],["151",3]],"vocab_skipped":null,"word_histogram":{"counts":[3976,0,0,0,0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0,38,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":5268,"n_null":1228,"n_unique":3707,"null_rate":0.2331055429005315,"stats":{"allcaps_rate":0.9663366336633663,"boilerplate_rate":0.0,"duplicate_rate":0.08242574257425743,"emoji_rate":0.0,"len_max":20,"len_mean":5.64480198019802,"len_median":5.0,"len_min":1,"len_p95":10.0,"n_duplicates":333,"n_empty":0,"one_word_rate":0.9841584158415841,"readability_flesch_mean":121.20477500000001,"url_rate":0.0,"vocab_size":3739,"word_mean":1.0257425742574258,"word_median":1.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+4.25"},{"code":"outliers","level":"warn","message":"10.1% rows beyond 1.5 IQR"}],"column":"Aboard","extras":{"histogram":{"counts":[2978,1055,430,230,129,105,75,56,46,35,27,16,8,7,9,4,9,3,9,3,2,3,1,1,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1],"edges":[0.0,16.1,32.2,48.300000000000004,64.4,80.5,96.60000000000001,112.70000000000002,128.8,144.9,161.0,177.10000000000002,193.20000000000002,209.3,225.40000000000003,241.50000000000003,257.6,273.70000000000005,289.8,305.90000000000003,322.0,338.1,354.20000000000005,370.3,386.40000000000003,402.50000000000006,418.6,434.70000000000005,450.80000000000007,466.90000000000003,483.00000000000006,499.1,515.2,531.3000000000001,547.4000000000001,563.5,579.6,595.7,611.8000000000001,627.9000000000001,644.0]},"sample":[1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,6.0,1.0,8.0,8.0,16.0,1.0,6.0,1.0,2.0,10.0,7.0,10.0,5.0,6.0,2.0,15.0,15.0,7.0,12.0,12.0,7.0,14.0,3.0,5.0,19.0,12.0,3.0,17.0,4.0,14.0,17.0,9.0,16.0,2.0,16.0,10.0,22.0,13.0,17.0,37.0,19.0,17.0,27.0,20.0,15.0,25.0,2.0,7.0,6.0,6.0,3.0,11.0,10.0,22.0,23.0,3.0,20.0,25.0,24.0,12.0,14.0,12.0,49.0,13.0,22.0,21.0,44.0,25.0,18.0,2.0,23.0,53.0,48.0,26.0,3.0,43.0,44.0,9.0,18.0,2.0,15.0,19.0,17.0,24.0,26.0,9.0,17.0,21.0,4.0,44.0,13.0,52.0,38.0,20.0,23.0,29.0,23.0,16.0,2.0,50.0,27.0,1.0,3.0,8.0,35.0,3.0,21.0,16.0,28.0,18.0,27.0,5.0,26.0,26.0,6.0,14.0,3.0,22.0,36.0,25.0,62.0,21.0,23.0,12.0,35.0,3.0,67.0,44.0,26.0,49.0,11.0,15.0,45.0,3.0,17.0,18.0,4.0,5.0,18.0,14.0,6.0,3.0,40.0,45.0,10.0,9.0,44.0,4.0,72.0,48.0,23.0,12.0,28.0,14.0,18.0,5.0,107.0,132.0,84.0,13.0,8.0,6.0,29.0,4.0,22.0,83.0,3.0,3.0,80.0,2.0,49.0,7.0,30.0,32.0,3.0,9.0,2.0,38.0,29.0,2.0,25.0,9.0,15.0,4.0,3.0,80.0,82.0,7.0,66.0,127.0,130.0,3.0,6.0,63.0,5.0,29.0,7.0,3.0,21.0,7.0,9.0,25.0,4.0,23.0,11.0,12.0,5.0,12.0,31.0,33.0,18.0,14.0,2.0,4.0,6.0,6.0,8.0,31.0,229.0,79.0,60.0,12.0,15.0,64.0,4.0,12.0,83.0,34.0,126.0,111.0,8.0,25.0,8.0,2.0,3.0,176.0,5.0,15.0,3.0,100.0,5.0,5.0,2.0,73.0,2.0,107.0,12.0,3.0,5.0,5.0,11.0,82.0,3.0,4.0,2.0,3.0,4.0,21.0,6.0,2.0,76.0,5.0,10.0,45.0,20.0,4.0,77.0,10.0,44.0,30.0,165.0,2.0,200.0,134.0,9.0,18.0,3.0,7.0,6.0,4.0,64.0,12.0,7.0,3.0,10.0,6.0,10.0,3.0,7.0,7.0,34.0,89.0,45.0,4.0,50.0,8.0,24.0,7.0,66.0,167.0,12.0,4.0,13.0,18.0,79.0,35.0,118.0,10.0,15.0,10.0,2.0,50.0,3.0,46.0,12.0,4.0,30.0,192.0,50.0,4.0,9.0,10.0,4.0,89.0,8.0,4.0,87.0,14.0,8.0,1.0,11.0,3.0,9.0,4.0,6.0,9.0,82.0,21.0,15.0,12.0,11.0,2.0,5.0,10.0,104.0,34.0,29.0,12.0,5.0,8.0,22.0,2.0,11.0,30.0,10.0,16.0,5.0,63.0,21.0,42.0,71.0,28.0,1.0,46.0,21.0,85.0,5.0,129.0,130.0,38.0,13.0,3.0,3.0,10.0,53.0,15.0,13.0,7.0,2.0,15.0,13.0,22.0,6.0,126.0,17.0,43.0,2.0,11.0,3.0,3.0,110.0,7.0,3.0,2.0,3.0,9.0,2.0,1.0,293.0,28.0,62.0,29.0,24.0,10.0,189.0,32.0,19.0,349.0,8.0,36.0,4.0,22.0,33.0,57.0,26.0,10.0,6.0,31.0,5.0,1.0,5.0,4.0,12.0,20.0,6.0,4.0,9.0,143.0,9.0,17.0,18.0,12.0,3.0,8.0,18.0,2.0,16.0,2.0,9.0,7.0,6.0,71.0,10.0,16.0,5.0,4.0,2.0,46.0,25.0,161.0,148.0,10.0,6.0,7.0,7.0,2.0,6.0,7.0,53.0,2.0,11.0,15.0,10.0,39.0,14.0,11.0,100.0,117.0,3.0,6.0,4.0,11.0,3.0,5.0,11.0,10.0,18.0,10.0,1.0,3.0,20.0,8.0,13.0,6.0,1.0,13.0]},"kind":"numeric","n":5268,"n_null":22,"n_unique":239,"null_rate":0.004176157934700076,"stats":{"iqr":25.0,"kurtosis":28.413952537101586,"max":644.0,"mean":27.554517727792604,"median":13.0,"min":0.0,"n_outliers":529,"outlier_rate":0.10083873427373237,"q1":5.0,"q3":30.0,"skew":4.246965214115307,"std":43.076711027774856,"zero_rate":0.0003812428516965307}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+4.95"},{"code":"outliers","level":"warn","message":"8.4% rows beyond 1.5 IQR"}],"column":"Fatalities","extras":{"histogram":{"counts":[3314,980,343,215,96,90,51,42,39,19,18,9,11,3,2,6,2,5,4,1,1,0,1,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1],"edges":[0.0,14.575,29.15,43.724999999999994,58.3,72.875,87.44999999999999,102.02499999999999,116.6,131.17499999999998,145.75,160.325,174.89999999999998,189.475,204.04999999999998,218.625,233.2,247.77499999999998,262.34999999999997,276.925,291.5,306.075,320.65,335.22499999999997,349.79999999999995,364.375,378.95,393.525,408.09999999999997,422.67499999999995,437.25,451.825,466.4,480.97499999999997,495.54999999999995,510.125,524.6999999999999,539.275,553.85,568.425,583.0]},"sample":[1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0,3.0,3.0,2.0,1.0,3.0,2.0,1.0,2.0,1.0,2.0,10.0,5.0,1.0,3.0,6.0,2.0,1.0,11.0,7.0,12.0,12.0,5.0,14.0,3.0,4.0,19.0,10.0,3.0,2.0,4.0,14.0,16.0,5.0,14.0,1.0,12.0,10.0,18.0,2.0,7.0,25.0,35.0,24.0,20.0,24.0,2.0,3.0,5.0,24.0,21.0,22.0,23.0,4.0,3.0,11.0,30.0,28.0,29.0,59.0,22.0,15.0,8.0,20.0,14.0,25.0,13.0,20.0,24.0,17.0,31.0,22.0,87.0,2.0,2.0,25.0,1.0,3.0,0.0,53.0,25.0,5.0,14.0,30.0,6.0,25.0,2.0,3.0,12.0,8.0,24.0,14.0,7.0,15.0,31.0,14.0,17.0,25.0,23.0,31.0,12.0,29.0,11.0,18.0,37.0,2.0,8.0,10.0,25.0,36.0,5.0,20.0,3.0,15.0,0.0,16.0,1.0,16.0,28.0,4.0,18.0,2.0,34.0,14.0,3.0,1.0,23.0,8.0,7.0,3.0,5.0,35.0,32.0,49.0,11.0,4.0,7.0,24.0,3.0,51.0,65.0,15.0,8.0,68.0,6.0,5.0,3.0,34.0,61.0,9.0,31.0,10.0,8.0,3.0,6.0,72.0,5.0,6.0,11.0,15.0,26.0,81.0,12.0,44.0,2.0,101.0,2.0,31.0,20.0,28.0,0.0,2.0,2.0,2.0,37.0,68.0,58.0,2.0,1.0,24.0,11.0,4.0,2.0,4.0,25.0,5.0,126.0,4.0,35.0,24.0,42.0,18.0,66.0,1.0,1.0,5.0,2.0,82.0,123.0,29.0,7.0,21.0,40.0,4.0,47.0,2.0,13.0,25.0,23.0,39.0,12.0,12.0,1.0,15.0,31.0,1.0,14.0,2.0,15.0,6.0,6.0,5.0,14.0,47.0,2.0,12.0,15.0,64.0,4.0,12.0,78.0,32.0,97.0,111.0,1.0,22.0,1.0,2.0,3.0,103.0,5.0,15.0,3.0,11.0,89.0,5.0,66.0,2.0,107.0,22.0,29.0,3.0,5.0,9.0,75.0,3.0,2.0,2.0,3.0,3.0,21.0,3.0,2.0,28.0,5.0,4.0,45.0,4.0,73.0,77.0,20.0,2.0,30.0,100.0,2.0,2.0,4.0,9.0,18.0,3.0,2.0,3.0,4.0,45.0,86.0,2.0,3.0,61.0,6.0,10.0,3.0,7.0,6.0,34.0,89.0,3.0,2.0,7.0,11.0,2.0,1.0,6.0,99.0,4.0,2.0,12.0,18.0,74.0,35.0,2.0,20.0,2.0,46.0,2.0,6.0,11.0,8.0,0.0,12.0,9.0,10.0,8.0,1.0,12.0,3.0,1.0,14.0,19.0,11.0,55.0,2.0,87.0,21.0,38.0,3.0,11.0,50.0,9.0,2.0,50.0,37.0,2.0,18.0,15.0,12.0,23.0,1.0,6.0,1.0,35.0,33.0,12.0,4.0,5.0,77.0,22.0,6.0,176.0,6.0,3.0,2.0,34.0,54.0,2.0,6.0,4.0,9.0,19.0,1.0,3.0,21.0,23.0,5.0,8.0,4.0,2.0,38.0,4.0,223.0,2.0,15.0,14.0,13.0,2.0,51.0,7.0,4.0,3.0,167.0,12.0,3.0,76.0,132.0,11.0,41.0,55.0,7.0,18.0,8.0,80.0,28.0,8.0,4.0,22.0,15.0,6.0,10.0,42.0,63.0,4.0,3.0,6.0,143.0,349.0,14.0,30.0,10.0,22.0,5.0,39.0,26.0,8.0,5.0,6.0,5.0,9.0,7.0,4.0,12.0,10.0,3.0,4.0,9.0,3.0,9.0,16.0,18.0,145.0,10.0,1.0,18.0,2.0,6.0,16.0,2.0,7.0,6.0,3.0,10.0,18.0,1.0,2.0,1.0,2.0,8.0,1.0,21.0,46.0,275.0,17.0,140.0,10.0,6.0,7.0,7.0,7.0,53.0,2.0,15.0,10.0,0.0,11.0,2.0,117.0,3.0,6.0,4.0,15.0,3.0,1.0,187.0,8.0,2.0,6.0,3.0,0.0,11.0]},"kind":"numeric","n":5268,"n_null":12,"n_unique":191,"null_rate":0.002277904328018223,"stats":{"iqr":20.0,"kurtosis":42.79146214638747,"max":583.0,"mean":20.06830289193303,"median":9.0,"min":0.0,"n_outliers":444,"outlier_rate":0.08447488584474885,"q1":3.0,"q3":23.0,"skew":4.948312044472851,"std":33.199952080203964,"zero_rate":0.011035007610350075}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+50.34"}],"column":"Ground","extras":{"histogram":{"counts":[5235,8,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2],"edges":[0.0,68.75,137.5,206.25,275.0,343.75,412.5,481.25,550.0,618.75,687.5,756.25,825.0,893.75,962.5,1031.25,1100.0,1168.75,1237.5,1306.25,1375.0,1443.75,1512.5,1581.25,1650.0,1718.75,1787.5,1856.25,1925.0,1993.75,2062.5,2131.25,2200.0,2268.75,2337.5,2406.25,2475.0,2543.75,2612.5,2681.25,2750.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":5268,"n_null":22,"n_unique":50,"null_rate":0.004176157934700076,"stats":{"iqr":0.0,"kurtosis":2558.595146965409,"max":2750.0,"mean":1.6088448341593595,"median":0.0,"min":0.0,"n_outliers":219,"outlier_rate":0.04174609226077011,"q1":0.0,"q3":0.0,"skew":50.33625228107394,"std":53.987827158856334,"zero_rate":0.9582539077392299}},{"alerts":[{"code":"near_unique","level":"info","message":"95.8% of rows are unique strings"}],"column":"Summary","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[822,1039,800,547,364,280,231,172,123,128,86,50,57,33,37,19,15,16,11,10,5,1,6,3,4,3,2,1,1,3,1,2,1,4,0,0,0,0,0,1],"edges":[6.0,54.7,103.4,152.10000000000002,200.8,249.5,298.20000000000005,346.90000000000003,395.6,444.3,493.0,541.7,590.4000000000001,639.1,687.8000000000001,736.5,785.2,833.9000000000001,882.6,931.3000000000001,980.0,1028.7,1077.4,1126.1000000000001,1174.8000000000002,1223.5,1272.2,1320.9,1369.6000000000001,1418.3000000000002,1467.0,1515.7,1564.4,1613.1000000000001,1661.8000000000002,1710.5,1759.2,1807.9,1856.6000000000001,1905.3000000000002,1954.0]},"near_unique":true,"sample":["Crashed into trees while attempting to land after being shot down by British and French aircraft.","Flew into a box canyon and crashed at an elevation of 4,000 ft.  VFR flight by the pilot into instrument meteorological conditions, and the pilot's failure to maintain sufficient altitude and/or clearance from mountainous terrain. Factors related to the accident were: the adverse weather and terrain conditions.","Midair collision. The Beechcraft was on a flight from Lyon to Lorient, approaching Lorient, when it requested permission to fly over the ocean liner Norway. While circling the Norway, it collided with the Cessna. One killed aboard the Cessna, 14 aboard the Beechcraft.  Failure of both pilots to 'see and avoid' each other under VFR condition.","The aircraft crashed into a 8,000 ft. mountain  in the Sierra Grande range while climbing en route from Comodoro Rivadavia to Cordoba in heavy rain and strong turbulence. The passengers included military personnel and their dependents.","The helicopter collided with trees after experiencing engine failure. Pilot overshot two suitable landing areas.","The jetliner crashed into the Black Sea and broke up in driving rain and low visibility after making a second attempt to land. The plane disappeared from radar screens just under four miles from shore and crashed after making a turn and heading toward Adler airport for a landing. Pilot error. The pilots of the Airbus-320  allowed the plane to descend too low as it faced bad weather on its approach to the airport.","Due to heavy traffic, the flight was diverted from the planned route. The aircraft failed to follow the assigned airway and crashed into a cloud obscured Montseny Mountain while on approach. The deviation from the assigned airway may have been caused by malfunctioning equipment. In addition, the ATC did not realize the aircraft was deviating from its assigned course.","The aircraft crashed into the Persian Gulf and exploded in flames while attempting to land at Bahrain International Airport. The crew decided to perform a missed approach after it was determined the aircraft was coming in too high and fast. Instructions were given for a 180 degree turn and climb to 2,500 feet. While performing the missed approach the plane suddenly descended rapidly from an altitude of 1,000 feet and crashed into the shallow waters of the gulf approximately 1 mile from the airport. The accident was a result of a fatal combination of factors, including the captain's failure to comply with standard operating procedures and the copilot's actions in not drawing the captain's attention to the deviations of the aircraft from the standard flight parameters. The captain may have suffered a 'spatial disorientation' to ground warning systems, which could have made him falsely perceive the aircraft was pitching up. He responded by making a nose down input, resulting in the aircraft starting to descend, when aircraft warning systems were saying he should increase altitude.","Diverted from Madang to Bagasin, overran the runway and crashed.","Crashed into a radio antenna tower and tore off a wing  in dense  fog.","Crashed on final approach after encountering windshear. Pilot not briefed on possibility of turbulence and windshear by FSS personnel.","Crashed during takeoff.","Crashed into a  mountain while en route..","Shot down by British aircraft.","Crashed onto a mountainside at 9,000 ft. The pilot misjudged the weather conditions and continued to fly into deteriorating weather conditions while trying to maintain VFR. The pilot attempted to climb at a speed below the minimum safe climbing speed of the aircraft.","The aircraft crashed while attempting to land at Kirkland AFB in gusty winds. The pilot appeared to have difficulty keeping the wings level. The plane crossed the threshold left of the center line. When the pilot corrected, the right starboard jet pod and No. 6 propeller struck the runway. The pilot attempted a go-around with the No.6 engine afire but crashed to earth bursting into flames.","Crashed shortly after takeoff in a snow storm. Failure of the pilot to recognize his proximity to the ground due to heavy snow which entirely covered the terrain.","Crashed short of the runway in dense fog while attempting to land a second time at Lucapa.","The tour helicopter crashed near Banning House lodge in rain and poor visibility. The engine appears to have failed.","Engine failure on takeoff. Pilot failed to follow proper emergency procedures. Cause of engine failure unknown.","Failed to gain altitude after a missed approach and crashed. The malfunctioning of the automatipitch coarsening unit of the starboard propeller. This deprived the captain of the necessary degree of control of the aircraft at a critical stage of the flight.","Crashed into Mar Chiquita Lagoon during a heavy rainstorm.","The aircraft crashed into a mountain shortly after taking off from Florianpolis.","Missing on a flight from France to Spain. Shot down by an American Bristol Beau allied night fighter.","The aircraft struck a mountain at 6,200 feet after taking off in mist and haze. The crew took off under VFR conditions during adverse weather conditions. Inappropriate presence of a non-crew pilot in the cockpit and his great talkativeness distracting part of the crew which resulted in carelessness and disorientation in monitoring the heading and altitude necessary to maneuver the plane. VFR flight into IFR conditions.","Crashed into Blewett Falls Lake.","Experienced engine loss on final approach. Wrong engine feathered. Cashed. Inadequate maintenance and inspection. Engine fuel manifold valve defective.","Cleared for an approach to Oakland Municipal Airport, the aircraft crashed 15 miles SSE of the airport into Tolman peak. Patches of fog obscured the terrain. The aircraft struck a hill at an elevation of about 1,000 ft. at 225 to 240 mph. Neither of its 2 low-frequency receivers were tuned to the Oakland station and the captain may have attempted to fly by visual reference using the ADF.  As a result, the flight was 3 miles off course and well below the minimum prescribed altitude. The failure of the captain to adhere to instrument procedures in the Newark area during an approach to the Oakland Municipal  Airport.","One Swedish passenger was killed when the plane was attacked by German fighters. The plane was able to land safely in Amsterdam.","The cargo plane was on final approach when it hit a antenna tower and crashed. Fog and poor visibility prevailed at the time of the accident.","Crashed into trees while attempting a go-around in poor weather. Improper minimum descent atlitiude and missed approach. Improper inflight decision. Improper missed approach. Minimum descent altitude not maintained.","While attemping to take off from Luxor, the cargo plane slammed to the ground, slid down the runway and caught fire.","The aircraft suffered an aft pressure bulkhead failure at 23,900 ft. The aircraft had severe control difficulties with loss of all controls and eventually after 40 minutes, collided with a mountain. Improper repair of the  bulkhead while being supervised by Boeing engineers after a tail strike in 1978. Worst single plane disaster in aviation history. Kyu Sakamoto, 43, famous for his Japanese song 'Sukiyaki' was killed in the accident.","Lost an engine on approach and was unable to maintain altitude. Aircraft overloaded by 827 lbs.","Airframe failure after flying into adverse weather. VFR flight into adverse weather conditions.","Shot down by enemy fire.","The sightseeing helicopter, headed for the bottom of the Grand Canyon, hit the face of a cliff and crashed in rugged terrain killing all aboard. Tourists were supposed to board a pontoon boat when the accident occurred 2/3 of the way down the canyon. The pilot's disregard of safe flying procedures and misjudgment of the helicopter's proximity to terrain. Contributing to the accident was the failure of Sundance Helicopters and the FAA to provide adequate surveillance of Sundance's air tour operations in Descent Canyon.","Crashed into the ocean.","Crashed 7 kms from the runway in a sandstorm. Decision to land while visibility was below company minimums for that particular airport at night.","The plane rolled to the left and crashed into houses after takeoff. Failure of both artificial horizons.","During the takeoff cargo shifted in the plane causing the aircraft to lose altitude and crash into power lines.","The aircraft flew into the side of a cliff during it's inaugural flight from Gohu Airstrip in the Finisterre Mountains.","Exlpoded and caught fire near Bahia de Kino.  A bomb is believed to have exploded onboard.","While on a positioning flight the plane entered an uncontrolled descent and crashed. The pilot's inadvertent flight into mountain wave weather conditions while IMC, resulting in a loss of aircraft control.","While on a training mission the aircraft crashed into a mountainous area and was destroyed.","The pilot issued a distress signal within five minutes of taking off from General Mitchell International Airport in Milwaukee, requesting an emergency return to the airport. The plane then crashed into Lake Michigan two miles off shore. The plane was carrying an organ transplant team. The pilot reported to ATC he had runaway trim prior to the accident.","Crashed into Mt. Nova. Deviated off course for unknown reasons.","Crashed in icing conditions at a low altitude. Evasive maneuver to avoid trees. Pilot not instrument rated. Continued VFR flight into adverse weather conditions.","The aircraft crashed shortly after taking off. Engine failure due to fuel contamination.","The aircraft crashed near a pond 25 km from Cubuk, 20 minutes after taking off. Failure to use de-icing and poor weather was to blame."],"top_values":[],"top_words":[["the",14455],["a",4621],["and",4405],["to",4347],["of",4337],["in",2951],["crashed",2925],["into",2300],["aircraft",2031],["was",2002],["plane",1685],["after",1658],["while",1548],["on",1474],["an",1320],["at",1160],["from",1128],["pilot",891],["by",829],["flight",793],["with",754],["engine",749],["during",746],["off",701],["failure",674]],"vocab_skipped":null,"word_histogram":{"counts":[1150,1277,789,469,314,262,167,148,83,54,39,35,20,18,15,8,4,7,2,3,1,1,2,5,0,3,1,0,0,1],"edges":[1.0,11.8,22.6,33.400000000000006,44.2,55.0,65.80000000000001,76.60000000000001,87.4,98.2,109.0,119.80000000000001,130.60000000000002,141.4,152.20000000000002,163.0,173.8,184.60000000000002,195.4,206.20000000000002,217.0,227.8,238.60000000000002,249.4,260.20000000000005,271.0,281.8,292.6,303.40000000000003,314.20000000000005,325.0]}},"kind":"text","n":5268,"n_null":390,"n_unique":4673,"null_rate":0.07403189066059225,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.04202542025420254,"emoji_rate":0.0,"len_max":1954,"len_mean":200.73575235752358,"len_median":136.0,"len_min":6,"len_p95":584.0,"n_duplicates":205,"n_empty":0,"one_word_rate":0.0004100041000410004,"readability_flesch_mean":61.67790515313969,"url_rate":0.0,"vocab_size":12513,"word_mean":33.23964739647396,"word_median":23.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.Aboard.stats","columns.Fatalities.stats","columns.Ground.stats","columns.Operator.top_values","columns.Type.top_values","columns.Flight #.null_rate","columns.Time.null_rate"],"featured_charts":[{"caption":"Heavily right-skewed: most crashes kill under 10 people but a long tail reaches 583.","column":"Fatalities","kind":"histogram"},{"caption":"Shows passenger load distribution; median 13 with rare wide-body events near 644.","column":"Aboard","kind":"histogram"},{"caption":"Aeroflot and U.S. military operators dominate \u2014 check for over-representation before averaging.","column":"Operator","kind":"bar"},{"caption":"Douglas DC-3 alone accounts for 334 crashes; aircraft type is highly concentrated at the top.","column":"Type","kind":"bar"},{"caption":"Almost all values are zero; the few non-zero entries (up to 2,750) are extreme outliers worth flagging.","column":"Ground","kind":"histogram"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset records 5,268 airplane crashes across 13 columns, mixing dates and times with operator, aircraft type, route, location, and casualty counts (Aboard, Fatalities, Ground). Casualty figures are highly skewed: Aboard averages 27.5 with a median of 13 and a maximum of 644, while Fatalities averages 20.1 with a median of 9 and a max of 583, and Ground deaths are zero in roughly 96% of rows but spike to 2,750 \u2014 clear outliers worth investigating. Operator and Type are dominated by a few heavy hitters (Aeroflot and U.S. military operators; Douglas DC-3 alone appears 334 times), suggesting concentration that could bias any aggregate analysis. Note also that Flight # is missing in nearly 80% of rows and Time in 42%, so those fields are weak for filtering. Start by looking at the Fatalities distribution and the top operators and aircraft types.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.word_mean","stats.duplicate_rate","stats.n_duplicates","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds dates stored as text in MM/DD/YYYY format \u2014 every one of 5268 values is exactly 10 characters and a single token. There are 515 duplicates (9.8%) with repeats clustering on historically notable days such as 09/11/2001 and 06/06/1944, suggesting the rows describe events tied to those dates rather than unique daily records. The text alerts (allcaps, one_word, short_text) are artifacts of the date formatting, not real free-text content.","role":"timestamp","scope":"column","target":"Date","treatment":"parse to a proper date type (MM/DD/YYYY) before any temporal analysis."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.duplicate_rate","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Clock times in HH:MM format, stored as text rather than a temporal type \u2014 values like '15:00', '12:00' and '11:00' top the list and lengths sit tightly between 4 and 7 characters (mean 5.0). Roughly 42% of rows are null and 67% of the non-null values are duplicates across only 1,005 distinct times, suggesting times cluster on the half hour. Despite being numeric-looking, it tripped allcaps and one-word alerts because the profiler treats the strings as tokens.","role":"timestamp","scope":"column","target":"Time","treatment":"parse to a time-of-day type and impute or flag the 42% missing before use."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.n_duplicates","stats.word_median","stats.len_median","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Free-text place names, typically 'City, Country/State' (word_median 3, len_median 19), with 4303 unique values across 5268 rows. Top entries cluster on major world cities (Sao Paulo, Moscow, Rio), but 'near' appears 1272 times suggesting many entries are approximate locations rather than exact place names. Duplicate rate of 18% and 945 repeated strings indicate moderate reusability, though high cardinality limits direct grouping.","role":"feature","scope":"column","target":"Location","treatment":"Parse into city/region/country components and geocode before use; raw strings are too high-cardinality to one-hot."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.n_duplicates","stats.word_mean","stats.len_mean","stats.one_word_rate","top_values","language_counts","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds the airline or military operator name for each record, with 2476 unique values across 5268 rows and only a 0.0034 null rate. It is heavily duplicated (duplicate_rate 0.528, n_duplicates 2774), led by Aeroflot (179) and Military - U.S. Air Force (176), and the language detector flags a multilingual mix dominated by English (3340) but with sizable Italian (278), Spanish (224), German (202), and French (183) counts \u2014 likely an artifact of short proper nouns rather than true translations. Entries are short (word_mean 3.05, len_mean 19.5) and one_word_rate is 0.165, consistent with brand-style names.","role":"feature","scope":"column","target":"Operator","treatment":"Normalize casing and consolidate Military - * variants, then treat as a high-cardinality categorical (target/frequency encode)."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Likely a flight number identifier attached to records (probably aviation incidents). Nearly 80% of rows are null (null_rate 0.7971) and the most common non-null value is the placeholder '-' at 67 occurrences (6.27% of present values), suggesting missing-data sentinels mixed with real codes. Cardinality is high (724 unique across 5268 rows) with entropy_ratio 0.953, so among populated rows values are nearly uniformly distributed.","role":"identifier","scope":"column","target":"Flight #","treatment":"Normalize '-' to null and treat as a high-cardinality identifier; drop from modelling or use only as a join key."},{"confidence":"high","critiques":[],"evidence_keys":["alerts","n","n_unique","null_rate","language_counts","stats.len_mean","stats.word_mean","stats.duplicate_rate","stats.n_duplicates","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Short free-text describing a flight route, typically formatted as 'Origin - Destination' (the hyphen appears 3658 times across 5268 rows) with occasional non-route labels like 'Training' (81), 'Sightseeing' (29), or 'Test flight' (17). Values are short (mean 22 chars, 4 words) and highly varied (3244 unique out of 5268), but 32.38% are null and 318 duplicates exist. Language detection flags a multilingual mix dominated by English (2567) with notable Spanish (237), Portuguese (100), German (88) and Italian (88), reflecting place names rather than true prose.","role":"free_text","scope":"column","target":"Route","treatment":"Parse on ' - ' to split origin/destination and bucket non-route labels separately; impute or flag the 32% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_mean","stats.word_median","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column records aircraft make-and-model designations, dominated by manufacturer-plus-type strings like 'Douglas DC-3' (334 occurrences) and 'de Havilland Canada DHC-6 Twin Otter 300'. Values are short (mean 18.3 chars, median 2 words) but highly repetitive: 53.3% duplicate rate across 2,446 unique types, with Douglas alone appearing in 1,113 rows. Watch for near-duplicate variants of the same airframe ('Douglas C-47', 'Douglas C-47A', 'Douglas C-47B') that will fragment any group-by unless normalised.","role":"feature","scope":"column","target":"Type","treatment":"Normalise manufacturer/variant strings (e.g. collapse C-47 sub-variants) before using as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.one_word_rate","stats.len_mean","stats.len_max","stats.duplicate_rate","stats.n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Almost certainly aircraft tail/registration codes: 4905 unique values across 5268 rows, 99% all-caps single tokens with mean length 6.4 (max 15), and top tokens like 'hk-' and 'nc10809' resemble registration prefixes. Near-unique (n_unique/n \u2248 0.93) with a 6.36% null rate and only 28 duplicates, so it behaves as an identifier rather than a feature. The lone '/' appearing 36 times suggests a placeholder for split/unknown registrations worth inspecting.","role":"identifier","scope":"column","target":"Registration","treatment":"Treat as an identifier: drop from modelling or use only for joins/lookup after normalising case and the '/' placeholder."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.one_word_rate","stats.len_mean","stats.duplicate_rate","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Despite the text classification, 'cn/In' looks like a short numeric code field \u2014 values are predominantly one-word, all-caps tokens with a mean length of 5.6 characters and the top values ('178', '19', '229', '125') all being integers. About 23.3% of rows are null and only 333 duplicates (8.2%) appear across 3,707 unique values, so cardinality is high relative to 5,268 rows. The '/' character showing up 49 times in top_words hints at occasional composite values (e.g., 'a/b'), which the column name 'cn/In' also suggests.","role":"feature","scope":"column","target":"cn/In","treatment":"Cast to numeric where possible and split composite '/'-separated entries; impute or flag the 23% nulls before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.median","stats.mean","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","stats.q1","stats.q3"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the number of people aboard, with values ranging from 0 to 644 and a median of 13. The distribution is heavily right-skewed (skew 4.25, kurtosis 28.4) and roughly 10% of rows (529) are flagged as outliers, indicating a long tail of very large flights against a typical small-aircraft baseline. Nulls are negligible (0.42%) and only 239 distinct values appear across 5268 rows.","role":"feature","scope":"column","target":"Aboard","treatment":"log-transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.kurtosis","stats.iqr","stats.n_outliers","stats.outlier_rate","stats.zero_rate","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Counts of deaths per event, ranging from 0 to 583 with a median of 9 and mean of 20.07. The distribution is heavily right-skewed (skew 4.95, kurtosis 42.79) with 444 outliers (8.4% of rows) and a small zero rate of 1.1%. The IQR of 20 against a max of 583 confirms a long tail driven by rare catastrophic events.","role":"numeric_target","scope":"column","target":"Fatalities","treatment":"log1p-transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.zero_rate","stats.median","stats.q1","stats.q3","stats.max","stats.mean","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","stats.std"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric field 'Ground' is overwhelmingly zero (zero_rate 0.9583) with median, q1, and q3 all at 0.0 and only 50 unique values across 5268 rows. The non-zero tail is extreme: max 2750.0 against a mean of 1.61, skew 50.3, and kurtosis 2559, producing 219 outliers (4.17%). This looks like a sparse count or charge-style feature where almost every record has no ground value but a few carry very large magnitudes.","role":"feature","scope":"column","target":"Ground","treatment":"Split into a zero/non-zero indicator and log-transform the non-zero magnitudes before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_median","stats.len_max","stats.readability_flesch_mean","stats.duplicate_rate","stats.n_duplicates","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Free-text incident summaries averaging 201 characters (median 136, max 1954) with a Flesch readability of 61.7, suggesting short narrative paragraphs. Domain vocabulary is clearly aviation-accident: 'crashed' (2925), 'aircraft' (2031), and 'into' (2300) dominate after stopwords. Near-unique (4673 of 5268) but with 205 exact duplicates (4.2%) and a 7.4% null rate worth checking before modelling.","role":"free_text","scope":"column","target":"Summary","treatment":"Tokenize and embed (or TF-IDF) for downstream NLP; dedupe the 205 exact repeats first."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4968,"prompt_tokens":22248,"total_tokens":27216}},"language_counts":{"bs":3,"ca":39,"ceb":14,"cs":11,"da":5,"de":290,"en":5907,"eo":7,"es":461,"et":3,"eu":1,"fi":6,"fr":247,"gd":1,"gl":2,"hr":14,"hu":6,"id":93,"it":366,"ja":1,"ku":1,"la":2,"lt":2,"ms":6,"nl":73,"no":22,"pl":31,"pt":155,"ro":6,"ru":27,"sh":3,"sl":20,"sv":51,"te":1,"tr":18,"uk":6,"vi":3},"meta":{"generated_at":"2026-05-01T18:06:16+00:00","mode":"full","row_count":5268,"sampled_rows":5268,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/wild/disasters/airplane_crashes.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"Aboard":"numeric","Date":"text","Fatalities":"numeric","Flight #":"categorical","Ground":"numeric","Location":"text","Operator":"text","Registration":"text","Route":"text","Summary":"text","Time":"text","Type":"text","cn/In":"text"}}
