{"attributions":[{"component":"fastText lid.176 language identification model","license":"CC-BY-SA-3.0","note":"Language counts in this report were produced with the fastText lid.176 model, licensed CC-BY-SA-3.0. This report is a derivative work and carries the same license for those figures.","url":"https://fasttext.cc/docs/en/language-identification.html"}],"columns":[{"alerts":[],"column":"category","extras":{"singletons":0,"top_values":[["aviation_accident",32410],["storm",14770],["earthquake",3742],["shipwreck",3653]]},"kind":"categorical","n":54575,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":1.4829928639010839,"entropy_ratio":0.7414964319505419,"top_rate":0.593861658268438,"top_value":"aviation_accident"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-2.51"},{"code":"outliers","level":"warn","message":"7.9% rows beyond 1.5 IQR"}],"column":"latitude","extras":{"histogram":{"counts":[1,0,0,1,1,5,30,22,37,79,176,103,35,108,56,20,23,41,66,64,28,171,70,112,346,895,4049,9547,10928,12642,7278,2535,1308,1095,1761,697,220,22,2,1],"edges":[-77.42499,-73.43508684999999,-69.4451837,-65.45528055,-61.465377399999994,-57.47547424999999,-53.485571099999994,-49.49566795,-45.505764799999994,-41.51586164999999,-37.525958499999994,-33.53605535,-29.546152199999995,-25.55624904999999,-21.566345899999995,-17.57644275,-13.586539599999995,-9.596636449999991,-5.606733299999988,-1.6168301499999984,2.373073000000005,6.362976150000009,10.352879299999998,14.342782450000001,18.332685600000005,22.32258875000001,26.312491900000012,30.30239505,34.292298200000005,38.28220135000001,42.2721045,46.26200765,50.251910800000005,54.24181395000001,58.23171710000001,62.221620250000015,66.21152340000002,70.20142655,74.1913297,78.18123285,82.171136]},"sample":[25.448333,25.7,25.848889,26.116667,26.518333,26.922778,26.966111,27.191944,27.411389,27.458611,27.490278,27.495,27.764167,27.765,28.047222,28.158611,28.368056,28.6275,28.683611,29.0,29.108611,29.179722,29.18,29.881111,30.166667,30.219722,30.351944,30.356389,30.711389,30.911667,30.930556,31.151389,31.258889,31.410833,31.431111,31.509444,31.637778,31.793611,31.847778,31.868611,31.886944,31.938056,32.0625,32.116111,32.152778,32.203611,32.490278,32.579167,32.597778,32.683889,32.700833,32.733611,32.859167,32.925278,32.966667,32.997222,33.01,33.114444,33.133889,33.169722,33.200556,33.233056,33.236667,33.281667,33.317222,33.35,33.356944,33.526667,33.526944,33.543333,33.5525,33.564722,33.574167,33.636667,33.639167,33.734444,33.810278,33.813889,33.8175,33.848056,33.85,33.945833,33.968889,33.97,34.0425,34.055833,34.087778,34.1,34.101111,34.198611,34.259722,34.262222,34.274444,34.359722,34.376944,34.421111,34.436667,34.4525,34.629444,34.65,34.743611,34.841944,34.882222,34.904722,34.905556,34.9775,34.988056,35.219167,35.5375,35.616944,35.617222,35.763056,35.766667,35.8775,35.880556,35.9625,36.001944,36.021667,36.08,36.174722,36.184722,36.404167,36.633056,36.787778,36.893333,36.895556,36.993889,37.083611,37.324444,37.486389,37.5,37.511944,37.659167,37.679167,37.691944,37.693333,37.828611,37.94,37.940278,37.996111,38.05,38.058333,38.1275,38.193889,38.266389,38.323333,38.344444,38.376667,38.418611,38.440833,38.484444,38.545556,38.646667,38.683611,38.707222,38.716389,38.801389,38.819167,38.838056,38.871944,38.941667,38.945556,38.945833,39.000556,39.122222,39.136944,39.175278,39.223056,39.269444,39.297778,39.34,39.467222,39.57,39.57,39.705278,39.916944,39.945833,39.996111,40.024444,40.038611,40.093611,40.124722,40.125,40.170556,40.260278,40.317222,40.453333,40.468611,40.617222,40.864167,40.893056,41.066389,41.113333,41.134167,41.153333,41.196111,41.307222,41.338333,41.368611,41.399722,41.401111,41.45,41.618333,41.7075,41.730556,41.781667,41.788889,41.790556,41.8325,41.85,41.872222,41.872778,41.874167,41.908889,41.968611,41.979444,42.05,42.053333,42.1575,42.2075,42.25,42.259722,42.33,42.337222,42.351111,42.565833,42.5925,42.621667,42.729444,42.742778,42.751111,42.797222,42.898333,42.9,42.953889,42.9975,43.0575,43.0875,43.106667,43.113889,43.243333,43.276944,43.631389,43.851389,44.001667,44.03,44.094722,44.201944,44.228611,44.256667,44.3125,44.343056,44.449722,44.568333,44.593889,44.620278,44.669167,44.787222,44.793056,44.906389,45.025556,45.081111,45.343889,45.588611,45.665,45.7775,45.849167,46.705278,46.82,47.481944,47.495,47.51,47.62,47.695278,47.819444,47.866111,48.160278,48.160278,48.310556,48.390278,48.483333,48.498889,48.601111,48.708056,57.566667,58.420556,58.566667,59.753889,60.183333,60.766667,60.905833,61.394444,61.565556,61.646111,61.79,62.534444,64.453611,18.534488,42.2513,50.784167,35.818996,54.630421,48.509314,-20.516638,-22.871332,-32.031481,40.683242,44.125131,44.318737,-21.12714,53.611806,-23.117833,46.671462,59.311063,10.308767,64.178421,15.721305,57.652918,12.470711,-32.088847,-41.636156,42.236849,8.84276,8.7405,44.342633,56.436781,53.833634,35.53,42.17,40.18,40.27,35.47,47.67,35.07,35.75,43.03,40.12,35.1,43.82,34.95,29.9,32.5,43.82,34.17,35.9,33.72,45.35,26.63,33.97,36.73,34.5,35.23,32.6,27.37,35.53,41.27,41.7,35.23,42.27,42.95,41.23,32.9,45.4,43.88,44.02,33.13,34.77,34.68,42.27,41.33,31.95,44.83,44.36667,48.06667,33.5,34.85,31.26667,33.01667,38.53333,42.96667,36.1,43.93333,44.28333,38.08333,40.63333,39.51667,41.33333,34.5,36.86667,43.36667,43.21667,38.63333,38.96667,34.34,30.53,31.43,38.77,38.4946,40.1,43.45,34.1434,31.7913,45.0,30.5379,39.3205,41.465,43.5179,37.4758,32.95,29.6308,60.9546,33.7615,32.5515,44.2315,31.0859,41.1323,34.5903,44.2491,40.75,27.8766,30.219,39.5667,38.2393,35.4944,34.93,42.0958,38.9,43.3644,30.0133,33.2037,30.7951,44.4693,43.62,41.2239,35.94,37.384,34.1511,39.4971,40.4914,35.2713,41.63,36.1289,34.0269,41.656,43.001,35.8275,45.1061,35.7886,35.934,35.414,40.6414,42.4357,32.0611,38.9174,29.91,36.314,42.9117,36.15,37.3324,36.8525,38.752,52.1171,56.2682,54.0969,27.6867,53.5821,67.8223,47.5643,32.766,42.374,54.4318,35.803167,35.891,56.3576,56.5691,56.0338,59.7953,36.6602,24.9039,52.2065,61.9449,21.1592,24.161,51.829,52.659,52.335,54.16,52.579,43.541,55.194,20.128167,54.045,60.7708,53.927,56.2692,43.737,52.829,51.941,64.22,57.5586]},"kind":"numeric","n":54575,"n_null":0,"n_unique":32209,"null_rate":0.0,"stats":{"iqr":9.120139000000002,"kurtosis":15.966290789665784,"max":82.171136,"mean":38.158956169986254,"median":38.376667,"min":-77.42499,"n_outliers":4302,"outlier_rate":0.07882730187814933,"q1":33.654028,"q3":42.774167,"skew":-2.509865078573989,"std":11.958473080750375,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+2.84"},{"code":"outliers","level":"warn","message":"7.9% rows beyond 1.5 IQR"}],"column":"longitude","extras":{"histogram":{"counts":[49,1005,1182,1679,289,833,6128,4912,3964,10929,12439,7045,1013,178,139,143,40,15,39,348,275,834,267,121,36,80,53,2,20,2,4,19,26,37,18,41,59,22,140,150],"edges":[-179.283333,-170.330553025,-161.37777305,-152.424993075,-143.4722131,-134.519433125,-125.56665315,-116.613873175,-107.6610932,-98.708313225,-89.75553325,-80.802753275,-71.84997329999999,-62.89719332499999,-53.94441334999999,-44.99163337499999,-36.038853399999994,-27.086073424999995,-18.133293449999996,-9.180513474999998,-0.22773349999999937,8.725046475,17.677826449999998,26.630606424999996,35.58338640000002,44.53616637500002,53.48894635000002,62.44172632500002,71.39450630000002,80.34728627500002,89.30006625000001,98.25284622500001,107.20562620000001,116.15840617500001,125.11118615000001,134.063966125,143.0167461,151.969526075,160.92230605,169.875086025,178.827866]},"sample":[-80.413056,-79.257222,-81.390278,-80.240556,-80.041667,-81.188611,-82.353056,-81.837222,-80.526111,-81.343056,-80.368333,-80.368333,-82.625,-82.626944,-82.405556,-82.638056,-80.6875,-81.802778,-82.1575,-82.9,-95.461944,-81.058056,-81.058056,-81.755278,-76.683333,-96.374444,-95.414444,-95.007778,-91.479167,-100.765833,-90.256944,-81.391389,-81.466111,-86.09,-110.841389,-106.141389,-97.073889,-98.956389,-99.314444,-107.615556,-81.557222,-111.118889,-91.254167,-110.941111,-91.698333,-96.7075,-80.993889,-96.718889,-93.805,-94.951111,-80.002778,-117.189722,-102.289167,-97.406944,-97.275278,-111.923056,-104.89,-115.365278,-116.919722,-86.303333,-97.197778,-84.280556,-116.224722,-112.504444,-84.528889,-110.666667,-86.947778,-112.295,-112.295278,-101.835556,-102.3725,-86.25,-117.128333,-95.450556,-84.428056,-117.026389,-111.6675,-78.728611,-118.151389,-111.925278,-116.483333,-118.279722,-112.798611,-117.619722,-112.149444,-117.601111,-118.035833,-117.683333,-84.161111,-96.674444,-118.413611,-91.3075,-78.713056,-84.916667,-117.315833,-119.8425,-119.27,-84.461111,-116.683889,-112.4275,-92.023889,-82.35,-95.783611,-85.459722,-111.874444,-80.564722,-78.874722,-101.705833,-115.416944,-106.086944,-106.089444,-80.955278,-115.328333,-86.3775,-86.380556,-95.341667,-77.683611,-115.119167,-115.152222,-89.163889,-113.043889,-88.015833,-95.151389,-98.670278,-121.41,-121.794167,-121.540833,-84.0675,-97.387222,-96.924722,-77.316667,-122.249444,-122.122222,-121.299722,-121.819444,-121.820278,-121.625833,-100.31,-78.548611,-122.061111,-84.933056,-80.8025,-91.769444,-121.700833,-108.1775,-88.858611,-121.810556,-121.962222,-82.500556,-105.11,-77.612778,-106.033056,-121.296944,-120.985833,-93.175833,-77.634722,-123.530556,-92.221667,-92.0025,-121.470833,-76.57,-104.569722,-104.569722,-119.751944,-123.2,-119.867222,-76.668333,-106.868889,-74.866111,-94.713889,-104.42,-78.957778,-104.849444,-104.849167,-75.032778,-76.867222,-105.024444,-105.189444,-82.461667,-105.229722,-105.293611,-67.792222,-76.3,-75.600833,-75.670833,-89.75,-105.018889,-105.126389,-74.244444,-85.47,-112.276111,-88.359722,-81.150556,-77.433611,-104.130278,-96.112222,-105.671667,-75.723333,-73.491389,-88.792778,-92.945833,-110.566667,-73.083333,-73.738056,-122.545556,-111.846667,-75.89,-111.854444,-85.668333,-88.475,-88.254444,-71.011111,-71.016389,-70.728611,-87.907778,-87.904444,-75.338889,-102.803889,-72.715556,-88.308889,-88.733333,-84.459167,-88.079167,-114.796667,-86.255556,-84.423056,-76.214722,-87.959722,-82.900556,-86.1075,-88.251111,-88.3725,-72.270833,-114.25,-85.343889,-72.620833,-74.196944,-88.177778,-76.103611,-89.531667,-88.368611,-116.996111,-71.360556,-96.401667,-86.663611,-88.19,-121.200556,-114.939444,-116.968611,-121.15,-89.904444,-88.790833,-68.361389,-119.465278,-114.818333,-73.305556,-115.674444,-88.56,-96.4575,-123.234444,-91.599444,-93.668333,-122.879722,-122.5975,-121.521111,-111.153056,-112.450556,-111.976944,-106.319722,-111.370556,-122.774444,-94.935556,-117.647778,-114.185833,-120.719722,-119.942778,-122.165,-122.158889,-114.256111,-101.499444,-122.9375,-122.662222,-113.115278,-122.910556,-157.016667,-135.432222,-153.916667,-154.911111,-154.333333,-161.833333,-161.426389,-149.845556,-149.851389,-149.2925,-156.588056,-147.609722,-147.563611,-72.37901,-81.889733,-0.37,14.449709,-1.013047,-58.541962,-40.066616,-43.13093,-52.039645,18.015617,9.64221,9.219052,-175.159952,11.487392,-44.282314,13.000658,18.455012,-109.202093,-51.699142,39.94777,-3.621948,-69.984873,115.754999,145.069112,12.508719,167.58521,167.71093,11.019559,10.950805,-79.006228,-90.42,-71.77,-88.22,-86.7,-96.9,-92.8,-95.05,-86.93,-88.88,-90.22,-90.38,-85.37,-101.1,-95.07,-99.65,-88.73,-85.2,-90.72,-93.08,-98.2,-80.17,-97.12,-96.02,-84.6,-84.82,-88.7,-97.42,-100.97,-95.87,-91.82,-80.23,-91.33,-96.82,-82.0,-88.3,-122.5,-103.47,-88.55,-87.57,-87.0,-92.23,-94.55,-91.7,-84.07,-72.22,-97.36667,-114.06667,-88.43333,-89.83333,-88.03333,-89.76667,-76.98333,-76.43333,-85.58333,-94.93333,-105.48333,-84.53333,-97.56667,-94.78333,-75.03333,-83.66667,-90.66667,-88.33333,-88.11667,-93.43333,-95.23333,-78.24,-92.08,-97.75,-86.1742,-98.2,-74.17,-90.73,-114.378,-89.0662,-105.568,-92.5113,-85.7591,-90.3227,-90.6715,-81.6686,-97.15,-95.5206,-161.2463,-91.9082,-93.7309,-88.4441,-97.8964,-83.7858,-93.039,-89.9702,-75.58,-82.5025,-92.3142,-99.8036,-95.7604,-90.6592,-92.64,-78.1429,-81.42,-91.2195,-93.206,-107.3048,-88.0417,-93.9386,-84.23,-85.8081,-79.0,-94.2915,-79.6079,-121.0364,-87.5269,-79.2278,-99.37,-96.1142,-95.1537,-70.1883,-99.5323,-78.4399,-87.6602,-86.8441,-78.4739,-82.82,-74.0847,-83.5514,-95.522,-77.0898,-98.82,-94.588,-95.8571,-82.42,-87.0957,-88.6389,-77.05,-169.4631,-152.1728,-166.602,-102.7938,-163.248,-157.0397,-128.8932,-100.659,-126.6594,-159.7133,-117.617833,-117.7365,-149.2464,-148.6628,-149.9167,-136.6481,-98.4594,-109.3154,-169.4714,-151.816,-106.2015,-108.794,-130.654,-133.069,-131.36,-162.489,-169.311,-127.313,-159.748,-155.9835,-164.057,-149.3106,-165.28,-157.2292,-128.072,-166.768,-169.997,-130.92,-152.6298]},"kind":"numeric","n":54575,"n_null":0,"n_unique":34804,"null_rate":0.0,"stats":{"iqr":29.864582999999996,"kurtosis":15.128289761542558,"max":178.827866,"mean":-92.97330393641776,"median":-92.8126,"min":-179.283333,"n_outliers":4320,"outlier_rate":0.07915712322491983,"q1":-112.04208299999999,"q3":-82.1775,"skew":2.8427602667192335,"std":39.50480965321675,"zero_rate":0.0}},{"alerts":[{"code":"multilingual","level":"info","message":"18 languages detected in sample"},{"code":"duplicates","level":"warn","message":"62.3% duplicate strings"}],"column":"name","extras":{"language_counts":{"__engine":"fasttext:4,964","de":58,"en":4726,"es":46,"eu":3,"fr":60,"ht":1,"id":3,"it":13,"ja":32,"lv":1,"pl":2,"pt":3,"ru":7,"sr":1,"sv":1,"uk":1,"zh":6},"language_sample_size":5000,"length_histogram":{"counts":[129,357,2564,297,296,2319,6611,20097,7724,5480,3498,2132,1396,827,490,191,82,19,19,14,5,8,2,5,1,8,0,0,0,0,1,0,0,0,2,0,0,0,0,1],"edges":[2.0,5.775,9.55,13.325,17.1,20.875,24.65,28.425,32.2,35.975,39.75,43.525,47.3,51.074999999999996,54.85,58.625,62.4,66.175,69.95,73.725,77.5,81.27499999999999,85.05,88.825,92.6,96.375,100.14999999999999,103.925,107.7,111.475,115.25,119.02499999999999,122.8,126.575,130.35,134.125,137.9,141.67499999999998,145.45,149.225,153.0]},"near_unique":false,"sample":["Aviation Accident - Bombardier,_Inc. DHC-8-103","Thunderstorm Wind in TEXAS, LUBBOCK","Flash Flood in SOUTH CAROLINA, HORRY","Flash Flood in ILLINOIS, JO DAVIESS","Aviation Accident - Cessna 172","107 km NNE of Los Barriles, Mexico","Aviation Accident - McDonnell_Douglas MD_83","Thunderstorm Wind in PENNSYLVANIA, ELK","Flood in VERMONT, CHITTENDEN","Aviation Accident - SCHWEIZER 269C-1","Unnamed Wreck","Aviation Accident - PIPER PA-12","Aviation Accident - PIPER PA-28-161","Aviation Accident - Bell 412HP","Aviation Accident - CESSNA 172","Aviation Accident - PIPER PA28","Aviation Accident - Price Cuby_Acro_Trainer","Thunderstorm Wind in NEW JERSEY, ATLANTIC","78 km ENE of Loreto, Mexico","Aviation Accident - Cessna 150J","Aviation Accident - Cessna 182S","Aviation Accident - SMITH_ALLEN STORCH","Aviation Accident - Hopkins Rutan_VariViggen","Aviation Accident - CESSNA 150F","Tornado in WISCONSIN, DUNN","Flash Flood in MISSISSIPPI, COPIAH","Aviation Accident - SWEARINGEN SA227-AC","Aviation Accident - Piper PA-28R-200","Aviation Accident - Mooney M20R","54 km SW of Campo Pesquero el Colorado, Mexico","Unnamed Wreck","MV Alta","Unnamed Wreck","Aviation Accident - Beech 35-A33","Aviation Accident - Swearingen SA226TC","Aviation Accident - Villarubia RV-6","66 km SE of Kokhanok, Alaska","Aviation Accident - Cessna 182L","Aviation Accident - MOONEY M20F","Aviation Accident - REWEY_WILLIAM_M CH_801","Thunderstorm Wind in ILLINOIS, MOULTRIE","Thunderstorm Wind in NEBRASKA, NUCKOLLS","Aviation Accident - MOONEY M20J","Flood in VIRGINIA, PULASKI","North Pacific Ocean","260 km SSW of Prince Rupert, Canada","Aviation Accident - Cessna 172R","Aviation Accident - PIPER PA28R","Aviation Accident - PIPER PA-28RT-201","Flash Flood in OHIO, OTTAWA"],"top_values":[["Unnamed Wreck",2184],["Aviation Accident - CESSNA 172",360],["Aviation Accident - Cessna 152",251],["Aviation Accident - Cessna 172N",238],["Aviation Accident - Cessna 172S",209],["Aviation Accident - CESSNA 172S",202],["Aviation Accident - Cessna 172",189],["Aviation Accident - CESSNA 152",176],["Aviation Accident - CESSNA 172N",162],["off the coast of Oregon",151],["Aviation Accident - Cessna 172M",150],["Aviation Accident - Piper PA-28-140",143],["Aviation Accident - Cessna 172P",133],["Aviation Accident - CESSNA 182",131],["Aviation Accident - CESSNA 180",131],["Aviation Accident - CIRRUS_DESIGN_CORP SR22",130],["Aviation Accident - Piper PA-28-180",125],["Aviation Accident - ROBINSON_HELICOPTER R22_BETA",124],["Aviation Accident - BEECH A36",118],["Aviation Accident - CESSNA 172M",116]],"top_words":[["-",11883],["aviation",11880],["accident",11880],["in",5428],["cessna",3163],["tornado",2311],["piper",1719],["flood",1544],["of",1376],["km",1198],["flash",878],["wind",834],["thunderstorm",817],["wreck",806],["unnamed",777],["alaska",742],["beech",679],["texas,",546],["hail",470],["bell",269],["carolina,",256],["new",249],["arkansas,",238],["lightning",228],["north",222]],"vocab_skipped":null,"word_histogram":{"counts":[435,2794,327,8386,0,37852,3053,1483,0,85,75,8,0,15,19,8,9,0,11,4,3,0,1,5,0,0,1,0,0,1],"edges":[1.0,1.7666666666666666,2.533333333333333,3.3000000000000003,4.066666666666666,4.833333333333334,5.6000000000000005,6.366666666666667,7.133333333333334,7.9,8.666666666666668,9.433333333333334,10.200000000000001,10.966666666666667,11.733333333333334,12.5,13.266666666666667,14.033333333333335,14.8,15.566666666666668,16.333333333333336,17.1,17.866666666666667,18.633333333333333,19.400000000000002,20.166666666666668,20.933333333333334,21.700000000000003,22.46666666666667,23.233333333333334,24.0]}},"kind":"text","n":54575,"n_null":0,"n_unique":20587,"null_rate":0.0,"stats":{"allcaps_rate":0.0012643151626202474,"boilerplate_rate":0.0,"duplicate_rate":0.6227759963353183,"emoji_rate":0.0,"len_max":153,"len_mean":32.38147503435639,"len_median":31.0,"len_min":2,"len_p95":48.0,"n_duplicates":33988,"n_empty":0,"one_word_rate":0.007970682546953734,"readability_flesch_mean":8.743982142857158,"url_rate":0.0,"vocab_size":8062,"word_mean":4.782464498396702,"word_median":5.0}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.8% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"82.0% duplicate strings"}],"column":"date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,0,0,0,0,0,0,0,0,151,0,0,0,0,13,0,0,0,0,1,0,0,0,0,19,0,0,0,0,5,0,0,0,0,3,0,0,0,51248],"edges":[2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4000000000000004,3.6,3.8,4.0,4.2,4.4,4.6,4.800000000000001,5.0,5.2,5.4,5.6,5.800000000000001,6.0,6.2,6.4,6.6000000000000005,6.800000000000001,7.0,7.2,7.4,7.6000000000000005,7.800000000000001,8.0,8.2,8.4,8.600000000000001,8.8,9.0,9.2,9.4,9.600000000000001,9.8,10.0]},"near_unique":false,"sample":["2017-01-01","2005-04-02","2017-04-29","2012-06-29","2015-01-01","1278558949","2011-01-01","2021-12-10","2012-03-02","2011-01-01","2007-01-01","2014-01-01","2005-01-01","2007-01-01","2004-01-01","2018-01-01","2012-01-01","2014-09-28","1042679190","2005-01-01","2010-01-01","2005-01-01","2008-01-01","2006-01-01","1987-07-26","2006-10-26","2001-01-01","2002-01-01","2004-01-01","1286967023","1965-05-06","1970-04-18","1970-07-04","2011-01-01","2002-01-01","2014-01-01","1641997692","2016-01-01","2011-01-01","2007-01-01","2011-03-07","2025-08-09","2015-01-01","2025-05-16","1474779710","1242617976","2006-01-01","2011-01-01","2018-01-01","2019-05-27"],"top_values":[["2003-01-01",2396],["2005-01-01",2220],["2004-01-01",2150],["2007-01-01",2137],["2008-01-01",2050],["2002-01-01",2021],["2006-01-01",1962],["2009-01-01",1937],["2011-01-01",1934],["2012-01-01",1886],["2010-01-01",1835],["2013-01-01",1519],["2016-01-01",1497],["2015-01-01",1468],["2017-01-01",1435],["2014-01-01",1398],["2018-01-01",1283],["2001-01-01",1088],["2000-01-01",191],["1974-04-03",126]],"top_words":[["2003-01-01",943],["2007-01-01",857],["2005-01-01",847],["2004-01-01",838],["2002-01-01",823],["2008-01-01",802],["2006-01-01",754],["2011-01-01",751],["2012-01-01",734],["2009-01-01",731],["2010-01-01",705],["2015-01-01",586],["2016-01-01",567],["2013-01-01",563],["2017-01-01",550],["2014-01-01",531],["2018-01-01",502],["2001-01-01",411],["2000-01-01",78],["1974-04-03",55],["2011-04-27",39],["2011-05-01",29],["2021-09-01",23],["2012-06-29",22],["2011-08-28",20]],"vocab_skipped":null,"word_histogram":{"counts":[51365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,0,0,0,0,32],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":54575,"n_null":3134,"n_unique":9264,"null_rate":0.057425561154374714,"stats":{"allcaps_rate":0.9982504228144865,"boilerplate_rate":0.0,"duplicate_rate":0.8199101883711436,"emoji_rate":0.0,"len_max":10,"len_mean":9.979529946929492,"len_median":10.0,"len_min":2,"len_p95":10.0,"n_duplicates":42177,"n_empty":0,"one_word_rate":0.9985225792655664,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4710,"word_mean":1.002099492622616,"word_median":1.0}},{"alerts":[],"column":"subcategory","extras":{"singletons":4,"top_values":[["aviation",32410],["Tornado",6334],["seismic",3742],["maritime",3653],["Flash Flood",2358],["Thunderstorm Wind",2257],["Flood",1777],["Hail",1246],["Lightning",574],["Heavy Rain",99],["Marine Strong Wind",43],["Debris Flow",43],["Marine Thunderstorm Wind",25],["Marine High Wind",5],["Dust Devil",3],["Waterspout",2],["Tropical Storm",1],["High Wind",1],["Heat",1],["Marine Lightning",1]]},"kind":"categorical","n":54575,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"cardinality":20,"entropy":2.1152209812804603,"entropy_ratio":0.4894160510867053,"top_rate":0.593861658268438,"top_value":"aviation"}},{"alerts":[{"code":"null_rate","level":"warn","message":"80.1% null"}],"column":"magnitude","extras":{"singletons":80,"top_values":[["0",3863],["4.5",686],["4.6",558],["4.7",415],["1.75",383],["4.8",317],["4.9",261],["5",238],["2.75",220],["5.1",202],["5.2",167],["70.00",162],["50.00",151],["2.00",150],["5.3",126],["2.50",123],["61.00",122],["65.00",104],["52.00",95],["5.4",95]]},"kind":"categorical","n":54575,"n_null":43711,"n_unique":291,"null_rate":0.8009344938158498,"stats":{"cardinality":291,"entropy":4.731914759687593,"entropy_ratio":0.5781291176031802,"top_rate":0.3555780559646539,"top_value":"0"}},{"alerts":[{"code":"null_rate","level":"warn","message":"72.9% null"}],"column":"fatalities","extras":{"singletons":18,"top_values":[["0",10209],["1",3208],["2",649],["3",222],["4",112],["5",74],["6",66],["7",38],["9",25],["10",24],["8",21],["11",20],["13",11],["16",10],["12",9],["14",8],["17",6],["20",6],["25",4],["23",3]]},"kind":"categorical","n":54575,"n_null":39805,"n_unique":49,"null_rate":0.7293632615666514,"stats":{"cardinality":49,"entropy":1.4234519366880767,"entropy_ratio":0.2535219051755631,"top_rate":0.691198375084631,"top_value":"0"}},{"alerts":[{"code":"null_rate","level":"warn","message":"72.9% null"}],"column":"injuries","extras":{"singletons":69,"top_values":[["0",10064],["1",893],["2",552],["3",343],["4",236],["5",234],["10",219],["6",196],["12",158],["7",134],["8",121],["20",114],["15",111],["11",90],["9",85],["13",70],["14",69],["30",68],["25",56],["16",48]]},"kind":"categorical","n":54575,"n_null":39805,"n_unique":178,"null_rate":0.7293632615666514,"stats":{"cardinality":178,"entropy":2.468064058708073,"entropy_ratio":0.33014340084475474,"top_rate":0.6813811780636425,"top_value":"0"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"87.2% rows are all-caps"},{"code":"null_rate","level":"warn","message":"72.9% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"93.1% duplicate strings"}],"column":"damage","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[368,0,0,0,0,264,0,0,0,0,1252,0,0,0,0,1172,0,0,0,0,3414,0,0,0,0,6075,0,0,0,0,1450,0,0,0,0,514,0,0,0,261],"edges":[0.0,0.2,0.4,0.6000000000000001,0.8,1.0,1.2000000000000002,1.4000000000000001,1.6,1.8,2.0,2.2,2.4000000000000004,2.6,2.8000000000000003,3.0,3.2,3.4000000000000004,3.6,3.8000000000000003,4.0,4.2,4.4,4.6000000000000005,4.800000000000001,5.0,5.2,5.4,5.6000000000000005,5.800000000000001,6.0,6.2,6.4,6.6000000000000005,6.800000000000001,7.0,7.2,7.4,7.6000000000000005,7.800000000000001,8.0]},"near_unique":false,"sample":["250K","40.00M","3.00M","20.00K","3.17M","15.00M","1.00M","0.00K","0.00K","25K","3.00M","10.00M","1M","25K",".15M","2.5M","2.5M","5.40M","0.00K","1.5M","2.7M","2.5M","2.5M","2.5M","1.50M","986.00K","2.50M","0","2.5M","","5.00M","161.10M","3.00M","150K","3.5M","13M","8.00M","6.50M","0.00K","5.60M","9.20M","0.00K","15M","3.10M","","3.50M","5K","1M","2.5M","15.00K"],"top_values":[["2.5M",2278],["1.00M",1306],["0.00K",1229],["2.00M",553],["25M",530],["250K",471],["1M",457],["",368],["1.50M",334],["5.00M",325],["3.00M",297],["0",264],["2M",231],["10.00M",212],["5M",182],["25K",178],["4.00M",155],["2.50M",136],["1.5M",123],["10.00K",113]],"top_words":[["2.5m",2278],["1.00m",1306],["0.00k",1229],["2.00m",553],["25m",530],["250k",471],["1m",457],["1.50m",334],["5.00m",325],["3.00m",297],["0",264],["2m",231],["10.00m",212],["5m",182],["25k",178],["4.00m",155],["2.50m",136],["1.5m",123],["10.00k",113],["0k",111],["3m",109],["1.20m",109],["1000.00k",102],["10m",92],["50.00m",78]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14770,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":54575,"n_null":39805,"n_unique":1014,"null_rate":0.7293632615666514,"stats":{"allcaps_rate":0.872444143534191,"boilerplate_rate":0.0,"duplicate_rate":0.9313473256601219,"emoji_rate":0.0,"len_max":8,"len_mean":4.380568720379147,"len_median":5.0,"len_min":0,"len_p95":7.0,"n_duplicates":13756,"n_empty":368,"one_word_rate":1.0,"readability_flesch_mean":116.97730000000003,"url_rate":0.0,"vocab_size":1013,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"72.9% null"}],"column":"state","extras":{"singletons":3,"top_values":[["TEXAS",1450],["MISSOURI",648],["ARKANSAS",602],["MISSISSIPPI",570],["GEORGIA",562],["ILLINOIS",560],["IOWA",527],["LOUISIANA",507],["TENNESSEE",499],["FLORIDA",498],["OKLAHOMA",490],["NEBRASKA",486],["ALABAMA",469],["WISCONSIN",463],["OHIO",441],["MICHIGAN",426],["NORTH CAROLINA",422],["KANSAS",418],["INDIANA",408],["KENTUCKY",383]]},"kind":"categorical","n":54575,"n_null":39805,"n_unique":65,"null_rate":0.7293632615666514,"stats":{"cardinality":65,"entropy":5.182276527947146,"entropy_ratio":0.8605048195057264,"top_rate":0.0981719702098849,"top_value":"TEXAS"}},{"alerts":[{"code":"allcaps","level":"info","message":"49.5% rows are all-caps"},{"code":"null_rate","level":"warn","message":"40.6% null"},{"code":"duplicates","level":"warn","message":"70.8% duplicate strings"}],"column":"aircraft_type","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[592,1240,3852,7354,2473,1129,1380,3032,1131,853,1024,779,670,1117,947,528,507,403,455,447,312,352,258,282,261,276,283,145,56,46,35,48,51,38,19,5,7,6,2,15],"edges":[7.0,8.075,9.15,10.225,11.3,12.375,13.45,14.524999999999999,15.6,16.674999999999997,17.75,18.825,19.9,20.975,22.049999999999997,23.125,24.2,25.275,26.349999999999998,27.425,28.5,29.575,30.65,31.724999999999998,32.8,33.875,34.95,36.025,37.099999999999994,38.175,39.25,40.324999999999996,41.4,42.475,43.55,44.625,45.699999999999996,46.775,47.85,48.925,50.0]},"near_unique":false,"sample":["Piper PA-28-140","Robinson R-44","Cessna 180H","WACO QCF-2","PIPER PA_22","PIPER PA_18-150","Piper PA-38-112","Cessna 182K","Rotary_Air_Force_Marketing RAF_2000","Bell UH-1B","RYAN NAVION","CESSNA T210M","PIPER PA-12","BEECH C23","Grumman G-164A","Cessna 170A","EMBRAER_S.A. EMB-500","CESSNA R172K","Aviat A-1B","Preceptor_Aircraft Ultra_pup","FLIGHTWORKS_CORP CAPELLA_XS_FW2C80TD","CESSNA 182T","CESSNA 172","CESSNA 182F","FLIGHT_DESIGN CTLS","Air_Tractor AT-301","Piper PA38-112","Schempp-Hirth Nimbus-3DM","Eurocopter EC120B","PIPER PA-24-180","WINGS_AN_THINGS_INC AVID_FLYER","Embraer EMB-135","McDonnell_Douglas MD-82","Cessna T207A","PIPER PA28","JOHNSON_GLENN_L ZODIAC_601_XL","PIPER PA-28-181","CESSNA 180","CESSNA A185F","PIPER PA_28-161","PIPER PA-28-180","BELL BHT407","Ercoupe_(Eng_&_Research_Corp.) 415-D","MAULE M-4-220C","CESSNA 208B","CESSNA 182","Cessna 180","Christen_Industries A1B","CESSNA TR182","Piper PA-23-250"],"top_values":[["CESSNA 172",360],["Cessna 152",251],["Cessna 172N",238],["Cessna 172S",209],["CESSNA 172S",202],["Cessna 172",189],["CESSNA 152",176],["CESSNA 172N",162],["Cessna 172M",150],["Piper PA-28-140",143],["Cessna 172P",133],["CESSNA 182",131],["CESSNA 180",131],["CIRRUS_DESIGN_CORP SR22",130],["Piper PA-28-180",125],["ROBINSON_HELICOPTER R22_BETA",124],["BEECH A36",118],["CESSNA 172M",116],["PIPER PA-18-150",115],["Cessna 180",112]],"top_words":[["cessna",5421],["piper",2956],["beech",1151],["bell",474],["172",334],["boeing",317],["152",295],["mooney",294],["robinson",282],["172n",250],["172s",243],["schweizer",203],["bellanca",185],["hughes",185],["pa-28-140",164],["180",160],["robinson_helicopter",154],["maule",153],["aeronca",150],["sr22",150],["172m",148],["a36",147],["pa-18-150",143],["r22_beta",143],["182",136]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32410,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[1.5,1.5333333333333334,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666667,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333333,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0,2.033333333333333,2.0666666666666664,2.1,2.1333333333333333,2.1666666666666665,2.2,2.2333333333333334,2.2666666666666666,2.3,2.3333333333333335,2.3666666666666667,2.4,2.4333333333333336,2.466666666666667,2.5]}},"kind":"text","n":54575,"n_null":22165,"n_unique":9478,"null_rate":0.40613834173156205,"stats":{"allcaps_rate":0.4954952175254551,"boilerplate_rate":0.0,"duplicate_rate":0.7075593952483801,"emoji_rate":0.0,"len_max":50,"len_mean":15.850848503548288,"len_median":13.0,"len_min":7,"len_p95":31.0,"n_duplicates":22932,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":45.12250000000002,"url_rate":0.0,"vocab_size":7261,"word_mean":2.0,"word_median":2.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"40.6% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"event_id","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32410,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[13.5,13.525,13.55,13.575,13.6,13.625,13.65,13.675,13.7,13.725,13.75,13.775,13.8,13.825,13.85,13.875,13.9,13.925,13.95,13.975,14.0,14.025,14.05,14.075,14.1,14.125,14.15,14.175,14.2,14.225,14.25,14.275,14.3,14.325,14.35,14.375,14.4,14.425,14.45,14.475,14.5]},"near_unique":false,"sample":["20010627X01274","20060814X01163","20040930X01545","20091006X62448","20160516X70808","20150608X04410","20001212X20849","20050216X00203","20070322X00321","20020717X01150","20130318X45644","20120704X23310","20110718X01748","20160531X61809","20060601X00665","20030403X00426","20141123X91658","20140707X70004","20051031X01757","20030103X00006","20180323X13524","20180619X51517","20180611X50108","20180509X00947","20121023X11953","20030815X01344","20070531X00669","20010611X01142","20060516X00584","20090429X91658","20160803X11028","20031010X01706","20021118X05480","20060707X00894","20171211X74152","20160111X90408","20090723X54623","20150110X51117","20110701X55718","20150816X35140","20130809X95359","20130423X65502","20080411X00461","20090402X03139","20121204X63622","20150723X32606","20181217X25746","20040830X01307","20150909X52721","20021203X05548"],"top_values":[["20010519X00967",8],["20070111X00042",8],["20160718X62313",8],["20080509X00643",6],["20070614X00722",6],["20020731X01266",6],["20160216X30546",6],["20040910X01395",6],["20130614X22020",6],["20090808X42846",6],["20030605X00800",6],["20061015X01521",6],["20070618X00759",6],["20051213X01965",6],["20141023X80357",6],["20100224X50823",6],["20090518X11549",6],["20030903X01437",5],["20051013X01648",5],["20031229X02093",5]],"top_words":[["20160718x62313",5],["20030903x01437",5],["20061015x01521",5],["20070614x00722",5],["20070111x00042",5],["20080509x00643",5],["20141023x80357",5],["20030605x00800",5],["20060908x01324",5],["20131111x10005",5],["20060824x01236",4],["20030826x01400",4],["20140525x21418",4],["20051013x01648",4],["20080324x00372",4],["20170710x64407",4],["20050609x00745",4],["20111117x82932",4],["20110220x82019",4],["20041019x01650",4],["20030731x01236",4],["20100322x44107",4],["20090808x42846",4],["20110224x01309",4],["20030422x00544",4]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32410,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":54575,"n_null":22165,"n_unique":26427,"null_rate":0.40613834173156205,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.18460351743289108,"emoji_rate":0.0,"len_max":14,"len_mean":14.0,"len_median":14.0,"len_min":14,"len_p95":14.0,"n_duplicates":5983,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":17535,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"14 singleton categories"},{"code":"null_rate","level":"warn","message":"93.3% null"}],"column":"vessel_type","extras":{"singletons":14,"top_values":[["",3311],["ship",275],["submarine",18],["aircraft",16],["plane",10],["boat",3],["schooner",2],["car",2],["sailboat",2],["steamer",1],["airplane",1],["freightcar",1],["train",1],["paddle steamer",1],["vehicle",1],["motorbike",1],["helicopter",1],["Steam hoist",1],["tractor",1],["Airplane",1]]},"kind":"categorical","n":54575,"n_null":50922,"n_unique":23,"null_rate":0.9330645900137425,"stats":{"cardinality":23,"entropy":0.576412765750529,"entropy_ratio":0.12742453211649218,"top_rate":0.9063783191897071,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"13 singleton categories"},{"code":"null_rate","level":"warn","message":"93.3% null"},{"code":"imbalance","level":"warn","message":"top value is 99.4% of rows"}],"column":"cargo","extras":{"singletons":13,"top_values":[["",3632],["human",4],["timber",2],["coal",2],["fertilizer",1],["ore pellets",1],["Fischkutter (Stahl)",1],["seafood",1],["fish",1],["passengers",1],["mexican army supposed drugs, but the crew and cargo was not found",1],["iron ore",1],["pulp",1],["18 mines, 6 torpedos",1],["sugar",1],["containers;vehicles",1],["container;oil",1]]},"kind":"categorical","n":54575,"n_null":50922,"n_unique":17,"null_rate":0.9330645900137425,"stats":{"cardinality":17,"entropy":0.07301985554225272,"entropy_ratio":0.017864347243806682,"top_rate":0.9942513003011224,"top_value":""}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"depth_km","extras":{},"kind":"unknown","n":54575,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}}],"insights":{"errors":[],"insights":[{"confidence":"medium","critiques":[],"evidence_keys":["category.top_values","subcategory.top_values","fatalities.null_rate","injuries.null_rate","damage.null_rate","aircraft_type.top_words","state.top_values","fatalities.top_values","category.stats.top_rate"],"featured_charts":[{"caption":"Look for how heavily aviation accidents (59%) outweigh storms, earthquakes, and shipwrecks combined.","column":"category","kind":"donut"},{"caption":"Check whether aviation and Tornado dominate all other subcategories, signalling potential source imbalance.","column":"subcategory","kind":"bar"},{"caption":"Texas leads by a wide margin \u2014 look for whether the top states reflect tornado-prone and storm-prone regions of the US.","column":"state","kind":"bar"},{"caption":"The vast majority of recorded events show zero fatalities; look for how sharply the tail drops off beyond 1\u20132 deaths.","column":"fatalities","kind":"bar"},{"caption":"Event counts cluster around 2002\u20132012 \u2014 look for whether coverage drops off in more recent or earlier years.","column":"date","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset is a multi-hazard disaster event mashup of 54,575 records spanning aviation accidents, storms, earthquakes, and shipwrecks, each geolocated with latitude and longitude. Aviation accidents dominate heavily at nearly 59% of all records, with Cessna models being the most frequently involved aircraft \u2014 worth examining whether this reflects true prevalence or a reporting/sourcing bias. A second area of interest is the severity data: fatalities, injuries, and damage all carry a ~73% null rate, meaning consequence analysis is limited to roughly a quarter of the dataset and skewed toward zero-casualty events. The storm subcategory breakdown (Tornadoes, Flash Floods, Thunderstorm Wind) also deserves a closer look for geographic and seasonal clustering given the strong US state representation.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_values","allcaps_rate","one_word_rate","duplicate_rate","n_unique","n"],"model":"anthropic:default","narrative":"This column contains abbreviated monetary damage estimates (e.g., '2.5M', '250K', '0.00K') stored as free-form text, most likely representing financial loss or property damage figures from incident or insurance records. The null rate is extremely high at 72.94%, meaning nearly three-quarters of rows carry no damage value. The all-caps rate of 87.2% and one-word rate of 100% confirm a consistent but non-numeric encoding; the 1,014 unique values across 54,575 rows with a duplicate rate of 93.1% indicate a relatively coarse discrete scale. Analysts should note that string suffixes (K vs M) encode magnitude and must be parsed before any quantitative use.","role":"feature","scope":"column","target":"damage","treatment":"Parse magnitude suffixes (K=thousands, M=millions) and convert to a numeric column; impute or flag the 72.94% nulls before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","duplicate_rate","n_duplicates","n_unique","null_rate","len_median","len_max"],"model":"anthropic:default","narrative":"This column contains date strings in ISO-8601 format (YYYY-MM-DD), stored as text rather than a native date type. Nearly all top values fall on January 1st of their respective years (2002\u20132012), suggesting dates are truncated or snapped to year-start, which is analytically significant and likely not raw event timestamps. The duplicate rate is extremely high at 81.99%, consistent with annual granularity across 54,575 rows, and 9,264 unique values hint that some finer dates do exist beyond the dominant Jan-1 entries. Null rate is low at 5.74%.","role":"timestamp","scope":"column","target":"date","treatment":"Parse to date type, investigate year-start snapping before using as a time feature, and consider extracting year as an ordinal variable."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","duplicate_rate","n_duplicates","n_unique","len_min","len_max","len_mean","allcaps_rate","top_values"],"model":"anthropic:default","narrative":"This column is an aviation or safety incident event identifier \u2014 the 14-character format (e.g., '20010519X00967') encodes a date prefix followed by an alphanumeric case code, consistent with NTSB accident/incident IDs. Two signals are surprising: a null rate of 40.61% means nearly half of rows lack an event ID entirely, and the duplicate rate of 18.46% (5,983 duplicates across 26,427 unique values) indicates multiple rows share the same event ID, implying a one-to-many relationship where each event spawns several records. All values are exactly 14 characters and fully uppercase, confirming a tightly controlled format with no malformed entries.","role":"foreign_key","scope":"column","target":"event_id","treatment":"Left-join on this ID to an events dimension table; investigate and handle the 40.61% null rate before joining."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","allcaps_rate","duplicate_rate","n_unique","top_values","top_words","n_duplicates"],"model":"anthropic:default","narrative":"This column contains aircraft make-and-model designations (e.g., 'Cessna 172', 'Piper PA-28-140') from what appears to be an aviation incident or registration dataset. Two major surprises: first, 40.6% of rows are null, indicating substantial missing coverage; second, case inconsistency is severe \u2014 'CESSNA 172' (360 occurrences) and 'Cessna 172' (189 occurrences) are counted as distinct values despite being the same aircraft, with ~49.5% of values in all-caps, inflating n_unique (9,478) and the duplicate rate (70.8%) artificially. The top words confirm a GA-heavy dataset dominated by Cessna, Piper, and Beech.","role":"label","scope":"column","target":"aircraft_type","treatment":"Normalize case (lowercase or title-case), then deduplicate/consolidate variant spellings before using as a categorical feature or grouping key."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_values","entropy_ratio","top_rate","alerts"],"model":"anthropic:default","narrative":"This column records the type of cargo carried by vessels or vehicles, with 17 distinct categories including 'human', 'timber', 'coal', 'fertilizer', and 'fish'. It is overwhelmingly sparse: 93.31% of rows are null, and among the non-null rows the top value is an empty string (3,632 occurrences), meaning genuinely populated values number only in the single digits each. The entropy ratio of 0.018 confirms near-total concentration, and the presence of a German-language entry ('Fischkutter (Stahl)') signals a language mix in the rare populated records.","role":"feature","scope":"column","target":"cargo","treatment":"Exclude from modelling unless the non-null subset is the analytic focus; treat empty strings as nulls, consolidate language variants, and flag the 93.31% missingness as likely structural (field not applicable to most records)."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","stats.duplicate_rate","stats.n_duplicates","n_unique","n","language_counts","alerts"],"model":"anthropic:default","narrative":"This column contains descriptive incident or event names, predominantly aviation accidents and natural disaster events (floods, tornadoes). The duplicate rate is strikingly high at 62.3% \u2014 with 33,988 duplicates across only 20,587 unique values out of 54,575 rows \u2014 largely driven by generic labels like 'Unnamed Wreck' (2,184 occurrences) and repeated aircraft model patterns (e.g., 'Aviation Accident - CESSNA 172' variants). While 86.6% of detected-language tokens are English, 14 other languages appear (French: 60, German: 58, Spanish: 46, Japanese: 32), indicating a multilingual dataset that may require language-aware processing.","role":"label","scope":"column","target":"name","treatment":"Normalize case variants (e.g., 'CESSNA 172' vs 'Cessna 172') before grouping or embedding; treat as a categorical label with high cardinality rather than free text."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","iqr","skew","kurtosis","n_outliers","outlier_rate","q1","q3"],"model":"anthropic:default","narrative":"This column contains geographic latitude values ranging from -77.42 to 82.17, consistent with global coordinate data. The median of 38.38 and IQR of 9.12 suggest the bulk of records cluster around mid-latitude Northern Hemisphere locations (roughly US/Europe), but the negative minimum (-77.42) indicates some Southern Hemisphere entries. Highly surprising is the negative skew of -2.51 combined with extreme kurtosis of 15.97 and 4,302 outliers (7.88% of rows), pointing to a heavy tail of anomalous low-latitude or Southern Hemisphere observations that likely warrant geographic subsetting or anomaly review.","role":"feature","scope":"column","target":"latitude","treatment":"Retain as-is for geo-spatial modelling; investigate the 4,302 outliers for data quality issues before binning or clustering by region."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","n_outliers","outlier_rate","iqr","q1","q3"],"model":"anthropic:default","narrative":"This column contains geographic longitude values, spanning from -179.28\u00b0 to +178.83\u00b0, consistent with worldwide coordinates. The mean (-92.97\u00b0) and median (-92.81\u00b0) are tightly clustered in the central United States, suggesting the bulk of records are North American, yet 4,320 outliers (7.9% of rows) and an extreme kurtosis of 15.13 indicate a heavy-tailed distribution with a substantial minority of globally dispersed points. The positive skew of 2.84 confirms an asymmetric pull toward higher (less-negative or positive) longitude values, i.e., non-US locations.","role":"feature","scope":"column","target":"longitude","treatment":"Retain as-is for geospatial modelling; consider pairing with latitude and clustering by region to handle the bimodal/heavy-tailed distribution before feeding into non-spatial models."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","n","top_values","alerts"],"model":"anthropic:default","narrative":"This column categorizes the type of vessel involved in an incident or record, with 23 distinct values including 'ship', 'submarine', 'aircraft', and oddly 'car'. Two major data quality issues stand out: the null rate is extreme at 93.31%, meaning only ~3,700 of 54,575 rows carry any value, and the top recorded value is an empty string (3,311 occurrences), which inflates the apparent top_rate to 90.6% \u2014 suggesting the true fill rate is even lower than the null_rate implies. The long-tail alert is consistent with rare values like 'schooner' (2), 'sailboat' (2), and 'steamer' (1), while 'car' appearing as a vessel type signals potential data entry errors or schema misuse.","role":"feature","scope":"column","target":"vessel_type","treatment":"Treat empty strings as nulls, impute or exclude before modelling given 93.31% missingness, and audit 'car' and 'aircraft' entries for schema validity."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","cardinality","top_values"],"model":"anthropic:default","narrative":"This column represents a fatality count per incident, stored as a categorical/string type despite being numeric in nature. The null rate is severe at 72.94%, meaning nearly three-quarters of records have no value recorded \u2014 this is the primary alert. Among non-null values, the distribution is heavily right-skewed: '0' dominates at 69.1% of non-null rows, with counts dropping sharply through 49 distinct values, indicating rare but high-fatality events exist in the tail.","role":"feature","scope":"column","target":"fatalities","treatment":"Cast to integer, treat nulls as unknown (not zero), then apply log1p-transform or use as-is for count-based modelling given heavy zero-inflation."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","cardinality","top_values"],"model":"anthropic:default","narrative":"This column represents an injury count per incident, stored as a categorical type despite containing integer values (0, 1, 2, \u2026). The dominant concern is an extreme null rate of 72.94%, meaning nearly three-quarters of rows carry no injury data at all. Among non-null rows, the value '0' accounts for 68.14% of responses, indicating most recorded incidents involved no injuries, with a long tail reaching at least 178 distinct values \u2014 suggesting occasional high-casualty outliers.","role":"feature","scope":"column","target":"injuries","treatment":"Cast to integer, impute or flag nulls explicitly, then consider log-transform or treat as a count target given heavy zero-inflation."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","entropy_ratio","cardinality"],"model":"anthropic:default","narrative":"This column contains US state names (full uppercase spellings), acting as a geographic feature for records in the dataset. The critical issue is a 72.94% null rate, meaning nearly three-quarters of all 54,575 rows carry no state value \u2014 this is a severe missingness alert. Among non-null values, cardinality is 65 (slightly above 50 US states, suggesting territories or data anomalies), and distribution is moderately spread (entropy ratio 0.86) with Texas as the dominant value at 9.82% of non-null records.","role":"feature","scope":"column","target":"state","treatment":"Investigate missingness mechanism before use; consider imputation or missingness indicator flag, and audit the 65 unique values to identify non-standard entries beyond the 50 states."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","n","top_values","cardinality"],"model":"anthropic:default","narrative":"This column represents earthquake or seismic event magnitude, stored as a categorical/string type despite being a numeric measurement with 291 distinct decimal values (e.g., 4.5, 4.6, 4.7). Two signals demand attention: the null rate is extremely high at 80.09%, meaning only ~10,866 of 54,575 rows carry a value. The dominant value '0' accounts for 35.56% of non-null records (3,863 occurrences), which is likely a sentinel or placeholder rather than a true zero magnitude, since genuine zero-magnitude events would be vanishingly rare and the next most frequent values cluster around 4.5\u20135.1.","role":"feature","scope":"column","target":"magnitude","treatment":"Cast to float after replacing '0' sentinel values with NaN; investigate whether 80.09% nulls are structurally missing or data quality issues before imputing or dropping rows."},{"confidence":"low","critiques":[],"evidence_keys":["column","n","null_rate","alerts"],"model":"anthropic:default","narrative":"This column represents earthquake or geological event depth in kilometres, a continuous numeric feature. The profiler skipped analysis entirely, so no distribution statistics, uniqueness counts, or range information are available. With 54,575 rows and a null rate of 0.0, the data is fully populated, but nothing can be said about skew, outliers, or value range from this evidence alone. An analyst should inspect the column directly before modelling.","role":"feature","scope":"column","target":"depth_km","treatment":"Manually profile for range and skew; apply log-transform if depth distribution is right-skewed before regression."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","null_rate","n"],"model":"anthropic:default","narrative":"This column is a disaster/incident type label with exactly 4 categories: aviation_accident, storm, earthquake, and shipwreck. The distribution is notably skewed \u2014 aviation_accident dominates at 59.4% of all 54,575 rows (32,410 records), while earthquake and shipwreck are each underrepresented at roughly 6.7% apiece. The entropy ratio of 0.74 confirms meaningful but unbalanced spread across classes, which could bias classifiers trained on this target without resampling.","role":"label","scope":"column","target":"category","treatment":"Use as classification target; apply class-weighting or oversampling for minority classes (earthquake, shipwreck) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","entropy_ratio","top_values","n","null_rate"],"model":"anthropic:default","narrative":"This column is a categorical event subcategory, most likely classifying incident or hazard reports across domains such as aviation, geophysical (seismic), meteorological (Tornado, Flash Flood, Thunderstorm Wind), and maritime events. 'aviation' dominates heavily at 59.4% of all 54,575 rows, creating pronounced class imbalance. A subtle data quality issue is present: some values use title case ('Tornado', 'Flash Flood', 'Thunderstorm Wind', 'Hail') while others are fully lowercase ('aviation', 'seismic', 'maritime'), suggesting records were ingested from at least two inconsistently formatted sources. Entropy ratio of 0.49 confirms the distribution is far from uniform.","role":"label","scope":"column","target":"subcategory","treatment":"Normalize casing before use, then one-hot encode or target-encode accounting for the heavy 'aviation' majority (59.4%)."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4791,"prompt_tokens":18749,"total_tokens":23540}},"language_counts":{"de":58,"en":4726,"es":46,"eu":3,"fr":60,"ht":1,"id":3,"it":13,"ja":32,"lv":1,"pl":2,"pt":3,"ru":7,"sr":1,"sv":1,"uk":1,"zh":6},"meta":{"generated_at":"2026-06-22T01:03:33+00:00","mode":"full","row_count":54575,"sampled_rows":54575,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/wild/disasters/disasters_mashup.json"},"notes":[],"saturn_version":"0.2.0","schema":{"aircraft_type":"text","cargo":"categorical","category":"categorical","damage":"text","date":"text","depth_km":"unknown","event_id":"text","fatalities":"categorical","injuries":"categorical","latitude":"numeric","longitude":"numeric","magnitude":"categorical","name":"text","state":"categorical","subcategory":"categorical","vessel_type":"categorical"}}
