{"attributions":[{"component":"fastText lid.176 language identification model","license":"CC-BY-SA-3.0","note":"Language counts in this report were produced with the fastText lid.176 model, licensed CC-BY-SA-3.0. This report is a derivative work and carries the same license for those figures.","url":"https://fasttext.cc/docs/en/language-identification.html"}],"columns":[{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[3,0,0,0,0,0,0,0,0,0,2,0,0,2,0,75,19,10,22,270,522,1240,2165,2333,1803,1901,2226,1382,515,232,0,0,0,5,6,15,11,8,2,1],"edges":[-14.3236,-12.2123375,-10.101075000000002,-7.989812500000001,-5.878550000000001,-3.7672875,-1.6560250000000014,0.4552374999999991,2.5664999999999996,4.677762499999998,6.7890250000000005,8.9002875,11.011549999999998,13.1228125,15.234074999999999,17.3453375,19.4566,21.567862500000004,23.679125,25.7903875,27.901650000000004,30.0129125,32.124175,34.2354375,36.3467,38.4579625,40.569225,42.680487500000005,44.79175,46.9030125,49.014275000000005,51.12553750000001,53.2368,55.3480625,57.45932500000001,59.5705875,61.68185,63.79311250000001,65.904375,68.0156375,70.1269]},"sample":[35.6,45.12,30.37,34.7,30.5,35.47,46.5,35.75,35.58,43.93,34.7,34.32,33.83,32.35,34.6,44.5,39.97,41.53,40.72,40.9,41.4,40.55,38.7,34.73,43.02,31.83,31.43,34.78,33.13,42.03,29.85,35.47,31.42,41.8,41.43,35.92,33.62,34.83,34.45,33.02,40.95,34.98,35.57,39.98,39.75,38.7,37.57,31.37,36.05,39.48,40.82,31.83,33.52,39.32,30.2,43.82,43.65,34.32,40.07,31.98,36.72,33.42,43.07,32.58,29.2,32.78,28.07,40.6,30.1,36.72,40.67,26.17,32.25,37.2,41.88,40.35,33.62,37.9,37.02,40.52,38.15,38.78,30.93,39.3,29.42,40.63,29.9,32.37,36.23,34.92,40.48,34.33,35.12,35.47,44.67,37.05,34.75,42.02,35.75,32.13,39.72,31.52,31.43,42.42,41.13,34.17,41.25,29.43,34.6,38.78,34.27,34.38,38.15,38.93,39.55,38.67,41.03,38.25,39.48,37.82,39.43,40.6,40.12,28.02,38.12,32.73,32.4,45.55,30.13,40.28,32.33,46.8,43.93,41.13,35.98,35.82,31.1,32.45,32.38,32.42,31.58,41.45,28.1,35.68,30.82,25.68,37.43,35.9,43.92,43.07,41.52,43.1,44.55,32.42,36.53,40.43,42.3,41.52,40.88,46.37,43.73,41.62,24.7,33.42,33.4,38.48,41.82,44.03,46.8,34.65,34.7,32.75,41.23333,30.9,35.26667,35.91667,33.51667,39.45,35.06667,32.38333,31.11667,32.18333,32.55,44.26667,40.83333,41.16667,32.55,42.0,43.38333,36.05,40.3,30.45,31.05,31.78333,32.05,40.7,40.51667,18.28333,48.08333,18.21667,32.3,45.5,31.25,43.21667,29.83333,34.4,33.38333,34.2,38.36667,44.31667,45.45,39.06667,45.2,47.75,29.71,40.87,35.4891,30.3,32.8378,46.55,46.5355,35.0502,31.4493,32.87,41.5004,41.32,40.75,34.5259,38.83,39.85,43.67,34.73,33.6952,43.45,26.35,43.9604,43.02,34.01,34.07,34.97,33.4139,37.501,45.0,39.0204,38.1939,35.3097,37.3964,34.98,32.79,35.3321,35.2677,35.2114,37.5642,34.0936,38.6597,34.236,42.7188,34.6111,39.3205,33.644,43.4988,43.5107,37.9503,41.7304,26.6241,40.43,30.2374,33.7934,35.8441,32.42,40.6612,31.64,34.43,46.8554,39.6597,42.36,39.9555,41.72,38.9228,46.8778,35.323,33.38,37.6046,39.78,32.9456,42.13,34.254,39.9408,45.008,43.49,44.1627,35.65,40.8436,43.9137,41.1045,43.5675,29.1216,42.21,34.8924,35.9657,48.98,48.98,42.8418,44.0964,36.9665,41.6739,41.8763,42.484,38.2393,42.2185,35.3632,34.74,34.939,33.5328,36.5972,34.8055,36.9736,40.55,39.27,60.468,61.225,30.18,33.49,39.95,38.4,37.1,43.52,42.8247,17.9757,39.0832,37.548,34.1784,48.0489,43.4889,38.665,43.7461,33.44,32.46,29.3482,38.818,38.7405,34.697,44.87,39.8967,38.3073,30.4,26.45,43.1613,42.0908,41.56,47.8,35.94,38.1975,30.27,39.8137,39.8394,40.7198,43.65,40.9711,39.7257,42.61,44.3595,37.9371,35.1508,38.3381,34.21,34.1733,34.3878,32.4866,33.5646,34.3402,38.97,33.7163,36.6589,26.57,32.4685,32.2478,34.2232,30.4907,33.1,33.1002,33.8585,33.8197,34.3425,32.7203,43.1925,43.2857,31.2634,30.2113,34.69,18.011,34.2492,34.7207,39.9408,36.6323,29.8193,18.2757,18.3044,18.4862,33.4156,31.9255,36.75,34.9474,37.7238,36.148,37.6547,37.4477,40.8668,35.58,36.4869,43.57,43.05,40.3459,33.4474,35.2907,37.3787,43.848,41.0587,39.4266,35.8649,35.14,30.65,30.6149,35.4601,33.7467,45.6629,36.62,35.17,33.8827,29.9071,43.5023,36.7669,41.65,33.5304,26.172,32.8685,32.7475,46.75,35.18,47.1226,33.51,35.414,28.9908,36.77,37.9683,40.87,30.5288,39.6399,39.1498,30.4003,34.3551,39.0723,36.352,29.9398,33.3905,36.817,47.6393,39.5649,43.018,39.3696,46.8681,36.0214,37.368,17.9376,29.9275,37.1,30.05,41.0615,36.95,46.4126,37.3501,39.6409,34.32,34.464,41.6534,41.28,36.8286,39.94,36.5683,42.4357,38.8016,33.489,30.9102,37.02,27.6233,37.59,33.5414,37.3905,35.4,18.3615,35.95,31.845,36.8669,36.0951,36.005,46.6294,36.3635,41.14,43.2789,41.7313]},"kind":"numeric","n":14770,"n_null":0,"n_unique":7810,"null_rate":0.0,"stats":{"iqr":7.4986999999999995,"kurtosis":3.3406489774886445,"max":70.1269,"mean":37.27773758287068,"median":37.12,"min":-14.3236,"n_outliers":159,"outlier_rate":0.010765064319566689,"q1":33.63,"q3":41.1287,"skew":-0.1786765983590032,"std":5.247184183716053,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[4,33,28,4,11,305,466,544,3693,5377,3281,941,79,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1],"edges":[-170.7316,-162.17658749999998,-153.62157499999998,-145.06656249999997,-136.51155,-127.95653749999998,-119.40152499999999,-110.84651249999999,-102.29149999999998,-93.73648749999998,-85.18147499999998,-76.62646249999999,-68.07144999999998,-59.51643749999998,-50.96142499999999,-42.40641249999999,-33.851399999999984,-25.29638749999998,-16.741374999999977,-8.186362499999973,0.3686500000000308,8.923662500000006,17.47867500000001,26.033687500000013,34.58870000000002,43.14371250000002,51.698725000000024,60.25373750000003,68.80875,77.3637625,85.91877500000001,94.47378750000004,103.02880000000002,111.5838125,120.13882500000003,128.6938375,137.24885000000003,145.8038625,154.35887500000004,162.91388750000002,171.4689]},"sample":[-90.72,-93.08,-92.13,-87.7,-90.55,-100.0,-97.0,-96.65,-88.47,-88.97,-85.28,-86.98,-102.05,-95.32,-92.8,-107.55,-76.67,-85.42,-93.55,-81.4,-98.38,-98.37,-94.4,-88.62,-96.07,-86.63,-83.13,-90.97,-96.1,-87.85,-93.97,-76.7,-86.07,-72.53,-90.42,-85.5,-87.6,-86.78,-84.25,-80.18,-99.43,-90.78,-89.65,-88.23,-85.65,-95.97,-81.45,-84.17,-85.2,-84.12,-85.6,-99.43,-86.93,-94.0,-92.18,-88.73,-88.7,-90.42,-81.63,-90.42,-93.63,-92.33,-95.88,-94.8,-81.02,-95.52,-80.57,-79.55,-98.42,-84.47,-93.02,-80.17,-85.4,-89.72,-93.17,-91.45,-93.67,-93.87,-95.85,-88.88,-85.02,-90.7,-92.17,-90.27,-82.1,-95.67,-93.95,-86.4,-96.33,-78.92,-75.7,-90.53,-90.17,-91.05,-88.88,-93.13,-79.37,-79.57,-97.37,-100.12,-104.9,-90.37,-90.25,-96.38,-87.87,-87.38,-95.6,-98.52,-83.75,-99.55,-97.75,-97.58,-97.43,-96.48,-84.05,-86.65,-98.33,-88.47,-88.88,-95.45,-99.42,-96.4,-76.95,-82.78,-81.15,-97.12,-90.23,-88.93,-94.4,-86.52,-90.17,-100.77,-88.23,-104.82,-91.72,-92.35,-97.35,-85.13,-86.3,-87.23,-83.93,-75.38,-80.63,-82.02,-97.62,-80.52,-85.78,-92.63,-86.03,-77.0,-91.5,-84.87,-93.67,-99.85,-85.18,-79.7,-83.47,-82.95,-90.07,-94.2,-83.97,-91.62,-81.08,-111.82,-94.73,-100.9,-79.45,-92.47,-100.77,-112.35,-99.33,-97.35,-103.66667,-95.3,-89.0,-86.2,-88.05,-85.78333,-98.88333,-89.55,-94.76667,-90.31667,-101.05,-88.4,-99.26667,-83.4,-90.31667,-91.65,-97.13333,-89.81667,-81.91667,-85.05,-84.91667,-89.65,-81.08333,-99.08333,-98.06667,-66.51667,-99.85,-65.73333,-90.21667,-91.73333,-83.68333,-88.11667,-97.96667,-85.03333,-96.43333,-79.75,-81.66667,-93.56667,-94.53333,-76.55,-93.63333,-124.38,-85.0,-100.21,-114.05,-97.95,-91.9,-87.38,-87.38,-85.6855,-85.506,-97.37,-95.3773,-96.35,-74.88,-101.7822,-97.6,-95.53,-70.27,-92.4031,-111.8826,-90.73,-80.08,-91.8687,-91.18,-86.75,-84.3,-87.27,-80.31,-92.2247,-105.568,-86.9488,-87.8725,-89.7592,-92.9828,-93.3353,-96.77,-87.1979,-87.4027,-91.4832,-91.2167,-91.4365,-95.6002,-86.8635,-93.9304,-92.0297,-85.7591,-85.638,-91.648,-91.6754,-107.0308,-88.0307,-80.2348,-79.7,-92.0925,-84.75,-90.7185,-90.01,-75.2843,-93.64,-100.3,-102.8159,-86.2148,-93.1,-89.5611,-73.98,-76.45,-96.9276,-89.6526,-112.24,-114.0477,-97.89,-90.1539,-98.03,-96.432,-86.2367,-93.8562,-96.89,-92.5353,-97.58,-93.1129,-94.3451,-89.0723,-84.7918,-95.6492,-78.14,-85.423,-91.422,-102.25,-102.25,-85.9818,-102.5019,-91.4235,-72.9758,-87.6125,-70.869,-95.7604,-96.3817,-79.27,-80.18,-97.67,-89.153,-83.4922,-86.4641,-89.43,-82.62,-81.55,-150.5,-149.65,-93.58,-80.93,-82.02,-95.6,-76.53,-88.22,-102.68,-66.54,-82.7333,-97.348,-109.9434,-117.3422,-71.3451,-104.7677,-92.2092,-84.31,-84.99,-95.0186,-104.8316,-104.7458,-76.774,-88.63,-89.1125,-123.0501,-87.45,-81.95,-70.6446,-96.931,-85.24,-103.68,-79.0,-82.9471,-86.0,-75.4234,-75.6087,-74.7059,-83.93,-98.0,-86.3496,-83.51,-106.8055,-82.8799,-97.0142,-85.6392,-86.97,-101.7628,-97.3169,-97.4352,-116.1083,-90.1974,-90.45,-80.2181,-94.4463,-82.037,-88.8749,-92.9838,-83.9491,-87.2052,-96.68,-96.6288,-79.0687,-79.0829,-79.4344,-97.3554,-91.8536,-91.7242,-91.8498,-89.7614,-85.78,-65.9273,-77.9521,-79.0038,-79.7004,-87.6345,-96.7209,-66.1775,-67.2271,-66.8329,-89.5944,-93.7101,-90.4,-95.4836,-90.0055,-93.7322,-91.5341,-118.4093,-99.73,-78.83,-79.7452,-116.12,-86.24,-94.875,-94.6484,-90.4761,-79.162,-95.346,-80.5702,-77.4042,-79.0149,-79.121,-98.45,-98.3953,-94.3993,-116.9075,-118.8357,-88.32,-85.7823,-84.408,-96.9573,-97.1266,-93.175,-83.55,-90.9931,-97.9952,-96.9149,-93.9719,-118.32,-101.9,-104.7782,-89.92,-82.82,-90.194,-80.73,-83.6148,-98.0383,-103.8206,-75.7796,-77.2766,-88.5177,-86.4723,-77.1123,-88.522,-90.0672,-110.8224,-88.485,-122.4119,-99.4125,-83.235,-80.7593,-100.8664,-78.3321,-121.8806,-66.294,-90.0403,-92.92,-95.42,-92.3334,-88.61,-117.0225,-117.6781,-76.8703,-103.3,-88.789,-87.5248,-75.89,-119.3245,-75.91,-89.0403,-83.552,-81.3336,-97.487,-95.316,-92.66,-80.3957,-77.59,-111.4398,-91.1932,-97.242,-66.99,-81.944,-93.752,-88.3541,-94.8321,-114.967,-97.5341,-89.0436,-74.58,-88.0633,-87.9149]},"kind":"numeric","n":14770,"n_null":0,"n_unique":8828,"null_rate":0.0,"stats":{"iqr":12.170000000000002,"kurtosis":55.607955032539,"max":171.4689,"mean":-90.94044363452944,"median":-90.22,"min":-170.7316,"n_outliers":623,"outlier_rate":0.042180094786729856,"q1":-96.4,"q3":-84.23,"skew":1.2861616598782257,"std":11.695802025598638,"zero_rate":0.0}},{"alerts":[{"code":"multilingual","level":"info","message":"13 languages detected in sample"},{"code":"duplicates","level":"warn","message":"54.9% duplicate strings"}],"column":"name","extras":{"language_counts":{"__engine":"fasttext:5,000","de":25,"en":4796,"es":134,"eu":2,"fr":1,"id":6,"it":5,"ja":22,"pt":4,"ru":2,"sr":2,"zh":1},"language_sample_size":5000,"length_histogram":{"counts":[50,793,2165,3842,2969,1813,1442,915,493,153,45,4,3,4,6,9,5,7,3,10,7,5,6,3,3,3,1,5,3,0,0,0,0,0,1,0,0,0,1,1],"edges":[17.0,19.925,22.85,25.775,28.7,31.625,34.55,37.474999999999994,40.4,43.325,46.25,49.175,52.099999999999994,55.025,57.949999999999996,60.875,63.8,66.725,69.65,72.57499999999999,75.5,78.425,81.35,84.27499999999999,87.19999999999999,90.125,93.05,95.975,98.89999999999999,101.82499999999999,104.75,107.675,110.6,113.52499999999999,116.44999999999999,119.375,122.3,125.225,128.14999999999998,131.075,134.0]},"near_unique":false,"sample":["Tornado in LOUISIANA, CADDO","Flood in LOUISIANA, BEAUREGARD","Tornado in NORTH CAROLINA, ANSON","Thunderstorm Wind in GEORGIA, NEWTON","Hail in WISCONSIN, DODGE","Flash Flood in MISSOURI, DENT","Tornado in PENNSYLVANIA, LUZERNE","Flash Flood in KENTUCKY, FRANKLIN","Lightning in FLORIDA, PALM BEACH","Tornado in SOUTH CAROLINA, DARLINGTON","Tornado in MISSISSIPPI, ATTALA","Tornado in LOUISIANA, CADDO","Hail in MICHIGAN, BRANCH","Tornado in KANSAS, STEVENS","Thunderstorm Wind in CALIFORNIA, FRESNO","Tornado in ALABAMA, MARSHALL","Tornado in ALABAMA, MADISON","Flash Flood in MISSOURI, LINCOLN","Flash Flood in TEXAS, HIDALGO","Tornado in MASSACHUSETTS, NORFOLK","Thunderstorm Wind in WISCONSIN, JEFFERSON","Tornado in KENTUCKY, LEE","Tornado in LOUISIANA, ST. LANDRY","Tornado in FLORIDA, HILLSBOROUGH","Flash Flood in OHIO, SUMMIT","Flood in WISCONSIN, VERNON","Flash Flood in IOWA, WAYNE","Thunderstorm Wind in NORTH CAROLINA, DAVIDSON","Tornado in KANSAS, ELLSWORTH","Tornado in GEORGIA, WHEELER","Thunderstorm Wind in TEXAS, LUBBOCK","Hail in COLORADO, ARAPAHOE","Thunderstorm Wind in OHIO, WASHINGTON","Thunderstorm Wind in MISSOURI, CASS","Tornado in MISSISSIPPI, WASHINGTON","Thunderstorm Wind in ILLINOIS, JEFFERSON","Hail in NEBRASKA, BUFFALO","Flood in MINNESOTA, HOUSTON","Lightning in UTAH, SUMMIT","Flood in WASHINGTON, WHATCOM","Flash Flood in VIRGINIA, HENRY","Thunderstorm Wind in TEXAS, HARRIS","Thunderstorm Wind in PENNSYLVANIA, UNION","Flood in MICHIGAN, IRON","Tornado in IOWA, POTTAWATTAMIE","Thunderstorm Wind in GEORGIA, LOWNDES","Tornado in KENTUCKY, TAYLOR","Tornado in NEW JERSEY, MERCER","Tornado in ARKANSAS, WHITE","Thunderstorm Wind in GEORGIA, CLAYTON"],"top_values":[["Hail in TEXAS, TARRANT",59],["Tornado in TEXAS, HARRIS",30],["Thunderstorm Wind in ARIZONA, MARICOPA",29],["Flash Flood in CALIFORNIA, SAN BERNARDINO",28],["Tornado in OKLAHOMA, OKLAHOMA",26],["Hail in TEXAS, DENTON",26],["Hail in TEXAS, COLLIN",24],["Flash Flood in ILLINOIS, COOK",23],["Tornado in ALABAMA, JEFFERSON",22],["Tornado in TEXAS, DALLAS",22],["Tornado in ARKANSAS, PULASKI",19],["Hail in TEXAS, LUBBOCK",19],["Hail in TEXAS, RANDALL",18],["Flash Flood in NEVADA, CLARK",18],["Tornado in LOUISIANA, CADDO",17],["Tornado in MISSISSIPPI, JONES",17],["Thunderstorm Wind in ILLINOIS, COOK",17],["Flash Flood in TEXAS, HARRIS",17],["Hail in TEXAS, DALLAS",17],["Tornado in LOUISIANA, BOSSIER",16]],"top_words":[["in",14773],["tornado",6334],["flood",4135],["flash",2358],["wind",2332],["thunderstorm",2282],["texas,",1450],["hail",1246],["new",685],["carolina,",657],["missouri,",648],["arkansas,",602],["lightning",575],["mississippi,",570],["north",568],["georgia,",562],["illinois,",560],["iowa,",527],["louisiana,",507],["tennessee,",499],["florida,",498],["oklahoma,",490],["nebraska,",486],["alabama,",469],["wisconsin,",463]],"vocab_skipped":null,"word_histogram":{"counts":[8215,5152,0,1215,89,0,11,15,0,5,8,0,18,8,0,9,11,0,4,3,0,1,4,0,0,1,0,0,0,1],"edges":[4.0,4.666666666666667,5.333333333333333,6.0,6.666666666666666,7.333333333333333,8.0,8.666666666666666,9.333333333333332,10.0,10.666666666666666,11.333333333333332,12.0,12.666666666666666,13.333333333333332,14.0,14.666666666666666,15.333333333333332,16.0,16.666666666666664,17.333333333333332,18.0,18.666666666666664,19.333333333333332,20.0,20.666666666666664,21.333333333333332,22.0,22.666666666666664,23.333333333333332,24.0]}},"kind":"text","n":14770,"n_null":0,"n_unique":6660,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.5490859851049424,"emoji_rate":0.0,"len_max":134,"len_mean":30.219160460392686,"len_median":29.0,"len_min":17,"len_p95":41.0,"n_duplicates":8110,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":31.163353571428587,"url_rate":0.0,"vocab_size":1980,"word_mean":4.587610020311442,"word_median":4.0}},{"alerts":[{"code":"multilingual","level":"info","message":"5 languages detected in sample"},{"code":"duplicates","level":"warn","message":"60.8% duplicate strings"}],"column":"description","extras":{"language_counts":{"__engine":"fasttext:5,000","en":4984,"fr":5,"ja":1,"no":10},"language_sample_size":5000,"length_histogram":{"counts":[4,13,3101,1689,2230,1186,572,1589,2168,902,7,6,14,10,15,25,30,30,34,47,71,64,52,77,60,74,66,65,54,69,82,70,72,78,64,29,20,11,12,8],"edges":[3.0,9.4,15.8,22.200000000000003,28.6,35.0,41.400000000000006,47.800000000000004,54.2,60.6,67.0,73.4,79.80000000000001,86.2,92.60000000000001,99.0,105.4,111.80000000000001,118.2,124.60000000000001,131.0,137.4,143.8,150.20000000000002,156.60000000000002,163.0,169.4,175.8,182.20000000000002,188.60000000000002,195.0,201.4,207.8,214.20000000000002,220.60000000000002,227.0,233.4,239.8,246.20000000000002,252.60000000000002,259.0]},"near_unique":false,"sample":["Magnitude 0; 40 injuries, 9 fatalities; $250K property damage","$40.00M property damage","$3.00M property damage","EG55.00; 1 injuries, 1 fatalities; $20.00K property damage; A weak upper-level wave moving across the state resulted in widespread strong to severe thunderstorms across north and central Georgia.","Magnitude 1.75; $3.17M property damage","$15.00M property damage","5 injuries, 0 fatalities; $1.00M property damage","0 injuries, 1 fatalities","1 injuries, 1 fatalities","Magnitude 0; 0 injuries, 1 fatalities; $25K property damage","2 injuries, 0 fatalities; $3.00M property damage","2 injuries, 0 fatalities; $10.00M property damage","Magnitude 1.75; $1M property damage","Magnitude 0; 5 injuries, 1 fatalities; $25K property damage","Magnitude 30; 15 injuries, 0 fatalities; $.15M property damage","Magnitude 0; 5 injuries, 0 fatalities; $2.5M property damage","Magnitude 0; 3 injuries, 2 fatalities; $2.5M property damage","$5.40M property damage; The Pin Oak Levee in Winfield, MO was breached during the afternoon hours of June 2nd, flooding eastern portions of Winfield east of Highway 79.","0 injuries, 3 fatalities","$1.5M property damage","8 injuries, 0 fatalities; $2.7M property damage","Magnitude 0; 25 injuries, 1 fatalities; $2.5M property damage","Magnitude 0; 7 injuries, 0 fatalities; $2.5M property damage","Magnitude 0; 53 injuries, 0 fatalities; $2.5M property damage","$1.50M property damage","0 injuries, 1 fatalities; $986.00K property damage","$2.50M property damage","Magnitude 0; 1 injuries, 1 fatalities; $0 property damage","Magnitude 0; $2.5M property damage","0 injuries, 2 fatalities","MG83.00; $5.00M property damage","Magnitude 2.50; $161.10M property damage","EG56.00; $3.00M property damage","EG61; 2 injuries, 1 fatalities; $150K property damage","12 injuries, 0 fatalities; $3.5M property damage","EG78; 5 injuries, 0 fatalities; $13M property damage","Magnitude 1.50; $8.00M property damage","$6.50M property damage","2 injuries, 1 fatalities; Showers and thunderstorms occurred across much of northern Utah, and a man was killed by a lightning strike.","$5.60M property damage","$9.20M property damage","EG50.00; 1 injuries, 1 fatalities; A cluster of severe thunderstorms moved across the area and produced hail, damaging winds and flash flooding.","$15M property damage; Strong winds knocked trees down along Rte. 80 near West Buffalo Township.","$3.10M property damage","3 injuries, 1 fatalities","EG65.00; $3.50M property damage","E50; $5K property damage","$1M property damage","Magnitude 0; 7 injuries, 0 fatalities; $2.5M property damage","EG45.00; 0 injuries, 1 fatalities; $15.00K property damage"],"top_values":[["Magnitude 0; $2.5M property damage",1055],["$1.00M property damage",660],["0 injuries, 1 fatalities",488],["$2.00M property damage",292],["$1.50M property damage",178],["Magnitude 0; 1 injuries, 0 fatalities; $2.5M property damage",162],["Magnitude 0; $25M property damage",159],["$5.00M property damage",159],["$3.00M property damage",157],["$1M property damage",146],["Magnitude 0; 2 injuries, 0 fatalities; $2.5M property damage",115],["$10.00M property damage",101],["$4.00M property damage",87],["Magnitude 0; 3 injuries, 0 fatalities; $2.5M property damage",86],["Magnitude 0; 0 injuries, 1 fatalities; $0 property damage",85],["0 injuries, 2 fatalities",77],["Magnitude 0; 4 injuries, 0 fatalities; $2.5M property damage",75],["$1.20M property damage",73],["$2.50M property damage",67],["$2M property damage",66]],"top_words":[["property",13178],["damage",12415],["injuries,",7098],["fatalities;",5949],["magnitude",5463],["0",4929],["1",4111],["0;",3863],["$2.5m",2278],["$1.00m",1306],["the",1290],["and",1278],["2",1219],["a",1198],["fatalities",1153],["of",1026],["damage;",890],["in",622],["3",581],["$2.00m",553],["thunderstorms",546],["$25m",530],["across",508],["$250k",471],["$1m",457]],"vocab_skipped":null,"word_histogram":{"counts":[17,3777,4389,41,2427,2803,13,8,21,26,21,67,54,115,61,75,133,78,113,83,130,43,75,94,39,37,13,14,1,2],"edges":[1.0,2.4333333333333336,3.8666666666666667,5.3,6.733333333333333,8.166666666666668,9.6,11.033333333333333,12.466666666666667,13.9,15.333333333333334,16.766666666666666,18.2,19.633333333333333,21.066666666666666,22.5,23.933333333333334,25.366666666666667,26.8,28.233333333333334,29.666666666666668,31.1,32.53333333333333,33.96666666666667,35.4,36.833333333333336,38.266666666666666,39.7,41.13333333333333,42.56666666666667,44.0]}},"kind":"text","n":14770,"n_null":0,"n_unique":5796,"null_rate":0.0,"stats":{"allcaps_rate":0.00027081922816519973,"boilerplate_rate":0.0,"duplicate_rate":0.6075829383886255,"emoji_rate":0.0,"len_max":259,"len_mean":50.085240352065,"len_median":36.0,"len_min":3,"len_p95":166.0,"n_duplicates":8974,"n_empty":0,"one_word_rate":0.00027081922816519973,"readability_flesch_mean":29.857567232767256,"url_rate":0.0,"vocab_size":4289,"word_mean":7.393026404874746,"word_median":5.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"category","extras":{"singletons":0,"top_values":[["significant_us_storms",14770]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"significant_us_storms"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"65.8% duplicate strings"}],"column":"date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14770,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[9.5,9.525,9.55,9.575,9.6,9.625,9.65,9.675,9.7,9.725,9.75,9.775,9.8,9.825,9.85,9.875,9.9,9.925,9.95,9.975,10.0,10.025,10.05,10.075,10.1,10.125,10.15,10.175,10.2,10.225,10.25,10.275,10.3,10.325,10.35,10.375,10.4,10.425,10.45,10.475,10.5]},"near_unique":false,"sample":["1950-02-12","2016-03-10","2020-04-13","2018-06-25","2006-04-13","2024-11-05","2006-12-01","2021-12-11","2018-07-10","1961-04-12","2011-01-01","2009-04-09","1996-04-12","1951-06-23","1997-05-19","1985-04-05","1974-04-03","2019-06-02","2025-03-27","2004-08-21","1998-05-31","1961-06-09","1983-02-09","1974-02-19","2014-05-12","2016-09-22","2009-08-26","1985-05-15","1970-03-02","2024-09-26","2012-04-29","2012-06-07","2012-06-29","2003-08-21","2001-11-24","2006-07-21","2023-05-05","2008-06-08","2007-10-13","2009-01-07","2018-05-18","2023-05-08","1996-06-20","2023-04-13","2024-04-26","2024-03-27","2000-05-23","2003-09-23","1982-12-02","2021-03-01"],"top_values":[["1974-04-03",126],["2011-04-27",105],["2011-08-28",59],["2011-05-01",59],["2008-03-18",57],["2021-09-01",56],["1990-03-13",52],["1965-04-11",49],["2011-09-07",49],["2012-06-29",45],["2019-03-13",45],["2006-04-13",42],["1984-06-07",41],["1992-11-22",40],["2016-10-08",40],["1984-03-28",38],["2008-06-08",37],["2009-05-08",37],["1985-05-31",35],["2012-03-02",35]],"top_words":[["1974-04-03",126],["2011-04-27",105],["2011-08-28",59],["2011-05-01",59],["2008-03-18",57],["2021-09-01",56],["1990-03-13",52],["1965-04-11",49],["2011-09-07",49],["2012-06-29",45],["2019-03-13",45],["2006-04-13",42],["1984-06-07",41],["1992-11-22",40],["2016-10-08",40],["1984-03-28",38],["2008-06-08",37],["2009-05-08",37],["1985-05-31",35],["2012-03-02",35],["2024-09-27",35],["1998-05-31",34],["2003-05-04",34],["2011-04-26",34],["1990-06-02",32]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14770,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":14770,"n_null":0,"n_unique":5058,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.657549085985105,"emoji_rate":0.0,"len_max":10,"len_mean":10.0,"len_median":10.0,"len_min":10,"len_p95":10.0,"n_duplicates":9712,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":5058,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"country","extras":{"singletons":0,"top_values":[["USA",14770]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"USA"}},{"alerts":[],"column":"event_type","extras":{"singletons":4,"top_values":[["Tornado",6334],["Flash Flood",2358],["Thunderstorm Wind",2257],["Flood",1777],["Hail",1246],["Lightning",574],["Heavy Rain",99],["Marine Strong Wind",43],["Debris Flow",43],["Marine Thunderstorm Wind",25],["Marine High Wind",5],["Dust Devil",3],["Waterspout",2],["Tropical Storm",1],["High Wind",1],["Heat",1],["Marine Lightning",1]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":17,"null_rate":0.0,"stats":{"cardinality":17,"entropy":2.336076472984396,"entropy_ratio":0.5715223755452659,"top_rate":0.4288422477995938,"top_value":"Tornado"}},{"alerts":[],"column":"state","extras":{"singletons":3,"top_values":[["TEXAS",1450],["MISSOURI",648],["ARKANSAS",602],["MISSISSIPPI",570],["GEORGIA",562],["ILLINOIS",560],["IOWA",527],["LOUISIANA",507],["TENNESSEE",499],["FLORIDA",498],["OKLAHOMA",490],["NEBRASKA",486],["ALABAMA",469],["WISCONSIN",463],["OHIO",441],["MICHIGAN",426],["NORTH CAROLINA",422],["KANSAS",418],["INDIANA",408],["KENTUCKY",383]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":65,"null_rate":0.0,"stats":{"cardinality":65,"entropy":5.182276527947146,"entropy_ratio":0.8605048195057264,"top_rate":0.0981719702098849,"top_value":"TEXAS"}},{"alerts":[{"code":"null_rate","level":"warn","message":"51.8% null"}],"column":"magnitude","extras":{"singletons":48,"top_values":[["0",3863],["1.75",383],["2.75",220],["70.00",162],["50.00",151],["2.00",150],["2.50",123],["61.00",122],["65.00",104],["52.00",95],["78.00",80],["70",79],["3.00",77],["56.00",76],["87.00",65],["60.00",63],["50",59],["60",54],["1.50",50],["61",47]]},"kind":"categorical","n":14770,"n_null":7648,"n_unique":170,"null_rate":0.5178063642518619,"stats":{"cardinality":170,"entropy":3.586498005507387,"entropy_ratio":0.4840476142262948,"top_rate":0.5424038191519236,"top_value":"0"}},{"alerts":[],"column":"injuries","extras":{"singletons":69,"top_values":[["0",10064],["1",893],["2",552],["3",343],["4",236],["5",234],["10",219],["6",196],["12",158],["7",134],["8",121],["20",114],["15",111],["11",90],["9",85],["13",70],["14",69],["30",68],["25",56],["16",48]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":178,"null_rate":0.0,"stats":{"cardinality":178,"entropy":2.468064058708073,"entropy_ratio":0.33014340084475474,"top_rate":0.6813811780636425,"top_value":"0"}},{"alerts":[],"column":"fatalities","extras":{"singletons":18,"top_values":[["0",10209],["1",3208],["2",649],["3",222],["4",112],["5",74],["6",66],["7",38],["9",25],["10",24],["8",21],["11",20],["13",11],["16",10],["12",9],["14",8],["17",6],["20",6],["25",4],["23",3]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":49,"null_rate":0.0,"stats":{"cardinality":49,"entropy":1.4234519366880767,"entropy_ratio":0.2535219051755631,"top_rate":0.691198375084631,"top_value":"0"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"87.2% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"93.1% duplicate strings"}],"column":"damage_property","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[368,0,0,0,0,264,0,0,0,0,1252,0,0,0,0,1172,0,0,0,0,3414,0,0,0,0,6075,0,0,0,0,1450,0,0,0,0,514,0,0,0,261],"edges":[0.0,0.2,0.4,0.6000000000000001,0.8,1.0,1.2000000000000002,1.4000000000000001,1.6,1.8,2.0,2.2,2.4000000000000004,2.6,2.8000000000000003,3.0,3.2,3.4000000000000004,3.6,3.8000000000000003,4.0,4.2,4.4,4.6000000000000005,4.800000000000001,5.0,5.2,5.4,5.6000000000000005,5.800000000000001,6.0,6.2,6.4,6.6000000000000005,6.800000000000001,7.0,7.2,7.4,7.6000000000000005,7.800000000000001,8.0]},"near_unique":false,"sample":["250K","40.00M","3.00M","20.00K","3.17M","15.00M","1.00M","0.00K","0.00K","25K","3.00M","10.00M","1M","25K",".15M","2.5M","2.5M","5.40M","0.00K","1.5M","2.7M","2.5M","2.5M","2.5M","1.50M","986.00K","2.50M","0","2.5M","","5.00M","161.10M","3.00M","150K","3.5M","13M","8.00M","6.50M","0.00K","5.60M","9.20M","0.00K","15M","3.10M","","3.50M","5K","1M","2.5M","15.00K"],"top_values":[["2.5M",2278],["1.00M",1306],["0.00K",1229],["2.00M",553],["25M",530],["250K",471],["1M",457],["",368],["1.50M",334],["5.00M",325],["3.00M",297],["0",264],["2M",231],["10.00M",212],["5M",182],["25K",178],["4.00M",155],["2.50M",136],["1.5M",123],["10.00K",113]],"top_words":[["2.5m",2278],["1.00m",1306],["0.00k",1229],["2.00m",553],["25m",530],["250k",471],["1m",457],["1.50m",334],["5.00m",325],["3.00m",297],["0",264],["2m",231],["10.00m",212],["5m",182],["25k",178],["4.00m",155],["2.50m",136],["1.5m",123],["10.00k",113],["0k",111],["3m",109],["1.20m",109],["1000.00k",102],["10m",92],["50.00m",78]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14770,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":14770,"n_null":0,"n_unique":1014,"null_rate":0.0,"stats":{"allcaps_rate":0.872444143534191,"boilerplate_rate":0.0,"duplicate_rate":0.9313473256601219,"emoji_rate":0.0,"len_max":8,"len_mean":4.380568720379147,"len_median":5.0,"len_min":0,"len_p95":7.0,"n_duplicates":13756,"n_empty":368,"one_word_rate":1.0,"readability_flesch_mean":116.97730000000003,"url_rate":0.0,"vocab_size":1013,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"source","extras":{"singletons":0,"top_values":[["NOAA Storm Events Database",14770]]},"kind":"categorical","n":14770,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"NOAA Storm Events Database"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","event_type.top_values","date.top_values","damage_property.top_values","state.top_values","fatalities.top_values","injuries.top_values"],"featured_charts":[{"caption":"Look for the outsized dominance of Tornado versus all other storm types \u2014 it accounts for 43% of all records.","column":"event_type","kind":"bar"},{"caption":"Texas leads by a wide margin; compare the long tail of less-affected states to spot the core tornado-alley concentration.","column":"state","kind":"bar"},{"caption":"The distribution is heavily right-skewed \u2014 most events have zero fatalities, but look for the rare high-casualty outliers.","column":"fatalities","kind":"histogram"},{"caption":"Check whether damage clusters around round million-dollar values, which may indicate reporting thresholds or rounding conventions.","column":"damage_property","kind":"bar"},{"caption":"Like fatalities, injuries are zero for most events \u2014 scan the tail to identify the handful of mass-casualty storm incidents.","column":"injuries","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset contains 14,770 records of significant US storms sourced from the NOAA Storm Events Database, covering events across all 50+ states with dates, locations, event types, casualties, and property damage estimates. The most striking pattern is the dominance of tornadoes (6,334 events, 43% of all records), far outnumbering the next categories of Flash Flood and Thunderstorm Wind. Two dates worth flagging immediately are 1974-04-03 (126 events, the Super Outbreak) and 2011-04-27 (105 events, the 2011 Super Outbreak), suggesting this dataset captures landmark multi-tornado outbreaks disproportionately. Property damage skews heavily toward million-dollar figures, with '2.5M' being the single most common damage value (2,278 occurrences), hinting at possible rounding or a threshold-based inclusion criterion. Texas leads all states with 1,450 events, nearly double the next state (Missouri at 648), reflecting both its geographic size and exposure to severe weather corridors.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","duplicate_rate","n_duplicates","allcaps_rate","one_word_rate","n_empty","top_values","null_rate"],"model":"anthropic:default","narrative":"This column represents property damage amounts stored as formatted currency strings (e.g., '2.5M', '250K', '0.00K'), typical of NOAA storm event or similar disaster/insurance datasets. With only 1,014 unique values across 14,770 rows, the duplicate rate is extremely high at 93.1%, reflecting heavy rounding/bucketing of damage estimates rather than precise measurements. All values are single tokens (one_word_rate: 1.0) and 87.2% are uppercase, consistent with a coded categorical-style encoding of numeric magnitudes. There are 368 empty strings (null_rate reported as 0.0 but n_empty=368), which should be treated as missing values.","role":"feature","scope":"column","target":"damage_property","treatment":"Parse suffix notation (K=thousands, M=millions) to convert to numeric float, treat empty strings as null, then log-transform before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["len_min","len_max","len_mean","n","n_unique","null_rate","duplicate_rate","n_duplicates","top_values"],"model":"anthropic:default","narrative":"This column contains ISO-8601 date strings (YYYY-MM-DD format), stored as text rather than a native date type \u2014 all 14,770 values are exactly 10 characters with zero nulls. The duplicate rate of 65.75% (9,712 duplicates across only 5,058 unique dates) is notable and suggests this is a grouping/event date used as a foreign-key-style attribute rather than a unique record timestamp. The top date, 1974-04-03, appears 126 times, and several 2011 dates cluster heavily, which may reflect significant event concentrations worth investigating.","role":"timestamp","scope":"column","target":"date","treatment":"Parse to native date type, then use as a grouping/join key or engineer calendar features (year, month, day-of-week) for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["duplicate_rate","n_duplicates","n_unique","n","top_values","top_words","readability_flesch_mean","language_counts","alerts"],"model":"anthropic:default","narrative":"This column contains structured event descriptions summarising disaster or incident outcomes \u2014 specifically property damage amounts, injury counts, fatalities, and seismic magnitudes (e.g., 'Magnitude 0; $2.5M property damage'). The duplicate rate is strikingly high at 60.76%, with 8,974 duplicates across 14,770 rows and only 5,796 unique values, indicating these are templated strings generated from a small set of outcome combinations rather than free-form text. The Flesch readability mean of 29.86 reflects the dense, numeric, shorthand nature of the content. A small multilingual signal exists (10 Norwegian, 5 French, 1 Japanese entries) which may indicate data sourced from multiple regional systems and warrants review.","role":"label","scope":"column","target":"description","treatment":"Parse structured fields (damage amount, injuries, fatalities, magnitude) via regex into separate numeric columns rather than embedding as text."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","stats.duplicate_rate","stats.n_duplicates","n_unique","n","stats.vocab_size","stats.word_mean","language_counts","alerts"],"model":"anthropic:default","narrative":"This column contains structured event description labels of the form '[Weather Event Type] in [STATE, COUNTY]', effectively serving as a composite label combining event type and geographic location. The duplicate rate is strikingly high at 54.9%, with 8,110 duplicates across 14,770 rows and only 6,660 unique values, indicating that the same event type/location combinations recur frequently \u2014 consistent with repeated weather incidents in the same areas. The multilingual alert is almost certainly a false positive from language detection mis-classifying US place names and weather terminology as non-English; dominant language is English (4,796 of sampled values) and top values are entirely English-structured strings. Vocabulary size of 1,980 across ~14k rows and a mean of ~4.6 words per entry confirm the formulaic, low-variety nature of the text.","role":"label","scope":"column","target":"name","treatment":"Parse into two structured features (event_type, state_county) via regex split on ' in ' before modelling; do not embed as raw text."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy","null_rate","n"],"model":"anthropic:default","narrative":"This column is a dataset category tag, holding a single constant value 'significant_us_storms' across all 14,770 rows with no nulls. It carries zero information entropy (entropy = 0.0) and a top_rate of 1.0, meaning it is entirely invariant. This is a metadata label describing the dataset itself, not a feature with predictive or analytical value.","role":"metadata","scope":"column","target":"category","treatment":"Drop before modelling; constant column adds no signal and will cause issues with variance-based methods."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","top_rate","top_value","entropy","cardinality"],"model":"anthropic:default","narrative":"This column represents the country of origin or scope for all records in the dataset, and every single one of the 14,770 rows contains the value 'USA' \u2014 making it a zero-entropy constant. The column carries no discriminative information whatsoever and will contribute nothing to any model or analysis. Its uniformity may also indicate the dataset is intentionally scoped to a single country, which is worth confirming before joining with broader datasets.","role":"metadata","scope":"column","target":"country","treatment":"Drop before modelling; constant column with zero variance and entropy of 0.0."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","cardinality","top_rate","top_value","entropy","null_rate"],"model":"anthropic:default","narrative":"This column identifies the data source, and every single one of the 14,770 rows carries the identical value 'NOAA Storm Events Database' \u2014 cardinality of 1 with top_rate of 1.0 and entropy of 0.0. It is a constant metadata field, almost certainly a provenance tag added during ingestion. It carries zero predictive or analytical signal.","role":"metadata","scope":"column","target":"source","treatment":"Drop before modelling; constant column adds no variance."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","top_values","kind"],"model":"anthropic:default","narrative":"This column appears to represent a magnitude measure (likely seismic, stellar, or similar scientific scale) stored as a categorical type despite containing numeric-looking values spanning a wide range (e.g., 1.75, 2.75, 50.00, 70.00). Two surprises stand out: first, 51.78% of rows are null, triggering an alert; second, the dominant value '0' accounts for 54.24% of non-null rows (3,863 of ~7,124 non-null records), suggesting zero may encode 'none', 'unknown', or a sentinel rather than a true zero magnitude. The presence of both small decimal values (1.75, 2.00, 2.50) and large round integers (50.00, 61.00, 65.00, 70.00) hints at a possible mixed-scale or mixed-source column.","role":"feature","scope":"column","target":"magnitude","treatment":"Investigate zero sentinel vs. true zero, impute or drop nulls based on missingness mechanism, cast to float, then assess whether log-transform or binning is appropriate before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","q1","q3","iqr","skew","kurtosis","n_outliers","outlier_rate","null_rate"],"model":"anthropic:default","narrative":"This column contains geographic latitude values, spanning from -14.3236 to 70.1269 degrees, consistent with worldwide location data. The distribution is tightly clustered between Q1=33.63 and Q3=41.13 (IQR ~7.5), suggesting the bulk of records concentrate around mid-latitude Northern Hemisphere locations (roughly US/Europe range), with the mean (37.28) and median (37.12) nearly identical indicating only mild skew (-0.18). The leptokurtic shape (kurtosis 3.34) and 159 outliers (~1.1%) reflect a small tail of equatorial or high-latitude records that an analyst should verify are not geocoding errors.","role":"feature","scope":"column","target":"latitude","treatment":"Use as-is or pair with longitude for spatial modelling; consider binning into regions or projecting to avoid Euclidean distance distortion."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","q1","q3","iqr","kurtosis","skew","n_outliers","outlier_rate"],"model":"anthropic:default","narrative":"This column represents geographic longitude, with values spanning from -170.7316 to 171.4689 degrees. The bulk of observations cluster around the Americas (mean -90.94, IQR roughly -96.4 to -84.23, consistent with the central/eastern US or Caribbean), but the extreme kurtosis of 55.6 and 623 outliers (4.2%) indicate a heavy-tailed distribution with a notable minority of records far outside this core region \u2014 including values near +171, suggesting Pacific or Asian locations. The positive skew (1.29) and tight IQR relative to the full range confirm most records concentrate in a narrow band while a long right tail pulls toward positive (eastern hemisphere) longitudes.","role":"feature","scope":"column","target":"longitude","treatment":"Retain as-is for geospatial modelling; investigate the 623 outliers for data-entry errors or legitimate international records before clustering or bounding-box filtering."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy_ratio","null_rate","top_values"],"model":"anthropic:default","narrative":"This column contains categorical labels for natural weather/disaster event types across 14,770 records, with 17 distinct categories and no nulls. The dominant class is 'Tornado' at 42.9% (6,334 occurrences), creating notable class imbalance \u2014 the top 5 categories ('Tornado', 'Flash Flood', 'Thunderstorm Wind', 'Flood', 'Hail') account for the vast majority of records, while tail categories like 'Marine Thunderstorm Wind' (25) and 'Debris Flow' (43) are sparsely represented. The entropy ratio of 0.572 confirms moderate but uneven spread across classes, which will challenge classifiers without resampling or class-weight adjustment.","role":"label","scope":"column","target":"event_type","treatment":"Encode as nominal category; apply class weights or oversample minority classes (e.g., 'Marine Thunderstorm Wind' n=25) before classification modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","entropy","entropy_ratio","top_values","n","null_rate"],"model":"anthropic:default","narrative":"This column records fatality counts per incident, stored as strings but representing non-negative integers ranging from 0 to at least 10 across 49 distinct values. The dominant value is '0' at 69.1% of rows (10,209 of 14,770), indicating most incidents involve no fatalities. The distribution is heavily right-skewed, with counts dropping sharply: 1 fatality appears 3,208 times, 2 appears 649 times, and values thin out rapidly beyond that \u2014 yet 49 unique values suggests some high-count outliers exist beyond the top 10 shown. Low entropy (1.42, ratio 0.25) confirms the extreme concentration on zero.","role":"feature","scope":"column","target":"fatalities","treatment":"Cast to integer, treat as count variable; consider zero-inflated modelling or log1p-transform given severe right skew and 69.1% zero mass."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","top_values","kind"],"model":"anthropic:default","narrative":"This column represents a count of injuries per record, stored as a categorical type despite being fundamentally numeric. The dominant value is '0' appearing in 68.1% of rows (10,064 of 14,770), indicating most records involve no injuries. With 178 unique values and top counts following a steep drop-off consistent with a zero-inflated count distribution, the categorical encoding is likely a data-type artifact \u2014 the values are clearly ordinal integers and should be treated as numeric.","role":"feature","scope":"column","target":"injuries","treatment":"Cast to integer, then model with zero-inflated Poisson or apply log1p transform before regression given heavy zero inflation."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column represents the US state associated with each record, stored as full uppercase state names. With 65 unique values against the expected 50 US states, there are likely extra entries such as territories (e.g., Puerto Rico, Guam), non-standard labels, or minor data quality issues worth auditing. Texas dominates at 9.8% of records (1,450), and the top-10 states are heavily weighted toward the South and Midwest. The high entropy ratio of 0.86 indicates a relatively even spread across categories, though Texas is a clear outlier compared to the rest.","role":"label","scope":"column","target":"state","treatment":"Standardize to a canonical list (resolve the 65\u219250+ mapping), then one-hot encode or use target encoding for modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4291,"prompt_tokens":15701,"total_tokens":19992}},"language_counts":{"de":25,"en":9780,"es":134,"eu":2,"fr":6,"id":6,"it":5,"ja":23,"no":10,"pt":4,"ru":2,"sr":2,"zh":1},"meta":{"generated_at":"2026-06-21T23:29:41+00:00","mode":"full","row_count":14770,"sampled_rows":14770,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/wild/weather/noaa_significant_storms.json"},"notes":[],"saturn_version":"0.2.0","schema":{"category":"categorical","country":"categorical","damage_property":"text","date":"text","description":"text","event_type":"categorical","fatalities":"categorical","injuries":"categorical","latitude":"numeric","longitude":"numeric","magnitude":"categorical","name":"text","source":"categorical","state":"categorical"}}
