{"columns":[{"alerts":[{"code":"one_word","level":"warn","message":"58.5% rows are a single word"},{"code":"duplicates","level":"warn","message":"78.9% duplicate strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[72,190,110,466,942,2265,2027,1560,1821,1566,2019,626,923,808,1219,1117,747,889,549,531,719,307,119,178,63,52,22,8,2,17,19,11,50,13,9,2,1,3,0,1],"edges":[3.0,4.1,5.2,6.300000000000001,7.4,8.5,9.600000000000001,10.700000000000001,11.8,12.9,14.0,15.100000000000001,16.200000000000003,17.3,18.400000000000002,19.5,20.6,21.700000000000003,22.8,23.900000000000002,25.0,26.1,27.200000000000003,28.3,29.400000000000002,30.500000000000004,31.6,32.7,33.800000000000004,34.900000000000006,36.0,37.1,38.2,39.300000000000004,40.400000000000006,41.5,42.6,43.7,44.800000000000004,45.900000000000006,47.0]},"near_unique":false,"sample":["Animalia","Microtus ochrogaster","Acanthohoplites","Ammonoidea","Theropoda","Synphoroides","Hadrosauropodus leonardii","Moutoniceras moutonianum","Hoploscaphites","Palmatolepis","Carpodaptes rosei","Protungulatum","Lithostrotia","Polygnathus","Iguanodon","Sauropoda","Polygnathus","Anadesmoceras","Crassiproetus crassimarginatus","Dromaeosaurus albertensis","Phuwiangosaurus sirindhornae","Palmatolepis glabra","Mamenchisaurus","Anchisauripus bibractensis","Promartes lepidus","Thomomys bottae","Neoplagiaulax nanophus","Grallator tenuis","Polygnathus decorosa","Paladin","Xenocardia diversidens","Temnocyon percussor","Paramerychyus relictus","Sauroposeidon proteles","Tyrannosauridae","Dinosauria","Hystricurus","Ernestokokenia chaishoer","Iguanodontidae","Baioconodon nordicus","Thomomys","Cedaria","Apatosaurus","Syspacheilus","Cheirurus","Kettneraspis","Ankylosaurus magniventris","Sauropelta edwardsorum","Eubrontes","Scaphites meriani"],"top_values":[["Theropoda",768],["Dinosauria",512],["Sauropoda",426],["Hadrosauridae",411],["Palmatolepis",376],["Polygnathus",235],["Ammonoidea",195],["Ceratopsidae",169],["Ornithopoda",141],["Icriodus",135],["Ozarkodina",122],["Equus",119],["Grallator",118],["Ceratopsia",94],["Palmatolepis glabra",91],["Ornithischia",90],["Barremites",83],["Prosauropoda",81],["Tyrannosauridae",81],["Dromaeosauridae",80]],"top_words":[["theropoda",687],["palmatolepis",517],["dinosauria",465],["sauropoda",387],["hadrosauridae",376],["polygnathus",314],["grallator",286],["eubrontes",182],["equus",180],["ammonoidea",175],["icriodus",169],["ozarkodina",151],["ceratopsidae",147],["ornithopoda",133],["anomoepus",101],["baculites",100],["camarasaurus",90],["minuta",88],["allosaurus",85],["glabra",84],["merychyus",84],["ornithischia",83],["ceratopsia",83],["ptilodus",77],["tyrannosauridae",73]],"vocab_skipped":null,"word_histogram":{"counts":[12887,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8948,0,0,0,0,0,0,0,0,0,0,0,0,0,208],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":22043,"n_null":0,"n_unique":4660,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.7885950188268385,"emoji_rate":0.0,"len_max":47,"len_mean":15.094769314521617,"len_median":14.0,"len_min":3,"len_p95":26.0,"n_duplicates":17383,"n_empty":0,"one_word_rate":0.5846300412829469,"readability_flesch_mean":-4.12667499999997,"url_rate":0.0,"vocab_size":5140,"word_mean":1.4248060608810054,"word_median":1.0}},{"alerts":[],"column":"rank","extras":{"singletons":1,"top_values":[["species",9082],["genus",7342],["unranked clade",2828],["family",1716],["subfamily",272],["subclass",205],["class",134],["order",115],["infraorder",97],["superfamily",75],["subgenus",51],["kingdom",50],["suborder",29],["subspecies",23],["tribe",12],["subphylum",9],["superorder",2],["superclass",1]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":18,"null_rate":0.0,"stats":{"cardinality":18,"entropy":2.085282874450611,"entropy_ratio":0.5000768296142842,"top_rate":0.41201288390872387,"top_value":"species"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-2.44"},{"code":"outliers","level":"warn","message":"9.2% rows beyond 1.5 IQR"}],"column":"lat","extras":{"histogram":{"counts":[4,0,0,12,7,0,0,0,16,60,111,127,166,347,40,99,138,6,255,21,0,32,22,12,168,137,1224,669,1615,3128,4830,3520,2987,1322,253,303,99,118,180,15],"edges":[-84.333336,-80.2312526,-76.1291692,-72.02708580000001,-67.92500240000001,-63.822919000000006,-59.7208356,-55.6187522,-51.516668800000005,-47.41458540000001,-43.31250200000001,-39.210418600000004,-35.108335200000006,-31.00625180000001,-26.904168400000003,-22.802085000000005,-18.700001600000007,-14.59791820000001,-10.495834800000011,-6.3937514000000135,-2.2916680000000156,1.8104153999999966,5.9124987999999945,10.014582199999992,14.11666559999999,18.21874899999999,22.320832399999986,26.422915799999984,30.524999199999996,34.627082599999994,38.72916599999999,42.83124939999999,46.93333279999999,51.035416199999986,55.137499599999984,59.23958299999998,63.34166639999998,67.44374979999998,71.54583319999998,75.64791659999997,79.75]},"sample":[55.538334,49.23111,49.23111,41.349998,22.394722,39.216667,39.216667,39.216667,39.216667,39.216667,39.216667,62.480556,62.480556,49.5,49.5,62.733334,62.733334,62.733334,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,31.366667,31.366667,18.157778,18.157778,49.5,-33.200001,49.5,22.096666,39.433334,44.950001,44.950001,73.5,73.5,73.5,33.267223,46.841946,71.383614,-30.950001,47.700001,40.826389,51.60265,34.942501,49.0644,40.247086,44.081387,49.460835,49.233334,-28.416668,-29.033333,-29.883333,35.2374,34.43742,37.122406,37.997799,38.620537,33.107777,46.841946,47.5,30.8146,30.8146,40.095001,41.893101,-28.509443,36.099998,42.017776,42.017776,28.8333,30.984501,31.733433,41.893101,41.893101,41.893055,41.8825,32.108208,-9.679919,-9.679919,-9.709615,54.16,38.539501,-9.704556,-9.704556,-9.704556,42.017776,39.139999,40.586899,38.638058,43.409801,43.033333,48.650002,-14.6167,50.737499,42.517487,41.693111,42.273609,-16.48,38.566666,42.155437,40.441387,41.893055,50.849998,29.355556,43.622528,-9.708957,45.397652,42.629375,19.266666,-29.133333,-31.868889,-30.35,40.661667,45.147804,42.256668,42.57724,42.610104,42.294674,26.343056,44.051388,36.640278,25.173765,43.74881,44.270752,42.963055,32.410378,51.551666,51.819099,-17.687,23.380833,39.267223,47.5919,48.633301,48.966599,29.138056,25.505556,31.666668,19.317499,20.892221,38.936169,39.155956,29.316668,39.030499,42.664555,38.799999,49.119999,49.119999,16.700001,16.666668,36.269444,42.166668,42.166668,43.44128,36.121387,23.057222,-32.216702,-43.783333,16.695,39.905277,45.948799,40.611942,50.736988,50.749004,50.748341,46.3629,45.93903,49.127655,51.8046,41.627701,-38.200832,51.542702,36.343887,37.579166,50.688,47.96389,43.83139,43.741669,47.5667,47.549999,47.549999,47.516701,43.233299,43.116329,47.799999,40.285278,43.219398,45.468201,35.195,49.9147,62.500557,51.784698,50.606667,29.837761,41.25,50.599998,32.253334,50.655834,53.633331,34.907101,34.992802,24.2983,44.583328,55.903332,46.900002,37.257801,48.766666,40.950001,40.950001,50.815033,42.26667,36.4884,43.488888,48.900002,33.154167,42.165356,36.886551,36.886551,36.886539,49.283001,36.388332,36.294445,36.284443,36.270279,-2.4781,36.227501,36.024445,36.1548,35.991943,36.285702,36.271099,43.266701,43.266701,35.956108,42.117294,42.117294,42.117294,42.117294,51.9333,42.017159,40.116699,50.744595,50.738434,50.747437,44.433334,-43.238335,-33.799999,52.461189,47.730373,41.378132,41.378132,42.86639,-21.933332,16.416668,43.36861,34.906387,42.001389,50.666668,29.450001,39.700001,39.099998,47.799999,49.5,39.099998,41.934032,47.799999,36.299999,47.799999,47.799999,39.400002,36.299999,44.299999,35.700001,36.299999,36.5746,36.5746,36.299999,53.0,35.700001,35.700001,35.700001,41.799999,41.799999,35.5,35.5,36.271099,41.700001,41.301399,35.700001,51.0,43.849998,43.849998,45.799999,44.299999,45.799999,45.799999,51.900002,41.799999,41.799999,51.183334,51.5,52.200001,43.200001,41.583332,41.700001,45.799999,45.799999,43.849998,43.849998,45.200001,44.65723,44.299999,43.200001,43.200001,45.400002,32.683334,32.683334,43.647499,43.647499,43.5,43.5,40.0,40.0,3.3,43.299999,43.299999,43.299999,42.416668,43.299999,43.299999,49.599998,29.200001,29.200001,42.099998,42.0,42.400002,43.299999,30.299999,35.700001,35.700001,42.75,42.488899,42.200001,42.1856,43.133331,43.133331,43.133331,43.200001,42.799999,44.900002,42.400002,40.900002,42.400002,42.683334,42.683334,42.799999,49.700001,49.700001,49.599998,42.799999,42.700001,42.490002,42.490002,22.766666,35.700001,35.700001,32.578499,28.955299,37.200001,37.200001,32.900002,32.900002,37.047699,40.200001,40.200001,40.200001,35.0,35.0,35.0,33.799999,37.044701,37.044701,37.044701,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,34.799999,66.599998,39.0,21.799999,40.799999,38.700001,28.799168,36.0,38.299999,39.299999,64.800003,37.200001,39.691666,47.0,27.041668,27.041668,33.599998,37.056667,34.799999,64.800003,42.700001,39.299999,29.683332,70.480003,70.480003,43.269444,43.252499,43.269444,43.269444,43.269444,43.269444,50.599998,50.599998,50.599998,51.083332,51.083332,51.083332,51.083332,51.283333,51.283333,52.133331,52.200001,53.283333,43.30389,24.216667,46.833332,46.833332,37.551109,37.551109,37.867222,37.867222,37.867222,37.867222,38.052223,38.052223,38.052223,38.052223,38.05389,38.05389,38.05389,31.0,52.833332,44.475834,44.541668,52.833332,39.163891,45.763611,40.037777,51.400002,44.662224,44.523609,44.523609,47.15361,44.523609,39.096668,30.034721,39.096668,62.833332,50.200001,43.212776,38.533333,44.75,43.516666,39.37722,60.0,39.445278,39.445278,39.423058,43.212776,43.15472,35.075001,43.212776,75.099998,75.099998,75.0,75.0,75.0,35.799999,34.454166,-33.591667,52.700001,63.279167]},"kind":"numeric","n":22043,"n_null":0,"n_unique":4095,"null_rate":0.0,"stats":{"iqr":11.609027999999995,"kurtosis":7.054009852157675,"max":79.75,"mean":37.11953781236674,"median":41.700001,"min":-84.333336,"n_outliers":2019,"outlier_rate":0.0915937032164406,"q1":35.0,"q3":46.609027999999995,"skew":-2.4423416924720134,"std":19.371906015387005,"zero_rate":0.0}},{"alerts":[],"column":"lon","extras":{"histogram":{"counts":[4,13,18,32,91,163,1447,5659,3904,360,559,962,409,76,52,13,0,93,160,1918,932,817,253,447,311,71,90,315,306,107,78,553,1162,150,206,50,238,11,4,9],"edges":[-176.667007,-167.82356105000002,-158.9801151,-150.13666915000002,-141.2932232,-132.44977725,-123.60633130000002,-114.76288535000002,-105.91943940000002,-97.07599345000001,-88.23254750000001,-79.38910155000002,-70.54565560000002,-61.702209650000015,-52.858763700000026,-44.01531775000001,-35.17187180000002,-26.32842585000003,-17.484979900000013,-8.641533950000024,0.20191199999999299,9.045357949999982,17.88880389999997,26.732249849999988,35.57569579999998,44.419141749999966,53.26258769999998,62.10603364999997,70.94947959999996,79.79292554999995,88.6363715,97.47981744999998,106.32326339999997,115.16670934999996,124.01015529999995,132.85360125,141.6970472,150.54049314999997,159.38393909999996,168.22738504999995,177.070831]},"sample":[-133.154999,16.761944,16.761944,-82.724998,107.554443,-116.216667,-116.216667,-116.216667,-116.216667,-116.216667,-116.216667,-124.783333,-124.783333,14.333333,14.333333,54.378613,54.378613,54.378613,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,-7.3,-7.3,97.933609,97.933609,14.333333,148.683334,14.333333,96.625,8.5,-109.616669,-109.616669,-100.0,-100.0,-100.0,53.777222,4.432778,-22.568611,26.616667,8.4667,-74.106667,-2.629478,-109.763611,8.9764,-75.460152,9.882214,11.2875,11.166667,27.966667,27.483334,27.35,-105.793999,-109.462486,-112.537674,-102.973404,-108.940765,-101.449997,4.432778,8.016667,-9.1002,-9.1002,-75.469002,-106.041,28.623056,-110.300003,-106.048615,-106.048615,104.133301,56.5672,-6.876384,-106.041,-106.079803,-106.00222,-106.1073,-6.451011,39.213799,39.213799,39.224609,-0.915,-105.224701,39.22842,39.22842,39.22842,-106.048615,-9.19,-109.432198,-108.194443,-103.409698,90.75,9.5167,48.016701,-2.903611,-72.550308,-72.639221,-72.566666,47.400002,-8.733333,-105.90976,-109.300552,-105.979721,21.450001,104.709167,-108.184586,39.228508,-64.219345,-105.439041,79.51667,27.35,-55.897221,27.6,-0.87934,88.900627,-72.61528,-72.580963,-72.542458,-72.598877,102.144447,-107.458336,103.0,102.097412,3.30623,3.60363,90.573891,105.786247,-1.795,-112.981903,-65.940002,34.806946,-111.254166,84.0047,-113.75,-112.650002,-103.196945,-101.343887,-110.76667,6.381111,-0.033333,-111.066017,-110.857689,-103.51667,-76.8694,-2.634513,-9.383333,-110.470001,-110.470001,102.25,102.25,-108.166946,0.9,0.9,100.374809,136.544724,73.341942,-58.133301,-68.916664,31.145,108.900002,-103.9617,-0.111111,-111.494652,-111.458488,-111.510078,-103.897903,-103.946098,-110.872475,-112.946602,-109.090202,-64.493332,-112.865601,-108.084167,-111.830559,-111.616669,-106.455559,112.427498,112.183334,-107.099998,-107.099998,-107.099998,-106.400002,-104.583298,-104.637001,-106.099998,-106.860558,-107.572601,-108.120796,-107.047501,-112.993202,-150.005554,55.098598,-1.965,-98.22187,120.033302,-111.620003,-97.875557,-1.158333,-113.300003,128.153305,128.453094,115.360603,62.866669,87.957779,-101.5,126.757202,4.833333,-0.766667,-0.766667,-111.59536,0.73333,-104.053299,-104.610558,-112.800003,-107.192223,1.012499,120.658951,120.658951,120.658951,-112.577003,-108.073891,-108.102501,-108.102501,-108.144447,-44.461399,-107.941498,-107.798058,-107.816597,-107.582497,-108.244797,-108.244797,-104.616699,-104.650002,119.326942,62.655315,62.655315,62.655315,62.655315,116.25,63.3027,70.633301,-111.484749,-111.501808,-111.490646,5.933333,-68.776947,25.466667,64.629562,84.057571,72.193581,72.193581,59.245834,-68.73333,9.1,-104.697777,128.154999,121.692497,-1.55,30.583332,-104.699997,-111.800003,-106.099998,-109.199997,-111.800003,-106.863892,-106.099998,-108.199997,-106.099998,-106.099998,-104.5,-108.199997,-109.0,-107.0,-108.199997,-107.974701,-107.974701,-108.199997,-116.800003,-107.0,-107.0,-107.0,-107.0,-107.0,-117.76667,-117.76667,-107.763,-109.0,-107.735703,-107.0,-114.099998,-108.349998,-108.349998,-109.800003,-109.0,-109.800003,-109.800003,-113.300003,-110.699997,-110.699997,-114.433334,-112.300003,-113.599998,-108.199997,-110.683334,-109.0,-110.5,-109.800003,-108.349998,-108.349998,-109.0,-108.301559,-109.0,-107.099998,-107.099998,-105.599998,72.366669,72.366669,111.9767,111.9767,112.0,112.0,114.0,115.0,-75.116669,-102.5,-102.5,-102.5,-103.866669,-102.5,-102.5,-108.800003,-103.199997,-103.199997,-105.0,-104.199997,-103.800003,-102.5,-83.0,-107.0,-107.0,-102.01667,-102.960701,-103.099998,-103.732651,-101.866669,-101.866669,-101.866669,-101.800003,-103.099998,-120.199997,-103.800003,-103.283333,-103.800003,-102.849998,-102.849998,-103.099998,-109.0,-109.0,-109.0,-100.800003,-100.0,-98.110001,-98.110001,111.566666,-112.400002,-112.400002,-109.4842,-82.676903,-100.300003,-100.300003,-117.099998,-117.099998,-100.495201,-98.099998,-98.099998,-98.099998,-101.900002,-101.900002,-101.900002,-99.699997,-100.495201,-100.477097,-100.477097,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-161.899994,-114.099998,-102.300003,-124.199997,-79.300003,-82.070274,-93.199997,-97.599998,-105.699997,-147.800003,-100.300003,-78.787498,-118.5,-81.821114,-81.821114,-117.800003,-120.195831,-117.0,-147.800003,-102.5,-105.699997,-82.566666,-21.969999,-21.969999,-104.271667,-104.271667,-104.271667,-104.271667,-104.271667,-104.251945,-1.316667,-1.316667,-1.316667,1.183333,1.183333,1.083333,1.083333,0.533333,0.533333,-0.3,0.116667,0.183333,40.259724,-102.816666,6.516667,6.516667,-3.933611,-3.933611,-2.574167,-2.574167,-2.574167,-2.574167,-1.883333,-1.883333,-1.883333,-1.883333,-1.888611,-1.888611,-1.888611,-98.099998,-119.25,-73.212502,-73.195831,-119.25,-119.833054,-111.732224,-76.305832,-116.48333,-111.103333,-89.574448,-89.574448,-110.215553,-89.574448,-116.263885,-103.308334,-116.263885,-136.583328,-63.5,-75.456108,-78.599998,-79.333336,-91.76667,-116.138054,11.0,-83.828613,-83.828613,-85.012779,-75.456108,-77.615829,-98.241669,-75.456108,-94.0,-94.0,-95.0,-95.0,-95.0,-86.116669,-87.753891,148.891663,-3.25,-128.544449]},"kind":"numeric","n":22043,"n_null":0,"n_unique":4259,"null_rate":0.0,"stats":{"iqr":114.0398315,"kurtosis":-0.4931940816073621,"max":177.070831,"mean":-47.21219894846436,"median":-98.25,"min":-176.667007,"n_outliers":3,"outlier_rate":0.00013609762736469628,"q1":-108.1672245,"q3":5.872607,"skew":0.9275339699239383,"std":79.13473751117112,"zero_rate":0.00022682937894116047}},{"alerts":[{"code":"outliers","level":"warn","message":"11.6% rows beyond 1.5 IQR"}],"column":"early_age_mya","extras":{"histogram":{"counts":[2334,1665,85,12,2904,1302,2239,454,796,1645,410,1650,421,36,975,193,530,216,61,0,0,0,25,17,21,37,58,770,319,319,176,602,265,382,427,81,334,180,62,40],"edges":[0.0117,13.481407499999998,26.951114999999998,40.42082249999999,53.89052999999999,67.3602375,80.829945,94.2996525,107.76935999999999,121.23906749999999,134.70877499999997,148.17848249999997,161.64818999999997,175.11789749999997,188.58760499999997,202.05731249999997,215.52701999999996,228.99672749999996,242.46643499999996,255.93614249999996,269.40585,282.8755575,296.345265,309.81497249999995,323.28468,336.7543875,350.224095,363.69380249999995,377.16351,390.6332175,404.10292499999997,417.57263249999994,431.04233999999997,444.5120475,457.98175499999996,471.45146249999993,484.92116999999996,498.3908775,511.86058499999996,525.3302924999999,538.8]},"sample":[393.47,372.15,372.15,387.95,419.62,393.47,393.47,393.47,393.47,393.47,393.47,419.62,419.62,419.62,419.62,382.31,372.15,372.15,368.5,368.5,368.5,368.5,372.15,375.2,375.2,379.0,379.0,419.62,413.02,422.7,419.62,393.47,419.62,419.62,393.47,425.0,410.62,410.62,419.62,419.62,419.62,303.7,246.7,227.3,227.3,227.3,227.3,205.7,227.3,215.38,227.3,237.0,227.3,227.3,227.3,227.3,227.3,237.0,227.3,205.7,227.3,227.3,227.3,246.7,227.3,237.0,237.0,237.0,154.8,201.4,199.5,154.8,154.8,168.2,201.4,184.2,154.8,154.8,154.8,154.8,168.2,152.21,152.21,149.2,161.5,154.8,149.2,149.2,152.21,154.8,152.21,192.9,154.8,154.8,149.2,182.9,168.2,199.5,201.4,201.4,201.4,168.2,161.5,154.8,149.2,154.8,201.4,184.2,154.8,149.2,201.4,154.8,184.2,201.4,154.8,201.4,154.8,161.5,201.4,201.4,201.4,201.4,201.4,157.9,201.4,199.5,201.4,201.4,168.2,149.2,152.21,72.2,72.2,143.1,72.2,72.2,83.6,83.6,83.6,83.6,83.6,143.1,143.1,100.5,121.4,83.6,119.57,83.6,121.4,83.6,83.6,125.77,125.77,83.6,72.2,72.2,72.2,121.4,72.2,100.5,119.57,83.6,143.1,72.2,125.77,83.6,83.6,83.6,72.2,72.2,83.6,83.6,83.6,83.6,72.2,83.6,72.2,83.6,72.2,83.6,83.6,72.2,72.2,72.2,72.2,72.2,72.2,72.2,72.2,113.2,121.4,85.7,83.6,83.6,100.5,143.1,113.2,121.4,83.6,113.2,132.6,72.2,113.2,113.2,72.2,93.9,121.4,72.2,121.4,113.2,125.77,125.77,83.6,72.2,113.2,72.2,72.2,83.6,72.2,83.6,83.6,83.6,83.6,83.6,83.6,83.6,83.6,100.5,83.6,83.6,83.6,83.6,83.6,83.6,72.2,72.2,83.6,93.9,93.9,93.9,93.9,143.1,93.9,85.7,83.6,83.6,83.6,83.6,119.57,143.1,72.2,83.6,85.7,85.7,93.9,100.5,121.4,72.2,121.4,121.4,37.71,33.9,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,59.24,57.5,60.9,60.9,60.9,15.98,15.98,15.98,15.98,15.98,15.98,5.333,3.6,13.8,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,18.5,18.5,18.5,16.3,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,16.3,16.3,18.5,18.5,18.5,18.5,18.5,16.3,16.3,16.3,0.774,1.4,1.4,4.7,2.58,2.58,2.58,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,0.129,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,149.2,149.2,72.2,83.6,83.6,83.6,83.6,83.6,121.4,121.4,110.1,113.2,113.2,121.4,113.2,121.4,121.4,121.4,121.4,121.4,121.4,143.1,100.5,113.2,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,358.86,538.8,515.3,515.3,515.3,509.7,506.5,509.7,506.5,497.0,504.5,504.5,497.0,538.8,483.4,486.85,481.7,468.0,471.3,457.3,458.2,458.2,450.2,449.5,448.8,449.6,449.6,449.6,443.1,443.1,443.1,453.8,432.9,432.9,432.9,432.9,432.9,457.3,323.4,452.3,443.1,410.62]},"kind":"numeric","n":22043,"n_null":0,"n_unique":164,"null_rate":0.0,"stats":{"iqr":138.0,"kurtosis":0.07677114135836893,"max":538.8,"mean":154.67317342466995,"median":110.1,"min":0.0117,"n_outliers":2549,"outlier_rate":0.1156376173842036,"q1":63.4,"q3":201.4,"skew":1.1313968758308393,"std":143.08815184956683,"zero_rate":0.0}},{"alerts":[{"code":"outliers","level":"warn","message":"11.5% rows beyond 1.5 IQR"}],"column":"late_age_mya","extras":{"histogram":{"counts":[2732,1291,69,7,2911,3279,374,949,922,1043,1317,671,368,135,634,1003,35,123,63,2,0,0,12,34,19,4,83,133,828,467,190,530,170,141,529,299,111,310,191,64],"edges":[0.0,13.025,26.05,39.075,52.1,65.125,78.15,91.175,104.2,117.22500000000001,130.25,143.275,156.3,169.32500000000002,182.35,195.375,208.4,221.425,234.45000000000002,247.475,260.5,273.52500000000003,286.55,299.575,312.6,325.625,338.65000000000003,351.675,364.7,377.725,390.75,403.77500000000003,416.8,429.825,442.85,455.875,468.90000000000003,481.925,494.95,507.975,521.0]},"sample":[382.31,368.5,368.5,382.31,410.62,387.95,387.95,387.95,387.95,387.95,387.95,413.02,413.02,413.02,413.02,372.15,368.5,368.5,365.2,365.2,365.2,365.2,368.5,372.15,372.15,375.2,375.2,413.02,410.62,413.02,410.62,387.95,413.02,387.95,387.95,393.47,393.47,393.47,413.02,413.02,413.02,298.9,237.0,205.7,201.4,205.7,205.7,201.4,205.7,211.18,201.4,227.3,205.7,205.7,201.4,201.4,201.4,227.3,205.7,199.5,205.7,205.7,205.7,237.0,205.7,227.3,227.3,201.4,143.1,192.9,184.2,143.1,143.1,161.5,184.2,165.3,143.1,143.1,143.1,143.1,165.3,149.2,149.2,143.1,154.8,143.1,143.1,143.1,143.1,143.1,149.2,174.7,143.1,143.1,143.1,180.4,165.3,192.9,192.9,192.9,192.9,165.3,149.2,143.1,145.06,143.1,199.5,168.2,143.1,143.1,199.5,143.1,170.9,192.9,143.1,192.9,143.1,154.8,192.9,192.9,192.9,199.5,174.7,152.21,174.7,192.9,199.5,199.5,143.1,143.1,149.2,66.0,66.0,66.0,66.0,66.0,72.2,72.2,72.2,72.2,72.2,121.4,93.9,93.9,100.5,72.2,113.2,72.2,119.57,72.2,72.2,121.4,121.4,72.2,66.0,66.0,66.0,113.2,66.0,66.0,113.2,66.0,100.5,66.0,121.4,72.2,72.2,72.2,66.0,66.0,72.2,66.0,72.2,66.0,66.0,72.2,66.0,72.2,66.0,72.2,72.2,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,100.5,100.5,83.6,66.0,66.0,66.0,137.05,110.1,113.2,72.2,100.5,119.57,66.0,100.5,100.5,66.0,89.8,113.2,66.0,93.9,100.5,121.4,121.4,72.2,66.0,100.5,66.0,66.0,66.0,66.0,72.2,72.2,72.2,72.2,72.2,72.2,72.2,72.2,93.9,72.2,72.2,72.2,72.2,72.2,72.2,66.0,66.0,72.2,89.8,89.8,89.8,89.8,100.5,89.8,83.6,72.2,72.2,72.2,72.2,113.2,132.6,66.0,66.0,83.6,83.6,89.8,66.0,100.5,66.0,100.5,100.5,33.9,27.3,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,57.5,57.5,57.5,57.5,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,56.0,56.0,57.5,57.5,57.5,11.63,11.63,5.333,5.333,11.63,11.63,2.58,2.58,11.0,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,16.3,16.3,16.3,12.5,16.3,16.3,16.3,16.3,16.3,16.3,16.3,16.3,16.3,12.5,12.5,16.3,16.3,16.3,16.3,16.3,12.5,12.5,12.5,0.0117,0.21,0.21,0.21,1.8,1.8,1.8,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.0117,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.014,0.21,0.014,0.21,0.21,0.21,0.21,143.1,143.1,66.0,72.2,72.2,72.2,72.2,72.2,113.2,113.2,106.3,110.1,110.1,113.2,110.1,113.2,113.2,113.2,113.2,113.2,113.2,137.05,93.9,100.5,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,342.9,486.85,509.7,509.7,509.7,505.3,497.0,505.3,497.0,486.85,497.0,497.0,486.85,486.85,481.7,477.1,475.3,458.2,457.3,449.6,449.5,449.5,445.5,443.1,445.2,445.5,445.5,445.5,432.9,432.9,419.62,451.0,426.7,426.7,426.7,426.7,426.7,449.6,318.6,449.0,432.9,399.5]},"kind":"numeric","n":22043,"n_null":0,"n_unique":156,"null_rate":0.0,"stats":{"iqr":132.0,"kurtosis":0.123148868475361,"max":521.0,"mean":147.52259406160687,"median":93.9,"min":0.0,"n_outliers":2535,"outlier_rate":0.11500249512316835,"q1":60.9,"q3":192.9,"skew":1.1692870694044197,"std":141.7238408030002,"zero_rate":0.0011341468947058022}},{"alerts":[],"column":"period","extras":{"singletons":31,"top_values":[["Irvingtonian",1723],["Late Campanian",1088],["Torrejonian",935],["Tiffanian",923],["Puercan",778],["Kimmeridgian",636],["Hettangian",607],["Aptian",600],["Harrisonian",592],["Late Maastrichtian",544],["Norian",516],["Lochkovian",460],["Early Barremian",449],["Hemingfordian",441],["Tithonian",408],["Middle Campanian",359],["Early Famennian",346],["Early Albian",327],["Lancian",320],["Maastrichtian",314]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":298,"null_rate":0.0,"stats":{"cardinality":298,"entropy":6.421955981022376,"entropy_ratio":0.781338886656782,"top_rate":0.0781654039831239,"top_value":"Irvingtonian"}},{"alerts":[],"column":"late_interval","extras":{"singletons":22,"top_values":[["",18319],["Tithonian",548],["Sinemurian",430],["Late Campanian",183],["Early Cenomanian",132],["Albian",129],["Rhaetian",119],["Early Maastrichtian",111],["Early Tithonian",102],["Late Turonian",92],["Maastrichtian",72],["Harnagian",62],["Santonian",61],["Early Aptian",57],["Tiffanian",57],["Early Albian",56],["Barremian",54],["Pliensbachian",50],["Toarcian",50],["Cenomanian",45]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":138,"null_rate":0.0,"stats":{"cardinality":138,"entropy":1.5912749204207972,"entropy_ratio":0.22385446235659692,"top_rate":0.8310574785646236,"top_value":""}},{"alerts":[],"column":"phylum","extras":{"singletons":0,"top_values":[["Chordata",17993],["Mollusca",2000],["Arthropoda",2000],["",50]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":0.8872694459944851,"entropy_ratio":0.44363472299724255,"top_rate":0.81626820305766,"top_value":"Chordata"}},{"alerts":[],"column":"class","extras":{"singletons":2,"top_values":[["Mammalia",7015],["Saurischia",5507],["Ornithischia",2811],["Cephalopoda",2000],["Trilobita",2000],["Conodonta",1883],["Reptilia",568],["Aves",92],["",60],["NO_CLASS_SPECIFIED",26],["Pteraspidomorpha",24],["Placodermi",17],["Acanthodii",15],["Osteichthyes",11],["Thelodonti",4],["Osteostraci",4],["Chondrichthyes",4],["Actinopterygii",1],["Galeaspidomorphi",1]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":19,"null_rate":0.0,"stats":{"cardinality":19,"entropy":2.5789996336832743,"entropy_ratio":0.6071195013383377,"top_rate":0.3182416186544481,"top_value":"Mammalia"}},{"alerts":[],"column":"order","extras":{"singletons":25,"top_values":[["NO_ORDER_SPECIFIED",7117],["",3019],["Ammonitida",1572],["Ozarkodinida",1341],["Rodentia",1109],["Artiodactyla",951],["Carnivora",744],["Multituberculata",553],["Perissodactyla",517],["Phacopida",507],["Procreodi",503],["Prioniodontida",348],["Primates",315],["Asaphida",304],["Corynexochida",252],["Ammonoidea",246],["Ptychopariida",238],["Proetida",219],["Cimolesta",218],["Lagomorpha",187]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":99,"null_rate":0.0,"stats":{"cardinality":99,"entropy":3.886792157911311,"entropy_ratio":0.5863000560474654,"top_rate":0.3228689379848478,"top_value":"NO_ORDER_SPECIFIED"}},{"alerts":[],"column":"family","extras":{"singletons":113,"top_values":[["",3418],["NO_FAMILY_SPECIFIED",1996],["Hadrosauridae",689],["Grallatoridae",593],["Palmatolepidae",586],["Arctocyonidae",503],["Polygnathidae",459],["Cricetidae",407],["Equidae",360],["Canidae",358],["Ceratopsidae",336],["Dromaeosauridae",335],["Icriodontidae",272],["Periptychidae",249],["Neoplagiaulacidae",234],["Merycoidodontidae",231],["Camelidae",216],["Tyrannosauridae",184],["Diplodocidae",181],["Asaphidae",172]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":528,"null_rate":0.0,"stats":{"cardinality":528,"entropy":6.5662376262361,"entropy_ratio":0.7260008287544487,"top_rate":0.1550605634441773,"top_value":""}},{"alerts":[{"code":"one_word","level":"warn","message":"98.9% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"88.2% duplicate strings"}],"column":"genus","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[5545,0,0,13,31,0,372,188,704,1676,2307,0,2405,2504,2230,1696,1017,0,522,324,191,50,0,51,21,19,36,20,0,5,7,3,4,40,0,57,4,0,0,1],"edges":[0.0,0.825,1.65,2.4749999999999996,3.3,4.125,4.949999999999999,5.7749999999999995,6.6,7.425,8.25,9.075,9.899999999999999,10.725,11.549999999999999,12.375,13.2,14.024999999999999,14.85,15.674999999999999,16.5,17.325,18.15,18.974999999999998,19.799999999999997,20.625,21.45,22.275,23.099999999999998,23.924999999999997,24.75,25.575,26.4,27.224999999999998,28.049999999999997,28.875,29.7,30.525,31.349999999999998,32.175,33.0]},"near_unique":false,"sample":["","Microtus","Acanthohoplites","","","Synphoroides","Hadrosauropodus","Hemibaculites","Hoploscaphites","Palmatolepis","Carpodaptes","Protungulatum","","Polygnathus","Iguanodon","","Polygnathus","Anadesmoceras","Crassiproetus","Dromaeosaurus","Phuwiangosaurus","Palmatolepis","Mamenchisaurus","Anchisauripus","Promartes","Thomomys","Neoplagiaulax","Grallator","Polygnathus","Paladin","Xenocardia","Temnocyon","Paramerychyus","Sauroposeidon","","","Hystricurus","Ernestokokenia","","Baioconodon","Thomomys","Cedaria","Apatosaurus","Syspacheilus","Cheirurus","Kettneraspis","Ankylosaurus","Sauropelta","Eubrontes","Scaphites"],"top_values":[["",5545],["Palmatolepis",567],["Polygnathus",347],["Grallator",321],["Eubrontes",206],["Icriodus",186],["Equus",185],["Ozarkodina",179],["Baculites",115],["Anomoepus",108],["Camarasaurus",101],["Merychyus",95],["Allosaurus",92],["Chriacus",87],["Barremites",84],["Ptilodus",82],["Isotelus",81],["Richardoestesia",80],["Pelekysgnathus",76],["Triceratops",74]],"top_words":[["palmatolepis",517],["polygnathus",314],["grallator",289],["eubrontes",187],["equus",180],["icriodus",169],["ozarkodina",166],["anomoepus",103],["baculites",100],["camarasaurus",91],["allosaurus",85],["merychyus",84],["chriacus",77],["ptilodus",77],["isotelus",74],["barremites",73],["richardoestesia",70],["periptychus",70],["nicollidina",68],["phyllopachyceras",68],["mesodma",66],["pelekysgnathus",66],["saurornitholestes",65],["microtus",64],["neoplagiaulax",63]],"vocab_skipped":null,"word_histogram":{"counts":[21810,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,233],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":22043,"n_null":0,"n_unique":2608,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.8816857959442908,"emoji_rate":0.0,"len_max":33,"len_mean":8.188449848024316,"len_median":10.0,"len_min":0,"len_p95":15.0,"n_duplicates":19435,"n_empty":5545,"one_word_rate":0.9894297509413419,"readability_flesch_mean":-4.827074999999976,"url_rate":0.0,"vocab_size":2525,"word_mean":1.010570249058658,"word_median":1.0}},{"alerts":[],"column":"country","extras":{"singletons":13,"top_values":[["US",11218],["CA",1830],["CN",1661],["UK",983],["ES",841],["FR",390],["MA",303],["AR",292],["CZ",288],["AU",247],["TZ",218],["UZ",184],["MX",175],["KR",175],["SE",170],["CH",166],["MN",162],["ZA",159],["RU",156],["DE",152]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":93,"null_rate":0.0,"stats":{"cardinality":93,"entropy":3.2927149452236466,"entropy_ratio":0.5035379993570933,"top_rate":0.5089143945923876,"top_value":"US"}},{"alerts":[],"column":"state","extras":{"singletons":125,"top_values":[["Wyoming",1903],["Montana",1394],["",1082],["New Mexico",1048],["Alberta",1009],["Nebraska",950],["England",907],["Guangxi",861],["California",837],["Colorado",540],["Texas",530],["Utah",489],["Nevada",361],["Murcia",333],["North Dakota",325],["South Dakota",316],["Massachusetts",278],["Kansas",273],["Northwest Territories",246],["Arizona",226]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":519,"null_rate":0.0,"stats":{"cardinality":519,"entropy":6.287823408091024,"entropy_ratio":0.6971295702278271,"top_rate":0.08633126162500566,"top_value":"Wyoming"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"formation","extras":{"singletons":0,"top_values":[["",22043]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"collection","extras":{"singletons":0,"top_values":[["",22043]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"outliers","level":"warn","message":"7.0% rows beyond 1.5 IQR"}],"column":"paleolat","extras":{"histogram":{"counts":[13,17,17,11,12,19,13,87,82,185,454,285,350,508,462,669,554,173,144,184,345,212,504,275,888,1581,1491,2167,1711,787,2851,1762,1421,1132,152,8,4,6,3,13],"edges":[-86.16,-81.776,-77.392,-73.008,-68.624,-64.24,-59.855999999999995,-55.471999999999994,-51.087999999999994,-46.70399999999999,-42.31999999999999,-37.93599999999999,-33.55199999999999,-29.167999999999992,-24.783999999999992,-20.39999999999999,-16.01599999999999,-11.63199999999999,-7.2479999999999905,-2.86399999999999,1.5200000000000102,5.904000000000011,10.288000000000011,14.672000000000011,19.05600000000001,23.440000000000012,27.824000000000012,32.20800000000001,36.59200000000001,40.97600000000001,45.360000000000014,49.744,54.128000000000014,58.51200000000003,62.896000000000015,67.28,71.66400000000002,76.04800000000003,80.43200000000002,84.816,89.2]},"sample":[-29.25,-37.43,-31.58,-24.76,-22.3,-22.3,-22.3,-22.3,-22.3,-2.14,-2.14,-19.75,-19.75,-17.18,-16.91,0.94,-18.29,-18.29,-16.54,-16.54,-16.54,-16.54,-15.11,-13.01,-13.01,-13.01,-12.24,-37.6,2.62,2.62,2.89,-22.31,-73.67,-23.02,-26.49,-21.12,3.77,3.77,3.77,3.77,-37.31,27.19,25.38,-52.16,7.74,-12.58,34.3,33.99,13.79,11.81,19.88,29.89,12.19,12.12,-47.17,43.18,10.65,9.88,19.17,-52.22,15.25,16.88,25.23,28.61,30.33,-52.15,-52.15,21.08,28.8,30.53,30.69,28.61,-45.26,6.11,46.84,28.57,28.58,28.57,26.25,23.84,-37.95,-38.8,-38.8,32.68,25.2,-38.8,-38.82,-38.77,-41.81,24.53,24.64,25.97,31.26,31.15,-38.8,-31.38,34.26,22.11,26.14,28.84,6.11,-44.36,32.53,28.8,29.36,28.57,31.56,26.3,30.77,28.0,37.38,-43.2,15.0,39.7,27.49,21.3,26.84,20.52,22.11,20.52,-22.68,-38.8,13.13,29.95,37.38,22.55,-43.65,48.76,-28.27,28.8,19.57,27.97,50.66,60.93,37.45,59.56,59.56,59.56,56.43,12.67,19.56,-30.87,53.97,55.79,38.77,37.43,26.86,40.52,47.1,62.52,-54.87,35.88,10.98,10.98,61.01,39.13,39.15,-34.14,38.33,48.53,-80.63,36.49,56.04,56.42,56.5,56.59,37.47,60.91,56.46,59.28,60.93,53.68,59.31,64.06,60.91,60.91,58.55,53.8,58.5,43.96,30.96,31.35,40.47,58.36,58.44,58.36,58.34,48.89,41.53,31.01,88.97,88.97,88.78,45.38,12.94,-35.15,35.66,45.95,33.0,62.34,60.58,38.03,36.21,62.34,62.33,58.29,-39.72,49.46,56.11,56.11,35.04,35.04,49.84,49.82,36.64,-40.23,46.27,34.57,51.74,61.56,48.11,60.93,42.38,40.43,46.69,46.59,46.59,46.59,46.59,46.59,46.44,46.35,46.6,46.6,46.6,-25.68,53.75,37.49,38.04,53.81,35.62,60.76,-37.18,35.08,44.04,60.94,60.91,48.55,31.02,26.5,34.7,34.7,60.94,44.71,44.71,42.37,37.32,-36.58,35.88,31.09,52.45,20.56,11.74,-47.15,47.4,48.82,52.32,52.31,47.4,47.4,59.89,59.89,57.47,57.47,50.68,50.98,47.4,47.4,57.09,46.7,46.7,46.9,61.93,61.93,45.82,45.82,50.54,50.54,46.7,46.56,50.26,55.86,45.82,53.83,53.83,53.83,53.83,54.42,54.42,56.02,51.28,60.96,51.99,61.68,61.7,50.78,50.78,50.78,61.88,61.88,51.28,52.65,52.65,53.11,53.46,53.41,23.6,23.6,23.6,23.6,23.6,25.29,28.54,28.54,29.41,29.41,42.93,39.65,39.74,39.74,4.09,4.04,47.87,47.1,46.72,46.88,47.74,47.22,47.22,47.74,33.14,46.72,46.72,47.05,47.05,47.05,45.64,46.73,46.41,45.82,46.98,47.05,46.73,45.82,46.14,46.38,45.37,45.82,44.85,45.82,46.73,46.38,53.88,46.73,45.91,45.91,45.59,45.59,36.51,30.02,40.43,40.43,40.43,35.08,34.04,34.04,34.04,37.28,37.28,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,35.08,66.88,34.89,22.04,20.55,38.85,28.97,36.21,36.21,37.44,28.97,28.97,39.84,39.84,27.21,36.94,36.94,38.53,32.27,33.99,29.85,-6.9,-6.45,-3.24,47.18,53.79,52.1,51.85,51.86,51.86,51.86,52.09,41.07,41.04,41.28,41.28,41.28,41.28,45.82,45.82,43.29,51.73,51.72,33.84,42.09,42.09,32.02,26.08,25.75,25.75,25.75,25.61,25.61,25.61,25.61,25.61,25.61,25.61,25.61,-27.71,-19.68,7.68,7.21,7.68,39.62,30.11,8.22,37.83,30.02,19.53,26.4,26.4,18.51,31.07,29.84,18.7,27.85,-8.06,-8.06,-20.05,-20.05,-14.57,-18.32,-8.93,-27.67,-24.44,-17.08,-24.78,-28.14,-28.52,-31.66,-17.34,3.78,3.78,3.78,3.83,5.87,-18.49,-28.27,-24.26,12.0,-34.75,-34.75,-31.58,-4.74,-27.49,-26.9,-26.9]},"kind":"numeric","n":22043,"n_null":491,"n_unique":3214,"null_rate":0.022274645012021956,"stats":{"iqr":30.639999999999997,"kurtosis":0.26460067116566943,"max":89.2,"mean":26.45988864142539,"median":34.89,"min":-86.16,"n_outliers":1503,"outlier_rate":0.06973830734966592,"q1":16.34,"q3":46.98,"skew":-1.079632955397913,"std":29.543137135453637,"zero_rate":0.0}},{"alerts":[],"column":"paleolng","extras":{"histogram":{"counts":[3,12,4,31,31,121,345,968,726,1791,244,2539,3233,949,404,1084,478,154,57,626,210,1004,1648,1074,534,129,52,82,284,257,663,486,156,315,434,182,192,40,7,3],"edges":[-177.6,-168.9425,-160.285,-151.6275,-142.97,-134.3125,-125.655,-116.9975,-108.34,-99.6825,-91.025,-82.3675,-73.71000000000001,-65.05250000000001,-56.39500000000001,-47.73750000000001,-39.08000000000001,-30.422500000000014,-21.765000000000015,-13.107500000000016,-4.450000000000017,4.207499999999982,12.86499999999998,21.52249999999998,30.17999999999998,38.83749999999998,47.494999999999976,56.152499999999975,64.80999999999997,73.46749999999997,82.12499999999997,90.78249999999994,99.43999999999997,108.0975,116.75499999999997,125.41249999999994,134.06999999999996,142.7275,151.38499999999996,160.04249999999993,168.7]},"sample":[10.95,-84.88,-50.06,11.54,-77.85,-77.85,-77.85,-77.85,-77.85,-82.66,-82.66,-0.42,-0.42,9.31,9.52,0.82,92.81,92.81,91.22,91.22,91.22,91.22,90.03,88.5,88.5,88.5,87.97,-12.45,-84.66,-84.66,-69.65,126.28,-74.74,126.57,-75.39,16.73,-69.75,-69.75,-69.75,-69.75,-27.45,19.01,25.14,7.58,-30.75,36.62,8.84,8.32,-10.07,-11.61,30.48,12.1,-11.19,-11.14,16.76,122.21,-39.9,-37.44,35.77,7.99,-36.53,-10.44,25.35,21.05,16.02,-0.92,-0.92,-9.08,-40.29,-40.87,-41.97,-43.89,14.61,-36.66,96.31,-40.51,-40.43,-40.45,-40.53,2.54,31.29,31.25,31.25,12.74,-40.92,31.25,31.25,31.3,18.03,8.89,-36.14,-43.53,-38.06,120.27,31.26,22.74,18.27,-9.24,-43.78,19.67,-36.66,14.8,-34.27,-40.28,-43.88,-40.51,15.38,-40.53,17.7,-43.92,122.22,15.17,-39.65,21.5,33.42,-38.75,13.12,-9.58,-9.24,-9.58,37.19,31.25,-41.8,21.2,122.21,-37.65,14.98,92.47,33.5,-40.28,-9.47,14.69,127.48,-65.66,78.59,-71.97,-71.96,-71.96,-66.52,21.11,18.28,72.2,-65.56,-66.78,-69.01,-41.19,22.91,106.74,52.3,-66.82,-157.05,121.86,122.6,122.6,-65.7,98.75,98.4,69.9,96.71,118.25,114.67,93.77,-62.88,-62.81,-62.82,-62.11,24.74,-65.66,-62.06,-66.42,-65.65,-66.02,-66.1,-68.22,-65.55,-65.55,-65.58,-65.84,-64.0,16.07,10.77,9.81,112.9,-65.28,-65.37,-65.19,-64.03,-76.14,129.45,14.24,-107.56,-107.56,-116.22,-70.19,34.83,-38.54,-48.35,143.15,16.11,-67.67,-65.8,-46.35,138.18,-67.8,-67.7,-64.37,-40.01,97.36,-59.67,-59.67,138.78,138.78,-68.08,-68.04,23.7,66.05,-60.43,-68.21,-63.7,-71.56,124.45,-65.7,132.4,131.39,-70.95,-71.08,-71.08,-71.08,-71.08,-71.08,-70.9,-70.97,-71.05,-70.96,-70.98,-30.29,-65.79,-46.94,-46.34,-67.14,69.84,-68.14,-34.92,72.33,77.27,-65.76,-65.67,-75.01,23.11,19.52,-68.04,-68.04,-65.77,-36.63,-36.63,64.48,79.67,43.72,64.01,14.08,125.6,31.91,72.15,-57.58,-76.0,-70.92,-71.43,-71.42,-76.0,-76.0,-68.59,-68.59,-66.46,-66.46,-77.82,-78.69,-76.0,-76.0,-72.05,-77.16,-77.16,-76.78,-74.71,-74.71,-76.11,-76.11,-74.88,-74.88,-77.16,-76.69,-79.88,-67.09,-76.11,-73.27,-73.27,-73.27,-73.27,-73.72,-73.72,-73.65,-79.72,-78.13,-78.57,-77.31,-76.31,-77.86,-77.86,-77.86,-76.38,-76.38,-79.72,-75.98,-75.98,-77.05,-73.64,-72.03,101.5,101.5,101.5,101.5,101.5,73.87,73.71,73.71,73.8,73.8,110.18,113.23,114.42,114.42,-71.53,-71.56,-91.74,-93.37,-93.8,-94.64,-93.95,-90.01,-90.01,-93.95,-72.16,-93.8,-93.8,-93.54,-93.54,-93.54,-95.12,-94.98,-94.87,-97.28,-93.62,-93.54,-94.98,-97.28,-95.06,-95.78,-97.3,-97.28,-95.42,-97.28,-94.98,-95.78,-100.38,-94.98,-93.24,-93.24,-91.27,-91.27,-111.39,-81.8,-97.7,-97.7,-97.7,-116.69,-99.34,-99.34,-99.34,-100.12,-100.1,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.69,-161.94,-87.5,-101.99,-102.9,-78.85,-81.7,-92.81,-92.81,-99.92,-81.83,-81.83,-78.33,-78.33,-81.46,-99.52,-99.52,-97.21,-109.87,-99.14,-82.19,-31.32,-30.66,-28.53,17.16,-64.84,-63.02,-62.81,-62.78,-62.78,-62.78,-63.0,27.17,26.97,26.55,26.55,26.55,26.55,24.29,24.29,26.58,-47.66,-47.83,38.37,27.08,27.08,29.12,18.38,19.57,19.57,19.57,20.19,20.19,20.19,20.19,20.19,20.19,20.19,20.19,-65.96,-42.76,-125.02,-120.19,-125.02,-119.05,-123.96,-114.61,-118.48,-130.29,-134.41,-127.23,-127.23,-105.65,-117.22,-116.81,-107.27,-117.01,-111.05,-111.05,-110.11,-110.11,-106.99,-115.28,-131.34,-67.88,-112.52,-113.79,-112.52,-65.35,-95.11,-102.39,-105.36,-80.82,-80.82,-80.82,-81.1,-75.69,-117.58,-44.43,-31.52,117.39,-76.51,-76.51,-66.86,-76.31,-68.44,-66.43,-66.43]},"kind":"numeric","n":22043,"n_null":491,"n_unique":3715,"null_rate":0.022274645012021956,"stats":{"iqr":97.35,"kurtosis":-0.48113838740191683,"max":168.7,"mean":-28.57681282479584,"median":-62.150000000000006,"min":-177.6,"n_outliers":3,"outlier_rate":0.00013919821826280623,"q1":-77.16,"q3":20.19,"skew":0.737242760765059,"std":68.91097398994592,"zero_rate":0.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"89.8% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"83.1% duplicate strings"}],"column":"reference_no","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[23,0,0,0,0,0,0,0,0,0,2233,0,0,0,0,0,0,0,0,0,1274,0,0,0,0,0,0,0,0,0,8908,0,0,0,0,0,0,0,0,9605],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0,4.1,4.2,4.300000000000001,4.4,4.5,4.6,4.7,4.800000000000001,4.9,5.0]},"near_unique":false,"sample":["8880","3211","57","41","13037","36816","14666","70","45","4233","1039","1983","76304","136","29198","14071","7265","47","257","13817","9665","4233","12571","17957","1789","6294","1536","15230","4245","60994","6118","52421","6294","42441","13103","13022","61104","6147","14904","6294","1916","26476","53051","60580","17052","19","12202","13525","11946","66"],"top_values":[["4245",794],["6294",743],["70",567],["47",528],["16510",323],["15088",289],["2725",255],["3649",206],["13103",200],["45",199],["122",198],["11749",197],["2255",197],["3558",171],["1186",161],["1534",158],["11964",157],["1536",151],["66",144],["30862",143]],"top_words":[["4245",720],["6294",673],["70",526],["47",472],["16510",299],["15088",264],["2725",229],["3649",181],["2255",177],["13103",176],["122",176],["45",175],["11749",174],["3558",153],["11964",147],["1186",146],["1534",145],["1536",136],["66",132],["30862",129],["4216",125],["829",115],["13525",106],["6095",106],["3175",106]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22043,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":22043,"n_null":0,"n_unique":3725,"null_rate":0.0,"stats":{"allcaps_rate":0.8976545842217484,"boilerplate_rate":0.0,"duplicate_rate":0.8310121126888355,"emoji_rate":0.0,"len_max":5,"len_mean":4.172208864492129,"len_median":4.0,"len_min":1,"len_p95":5.0,"n_duplicates":18318,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3547,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"occurrence_no","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[8,0,0,0,0,0,15,0,0,0,0,0,0,26,0,0,0,0,0,0,1359,0,0,0,0,0,3185,0,0,0,0,0,0,16620,0,0,0,0,0,830],"edges":[1.0,1.15,1.3,1.45,1.6,1.75,1.9,2.05,2.2,2.3499999999999996,2.5,2.65,2.8,2.95,3.1,3.25,3.4,3.55,3.6999999999999997,3.85,4.0,4.15,4.3,4.449999999999999,4.6,4.75,4.9,5.05,5.2,5.35,5.5,5.6499999999999995,5.8,5.95,6.1,6.25,6.3999999999999995,6.55,6.7,6.85,7.0]},"near_unique":true,"sample":["361526","196124","27440","23237","519811","10365","533398","31658","23849","142746","165932","164237","149585","39860","280868","517672","274518","24634","47881","513167","380655","142799","475132","255937","183495","196341","164728","520152","148880","9930","151592","182612","182610","498922","483626","521531","2494","155004","535857","164148","197494","1860","1237630","1820","4630","11746","464436","498930","461468","30717"],"top_values":[],"top_words":[["164260",1],["11739",1],["37907",1],["621561",1],["197544",1],["24377",1],["1439335",1],["25196",1],["513513",1],["27500",1],["498914",1],["459554",1],["28257",1],["195921",1],["165364",1],["164387",1],["150846",1],["4993",1],["559720",1],["498896",1],["481940",1],["5599",1],["197002",1],["3019",1],["196076",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22043,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":22043,"n_null":0,"n_unique":22043,"null_rate":0.0,"stats":{"allcaps_rate":0.9989565848568707,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":7,"len_mean":5.761783786235993,"len_median":6.0,"len_min":1,"len_p95":6.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":20000,"word_mean":1.0,"word_median":1.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","phylum.top_values","class.top_values","country.top_values","state.top_values","early_age_mya.stats","late_age_mya.stats","rank.top_values","collection.stats","formation.stats"],"featured_charts":[{"caption":"Mammalia, Saurischia, and Ornithischia dominate; check how unevenly taxonomic classes are represented.","column":"class","kind":"bar"},{"caption":"Chordata accounts for roughly 82% of records \u2014 see how heavily vertebrate-skewed the dataset is.","column":"phylum","kind":"donut"},{"caption":"Look for the strong US concentration (~51%) before drawing global conclusions.","column":"country","kind":"bar"},{"caption":"Right-skewed distribution from recent fossils to ~539 Mya; watch for the long tail and outliers.","column":"early_age_mya","kind":"histogram"},{"caption":"Most records are identified to species or genus level \u2014 useful context for taxonomic resolution.","column":"rank","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset contains 22,043 fossil occurrence records with 21 columns spanning taxonomy (phylum, class, order, family, genus, name, rank), geography (country, state, lat/lon, paleolat/paleolng), and geologic age (early_age_mya, late_age_mya, period, late_interval). Taxonomy is dominated by Chordata (about 82% of rows) with Mammalia as the leading class (~32%) followed by Saurischia and Ornithischia, suggesting a strong vertebrate and dinosaur emphasis worth examining first. Geographically the data skews heavily to the US (~51%), with Wyoming, Montana, and New Mexico topping the state list, so any spatial analysis should account for this North American concentration. Age columns (early_age_mya, late_age_mya) are right-skewed with medians around 100 Mya and ~11% flagged as outliers, hinting at a long tail of very old records. Note that 'collection' and 'formation' are entirely empty and should be ignored.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","n_duplicates","one_word_rate","len_mean","top_values","top_words","readability_flesch_mean"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds taxonomic names of fossil organisms, dominated by dinosaur and conodont genera/clades like Theropoda (768), Dinosauria (512), and Palmatolepis (376). Values are short\u2014mean length 15 characters and 58% are single words\u2014so the Flesch readability score of -4.13 is a meaningless artifact of scientific Latin. With 4,660 uniques across 22,043 rows and a 78.9% duplicate rate, this behaves as a categorical taxon label rather than a free-text field.","role":"label","scope":"column","target":"name","treatment":"Treat as a high-cardinality categorical; group rare taxa or target-encode before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.cardinality","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the taxonomic rank of each record, with 18 distinct values and no nulls across 22,043 rows. 'Species' dominates at 41.2% (9,082 rows), followed by 'genus' (7,342) and 'unranked clade' (2,828); ranks below family drop off sharply, with 'subfamily' already at only 272. The presence of 'unranked clade' alongside formal Linnaean ranks is worth noting as a non-standard category.","role":"feature","scope":"column","target":"rank","treatment":"One-hot or ordinal-encode by taxonomic depth before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","kurtosis","iqr","outlier_rate","n_outliers","null_rate","n","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"This is a latitude coordinate in degrees, ranging from -84.33 to 79.75 with a median of 41.70 and IQR of 11.61. The strong negative skew (-2.44) and kurtosis (7.05) suggest most points cluster in the northern hemisphere with a long tail of southern-hemisphere outliers (9.16% flagged). No nulls and 4,095 distinct values across 22,043 rows indicate repeated locations rather than per-row unique geocoding.","role":"feature","scope":"column","target":"lat","treatment":"Pair with a longitude column for spatial features; avoid log-transform and keep raw degrees."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","n","n_unique","n_outliers","q1","q3"],"model":"anthropic:claude-opus-4-7","narrative":"This column captures longitude coordinates, with values spanning -176.67 to 177.07, consistent with the full global range. The distribution is right-skewed (skew 0.93) and centered on a median of -98.25, suggesting a heavy concentration of records in the Western Hemisphere (likely the Americas). Only 4,259 unique values across 22,043 rows indicate repeated location points, and just 3 outliers were flagged.","role":"feature","scope":"column","target":"lon","treatment":"Pair with latitude for geospatial features; consider binning or projecting rather than using raw degrees in linear models."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.skew","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric column representing the early bound of an age estimate in millions of years (mya), spanning 0.0117 to 538.8 across 22043 rows with no nulls. The distribution is right-skewed (skew 1.13) with median 110.1 well below mean 154.67, and saturn flags 2549 outlier rows (11.6%) \u2014 consistent with a long Paleozoic tail above the Q3 of 201.4. Only 164 unique values suggest ages are bucketed to standard stratigraphic boundaries rather than continuous measurements.","role":"feature","scope":"column","target":"early_age_mya","treatment":"Consider log-transform or binning to stratigraphic periods before modelling given the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.q1","stats.q3","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric column representing the younger bound of a geological age in millions of years (Mya), with 156 distinct values across 22,043 rows and no nulls. Distribution is right-skewed (skew 1.17) with median 93.9 well below the mean of 147.5, ranging from 0 to 521 Mya, and 11.5% of values (2,535) flagged as outliers on the high end. The bounded discrete value set suggests entries snap to standardized stratigraphic stage boundaries rather than continuous measurements.","role":"feature","scope":"column","target":"late_age_mya","treatment":"Keep as-is or pair with early_age_mya to derive a midpoint; consider log or sqrt transform before regression given the right skew."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records geologic time periods or stages, with 298 distinct values across 22,043 rows and no nulls. The distribution is moderately spread (entropy ratio 0.78), but 'Irvingtonian' dominates at 7.8% (1,723 rows), followed by 'Late Campanian' and various Paleocene/Mesozoic stages. The vocabulary mixes broad and finely-subdivided stage names (e.g., 'Late Maastrichtian' vs. 'Aptian'), so granularity is uneven.","role":"feature","scope":"column","target":"period","treatment":"Group rare stages or map to coarser epochs before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical geological stage marking the late end of a fossil/sample's age range, with 138 distinct stage names like Tithonian, Sinemurian, and Late Campanian. The column is dominated by empty strings (83.1% of 22,043 rows), leaving only ~3,724 records with an actual stage; entropy ratio of 0.22 reflects this sparsity. Among populated values, Tithonian (548) and Sinemurian (430) lead, suggesting Mesozoic specimens are over-represented.","role":"feature","scope":"column","target":"late_interval","treatment":"Treat empty string as missing and either impute from a paired early_interval or use as a sparse categorical with an explicit 'unknown' level."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic phylum label with only 4 distinct values across 22,043 rows. Chordata dominates at 81.6% (17,993 rows), with Mollusca and Arthropoda each at exactly 2,000 \u2014 suggesting deliberate sampling caps on the non-Chordata classes. 50 rows carry an empty-string phylum that is not counted as null.","role":"label","scope":"column","target":"phylum","treatment":"Recode the 50 empty strings to null and treat as a low-cardinality categorical; expect class imbalance if used as a target."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic class label for what appears to be a paleontological/zoological occurrence dataset, with 19 distinct values across 22,043 rows and no nulls. Mammalia dominates at 31.8% (7,015), followed by the dinosaur clades Saurischia (5,507) and Ornithischia (2,811), suggesting a fossil-heavy sample. Note two sentinel-style entries that should be cleaned: 60 empty strings and 26 'NO_CLASS_SPECIFIED' rows.","role":"label","scope":"column","target":"class","treatment":"Normalize the empty strings and 'NO_CLASS_SPECIFIED' to a single missing token, then one-hot or target-encode for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic order assignments for what appears to be a paleontology/biology specimen dataset, mixing extinct groups (Ammonitida, Ozarkodinida, Multituberculata, Phacopida) with extant mammalian orders (Rodentia, Artiodactyla, Carnivora). Coverage is poor: 32.3% are the sentinel 'NO_ORDER_SPECIFIED' and another 3019 rows are empty strings, so roughly 46% of records lack a real order. Across 22043 rows there are 99 distinct values with entropy ratio 0.586, indicating a few orders dominate the long tail.","role":"feature","scope":"column","target":"order","treatment":"Normalise empty strings and 'NO_ORDER_SPECIFIED' into a single missing category before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic family classification for biological specimens, with 528 distinct families across 22043 rows and no nulls. The top value is the empty string at 15.5% (3418 rows), and 'NO_FAMILY_SPECIFIED' adds another 1996 rows\u2014together roughly a quarter of records lack a real family assignment. Among populated values, families like Hadrosauridae (689), Grallatoridae (593), and Palmatolepidae (586) dominate, suggesting a paleontological dataset.","role":"feature","scope":"column","target":"family","treatment":"Normalize empty strings and 'NO_FAMILY_SPECIFIED' to a single missing category, then target- or frequency-encode before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.duplicate_rate","stats.n_duplicates","stats.n_empty","stats.len_mean","stats.len_max","stats.vocab_size","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds taxonomic genus names for fossil records \u2014 single-word Latinate identifiers like Palmatolepis, Polygnathus, and Grallator dominate, with one_word_rate at 0.989. Of 22,043 rows, 5,545 are empty strings and duplicate_rate is 0.88 across 2,608 unique values, so the field is heavily repeated and a quarter of rows carry no genus at all. Length stats (mean 8.2, max 33) and a vocab of 2,525 are consistent with controlled scientific nomenclature rather than free text.","role":"feature","scope":"column","target":"genus","treatment":"Treat as a high-cardinality categorical: normalize case, encode empties as missing, and target/frequency-encode rather than one-hot."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Two-letter ISO country codes across 93 distinct values with no nulls in 22,043 rows. The distribution is heavily US-dominated at 50.9% (11,218 rows), with CA, CN, UK, and ES rounding out a long tail; entropy ratio of 0.50 confirms the concentration. Worth noting 'UK' is used rather than the ISO-standard 'GB', which may complicate joins against canonical country tables.","role":"feature","scope":"column","target":"country","treatment":"Normalize codes (e.g., UK\u2192GB) and group the long tail before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic subdivision (state/province/region) with 519 distinct values spanning US states, Canadian provinces, English regions, and Chinese provinces \u2014 indicating an international dataset rather than US-only. Wyoming leads at 8.6% (1903 rows), followed by Montana and an empty string with 1082 occurrences that null_rate=0 misses. Entropy ratio of 0.70 shows a fairly even spread across the long tail.","role":"feature","scope":"column","target":"state","treatment":"Treat empty strings as missing, then group-encode or target-encode given high cardinality."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"The `formation` column is constant: all 22043 rows hold the empty string, giving cardinality 1 and entropy 0.0. It carries no information and the top_value being \"\" suggests the field was never populated rather than genuinely categorical.","role":"other","scope":"column","target":"formation","treatment":"Drop; single constant value provides no signal."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"The 'collection' column contains a single value across all 22043 rows, and that value is the empty string. Cardinality is 1, entropy is 0, and null_rate is 0.0, meaning the field is technically populated but carries no information. This is likely a vestigial schema field that was never filled in.","role":"metadata","scope":"column","target":"collection","treatment":"Drop; constant empty value provides no signal."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","outlier_rate","n_outliers","null_rate","n","n_unique","q1","q3"],"model":"anthropic:claude-opus-4-7","narrative":"Paleolatitude in degrees, ranging from -86.16 to 89.2 with a median of 34.89 \u2014 consistent with reconstructed latitudes of fossil or geological samples. The distribution is left-skewed (skew -1.08) and ~6.97% of values flag as outliers (1503 records), suggesting a heavy southern-hemisphere tail against a northern-hemisphere mode. Null rate is low at 2.23% and 3214 unique values across 22043 rows indicate substantial repetition, likely from shared site coordinates.","role":"feature","scope":"column","target":"paleolat","treatment":"Use as-is for geographic modelling; consider binning by hemisphere or absolute latitude given the left skew."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.iqr","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"This is paleolongitude \u2014 reconstructed longitudinal coordinates of fossil/sample locations on ancient continental configurations. Values span the full longitudinal range (-177.6 to 168.7) with a mean of -28.58 and median of -62.15, indicating a leftward (western) concentration and moderate positive skew (0.74). Null rate is 2.23% and only 3 outliers are flagged, so the distribution is well-behaved within plausible geographic bounds.","role":"feature","scope":"column","target":"paleolng","treatment":"Use as a geographic coordinate feature; pair with paleolat and consider cyclic encoding since longitude wraps at \u00b1180."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","n_duplicates","one_word_rate","len_mean","len_max","len_min","top_values","stats.allcaps_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Short numeric reference codes (length 1-5, mean 4.17, all single-word) stored as text. Despite the name 'reference_no', it is far from unique: 22,043 rows collapse to 3,725 distinct values with an 83.1% duplicate rate, and the top code '4245' alone appears 794 times. The 89.8% allcaps_rate is a quirk of the detector treating digit-only strings as uppercase.","role":"foreign_key","scope":"column","target":"reference_no","treatment":"Treat as a categorical foreign key and left-join to the reference dimension; do not assume uniqueness."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.duplicate_rate","stats.len_min","stats.len_max","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is almost certainly a primary-key style identifier: every one of the 22043 rows holds a unique single-token value (n_unique == n, one_word_rate 1.0), with no nulls or duplicates. Lengths range from 1 to 7 characters and the top words are all numeric strings like '164260' and '1439335', so the field is stored as text but contains integer occurrence numbers. The 'allcaps' alert is a quirk of the detector treating digit-only strings as uppercase and can be ignored.","role":"identifier","scope":"column","target":"occurrence_no","treatment":"Use as a row key for joins; do not feed into modelling."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":6649,"prompt_tokens":25445,"total_tokens":32094}},"language_counts":{},"meta":{"generated_at":"2026-05-01T23:08:24+00:00","mode":"full","row_count":22043,"sampled_rows":22043,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/fossils.json"},"notes":[],"saturn_version":"0.2.0","schema":{"class":"categorical","collection":"categorical","country":"categorical","early_age_mya":"numeric","family":"categorical","formation":"categorical","genus":"text","lat":"numeric","late_age_mya":"numeric","late_interval":"categorical","lon":"numeric","name":"text","occurrence_no":"text","order":"categorical","paleolat":"numeric","paleolng":"numeric","period":"categorical","phylum":"categorical","rank":"categorical","reference_no":"text","state":"categorical"}}
