{"columns":[{"alerts":[{"code":"one_word","level":"warn","message":"58.5% rows are a single word"},{"code":"duplicates","level":"warn","message":"78.9% duplicate strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[72,190,110,466,942,2265,2027,1560,1821,1566,2019,626,923,808,1219,1117,747,889,549,531,719,307,119,178,63,52,22,8,2,17,19,11,50,13,9,2,1,3,0,1],"edges":[3.0,4.1,5.2,6.300000000000001,7.4,8.5,9.600000000000001,10.700000000000001,11.8,12.9,14.0,15.100000000000001,16.200000000000003,17.3,18.400000000000002,19.5,20.6,21.700000000000003,22.8,23.900000000000002,25.0,26.1,27.200000000000003,28.3,29.400000000000002,30.500000000000004,31.6,32.7,33.800000000000004,34.900000000000006,36.0,37.1,38.2,39.300000000000004,40.400000000000006,41.5,42.6,43.7,44.800000000000004,45.900000000000006,47.0]},"near_unique":false,"sample":["Animalia","Microtus ochrogaster","Acanthohoplites","Ammonoidea","Theropoda","Synphoroides","Hadrosauropodus leonardii","Moutoniceras moutonianum","Hoploscaphites","Palmatolepis","Carpodaptes rosei","Protungulatum","Lithostrotia","Polygnathus","Iguanodon","Sauropoda","Polygnathus","Anadesmoceras","Crassiproetus crassimarginatus","Dromaeosaurus albertensis","Phuwiangosaurus sirindhornae","Palmatolepis glabra","Mamenchisaurus","Anchisauripus bibractensis","Promartes lepidus","Thomomys bottae","Neoplagiaulax nanophus","Grallator tenuis","Polygnathus decorosa","Paladin","Xenocardia diversidens","Temnocyon percussor","Paramerychyus relictus","Sauroposeidon proteles","Tyrannosauridae","Dinosauria","Hystricurus","Ernestokokenia chaishoer","Iguanodontidae","Baioconodon nordicus","Thomomys","Cedaria","Apatosaurus","Syspacheilus","Cheirurus","Kettneraspis","Ankylosaurus magniventris","Sauropelta edwardsorum","Eubrontes","Scaphites meriani"],"top_values":[["Theropoda",768],["Dinosauria",512],["Sauropoda",426],["Hadrosauridae",411],["Palmatolepis",376],["Polygnathus",235],["Ammonoidea",195],["Ceratopsidae",169],["Ornithopoda",141],["Icriodus",135],["Ozarkodina",122],["Equus",119],["Grallator",118],["Ceratopsia",94],["Palmatolepis glabra",91],["Ornithischia",90],["Barremites",83],["Prosauropoda",81],["Tyrannosauridae",81],["Dromaeosauridae",80]],"top_words":[["theropoda",687],["palmatolepis",517],["dinosauria",465],["sauropoda",387],["hadrosauridae",376],["polygnathus",314],["grallator",286],["eubrontes",182],["equus",180],["ammonoidea",175],["icriodus",169],["ozarkodina",151],["ceratopsidae",147],["ornithopoda",133],["anomoepus",101],["baculites",100],["camarasaurus",90],["minuta",88],["allosaurus",85],["glabra",84],["merychyus",84],["ornithischia",83],["ceratopsia",83],["ptilodus",77],["tyrannosauridae",73]],"vocab_skipped":null,"word_histogram":{"counts":[12887,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8948,0,0,0,0,0,0,0,0,0,0,0,0,0,208],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":22043,"n_null":0,"n_unique":4660,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.7885950188268385,"emoji_rate":0.0,"len_max":47,"len_mean":15.094769314521617,"len_median":14.0,"len_min":3,"len_p95":26.0,"n_duplicates":17383,"n_empty":0,"one_word_rate":0.5846300412829469,"readability_flesch_mean":-4.12667499999997,"url_rate":0.0,"vocab_size":5140,"word_mean":1.4248060608810054,"word_median":1.0}},{"alerts":[],"column":"rank","extras":{"singletons":1,"top_values":[["species",9082],["genus",7342],["unranked clade",2828],["family",1716],["subfamily",272],["subclass",205],["class",134],["order",115],["infraorder",97],["superfamily",75],["subgenus",51],["kingdom",50],["suborder",29],["subspecies",23],["tribe",12],["subphylum",9],["superorder",2],["superclass",1]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":18,"null_rate":0.0,"stats":{"cardinality":18,"entropy":2.085282874450611,"entropy_ratio":0.5000768296142842,"top_rate":0.41201288390872387,"top_value":"species"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-2.44"},{"code":"outliers","level":"warn","message":"9.2% rows beyond 1.5 IQR"}],"column":"lat","extras":{"histogram":{"counts":[4,0,0,12,7,0,0,0,16,60,111,127,166,347,40,99,138,6,255,21,0,32,22,12,168,137,1224,669,1615,3128,4830,3520,2987,1322,253,303,99,118,180,15],"edges":[-84.333336,-80.2312526,-76.1291692,-72.02708580000001,-67.92500240000001,-63.822919000000006,-59.7208356,-55.6187522,-51.516668800000005,-47.41458540000001,-43.31250200000001,-39.210418600000004,-35.108335200000006,-31.00625180000001,-26.904168400000003,-22.802085000000005,-18.700001600000007,-14.59791820000001,-10.495834800000011,-6.3937514000000135,-2.2916680000000156,1.8104153999999966,5.9124987999999945,10.014582199999992,14.11666559999999,18.21874899999999,22.320832399999986,26.422915799999984,30.524999199999996,34.627082599999994,38.72916599999999,42.83124939999999,46.93333279999999,51.035416199999986,55.137499599999984,59.23958299999998,63.34166639999998,67.44374979999998,71.54583319999998,75.64791659999997,79.75]},"sample":[55.538334,49.23111,49.23111,41.349998,22.394722,39.216667,39.216667,39.216667,39.216667,39.216667,39.216667,62.480556,62.480556,49.5,49.5,62.733334,62.733334,62.733334,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,24.450001,31.366667,31.366667,18.157778,18.157778,49.5,-33.200001,49.5,22.096666,39.433334,44.950001,44.950001,73.5,73.5,73.5,33.267223,46.841946,71.383614,-30.950001,47.700001,40.826389,51.60265,34.942501,49.0644,40.247086,44.081387,49.460835,49.233334,-28.416668,-29.033333,-29.883333,35.2374,34.43742,37.122406,37.997799,38.620537,33.107777,46.841946,47.5,30.8146,30.8146,40.095001,41.893101,-28.509443,36.099998,42.017776,42.017776,28.8333,30.984501,31.733433,41.893101,41.893101,41.893055,41.8825,32.108208,-9.679919,-9.679919,-9.709615,54.16,38.539501,-9.704556,-9.704556,-9.704556,42.017776,39.139999,40.586899,38.638058,43.409801,43.033333,48.650002,-14.6167,50.737499,42.517487,41.693111,42.273609,-16.48,38.566666,42.155437,40.441387,41.893055,50.849998,29.355556,43.622528,-9.708957,45.397652,42.629375,19.266666,-29.133333,-31.868889,-30.35,40.661667,45.147804,42.256668,42.57724,42.610104,42.294674,26.343056,44.051388,36.640278,25.173765,43.74881,44.270752,42.963055,32.410378,51.551666,51.819099,-17.687,23.380833,39.267223,47.5919,48.633301,48.966599,29.138056,25.505556,31.666668,19.317499,20.892221,38.936169,39.155956,29.316668,39.030499,42.664555,38.799999,49.119999,49.119999,16.700001,16.666668,36.269444,42.166668,42.166668,43.44128,36.121387,23.057222,-32.216702,-43.783333,16.695,39.905277,45.948799,40.611942,50.736988,50.749004,50.748341,46.3629,45.93903,49.127655,51.8046,41.627701,-38.200832,51.542702,36.343887,37.579166,50.688,47.96389,43.83139,43.741669,47.5667,47.549999,47.549999,47.516701,43.233299,43.116329,47.799999,40.285278,43.219398,45.468201,35.195,49.9147,62.500557,51.784698,50.606667,29.837761,41.25,50.599998,32.253334,50.655834,53.633331,34.907101,34.992802,24.2983,44.583328,55.903332,46.900002,37.257801,48.766666,40.950001,40.950001,50.815033,42.26667,36.4884,43.488888,48.900002,33.154167,42.165356,36.886551,36.886551,36.886539,49.283001,36.388332,36.294445,36.284443,36.270279,-2.4781,36.227501,36.024445,36.1548,35.991943,36.285702,36.271099,43.266701,43.266701,35.956108,42.117294,42.117294,42.117294,42.117294,51.9333,42.017159,40.116699,50.744595,50.738434,50.747437,44.433334,-43.238335,-33.799999,52.461189,47.730373,41.378132,41.378132,42.86639,-21.933332,16.416668,43.36861,34.906387,42.001389,50.666668,29.450001,39.700001,39.099998,47.799999,49.5,39.099998,41.934032,47.799999,36.299999,47.799999,47.799999,39.400002,36.299999,44.299999,35.700001,36.299999,36.5746,36.5746,36.299999,53.0,35.700001,35.700001,35.700001,41.799999,41.799999,35.5,35.5,36.271099,41.700001,41.301399,35.700001,51.0,43.849998,43.849998,45.799999,44.299999,45.799999,45.799999,51.900002,41.799999,41.799999,51.183334,51.5,52.200001,43.200001,41.583332,41.700001,45.799999,45.799999,43.849998,43.849998,45.200001,44.65723,44.299999,43.200001,43.200001,45.400002,32.683334,32.683334,43.647499,43.647499,43.5,43.5,40.0,40.0,3.3,43.299999,43.299999,43.299999,42.416668,43.299999,43.299999,49.599998,29.200001,29.200001,42.099998,42.0,42.400002,43.299999,30.299999,35.700001,35.700001,42.75,42.488899,42.200001,42.1856,43.133331,43.133331,43.133331,43.200001,42.799999,44.900002,42.400002,40.900002,42.400002,42.683334,42.683334,42.799999,49.700001,49.700001,49.599998,42.799999,42.700001,42.490002,42.490002,22.766666,35.700001,35.700001,32.578499,28.955299,37.200001,37.200001,32.900002,32.900002,37.047699,40.200001,40.200001,40.200001,35.0,35.0,35.0,33.799999,37.044701,37.044701,37.044701,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,33.900002,34.799999,66.599998,39.0,21.799999,40.799999,38.700001,28.799168,36.0,38.299999,39.299999,64.800003,37.200001,39.691666,47.0,27.041668,27.041668,33.599998,37.056667,34.799999,64.800003,42.700001,39.299999,29.683332,70.480003,70.480003,43.269444,43.252499,43.269444,43.269444,43.269444,43.269444,50.599998,50.599998,50.599998,51.083332,51.083332,51.083332,51.083332,51.283333,51.283333,52.133331,52.200001,53.283333,43.30389,24.216667,46.833332,46.833332,37.551109,37.551109,37.867222,37.867222,37.867222,37.867222,38.052223,38.052223,38.052223,38.052223,38.05389,38.05389,38.05389,31.0,52.833332,44.475834,44.541668,52.833332,39.163891,45.763611,40.037777,51.400002,44.662224,44.523609,44.523609,47.15361,44.523609,39.096668,30.034721,39.096668,62.833332,50.200001,43.212776,38.533333,44.75,43.516666,39.37722,60.0,39.445278,39.445278,39.423058,43.212776,43.15472,35.075001,43.212776,75.099998,75.099998,75.0,75.0,75.0,35.799999,34.454166,-33.591667,52.700001,63.279167]},"kind":"numeric","n":22043,"n_null":0,"n_unique":4095,"null_rate":0.0,"stats":{"iqr":11.609027999999995,"kurtosis":7.054009852157675,"max":79.75,"mean":37.11953781236674,"median":41.700001,"min":-84.333336,"n_outliers":2019,"outlier_rate":0.0915937032164406,"q1":35.0,"q3":46.609027999999995,"skew":-2.4423416924720134,"std":19.371906015387005,"zero_rate":0.0}},{"alerts":[],"column":"lon","extras":{"histogram":{"counts":[4,13,18,32,91,163,1447,5659,3904,360,559,962,409,76,52,13,0,93,160,1918,932,817,253,447,311,71,90,315,306,107,78,553,1162,150,206,50,238,11,4,9],"edges":[-176.667007,-167.82356105000002,-158.9801151,-150.13666915000002,-141.2932232,-132.44977725,-123.60633130000002,-114.76288535000002,-105.91943940000002,-97.07599345000001,-88.23254750000001,-79.38910155000002,-70.54565560000002,-61.702209650000015,-52.858763700000026,-44.01531775000001,-35.17187180000002,-26.32842585000003,-17.484979900000013,-8.641533950000024,0.20191199999999299,9.045357949999982,17.88880389999997,26.732249849999988,35.57569579999998,44.419141749999966,53.26258769999998,62.10603364999997,70.94947959999996,79.79292554999995,88.6363715,97.47981744999998,106.32326339999997,115.16670934999996,124.01015529999995,132.85360125,141.6970472,150.54049314999997,159.38393909999996,168.22738504999995,177.070831]},"sample":[-133.154999,16.761944,16.761944,-82.724998,107.554443,-116.216667,-116.216667,-116.216667,-116.216667,-116.216667,-116.216667,-124.783333,-124.783333,14.333333,14.333333,54.378613,54.378613,54.378613,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,108.300003,-7.3,-7.3,97.933609,97.933609,14.333333,148.683334,14.333333,96.625,8.5,-109.616669,-109.616669,-100.0,-100.0,-100.0,53.777222,4.432778,-22.568611,26.616667,8.4667,-74.106667,-2.629478,-109.763611,8.9764,-75.460152,9.882214,11.2875,11.166667,27.966667,27.483334,27.35,-105.793999,-109.462486,-112.537674,-102.973404,-108.940765,-101.449997,4.432778,8.016667,-9.1002,-9.1002,-75.469002,-106.041,28.623056,-110.300003,-106.048615,-106.048615,104.133301,56.5672,-6.876384,-106.041,-106.079803,-106.00222,-106.1073,-6.451011,39.213799,39.213799,39.224609,-0.915,-105.224701,39.22842,39.22842,39.22842,-106.048615,-9.19,-109.432198,-108.194443,-103.409698,90.75,9.5167,48.016701,-2.903611,-72.550308,-72.639221,-72.566666,47.400002,-8.733333,-105.90976,-109.300552,-105.979721,21.450001,104.709167,-108.184586,39.228508,-64.219345,-105.439041,79.51667,27.35,-55.897221,27.6,-0.87934,88.900627,-72.61528,-72.580963,-72.542458,-72.598877,102.144447,-107.458336,103.0,102.097412,3.30623,3.60363,90.573891,105.786247,-1.795,-112.981903,-65.940002,34.806946,-111.254166,84.0047,-113.75,-112.650002,-103.196945,-101.343887,-110.76667,6.381111,-0.033333,-111.066017,-110.857689,-103.51667,-76.8694,-2.634513,-9.383333,-110.470001,-110.470001,102.25,102.25,-108.166946,0.9,0.9,100.374809,136.544724,73.341942,-58.133301,-68.916664,31.145,108.900002,-103.9617,-0.111111,-111.494652,-111.458488,-111.510078,-103.897903,-103.946098,-110.872475,-112.946602,-109.090202,-64.493332,-112.865601,-108.084167,-111.830559,-111.616669,-106.455559,112.427498,112.183334,-107.099998,-107.099998,-107.099998,-106.400002,-104.583298,-104.637001,-106.099998,-106.860558,-107.572601,-108.120796,-107.047501,-112.993202,-150.005554,55.098598,-1.965,-98.22187,120.033302,-111.620003,-97.875557,-1.158333,-113.300003,128.153305,128.453094,115.360603,62.866669,87.957779,-101.5,126.757202,4.833333,-0.766667,-0.766667,-111.59536,0.73333,-104.053299,-104.610558,-112.800003,-107.192223,1.012499,120.658951,120.658951,120.658951,-112.577003,-108.073891,-108.102501,-108.102501,-108.144447,-44.461399,-107.941498,-107.798058,-107.816597,-107.582497,-108.244797,-108.244797,-104.616699,-104.650002,119.326942,62.655315,62.655315,62.655315,62.655315,116.25,63.3027,70.633301,-111.484749,-111.501808,-111.490646,5.933333,-68.776947,25.466667,64.629562,84.057571,72.193581,72.193581,59.245834,-68.73333,9.1,-104.697777,128.154999,121.692497,-1.55,30.583332,-104.699997,-111.800003,-106.099998,-109.199997,-111.800003,-106.863892,-106.099998,-108.199997,-106.099998,-106.099998,-104.5,-108.199997,-109.0,-107.0,-108.199997,-107.974701,-107.974701,-108.199997,-116.800003,-107.0,-107.0,-107.0,-107.0,-107.0,-117.76667,-117.76667,-107.763,-109.0,-107.735703,-107.0,-114.099998,-108.349998,-108.349998,-109.800003,-109.0,-109.800003,-109.800003,-113.300003,-110.699997,-110.699997,-114.433334,-112.300003,-113.599998,-108.199997,-110.683334,-109.0,-110.5,-109.800003,-108.349998,-108.349998,-109.0,-108.301559,-109.0,-107.099998,-107.099998,-105.599998,72.366669,72.366669,111.9767,111.9767,112.0,112.0,114.0,115.0,-75.116669,-102.5,-102.5,-102.5,-103.866669,-102.5,-102.5,-108.800003,-103.199997,-103.199997,-105.0,-104.199997,-103.800003,-102.5,-83.0,-107.0,-107.0,-102.01667,-102.960701,-103.099998,-103.732651,-101.866669,-101.866669,-101.866669,-101.800003,-103.099998,-120.199997,-103.800003,-103.283333,-103.800003,-102.849998,-102.849998,-103.099998,-109.0,-109.0,-109.0,-100.800003,-100.0,-98.110001,-98.110001,111.566666,-112.400002,-112.400002,-109.4842,-82.676903,-100.300003,-100.300003,-117.099998,-117.099998,-100.495201,-98.099998,-98.099998,-98.099998,-101.900002,-101.900002,-101.900002,-99.699997,-100.495201,-100.477097,-100.477097,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-117.0,-161.899994,-114.099998,-102.300003,-124.199997,-79.300003,-82.070274,-93.199997,-97.599998,-105.699997,-147.800003,-100.300003,-78.787498,-118.5,-81.821114,-81.821114,-117.800003,-120.195831,-117.0,-147.800003,-102.5,-105.699997,-82.566666,-21.969999,-21.969999,-104.271667,-104.271667,-104.271667,-104.271667,-104.271667,-104.251945,-1.316667,-1.316667,-1.316667,1.183333,1.183333,1.083333,1.083333,0.533333,0.533333,-0.3,0.116667,0.183333,40.259724,-102.816666,6.516667,6.516667,-3.933611,-3.933611,-2.574167,-2.574167,-2.574167,-2.574167,-1.883333,-1.883333,-1.883333,-1.883333,-1.888611,-1.888611,-1.888611,-98.099998,-119.25,-73.212502,-73.195831,-119.25,-119.833054,-111.732224,-76.305832,-116.48333,-111.103333,-89.574448,-89.574448,-110.215553,-89.574448,-116.263885,-103.308334,-116.263885,-136.583328,-63.5,-75.456108,-78.599998,-79.333336,-91.76667,-116.138054,11.0,-83.828613,-83.828613,-85.012779,-75.456108,-77.615829,-98.241669,-75.456108,-94.0,-94.0,-95.0,-95.0,-95.0,-86.116669,-87.753891,148.891663,-3.25,-128.544449]},"kind":"numeric","n":22043,"n_null":0,"n_unique":4259,"null_rate":0.0,"stats":{"iqr":114.0398315,"kurtosis":-0.4931940816073621,"max":177.070831,"mean":-47.21219894846436,"median":-98.25,"min":-176.667007,"n_outliers":3,"outlier_rate":0.00013609762736469628,"q1":-108.1672245,"q3":5.872607,"skew":0.9275339699239383,"std":79.13473751117112,"zero_rate":0.00022682937894116047}},{"alerts":[{"code":"outliers","level":"warn","message":"11.6% rows beyond 1.5 IQR"}],"column":"early_age_mya","extras":{"histogram":{"counts":[2334,1665,85,12,2904,1302,2239,454,796,1645,410,1650,421,36,975,193,530,216,61,0,0,0,25,17,21,37,58,770,319,319,176,602,265,382,427,81,334,180,62,40],"edges":[0.0117,13.481407499999998,26.951114999999998,40.42082249999999,53.89052999999999,67.3602375,80.829945,94.2996525,107.76935999999999,121.23906749999999,134.70877499999997,148.17848249999997,161.64818999999997,175.11789749999997,188.58760499999997,202.05731249999997,215.52701999999996,228.99672749999996,242.46643499999996,255.93614249999996,269.40585,282.8755575,296.345265,309.81497249999995,323.28468,336.7543875,350.224095,363.69380249999995,377.16351,390.6332175,404.10292499999997,417.57263249999994,431.04233999999997,444.5120475,457.98175499999996,471.45146249999993,484.92116999999996,498.3908775,511.86058499999996,525.3302924999999,538.8]},"sample":[393.47,372.15,372.15,387.95,419.62,393.47,393.47,393.47,393.47,393.47,393.47,419.62,419.62,419.62,419.62,382.31,372.15,372.15,368.5,368.5,368.5,368.5,372.15,375.2,375.2,379.0,379.0,419.62,413.02,422.7,419.62,393.47,419.62,419.62,393.47,425.0,410.62,410.62,419.62,419.62,419.62,303.7,246.7,227.3,227.3,227.3,227.3,205.7,227.3,215.38,227.3,237.0,227.3,227.3,227.3,227.3,227.3,237.0,227.3,205.7,227.3,227.3,227.3,246.7,227.3,237.0,237.0,237.0,154.8,201.4,199.5,154.8,154.8,168.2,201.4,184.2,154.8,154.8,154.8,154.8,168.2,152.21,152.21,149.2,161.5,154.8,149.2,149.2,152.21,154.8,152.21,192.9,154.8,154.8,149.2,182.9,168.2,199.5,201.4,201.4,201.4,168.2,161.5,154.8,149.2,154.8,201.4,184.2,154.8,149.2,201.4,154.8,184.2,201.4,154.8,201.4,154.8,161.5,201.4,201.4,201.4,201.4,201.4,157.9,201.4,199.5,201.4,201.4,168.2,149.2,152.21,72.2,72.2,143.1,72.2,72.2,83.6,83.6,83.6,83.6,83.6,143.1,143.1,100.5,121.4,83.6,119.57,83.6,121.4,83.6,83.6,125.77,125.77,83.6,72.2,72.2,72.2,121.4,72.2,100.5,119.57,83.6,143.1,72.2,125.77,83.6,83.6,83.6,72.2,72.2,83.6,83.6,83.6,83.6,72.2,83.6,72.2,83.6,72.2,83.6,83.6,72.2,72.2,72.2,72.2,72.2,72.2,72.2,72.2,113.2,121.4,85.7,83.6,83.6,100.5,143.1,113.2,121.4,83.6,113.2,132.6,72.2,113.2,113.2,72.2,93.9,121.4,72.2,121.4,113.2,125.77,125.77,83.6,72.2,113.2,72.2,72.2,83.6,72.2,83.6,83.6,83.6,83.6,83.6,83.6,83.6,83.6,100.5,83.6,83.6,83.6,83.6,83.6,83.6,72.2,72.2,83.6,93.9,93.9,93.9,93.9,143.1,93.9,85.7,83.6,83.6,83.6,83.6,119.57,143.1,72.2,83.6,85.7,85.7,93.9,100.5,121.4,72.2,121.4,121.4,37.71,33.9,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,59.24,57.5,60.9,60.9,60.9,15.98,15.98,15.98,15.98,15.98,15.98,5.333,3.6,13.8,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,23.1,18.5,18.5,18.5,16.3,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,16.3,16.3,18.5,18.5,18.5,18.5,18.5,16.3,16.3,16.3,0.774,1.4,1.4,4.7,2.58,2.58,2.58,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,0.129,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,149.2,149.2,72.2,83.6,83.6,83.6,83.6,83.6,121.4,121.4,110.1,113.2,113.2,121.4,113.2,121.4,121.4,121.4,121.4,121.4,121.4,143.1,100.5,113.2,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,125.77,358.86,538.8,515.3,515.3,515.3,509.7,506.5,509.7,506.5,497.0,504.5,504.5,497.0,538.8,483.4,486.85,481.7,468.0,471.3,457.3,458.2,458.2,450.2,449.5,448.8,449.6,449.6,449.6,443.1,443.1,443.1,453.8,432.9,432.9,432.9,432.9,432.9,457.3,323.4,452.3,443.1,410.62]},"kind":"numeric","n":22043,"n_null":0,"n_unique":164,"null_rate":0.0,"stats":{"iqr":138.0,"kurtosis":0.07677114135836893,"max":538.8,"mean":154.67317342466995,"median":110.1,"min":0.0117,"n_outliers":2549,"outlier_rate":0.1156376173842036,"q1":63.4,"q3":201.4,"skew":1.1313968758308393,"std":143.08815184956683,"zero_rate":0.0}},{"alerts":[{"code":"outliers","level":"warn","message":"11.5% rows beyond 1.5 IQR"}],"column":"late_age_mya","extras":{"histogram":{"counts":[2732,1291,69,7,2911,3279,374,949,922,1043,1317,671,368,135,634,1003,35,123,63,2,0,0,12,34,19,4,83,133,828,467,190,530,170,141,529,299,111,310,191,64],"edges":[0.0,13.025,26.05,39.075,52.1,65.125,78.15,91.175,104.2,117.22500000000001,130.25,143.275,156.3,169.32500000000002,182.35,195.375,208.4,221.425,234.45000000000002,247.475,260.5,273.52500000000003,286.55,299.575,312.6,325.625,338.65000000000003,351.675,364.7,377.725,390.75,403.77500000000003,416.8,429.825,442.85,455.875,468.90000000000003,481.925,494.95,507.975,521.0]},"sample":[382.31,368.5,368.5,382.31,410.62,387.95,387.95,387.95,387.95,387.95,387.95,413.02,413.02,413.02,413.02,372.15,368.5,368.5,365.2,365.2,365.2,365.2,368.5,372.15,372.15,375.2,375.2,413.02,410.62,413.02,410.62,387.95,413.02,387.95,387.95,393.47,393.47,393.47,413.02,413.02,413.02,298.9,237.0,205.7,201.4,205.7,205.7,201.4,205.7,211.18,201.4,227.3,205.7,205.7,201.4,201.4,201.4,227.3,205.7,199.5,205.7,205.7,205.7,237.0,205.7,227.3,227.3,201.4,143.1,192.9,184.2,143.1,143.1,161.5,184.2,165.3,143.1,143.1,143.1,143.1,165.3,149.2,149.2,143.1,154.8,143.1,143.1,143.1,143.1,143.1,149.2,174.7,143.1,143.1,143.1,180.4,165.3,192.9,192.9,192.9,192.9,165.3,149.2,143.1,145.06,143.1,199.5,168.2,143.1,143.1,199.5,143.1,170.9,192.9,143.1,192.9,143.1,154.8,192.9,192.9,192.9,199.5,174.7,152.21,174.7,192.9,199.5,199.5,143.1,143.1,149.2,66.0,66.0,66.0,66.0,66.0,72.2,72.2,72.2,72.2,72.2,121.4,93.9,93.9,100.5,72.2,113.2,72.2,119.57,72.2,72.2,121.4,121.4,72.2,66.0,66.0,66.0,113.2,66.0,66.0,113.2,66.0,100.5,66.0,121.4,72.2,72.2,72.2,66.0,66.0,72.2,66.0,72.2,66.0,66.0,72.2,66.0,72.2,66.0,72.2,72.2,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,100.5,100.5,83.6,66.0,66.0,66.0,137.05,110.1,113.2,72.2,100.5,119.57,66.0,100.5,100.5,66.0,89.8,113.2,66.0,93.9,100.5,121.4,121.4,72.2,66.0,100.5,66.0,66.0,66.0,66.0,72.2,72.2,72.2,72.2,72.2,72.2,72.2,72.2,93.9,72.2,72.2,72.2,72.2,72.2,72.2,66.0,66.0,72.2,89.8,89.8,89.8,89.8,100.5,89.8,83.6,72.2,72.2,72.2,72.2,113.2,132.6,66.0,66.0,83.6,83.6,89.8,66.0,100.5,66.0,100.5,100.5,33.9,27.3,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,63.4,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,57.5,57.5,57.5,57.5,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,60.9,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,57.5,56.0,56.0,57.5,57.5,57.5,11.63,11.63,5.333,5.333,11.63,11.63,2.58,2.58,11.0,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,18.5,16.3,16.3,16.3,12.5,16.3,16.3,16.3,16.3,16.3,16.3,16.3,16.3,16.3,12.5,12.5,16.3,16.3,16.3,16.3,16.3,12.5,12.5,12.5,0.0117,0.21,0.21,0.21,1.8,1.8,1.8,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.0117,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.21,0.014,0.21,0.014,0.21,0.21,0.21,0.21,143.1,143.1,66.0,72.2,72.2,72.2,72.2,72.2,113.2,113.2,106.3,110.1,110.1,113.2,110.1,113.2,113.2,113.2,113.2,113.2,113.2,137.05,93.9,100.5,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,121.4,342.9,486.85,509.7,509.7,509.7,505.3,497.0,505.3,497.0,486.85,497.0,497.0,486.85,486.85,481.7,477.1,475.3,458.2,457.3,449.6,449.5,449.5,445.5,443.1,445.2,445.5,445.5,445.5,432.9,432.9,419.62,451.0,426.7,426.7,426.7,426.7,426.7,449.6,318.6,449.0,432.9,399.5]},"kind":"numeric","n":22043,"n_null":0,"n_unique":156,"null_rate":0.0,"stats":{"iqr":132.0,"kurtosis":0.123148868475361,"max":521.0,"mean":147.52259406160687,"median":93.9,"min":0.0,"n_outliers":2535,"outlier_rate":0.11500249512316835,"q1":60.9,"q3":192.9,"skew":1.1692870694044197,"std":141.7238408030002,"zero_rate":0.0011341468947058022}},{"alerts":[],"column":"period","extras":{"singletons":31,"top_values":[["Irvingtonian",1723],["Late Campanian",1088],["Torrejonian",935],["Tiffanian",923],["Puercan",778],["Kimmeridgian",636],["Hettangian",607],["Aptian",600],["Harrisonian",592],["Late Maastrichtian",544],["Norian",516],["Lochkovian",460],["Early Barremian",449],["Hemingfordian",441],["Tithonian",408],["Middle Campanian",359],["Early Famennian",346],["Early Albian",327],["Lancian",320],["Maastrichtian",314]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":298,"null_rate":0.0,"stats":{"cardinality":298,"entropy":6.421955981022376,"entropy_ratio":0.781338886656782,"top_rate":0.0781654039831239,"top_value":"Irvingtonian"}},{"alerts":[],"column":"late_interval","extras":{"singletons":22,"top_values":[["",18319],["Tithonian",548],["Sinemurian",430],["Late Campanian",183],["Early Cenomanian",132],["Albian",129],["Rhaetian",119],["Early Maastrichtian",111],["Early Tithonian",102],["Late Turonian",92],["Maastrichtian",72],["Harnagian",62],["Santonian",61],["Early Aptian",57],["Tiffanian",57],["Early Albian",56],["Barremian",54],["Pliensbachian",50],["Toarcian",50],["Cenomanian",45]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":138,"null_rate":0.0,"stats":{"cardinality":138,"entropy":1.5912749204207972,"entropy_ratio":0.22385446235659692,"top_rate":0.8310574785646236,"top_value":""}},{"alerts":[],"column":"phylum","extras":{"singletons":0,"top_values":[["Chordata",17993],["Mollusca",2000],["Arthropoda",2000],["",50]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":0.8872694459944851,"entropy_ratio":0.44363472299724255,"top_rate":0.81626820305766,"top_value":"Chordata"}},{"alerts":[],"column":"class","extras":{"singletons":2,"top_values":[["Mammalia",7015],["Saurischia",5507],["Ornithischia",2811],["Cephalopoda",2000],["Trilobita",2000],["Conodonta",1883],["Reptilia",568],["Aves",92],["",60],["NO_CLASS_SPECIFIED",26],["Pteraspidomorpha",24],["Placodermi",17],["Acanthodii",15],["Osteichthyes",11],["Thelodonti",4],["Osteostraci",4],["Chondrichthyes",4],["Actinopterygii",1],["Galeaspidomorphi",1]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":19,"null_rate":0.0,"stats":{"cardinality":19,"entropy":2.5789996336832743,"entropy_ratio":0.6071195013383377,"top_rate":0.3182416186544481,"top_value":"Mammalia"}},{"alerts":[],"column":"order","extras":{"singletons":25,"top_values":[["NO_ORDER_SPECIFIED",7117],["",3019],["Ammonitida",1572],["Ozarkodinida",1341],["Rodentia",1109],["Artiodactyla",951],["Carnivora",744],["Multituberculata",553],["Perissodactyla",517],["Phacopida",507],["Procreodi",503],["Prioniodontida",348],["Primates",315],["Asaphida",304],["Corynexochida",252],["Ammonoidea",246],["Ptychopariida",238],["Proetida",219],["Cimolesta",218],["Lagomorpha",187]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":99,"null_rate":0.0,"stats":{"cardinality":99,"entropy":3.886792157911311,"entropy_ratio":0.5863000560474654,"top_rate":0.3228689379848478,"top_value":"NO_ORDER_SPECIFIED"}},{"alerts":[],"column":"family","extras":{"singletons":113,"top_values":[["",3418],["NO_FAMILY_SPECIFIED",1996],["Hadrosauridae",689],["Grallatoridae",593],["Palmatolepidae",586],["Arctocyonidae",503],["Polygnathidae",459],["Cricetidae",407],["Equidae",360],["Canidae",358],["Ceratopsidae",336],["Dromaeosauridae",335],["Icriodontidae",272],["Periptychidae",249],["Neoplagiaulacidae",234],["Merycoidodontidae",231],["Camelidae",216],["Tyrannosauridae",184],["Diplodocidae",181],["Asaphidae",172]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":528,"null_rate":0.0,"stats":{"cardinality":528,"entropy":6.5662376262361,"entropy_ratio":0.7260008287544487,"top_rate":0.1550605634441773,"top_value":""}},{"alerts":[{"code":"one_word","level":"warn","message":"98.9% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"88.2% duplicate strings"}],"column":"genus","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[5545,0,0,13,31,0,372,188,704,1676,2307,0,2405,2504,2230,1696,1017,0,522,324,191,50,0,51,21,19,36,20,0,5,7,3,4,40,0,57,4,0,0,1],"edges":[0.0,0.825,1.65,2.4749999999999996,3.3,4.125,4.949999999999999,5.7749999999999995,6.6,7.425,8.25,9.075,9.899999999999999,10.725,11.549999999999999,12.375,13.2,14.024999999999999,14.85,15.674999999999999,16.5,17.325,18.15,18.974999999999998,19.799999999999997,20.625,21.45,22.275,23.099999999999998,23.924999999999997,24.75,25.575,26.4,27.224999999999998,28.049999999999997,28.875,29.7,30.525,31.349999999999998,32.175,33.0]},"near_unique":false,"sample":["","Microtus","Acanthohoplites","","","Synphoroides","Hadrosauropodus","Hemibaculites","Hoploscaphites","Palmatolepis","Carpodaptes","Protungulatum","","Polygnathus","Iguanodon","","Polygnathus","Anadesmoceras","Crassiproetus","Dromaeosaurus","Phuwiangosaurus","Palmatolepis","Mamenchisaurus","Anchisauripus","Promartes","Thomomys","Neoplagiaulax","Grallator","Polygnathus","Paladin","Xenocardia","Temnocyon","Paramerychyus","Sauroposeidon","","","Hystricurus","Ernestokokenia","","Baioconodon","Thomomys","Cedaria","Apatosaurus","Syspacheilus","Cheirurus","Kettneraspis","Ankylosaurus","Sauropelta","Eubrontes","Scaphites"],"top_values":[["",5545],["Palmatolepis",567],["Polygnathus",347],["Grallator",321],["Eubrontes",206],["Icriodus",186],["Equus",185],["Ozarkodina",179],["Baculites",115],["Anomoepus",108],["Camarasaurus",101],["Merychyus",95],["Allosaurus",92],["Chriacus",87],["Barremites",84],["Ptilodus",82],["Isotelus",81],["Richardoestesia",80],["Pelekysgnathus",76],["Triceratops",74]],"top_words":[["palmatolepis",517],["polygnathus",314],["grallator",289],["eubrontes",187],["equus",180],["icriodus",169],["ozarkodina",166],["anomoepus",103],["baculites",100],["camarasaurus",91],["allosaurus",85],["merychyus",84],["chriacus",77],["ptilodus",77],["isotelus",74],["barremites",73],["richardoestesia",70],["periptychus",70],["nicollidina",68],["phyllopachyceras",68],["mesodma",66],["pelekysgnathus",66],["saurornitholestes",65],["microtus",64],["neoplagiaulax",63]],"vocab_skipped":null,"word_histogram":{"counts":[21810,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,233],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":22043,"n_null":0,"n_unique":2608,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.8816857959442908,"emoji_rate":0.0,"len_max":33,"len_mean":8.188449848024316,"len_median":10.0,"len_min":0,"len_p95":15.0,"n_duplicates":19435,"n_empty":5545,"one_word_rate":0.9894297509413419,"readability_flesch_mean":-4.827074999999976,"url_rate":0.0,"vocab_size":2525,"word_mean":1.010570249058658,"word_median":1.0}},{"alerts":[],"column":"country","extras":{"singletons":13,"top_values":[["US",11218],["CA",1830],["CN",1661],["UK",983],["ES",841],["FR",390],["MA",303],["AR",292],["CZ",288],["AU",247],["TZ",218],["UZ",184],["MX",175],["KR",175],["SE",170],["CH",166],["MN",162],["ZA",159],["RU",156],["DE",152]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":93,"null_rate":0.0,"stats":{"cardinality":93,"entropy":3.2927149452236466,"entropy_ratio":0.5035379993570933,"top_rate":0.5089143945923876,"top_value":"US"}},{"alerts":[],"column":"state","extras":{"singletons":125,"top_values":[["Wyoming",1903],["Montana",1394],["",1082],["New Mexico",1048],["Alberta",1009],["Nebraska",950],["England",907],["Guangxi",861],["California",837],["Colorado",540],["Texas",530],["Utah",489],["Nevada",361],["Murcia",333],["North Dakota",325],["South Dakota",316],["Massachusetts",278],["Kansas",273],["Northwest Territories",246],["Arizona",226]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":519,"null_rate":0.0,"stats":{"cardinality":519,"entropy":6.287823408091024,"entropy_ratio":0.6971295702278271,"top_rate":0.08633126162500566,"top_value":"Wyoming"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"formation","extras":{"singletons":0,"top_values":[["",22043]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"collection","extras":{"singletons":0,"top_values":[["",22043]]},"kind":"categorical","n":22043,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"outliers","level":"warn","message":"7.0% rows beyond 1.5 IQR"}],"column":"paleolat","extras":{"histogram":{"counts":[13,17,17,11,12,19,13,87,82,185,454,285,350,508,462,669,554,173,144,184,345,212,504,275,888,1581,1491,2167,1711,787,2851,1762,1421,1132,152,8,4,6,3,13],"edges":[-86.16,-81.776,-77.392,-73.008,-68.624,-64.24,-59.855999999999995,-55.471999999999994,-51.087999999999994,-46.70399999999999,-42.31999999999999,-37.93599999999999,-33.55199999999999,-29.167999999999992,-24.783999999999992,-20.39999999999999,-16.01599999999999,-11.63199999999999,-7.2479999999999905,-2.86399999999999,1.5200000000000102,5.904000000000011,10.288000000000011,14.672000000000011,19.05600000000001,23.440000000000012,27.824000000000012,32.20800000000001,36.59200000000001,40.97600000000001,45.360000000000014,49.744,54.128000000000014,58.51200000000003,62.896000000000015,67.28,71.66400000000002,76.04800000000003,80.43200000000002,84.816,89.2]},"sample":[-29.25,-37.43,-31.58,-24.76,-22.3,-22.3,-22.3,-22.3,-22.3,-2.14,-2.14,-19.75,-19.75,-17.18,-16.91,0.94,-18.29,-18.29,-16.54,-16.54,-16.54,-16.54,-15.11,-13.01,-13.01,-13.01,-12.24,-37.6,2.62,2.62,2.89,-22.31,-73.67,-23.02,-26.49,-21.12,3.77,3.77,3.77,3.77,-37.31,27.19,25.38,-52.16,7.74,-12.58,34.3,33.99,13.79,11.81,19.88,29.89,12.19,12.12,-47.17,43.18,10.65,9.88,19.17,-52.22,15.25,16.88,25.23,28.61,30.33,-52.15,-52.15,21.08,28.8,30.53,30.69,28.61,-45.26,6.11,46.84,28.57,28.58,28.57,26.25,23.84,-37.95,-38.8,-38.8,32.68,25.2,-38.8,-38.82,-38.77,-41.81,24.53,24.64,25.97,31.26,31.15,-38.8,-31.38,34.26,22.11,26.14,28.84,6.11,-44.36,32.53,28.8,29.36,28.57,31.56,26.3,30.77,28.0,37.38,-43.2,15.0,39.7,27.49,21.3,26.84,20.52,22.11,20.52,-22.68,-38.8,13.13,29.95,37.38,22.55,-43.65,48.76,-28.27,28.8,19.57,27.97,50.66,60.93,37.45,59.56,59.56,59.56,56.43,12.67,19.56,-30.87,53.97,55.79,38.77,37.43,26.86,40.52,47.1,62.52,-54.87,35.88,10.98,10.98,61.01,39.13,39.15,-34.14,38.33,48.53,-80.63,36.49,56.04,56.42,56.5,56.59,37.47,60.91,56.46,59.28,60.93,53.68,59.31,64.06,60.91,60.91,58.55,53.8,58.5,43.96,30.96,31.35,40.47,58.36,58.44,58.36,58.34,48.89,41.53,31.01,88.97,88.97,88.78,45.38,12.94,-35.15,35.66,45.95,33.0,62.34,60.58,38.03,36.21,62.34,62.33,58.29,-39.72,49.46,56.11,56.11,35.04,35.04,49.84,49.82,36.64,-40.23,46.27,34.57,51.74,61.56,48.11,60.93,42.38,40.43,46.69,46.59,46.59,46.59,46.59,46.59,46.44,46.35,46.6,46.6,46.6,-25.68,53.75,37.49,38.04,53.81,35.62,60.76,-37.18,35.08,44.04,60.94,60.91,48.55,31.02,26.5,34.7,34.7,60.94,44.71,44.71,42.37,37.32,-36.58,35.88,31.09,52.45,20.56,11.74,-47.15,47.4,48.82,52.32,52.31,47.4,47.4,59.89,59.89,57.47,57.47,50.68,50.98,47.4,47.4,57.09,46.7,46.7,46.9,61.93,61.93,45.82,45.82,50.54,50.54,46.7,46.56,50.26,55.86,45.82,53.83,53.83,53.83,53.83,54.42,54.42,56.02,51.28,60.96,51.99,61.68,61.7,50.78,50.78,50.78,61.88,61.88,51.28,52.65,52.65,53.11,53.46,53.41,23.6,23.6,23.6,23.6,23.6,25.29,28.54,28.54,29.41,29.41,42.93,39.65,39.74,39.74,4.09,4.04,47.87,47.1,46.72,46.88,47.74,47.22,47.22,47.74,33.14,46.72,46.72,47.05,47.05,47.05,45.64,46.73,46.41,45.82,46.98,47.05,46.73,45.82,46.14,46.38,45.37,45.82,44.85,45.82,46.73,46.38,53.88,46.73,45.91,45.91,45.59,45.59,36.51,30.02,40.43,40.43,40.43,35.08,34.04,34.04,34.04,37.28,37.28,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,34.09,35.08,66.88,34.89,22.04,20.55,38.85,28.97,36.21,36.21,37.44,28.97,28.97,39.84,39.84,27.21,36.94,36.94,38.53,32.27,33.99,29.85,-6.9,-6.45,-3.24,47.18,53.79,52.1,51.85,51.86,51.86,51.86,52.09,41.07,41.04,41.28,41.28,41.28,41.28,45.82,45.82,43.29,51.73,51.72,33.84,42.09,42.09,32.02,26.08,25.75,25.75,25.75,25.61,25.61,25.61,25.61,25.61,25.61,25.61,25.61,-27.71,-19.68,7.68,7.21,7.68,39.62,30.11,8.22,37.83,30.02,19.53,26.4,26.4,18.51,31.07,29.84,18.7,27.85,-8.06,-8.06,-20.05,-20.05,-14.57,-18.32,-8.93,-27.67,-24.44,-17.08,-24.78,-28.14,-28.52,-31.66,-17.34,3.78,3.78,3.78,3.83,5.87,-18.49,-28.27,-24.26,12.0,-34.75,-34.75,-31.58,-4.74,-27.49,-26.9,-26.9]},"kind":"numeric","n":22043,"n_null":491,"n_unique":3214,"null_rate":0.022274645012021956,"stats":{"iqr":30.639999999999997,"kurtosis":0.26460067116566943,"max":89.2,"mean":26.45988864142539,"median":34.89,"min":-86.16,"n_outliers":1503,"outlier_rate":0.06973830734966592,"q1":16.34,"q3":46.98,"skew":-1.079632955397913,"std":29.543137135453637,"zero_rate":0.0}},{"alerts":[],"column":"paleolng","extras":{"histogram":{"counts":[3,12,4,31,31,121,345,968,726,1791,244,2539,3233,949,404,1084,478,154,57,626,210,1004,1648,1074,534,129,52,82,284,257,663,486,156,315,434,182,192,40,7,3],"edges":[-177.6,-168.9425,-160.285,-151.6275,-142.97,-134.3125,-125.655,-116.9975,-108.34,-99.6825,-91.025,-82.3675,-73.71000000000001,-65.05250000000001,-56.39500000000001,-47.73750000000001,-39.08000000000001,-30.422500000000014,-21.765000000000015,-13.107500000000016,-4.450000000000017,4.207499999999982,12.86499999999998,21.52249999999998,30.17999999999998,38.83749999999998,47.494999999999976,56.152499999999975,64.80999999999997,73.46749999999997,82.12499999999997,90.78249999999994,99.43999999999997,108.0975,116.75499999999997,125.41249999999994,134.06999999999996,142.7275,151.38499999999996,160.04249999999993,168.7]},"sample":[10.95,-84.88,-50.06,11.54,-77.85,-77.85,-77.85,-77.85,-77.85,-82.66,-82.66,-0.42,-0.42,9.31,9.52,0.82,92.81,92.81,91.22,91.22,91.22,91.22,90.03,88.5,88.5,88.5,87.97,-12.45,-84.66,-84.66,-69.65,126.28,-74.74,126.57,-75.39,16.73,-69.75,-69.75,-69.75,-69.75,-27.45,19.01,25.14,7.58,-30.75,36.62,8.84,8.32,-10.07,-11.61,30.48,12.1,-11.19,-11.14,16.76,122.21,-39.9,-37.44,35.77,7.99,-36.53,-10.44,25.35,21.05,16.02,-0.92,-0.92,-9.08,-40.29,-40.87,-41.97,-43.89,14.61,-36.66,96.31,-40.51,-40.43,-40.45,-40.53,2.54,31.29,31.25,31.25,12.74,-40.92,31.25,31.25,31.3,18.03,8.89,-36.14,-43.53,-38.06,120.27,31.26,22.74,18.27,-9.24,-43.78,19.67,-36.66,14.8,-34.27,-40.28,-43.88,-40.51,15.38,-40.53,17.7,-43.92,122.22,15.17,-39.65,21.5,33.42,-38.75,13.12,-9.58,-9.24,-9.58,37.19,31.25,-41.8,21.2,122.21,-37.65,14.98,92.47,33.5,-40.28,-9.47,14.69,127.48,-65.66,78.59,-71.97,-71.96,-71.96,-66.52,21.11,18.28,72.2,-65.56,-66.78,-69.01,-41.19,22.91,106.74,52.3,-66.82,-157.05,121.86,122.6,122.6,-65.7,98.75,98.4,69.9,96.71,118.25,114.67,93.77,-62.88,-62.81,-62.82,-62.11,24.74,-65.66,-62.06,-66.42,-65.65,-66.02,-66.1,-68.22,-65.55,-65.55,-65.58,-65.84,-64.0,16.07,10.77,9.81,112.9,-65.28,-65.37,-65.19,-64.03,-76.14,129.45,14.24,-107.56,-107.56,-116.22,-70.19,34.83,-38.54,-48.35,143.15,16.11,-67.67,-65.8,-46.35,138.18,-67.8,-67.7,-64.37,-40.01,97.36,-59.67,-59.67,138.78,138.78,-68.08,-68.04,23.7,66.05,-60.43,-68.21,-63.7,-71.56,124.45,-65.7,132.4,131.39,-70.95,-71.08,-71.08,-71.08,-71.08,-71.08,-70.9,-70.97,-71.05,-70.96,-70.98,-30.29,-65.79,-46.94,-46.34,-67.14,69.84,-68.14,-34.92,72.33,77.27,-65.76,-65.67,-75.01,23.11,19.52,-68.04,-68.04,-65.77,-36.63,-36.63,64.48,79.67,43.72,64.01,14.08,125.6,31.91,72.15,-57.58,-76.0,-70.92,-71.43,-71.42,-76.0,-76.0,-68.59,-68.59,-66.46,-66.46,-77.82,-78.69,-76.0,-76.0,-72.05,-77.16,-77.16,-76.78,-74.71,-74.71,-76.11,-76.11,-74.88,-74.88,-77.16,-76.69,-79.88,-67.09,-76.11,-73.27,-73.27,-73.27,-73.27,-73.72,-73.72,-73.65,-79.72,-78.13,-78.57,-77.31,-76.31,-77.86,-77.86,-77.86,-76.38,-76.38,-79.72,-75.98,-75.98,-77.05,-73.64,-72.03,101.5,101.5,101.5,101.5,101.5,73.87,73.71,73.71,73.8,73.8,110.18,113.23,114.42,114.42,-71.53,-71.56,-91.74,-93.37,-93.8,-94.64,-93.95,-90.01,-90.01,-93.95,-72.16,-93.8,-93.8,-93.54,-93.54,-93.54,-95.12,-94.98,-94.87,-97.28,-93.62,-93.54,-94.98,-97.28,-95.06,-95.78,-97.3,-97.28,-95.42,-97.28,-94.98,-95.78,-100.38,-94.98,-93.24,-93.24,-91.27,-91.27,-111.39,-81.8,-97.7,-97.7,-97.7,-116.69,-99.34,-99.34,-99.34,-100.12,-100.1,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.58,-116.69,-161.94,-87.5,-101.99,-102.9,-78.85,-81.7,-92.81,-92.81,-99.92,-81.83,-81.83,-78.33,-78.33,-81.46,-99.52,-99.52,-97.21,-109.87,-99.14,-82.19,-31.32,-30.66,-28.53,17.16,-64.84,-63.02,-62.81,-62.78,-62.78,-62.78,-63.0,27.17,26.97,26.55,26.55,26.55,26.55,24.29,24.29,26.58,-47.66,-47.83,38.37,27.08,27.08,29.12,18.38,19.57,19.57,19.57,20.19,20.19,20.19,20.19,20.19,20.19,20.19,20.19,-65.96,-42.76,-125.02,-120.19,-125.02,-119.05,-123.96,-114.61,-118.48,-130.29,-134.41,-127.23,-127.23,-105.65,-117.22,-116.81,-107.27,-117.01,-111.05,-111.05,-110.11,-110.11,-106.99,-115.28,-131.34,-67.88,-112.52,-113.79,-112.52,-65.35,-95.11,-102.39,-105.36,-80.82,-80.82,-80.82,-81.1,-75.69,-117.58,-44.43,-31.52,117.39,-76.51,-76.51,-66.86,-76.31,-68.44,-66.43,-66.43]},"kind":"numeric","n":22043,"n_null":491,"n_unique":3715,"null_rate":0.022274645012021956,"stats":{"iqr":97.35,"kurtosis":-0.48113838740191683,"max":168.7,"mean":-28.57681282479584,"median":-62.150000000000006,"min":-177.6,"n_outliers":3,"outlier_rate":0.00013919821826280623,"q1":-77.16,"q3":20.19,"skew":0.737242760765059,"std":68.91097398994592,"zero_rate":0.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"89.8% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"83.1% duplicate strings"}],"column":"reference_no","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[23,0,0,0,0,0,0,0,0,0,2233,0,0,0,0,0,0,0,0,0,1274,0,0,0,0,0,0,0,0,0,8908,0,0,0,0,0,0,0,0,9605],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0,4.1,4.2,4.300000000000001,4.4,4.5,4.6,4.7,4.800000000000001,4.9,5.0]},"near_unique":false,"sample":["8880","3211","57","41","13037","36816","14666","70","45","4233","1039","1983","76304","136","29198","14071","7265","47","257","13817","9665","4233","12571","17957","1789","6294","1536","15230","4245","60994","6118","52421","6294","42441","13103","13022","61104","6147","14904","6294","1916","26476","53051","60580","17052","19","12202","13525","11946","66"],"top_values":[["4245",794],["6294",743],["70",567],["47",528],["16510",323],["15088",289],["2725",255],["3649",206],["13103",200],["45",199],["122",198],["11749",197],["2255",197],["3558",171],["1186",161],["1534",158],["11964",157],["1536",151],["66",144],["30862",143]],"top_words":[["4245",720],["6294",673],["70",526],["47",472],["16510",299],["15088",264],["2725",229],["3649",181],["2255",177],["13103",176],["122",176],["45",175],["11749",174],["3558",153],["11964",147],["1186",146],["1534",145],["1536",136],["66",132],["30862",129],["4216",125],["829",115],["13525",106],["6095",106],["3175",106]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22043,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":22043,"n_null":0,"n_unique":3725,"null_rate":0.0,"stats":{"allcaps_rate":0.8976545842217484,"boilerplate_rate":0.0,"duplicate_rate":0.8310121126888355,"emoji_rate":0.0,"len_max":5,"len_mean":4.172208864492129,"len_median":4.0,"len_min":1,"len_p95":5.0,"n_duplicates":18318,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3547,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"occurrence_no","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[8,0,0,0,0,0,15,0,0,0,0,0,0,26,0,0,0,0,0,0,1359,0,0,0,0,0,3185,0,0,0,0,0,0,16620,0,0,0,0,0,830],"edges":[1.0,1.15,1.3,1.45,1.6,1.75,1.9,2.05,2.2,2.3499999999999996,2.5,2.65,2.8,2.95,3.1,3.25,3.4,3.55,3.6999999999999997,3.85,4.0,4.15,4.3,4.449999999999999,4.6,4.75,4.9,5.05,5.2,5.35,5.5,5.6499999999999995,5.8,5.95,6.1,6.25,6.3999999999999995,6.55,6.7,6.85,7.0]},"near_unique":true,"sample":["361526","196124","27440","23237","519811","10365","533398","31658","23849","142746","165932","164237","149585","39860","280868","517672","274518","24634","47881","513167","380655","142799","475132","255937","183495","196341","164728","520152","148880","9930","151592","182612","182610","498922","483626","521531","2494","155004","535857","164148","197494","1860","1237630","1820","4630","11746","464436","498930","461468","30717"],"top_values":[],"top_words":[["164260",1],["11739",1],["37907",1],["621561",1],["197544",1],["24377",1],["1439335",1],["25196",1],["513513",1],["27500",1],["498914",1],["459554",1],["28257",1],["195921",1],["165364",1],["164387",1],["150846",1],["4993",1],["559720",1],["498896",1],["481940",1],["5599",1],["197002",1],["3019",1],["196076",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22043,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":22043,"n_null":0,"n_unique":22043,"null_rate":0.0,"stats":{"allcaps_rate":0.9989565848568707,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":7,"len_mean":5.761783786235993,"len_median":6.0,"len_min":1,"len_p95":6.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":20000,"word_mean":1.0,"word_median":1.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["phylum.top_values","class.top_values","country.top_rate","country.top_value","early_age_mya.max","early_age_mya.mean","early_age_mya.n_outliers","rank.top_values","lat.skew","lat.n_outliers","row_count"],"featured_charts":[{"caption":"Look for how heavily Mammalia and the dinosaur classes (Saurischia, Ornithischia) dominate over marine and invertebrate groups like Cephalopoda and Trilobita.","column":"class","kind":"bar"},{"caption":"Notice the strong US bias (50.9% of records) versus all other countries, which may reflect collection effort rather than true fossil distribution.","column":"country","kind":"bar"},{"caption":"Look for the wide spread from near-zero to 538 million years ago, and the outlier spike of older Paleozoic occurrences pulling the tail right.","column":"early_age_mya","kind":"histogram"},{"caption":"Check the split between species- and genus-level identifications, as this reveals how precisely occurrences have been classified taxonomically.","column":"rank","kind":"donut"},{"caption":"Chordata overwhelmingly dominates at 81.6%, with Mollusca and Arthropoda each contributing exactly 2,000 records \u2014 a suspiciously round number worth investigating.","column":"phylum","kind":"donut"}],"model":"anthropic:default","narrative":"This is a fossil occurrence dataset containing 22,043 records spanning taxonomic classifications, geographic coordinates, and geological time ranges for paleontological finds. The taxonomic breakdown is dominated by Chordata (81.6%) with Mammalia, Saurischia, and Ornithischia as the leading classes, while over half of all occurrences (50.9%) come from the United States \u2014 worth examining for geographic bias. The geological age columns (early_age_mya and late_age_mya) span from near-present to over 500 million years ago with high spread and outliers, suggesting the dataset mixes very different eras of life. Taxonomic rank is split between species (41%) and genus (33%), meaning precision of identification varies considerably across records and may affect comparative analyses.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["duplicate_rate","n_duplicates","n_unique","n","allcaps_rate","len_mean","len_max","top_values"],"model":"anthropic:default","narrative":"This column is a reference number field \u2014 short numeric-looking codes (mean length 4.17 characters, max 5) that identify some entity like a case, order, or account. Despite the name implying uniqueness, the duplicate rate is extremely high at 83.1%, with only 3,725 unique values across 22,043 rows, meaning the same reference numbers recur many times (e.g., '4245' appears 794 times, '6294' 743 times). The allcaps alert (89.8%) is likely a false positive triggered by numeric-only strings. This column appears to be a grouping or foreign-key identifier rather than a row-level unique reference.","role":"foreign_key","scope":"column","target":"reference_no","treatment":"Use as a grouping key or left-join foreign key; do not treat as a unique row identifier."},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","duplicate_rate","n_duplicates","n_empty","one_word_rate","vocab_size","top_values","null_rate"],"model":"anthropic:default","narrative":"This column contains biological genus names from a paleontological or natural history dataset, as evidenced by taxa such as Palmatolepis (conodonts), Grallator/Eubrontes (dinosaur tracks), Baculites (ammonites), and Equus (horses). The duplicate rate is extremely high at 88.2% (19,435 of 22,043 rows share a value), which is expected for a categorical taxonomic label with only 2,608 unique genera. Notably, 5,545 rows (25.2% of the dataset) have an empty string rather than a null \u2014 a data quality issue that should be treated as missing. The vocabulary of 2,525 single-word tokens aligns tightly with the 2,608 unique values, confirming these are clean, single-word Latin genus names.","role":"label","scope":"column","target":"genus","treatment":"Replace empty strings with NaN, then encode as a categorical feature (e.g. label or target encode) or use as a grouping key for taxonomic analysis."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","n","duplicate_rate","null_rate","len_mean","len_min","len_max","one_word_rate","allcaps_rate","top_words"],"model":"anthropic:default","narrative":"This column is a unique occurrence identifier \u2014 likely a numeric reference code stored as text, given the all-caps alert (which reflects purely digit characters) and mean length of ~5.76 characters. All 22,043 rows are non-null, non-duplicate, and every value is a single token, confirming it as a primary key-style field. The values range from short (min length 1) to 7 characters and appear to be plain integers (e.g., '164260', '1439335'), with no patterns suggesting a structured prefix scheme.","role":"identifier","scope":"column","target":"occurrence_no","treatment":"Retain as a row identifier for joins or traceability; drop from any model feature matrix."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","n_duplicates","one_word_rate","word_mean","word_median","top_values"],"model":"anthropic:default","narrative":"This column contains taxonomic names of fossil organisms \u2014 dinosaurs (Theropoda, Sauropoda, Hadrosauridae), conodonts (Palmatolepis, Polygnathus, Icriodus), and other paleontological taxa \u2014 making it a biological classification label rather than a unique specimen identifier. The duplicate rate is extremely high at 78.9%, with only 4,660 unique values across 22,043 rows, reflecting that many specimens share the same taxon name. Over half of values (58.5%) are single words (genus or clade names), with a mean word count of 1.42, consistent with Linnaean binomial or higher-rank nomenclature. The top value 'Theropoda' alone appears 768 times, confirming this is a categorical grouping field, not a unique label.","role":"label","scope":"column","target":"name","treatment":"Encode as a categorical feature (ordinal or target-encode by frequency); consider hierarchical grouping by taxonomic rank for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["stats.min","stats.max","stats.median","stats.skew","stats.kurtosis","stats.outlier_rate","n_outliers","alerts"],"model":"anthropic:default","narrative":"This column contains geographic latitude values, with a valid range from -84.33\u00b0 to 79.75\u00b0 and a median of 41.70\u00b0, consistent with mid-northern hemisphere locations (e.g., Europe or northern US). The distribution is strongly left-skewed (skew = -2.44) with high kurtosis (7.05), indicating a heavy tail of unusually southern or southern-hemisphere values \u2014 9.2% of rows (2,019 records) are flagged as outliers. The -84.33\u00b0 minimum is suspicious as it approaches the Antarctic Circle and may represent data quality issues or encoding errors.","role":"feature","scope":"column","target":"lat","treatment":"Validate extreme negative values (especially near -84.33\u00b0) as likely errors; use as-is or pair with longitude for geospatial features after outlier review."},{"confidence":"high","critiques":[],"evidence_keys":["mean","median","max","min","n_unique","n","skew","outlier_rate","n_outliers","iqr","q1","q3"],"model":"anthropic:default","narrative":"This column represents the early (older) age boundary of a geological time range in millions of years ago (Mya), likely for fossil taxa or stratigraphic intervals. With only 164 unique values across 22,043 rows, most records share standardized geological stage boundaries rather than continuous age estimates. The distribution is right-skewed (skew 1.13) with a mean of ~154.7 Mya but a median of only 110.1 Mya, and 11.6% of values (2,549 rows) are flagged as outliers\u2014driven by a long tail extending to 538.8 Mya representing Cambrian or older occurrences against a bulk of Mesozoic/Cenozoic records.","role":"feature","scope":"column","target":"early_age_mya","treatment":"Use as-is or bin into geological periods; log-transform if used in regression due to right skew and wide range (0.0117\u2013538.8)."},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.iqr","stats.outlier_rate","stats.zero_rate","null_rate"],"model":"anthropic:default","narrative":"This column records the late (younger) age boundary of a fossil taxon's stratigraphic range in millions of years ago (Mya), a standard field in paleontological occurrence databases. With only 156 unique values across 22,043 rows, ages are drawn from a discrete set of stage/interval boundaries rather than continuous measurements. The distribution is right-skewed (skew 1.17, mean 147.5 vs. median 93.9) and 11.5% of rows are flagged as outliers, driven by a long tail extending to 521 Mya \u2014 likely Cambrian/Ordovician taxa pulling the upper end \u2014 while the near-zero minimum (0.0, ~0.1% of rows) represents taxa surviving to the Recent.","role":"feature","scope":"column","target":"late_age_mya","treatment":"Use as-is or bin into geological periods; consider log-transform or pair with early_age_mya to compute range duration before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["column","min","max","mean","median","skew","n_outliers","outlier_rate","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column represents paleolatitude \u2014 the reconstructed geographic latitude of a fossil or geological sample at the time of its deposition, ranging from -86.16\u00b0 (near the South Pole) to 89.2\u00b0 (near the North Pole). The distribution is moderately left-skewed (skew = -1.08) with a mean of 26.46\u00b0 but a median of 34.89\u00b0, indicating a pull toward southern or equatorial values in the tail. Notably, 6.97% of records (1,503) are flagged as outliers, likely representing polar or high-latitude specimens that are genuinely rare in the fossil record rather than data errors. The 3,214 unique values across 22,043 records suggests coordinate rounding or discrete sampling grids.","role":"feature","scope":"column","target":"paleolat","treatment":"Retain as-is for spatial or paleogeographic modelling; consider binning into latitudinal zones (e.g., polar, temperate, tropical) if used as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","top_value","top_rate","entropy","cardinality"],"model":"anthropic:default","narrative":"This column is a categorical field named 'collection' that is entirely empty strings across all 22,043 rows \u2014 it has cardinality of 1 and a top_value of '' with top_rate of 1.0. There are no nulls, meaning the field was populated with blank strings rather than left absent. The column carries zero information (entropy = 0.0) and is completely useless for analysis in its current state.","role":"other","scope":"column","target":"collection","treatment":"Drop this column; it is a constant empty string with no analytical value."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","top_rate","top_value","cardinality","entropy"],"model":"anthropic:default","narrative":"This column, 'formation', is a categorical field that is entirely empty: all 22,043 rows contain a blank string, giving it a cardinality of 1 and an entropy of 0. There is no null rate, meaning the field was populated with empty strings rather than true nulls. It carries zero information and should be dropped.","role":"other","scope":"column","target":"formation","treatment":"Drop entirely; column contains only empty strings across all 22,043 rows and provides no analytical value."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","iqr","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column contains geographic longitude values, spanning from -176.7\u00b0 to 177.1\u00b0, consistent with near-global coverage. The median of -98.25\u00b0 and mean of -47.2\u00b0 indicate a strong concentration of records in the Americas (particularly North/Central America), which explains the positive skew of 0.93 \u2014 the distribution is pulled rightward toward European/African longitudes. The wide IQR of 114\u00b0 and only 4,259 unique values across 22,043 rows suggests coordinates are rounded or snapped to coarse grid points rather than being fully precise.","role":"feature","scope":"column","target":"lon","treatment":"Use as-is or pair with latitude for geospatial modelling; consider binning into regions if cardinality reduction is needed."},{"confidence":"high","critiques":[],"evidence_keys":["column","min","max","median","mean","skew","iqr","n_unique","null_rate","n_outliers"],"model":"anthropic:default","narrative":"This column represents paleogeographic longitude \u2014 the reconstructed east-west position of a sample location at the time of fossil deposition, typically ranging from \u2212180\u00b0 to +180\u00b0. The values span \u2212177.6 to 168.7, consistent with valid global longitude, and the 3,715 unique values across 22,043 records suggest discretized but reasonably fine-grained spatial resolution. The distribution is moderately right-skewed (skew 0.737) with a wide IQR of 97.35\u00b0, indicating samples are spread broadly but with more density in the western/negative hemisphere \u2014 the median of \u221262.15 is well below the mean of \u221228.58. Null rate of 2.23% is minor but worth flagging for paleogeographic reconstructions where completeness matters.","role":"feature","scope":"column","target":"paleolng","treatment":"Use as-is for spatial modelling; consider sin/cos encoding if used in circular/angular context to handle the \u2212180\u00b0/+180\u00b0 boundary."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","n_unique","null_rate","top_rate","top_value","entropy_ratio","n"],"model":"anthropic:default","narrative":"This column contains the biological/taxonomic class of fossil or paleontological specimens, with 19 distinct values across 22,043 records and no nulls. The top value 'Mammalia' accounts for 31.8% of records, followed by dinosaur orders 'Saurischia' and 'Ornithischia' \u2014 notably these are clades, not true classes, mixed in with proper classes like Mammalia and Reptilia, suggesting taxonomic rank inconsistency. Two sentinel-style values ('NO_CLASS_SPECIFIED' with 26 occurrences and 60 empty-string entries) represent ~0.4% of records and should be treated as missing. Entropy ratio of 0.61 indicates moderate concentration rather than a uniform spread.","role":"label","scope":"column","target":"class","treatment":"Unify empty strings and 'NO_CLASS_SPECIFIED' into a single missing category, then encode as a categorical feature or target depending on modelling objective."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy_ratio","null_rate","top_values"],"model":"anthropic:default","narrative":"This column contains ISO-style two-letter country codes across 93 distinct countries, with zero nulls across 22,043 rows. The distribution is heavily US-dominated: 'US' alone accounts for 50.9% of all records (11,218 rows), while the next largest country 'CA' has only 1,830 \u2014 a roughly 6\u00d7 drop-off. The entropy ratio of 0.50 confirms the pronounced imbalance, meaning models treating this as a uniform categorical feature will be misled by the long tail of 83 countries each with very small counts.","role":"feature","scope":"column","target":"country","treatment":"One-hot encode top countries and bin the long tail into an 'Other' category, or embed via target-encoding given the severe US skew."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_rate","n_unique","null_rate","cardinality"],"model":"anthropic:default","narrative":"This column contains biological family-level taxonomic classifications for fossil or specimen records, with 528 distinct family names across 22,043 rows. The most surprising signal is that the top value is an empty string (3,418 occurrences, 15.5% of rows), and the second most frequent value is the sentinel 'NO_FAMILY_SPECIFIED' (1,996 occurrences) \u2014 together these two non-informative values account for roughly 24.6% of the dataset, indicating pervasive missing family assignments. Substantive values include well-known paleontological families such as Hadrosauridae, Grallatoridae, and Palmatolepidae, confirming a paleobiology or fossil-occurrence context.","role":"label","scope":"column","target":"family","treatment":"Unify missing indicators by mapping empty string and 'NO_FAMILY_SPECIFIED' to a single null/unknown category, then encode as a categorical feature or use as a grouping label."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","null_rate","top_values","cardinality"],"model":"anthropic:default","narrative":"This column encodes the geological time interval representing the late (upper) bound of a fossil or stratigraphic occurrence, drawn from standard chronostratigraphic stage names (e.g., 'Tithonian', 'Sinemurian', 'Albian'). The most striking signal is that 83.1% of the 22,043 rows carry an empty string as the top value, meaning the late interval is unspecified for the vast majority of records \u2014 this is a heavily sparse categorical field despite a zero null rate. The remaining 138 distinct values span Mesozoic and Cenozoic stages with modest frequency, suggesting the dataset skews toward certain time periods (Tithonian and Sinemurian together account for ~4.4% of all rows).","role":"feature","scope":"column","target":"late_interval","treatment":"Treat empty strings as missing; encode valid stage names ordinally by geological age order or map to numeric Ma midpoints before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","top_values","n_unique","entropy","n","null_rate"],"model":"anthropic:default","narrative":"This column contains biological taxonomic order classifications (e.g., Rodentia, Carnivora, Artiodactyla), likely from a paleontological or natural history specimen dataset. Two sentinel/missing-value patterns dominate: 'NO_ORDER_SPECIFIED' accounts for 32.3% of rows (7,117) and an empty string accounts for a further 3,019 rows (~13.7%), meaning roughly 46% of records lack a meaningful order assignment. Despite 99 unique values and moderate entropy (3.89), the effective signal is skewed toward these two non-informative categories.","role":"label","scope":"column","target":"order","treatment":"Consolidate 'NO_ORDER_SPECIFIED' and empty string into a single null/unknown category, then encode remaining values as a nominal categorical feature (e.g., target or ordinal encoding) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","null_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column represents geological time periods or North American Land Mammal Ages (NALMAs), with values like 'Irvingtonian', 'Torrejonian', 'Tiffanian', and 'Puercan' \u2014 terminology specific to paleontology and fossil occurrence datasets. With 298 unique values across 22,043 rows and zero nulls, the distribution is moderately concentrated: the top value 'Irvingtonian' accounts for only ~7.8% of rows, while entropy ratio of 0.78 suggests meaningful but not extreme skew. The mix of formal geological stages (e.g., 'Kimmeridgian', 'Aptian', 'Hettangian') alongside NALMA names signals that multiple classification schemes coexist in this column, which could complicate grouping or ordering without a lookup table.","role":"label","scope":"column","target":"period","treatment":"Map to a standardized chronostratigraphic hierarchy or numeric age range using a geological timescale reference before modelling or time-series analysis."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","null_rate"],"model":"anthropic:default","narrative":"This column contains biological phylum classifications drawn from exactly 4 distinct values across 22,043 records, with no nulls. It is heavily dominated by 'Chordata' at 81.6% (17,993 rows), while 'Mollusca' and 'Arthropoda' each account for exactly 2,000 records \u2014 a suspiciously round number suggesting deliberate sampling or stratification. Notably, 50 records carry an empty string value, which acts as a de-facto null and should be treated as missing rather than a valid category.","role":"label","scope":"column","target":"phylum","treatment":"Encode as ordinal or one-hot; replace 50 empty-string records with NaN and impute or exclude before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","top_values","entropy_ratio"],"model":"anthropic:default","narrative":"This column encodes taxonomic rank in a biological classification dataset, with 18 distinct levels spanning the Linnaean hierarchy from species up through class and beyond. 'Species' dominates at 41.2% (9,082 rows) and 'genus' follows at 7,342 rows, which is the expected shape for a taxonomy tree where leaf nodes vastly outnumber higher groupings. Notably, 'unranked clade' appears 2,828 times\u2014making it the third-largest category\u2014indicating a substantial portion of entries reflect modern phylogenetic classifications that don't fit traditional Linnaean ranks. Entropy ratio of 0.50 signals moderate concentration, not uniform distribution.","role":"label","scope":"column","target":"rank","treatment":"Ordinal-encode with a defined taxonomic hierarchy or one-hot encode for modelling; treat 'unranked clade' as a separate nominal category since it has no fixed position in the hierarchy."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","null_rate","entropy_ratio","n"],"model":"anthropic:default","narrative":"This column represents a geographic state or province field, but its 519 unique values far exceed the 50 US states, revealing a mix of US states, Canadian provinces (Alberta), English regions (England), and Chinese provinces (Guangxi) \u2014 indicating international scope. The top value is 'Wyoming' at 8.6% of rows, which is disproportionately high for a state with a small population, suggesting dataset bias or a specific collection source. Notably, 1,082 rows (roughly 4.9%) contain an empty string rather than a null, meaning the null_rate of 0.0 understates true missingness. The entropy ratio of 0.70 confirms meaningful but imperfect spread across categories.","role":"feature","scope":"column","target":"state","treatment":"Standardize empty strings to null, map to ISO region codes, and consider grouping by country before encoding."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":6323,"prompt_tokens":21019,"total_tokens":27342}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:37:57+00:00","mode":"full","row_count":22043,"sampled_rows":22043,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/fossils.json"},"notes":[],"saturn_version":"0.2.0","schema":{"class":"categorical","collection":"categorical","country":"categorical","early_age_mya":"numeric","family":"categorical","formation":"categorical","genus":"text","lat":"numeric","late_age_mya":"numeric","late_interval":"categorical","lon":"numeric","name":"text","occurrence_no":"text","order":"categorical","paleolat":"numeric","paleolng":"numeric","period":"categorical","phylum":"categorical","rank":"categorical","reference_no":"text","state":"categorical"}}
