{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"81 singleton categories"}],"column":"scientificName","extras":{"singletons":81,"top_values":[["Canella winterana (L.) Gaertn.",174],["Warburgia salutaris (Bertol.fil.) Chiov.",35],["Cinnamodendron dinisii Schwacke",20],["Hylephila phyleus (Drury, 1773)",18],["Cinnamosma Baill.",17],["Cinnamosma fragrans Baill.",14],["Ocybadistes walkeri Heron, 1894",11],["Cinnamodendron occhionianum F.Barros & J.Salazar",10],["Pinellia fujianensis H.Li & G.H.Zhu",10],["Urbanus proteus (Linnaeus, 1758)",8],["Cinnamosma madagascariensis Danguy",8],["Warburgia ugandensis Sprague",8],["Lerodea eufala (Edwards, 1869)",7],["Burnsius albezens Grishin, 2022",7],["Lerema Scudder, 1872",7],["Quasimellana eulogius (Pl\u00f6tz, 1882)",6],["Spicauda procne (Pl\u00f6tz, 1880)",6],["Burnsius oileus (Linnaeus, 1767)",5],["Burnsius orcynoides",5],["Cephrenes augiades (Felder, 1860)",5]]},"kind":"categorical","n":610,"n_null":0,"n_unique":157,"null_rate":0.0,"stats":{"cardinality":157,"entropy":5.516685902166702,"entropy_ratio":0.7562676788926866,"top_rate":0.28524590163934427,"top_value":"Canella winterana (L.) Gaertn."}},{"alerts":[],"column":"species","extras":{"singletons":58,"top_values":[["Canella winterana",174],["Droseraceae",38],["Warburgia salutaris",35],["Cinnamodendron dinisii",20],["Hylephila phyleus",19],["Sarraceniaceae",19],["Cinnamosma fragrans",14],["Ocybadistes walkeri",11],["Urbanus dorantes",10],["Cinnamodendron occhionianum",10],["Pinellia fujianensis",10],["Pyrgus oileus",9],["Cinnamosma madagascariensis",9],["Mellana eulogius",8],["Urbanus proteus",8],["Warburgia ugandensis",8],["Urbanus procne",7],["Lerodea eufala",7],["Burnsius albezens",7],["Gorgythion begga",6]]},"kind":"categorical","n":610,"n_null":0,"n_unique":123,"null_rate":0.0,"stats":{"cardinality":123,"entropy":5.143644338788545,"entropy_ratio":0.7408906866284185,"top_rate":0.28524590163934427,"top_value":"Canella winterana"}},{"alerts":[],"column":"genus","extras":{"singletons":30,"top_values":[["Canella",174],["Warburgia",43],["Cinnamosma",40],["Cinnamodendron",34],["Urbanus",25],["Hylephila",19],["Burnsius",16],["Pyrgus",11],["Lerema",11],["Ocybadistes",11],["Pinellia",10],["Mellana",8],["Trapezites",8],["Heliopetes",7],["Lerodea",7],["Toxidia",7],["Pleodendron",7],["Staphylus",6],["Gorgythion",6],["Polites",6]]},"kind":"categorical","n":610,"n_null":0,"n_unique":94,"null_rate":0.0,"stats":{"cardinality":94,"entropy":4.8396700345516415,"entropy_ratio":0.7383636325736792,"top_rate":0.28524590163934427,"top_value":"Canella"}},{"alerts":[],"column":"family","extras":{"singletons":0,"top_values":[["Hesperiidae",300],["Canellaceae",300],["Araceae",10]]},"kind":"categorical","n":610,"n_null":0,"n_unique":3,"null_rate":0.0,"stats":{"cardinality":3,"entropy":1.1042875713905729,"entropy_ratio":0.6967278852894774,"top_rate":0.4918032786885246,"top_value":"Hesperiidae"}},{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[1,7,22,38,65,32,12,12,6,2,1,18,3,17,9,50,98,49,145,20,2,0,0,1],"edges":[-43.245933,-39.4979885,-35.750044,-32.0020995,-28.254154999999997,-24.506210499999998,-20.758266,-17.010321499999996,-13.262376999999997,-9.514432499999998,-5.766487999999995,-2.0185434999999927,1.7294010000000029,5.4773455000000055,9.225290000000008,12.973234500000004,16.721179000000006,20.46912350000001,24.217068000000005,27.965012500000007,31.71295700000001,35.46090150000001,39.208846000000015,42.956790500000004,46.704735]},"sample":[26.179819,8.621311,8.640193,8.640193,8.621311,26.179819,26.179819,-31.549801,20.804663,26.179819,26.179819,20.804663,-31.549801,26.179819,20.802768,26.179819,20.804663,20.804663,20.804663,20.802768,26.179819,20.804663,20.804663,-31.549801,26.179819,26.179819,8.845343,-31.549801,8.845343,20.804663,20.802768,20.802768,20.804663,20.804663,8.845343,8.845343,8.664558,8.664558,20.804663,20.804663,20.804663,20.804663,46.704735,30.050126,8.364038,8.364038,8.364038,26.481879,28.074026,24.825869,26.179835,26.177018,24.825869,-28.16653,-27.447433,24.81461,19.147782,28.074026,20.762597,-28.121749,-31.380579,20.649369,-2.755168,27.525813,26.179779,24.825869,26.179665,25.853647,24.826057,-34.96593,26.178625,28.035772,26.177985,-27.377124,-27.211439,-37.487088,-6.193826,27.530875,29.898853,27.859713,-28.220045,-12.712856,24.825869,28.003759,27.83223,-19.874459,26.179684,-25.156586,26.176562,24.816513,-29.714013,20.802166,1.334624,22.00364,26.179835,-33.365438,-34.96595,22.232548,20.800117,-34.937613,26.179835,5.413346,20.802166,24.815698,26.178806,-33.899453,-29.611723,24.825025,12.891503,-31.536625,26.177014,24.825869,-33.903937,25.896378,32.253425,24.779719,24.825869,26.179835,-23.40968,-38.167207,30.281394,-26.678298,24.816376,30.320698,26.307988,24.825869,-28.019146,28.074026,-28.1845,1.334624,-29.613833,24.825869,-28.780412,26.179808,32.756558,26.179835,24.825869,19.147782,-28.04139,29.560374,28.074026,26.176526,26.176539,22.485156,19.421646,28.074026,22.221733,26.178625,27.795537,1.320859,-28.154699,4.242971,26.176577,28.074026,-37.3301,24.825869,25.996579,10.687131,19.421646,24.816758,-28.829093,-37.831795,26.481879,26.179835,24.825869,26.179835,26.179835,22.518403,24.825869,-12.713027,-32.885742,-34.907842,-29.71403,-19.606964,19.147782,29.691014,27.0653,20.802166,22.574301,20.800108,26.179835,24.826719,29.075112,-29.596718,-37.664092,24.816517,-29.632672,26.179835,-37.677609,-29.59632,-34.97092,12.959028,-27.083727,26.179835,-28.076625,10.685789,27.162752,28.074026,-28.019145,-34.628714,26.179889,27.2263,26.176813,-27.377205,-27.485914,27.035342,26.177956,1.339805,-33.974328,27.669195,-34.614416,20.675943,29.075102,-27.485856,1.334624,1.334624,24.825869,26.176796,-28.82337,20.820584,-33.912887,26.1792,26.179835,1.334624,26.179835,-31.380563,-28.204655,26.176549,-33.982016,21.513941,26.179835,-27.974187,-33.877167,26.179748,27.861513,1.334624,-28.161847,26.176515,22.00351,-25.156212,-33.912888,5.394318,25.194863,16.242086,25.598732,18.046303,19.720466,25.473693,25.598758,19.403508,19.4036,19.403573,17.936894,-16.318108,17.976625,17.996704,16.472114,25.17987,16.333939,16.339855,25.174378,-29.615876,18.352243,-29.740272,-24.47576,25.176172,-17.29501,-17.315892,-17.294405,-25.381944,-24.30284,25.558868,16.359793,18.31114,25.599225,19.285562,16.243411,16.277479,-22.936497,-20.595523,17.944525,9.998,-26.247665,16.41105,16.399014,16.403318,16.40414,16.244878,16.408485,-26.64,16.32399,-22.951844,-26.187222,16.482446,16.394923,16.253388,25.190633,-17.314728,16.48912,25.182286,25.031644,16.235451,16.253503,16.254015,16.254015,-16.775556,-16.775556,19.721045,18.361462,18.370675,18.370045,-27.223323,17.005077,19.27572,17.005983,17.007669,17.000375,9.48,9.48,16.241928,17.976337,-29.990143,18.041586,-29.891904,16.341862,17.005186,17.007984,18.351419,17.008044,-23.230711,17.976337,17.976337,18.505963,18.341092,-23.189398,-29.818659,18.390567,17.934501,-1.500385,-1.482132,-20.743055,-20.743055,18.043981,19.842773,17.979033,18.329955,18.353547,19.870453,19.842617,19.843152,19.843152,-23.914418,19.778516,-21.404444,-21.404444,7.274151,-29.020732,-23.225633,-23.222212,19.842347,-23.264418,17.158672,17.157071,-25.432485,16.510281,24.901808,-1.284683,19.328337,-0.758683,16.779631,-12.013812,-21.807222,16.441074,-24.84902,18.358599,18.35348,-24.81212,25.174345,-26.318465,17.116414,-25.46056,25.253984,25.082691,19.735511,25.135936,19.735534,18.372312,16.242251,18.028489,16.414783,-25.655798,-22.879271,8.66,18.042642,25.175473,18.238062,18.379792,24.549805,-14.110778,-14.110778,-14.110778,-28.324044,19.288679,-25.69182,18.129318,18.04002,18.009385,18.048152,-16.278889,-16.278889,-16.278889,-33.966344,17.986613,16.267364,16.255,16.267442,25.117507,-29.770023,-29.853557,-12.36694,-13.0986,-15.572397,19.27524,-24.931442,-22.543889,-22.54625,-22.543889,-22.54625,-22.54625,-22.543889,25.753603,25.753512,25.090486,25.19313,-1.099128,25.063855,17.972928,25.277455,25.144483,25.277478,-25.618266,-27.062548,-27.108571,-27.031285,-30.02945,18.312011,16.231592,25.470203,16.782185,19.662051,16.25469,16.340913,16.782183,16.250417,18.369567,16.511455,-25.350556,-25.350556,-25.350556,-25.350556,-25.350556,-25.350556,-17.068878,-25.461944,-25.46194,21.094488,21.09619,-26.278889,-26.731944,-26.731944,-26.278889,-26.278889,-25.529891,-26.897778,19.31372,16.483017,18.371343,16.32459,16.32459,-29.949774,16.483017,-24.59445,-24.59423,17.974925,17.973438,17.684439,-1.063136,-28.350278,-28.350278,-23.189182,25.455462,25.454141,24.573684,-23.439167,-23.439167,25.777421,26.169487,26.280894,26.268276,26.25502]},"kind":"numeric","n":610,"n_null":0,"n_unique":466,"null_rate":0.0,"stats":{"iqr":47.7480595,"kurtosis":-1.2828894787378404,"max":46.704735,"mean":5.199974558196722,"median":17.008014000000003,"min":-43.245933,"n_outliers":0,"outlier_rate":0.0,"q1":-22.9221905,"q3":24.825869,"skew":-0.6516593385910574,"std":22.7455645654173,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[61,91,16,84,128,44,12,0,0,0,0,3,12,38,40,0,0,2,0,14,17,1,6,41],"edges":[-115.044309,-103.85949029166666,-92.67467158333334,-81.489852875,-70.30503416666667,-59.120215458333334,-47.935396749999995,-36.75057804166667,-25.565759333333332,-14.380940624999994,-3.1961219166666694,7.988696791666669,19.173515500000008,30.358334208333332,41.543152916666656,52.72797162500001,63.912790333333334,75.09760904166666,86.28242775000001,97.46724645833334,108.65206516666666,119.83688387500001,131.02170258333334,142.20652129166666,153.39134]},"sample":[-98.366518,-80.139711,-80.128853,-80.128853,-80.139711,-98.366518,-98.366518,-61.070202,-105.464931,-98.366518,-98.366518,-105.464931,-61.070202,-98.366518,-105.464906,-98.366518,-105.464931,-105.464931,-105.464931,-105.464906,-98.366518,-105.464931,-105.464931,-61.070202,-98.366518,-98.366518,-79.854536,-61.070202,-79.854536,-105.464931,-105.464906,-105.464906,-105.464931,-105.464931,-79.854536,-79.854536,-80.201424,-80.201424,-105.464931,-105.464931,-105.464931,-105.464931,10.521621,-99.148571,-80.273465,-80.273465,-80.273465,-81.178253,-97.041326,-107.386056,-98.366493,-98.365221,-107.386056,153.378417,152.948002,-107.357434,-96.170749,-97.041326,-105.355147,153.296383,-58.030776,-100.407058,-78.746653,-97.883522,-98.366254,-107.386056,-98.366191,-80.194197,-107.385551,138.694108,-98.366464,-82.78318,-98.36571,152.907233,153.068481,145.419561,106.880498,-97.841217,-96.336188,-82.72409,31.948183,143.288607,-107.386056,-97.061342,-97.378772,-43.996013,-98.365843,30.542855,-98.365877,-107.360741,-53.738545,-105.464973,103.817803,-98.778952,-98.366493,151.474595,138.694092,113.861083,-105.465858,138.648178,-98.366493,100.782676,-105.464973,-107.35948,-98.36584,151.240617,-53.69346,-80.815525,77.592306,-61.063094,-98.365315,-107.386056,151.080964,-97.488067,-110.796325,-107.362466,-107.386056,-98.366493,-47.35743,144.344189,-98.068091,28.417985,-107.358348,-89.936305,-98.169937,-107.386056,153.322926,-97.041326,153.293845,103.817803,-53.690817,-107.386056,32.087622,-98.366245,-97.25846,-98.366493,-107.386056,-96.170749,153.2223,-98.226343,-97.041326,-98.36583,-98.36584,114.086229,-96.322208,-97.041326,113.916097,-98.366464,-115.044309,103.769375,153.256742,101.322601,-98.365753,-97.041326,144.594208,-107.386056,-97.567558,-61.757618,-96.322208,-107.360841,151.93876,145.161942,-81.178253,-98.366493,-107.386056,-98.366493,-98.366493,114.365529,-107.386056,143.29832,151.64688,138.649018,-53.738542,-46.195667,-96.170749,-81.470794,-82.338038,-105.464973,114.287237,-105.46589,-98.366493,-107.38439,-97.653529,-53.711429,148.365892,-107.360307,-53.611348,-98.366493,148.417999,-53.736237,138.70987,77.524142,152.996957,-98.366493,153.39134,-61.756036,-80.440158,-97.041326,153.322772,-58.609075,-98.366101,-82.266607,-98.365844,152.907212,152.954161,-82.416259,-98.365694,103.820587,151.001888,-97.350443,-58.476618,-87.043645,-97.653529,152.954027,103.817803,103.817803,-107.386056,-98.365204,151.984467,-105.464092,151.182322,-98.36631,-98.366493,103.817803,-98.366493,-58.030727,153.256105,-98.365832,22.607134,-87.667068,-98.366493,153.195847,151.222838,-98.366261,-80.448075,103.817803,153.365129,-98.365577,-98.779028,30.542165,151.18232,100.259073,-80.874222,-61.313876,-80.315696,-66.69737,-79.788632,-80.18732,-80.315705,-70.76923,-70.772903,-70.773505,-67.193009,46.809548,-66.857788,-67.190212,-61.419891,-80.362838,-61.369203,-61.374054,-80.923058,31.071189,-64.768574,31.02554,30.703825,-80.904105,45.92816,45.914649,45.928753,-49.011389,31.239641,-80.451903,-61.398204,-66.013028,-80.315226,-69.303125,-61.175858,-61.246743,30.587292,46.564227,-66.964937,-84.0678,-48.981268,-61.402031,-61.405249,-61.405352,-61.40616,-61.176353,-61.402612,-49.5,-61.298364,-43.279646,-49.140833,-61.503205,-61.525695,-61.467537,-80.874157,45.852547,-61.440749,-80.875398,-80.502197,-61.414507,-61.202008,-61.19918,-61.234006,49.663611,49.663611,-79.788063,-64.924142,-65.651642,-65.651458,-50.462287,-61.768082,-81.380538,-61.765823,-61.773903,-61.746994,-84.04,-84.04,-61.38837,-67.210727,31.197147,-63.071664,31.099706,-61.524778,-61.767986,-61.774211,-64.769835,-61.774196,28.90127,-67.210727,-67.210727,-67.139405,-64.783354,29.033478,30.910679,-65.593413,-67.191656,35.350804,35.12999,45.535278,45.535278,-67.202055,-71.406185,-67.159836,-68.8106,-64.767708,-71.40674,-71.405997,-71.406088,-71.406088,29.629134,-71.266734,47.949722,47.949722,38.691743,31.478708,28.974839,28.836087,-71.40611,28.972834,-61.756172,-61.758021,30.708625,-61.468301,-80.700221,36.811073,-81.103247,36.438018,-62.171217,49.300048,-41.641944,-61.467735,46.77992,-64.927246,-64.767753,46.77883,-80.92308,32.075091,-62.55845,31.541192,-80.313165,-80.440153,-79.737044,-80.397683,-79.737313,-65.639731,-61.325146,-63.092639,-61.421478,28.335765,-43.275835,-83.57,-66.748292,-80.904818,-62.986667,-65.620512,-81.797208,49.964306,49.964306,49.964306,31.936397,-70.924514,27.979859,-66.703806,-66.744995,-66.191952,-66.654481,48.889722,48.889722,48.889722,18.45305,-66.708002,-61.24077,-61.235147,-61.240505,-80.988618,30.984989,30.871719,49.38556,49.2369,46.419437,-81.315193,31.579219,44.227222,44.227556,44.227222,44.227556,44.227556,44.227222,-80.378038,-80.378497,-80.888484,-80.948371,38.023118,-77.311051,-66.865814,-80.296622,-80.392167,-80.297797,27.655584,32.559378,32.507789,32.485324,30.883078,-70.920022,-61.39723,-80.509652,-62.17816,-80.100652,-61.224419,-61.525762,-62.178158,-61.189036,-65.796824,-61.465347,-48.889722,-48.889444,-48.889722,-48.889722,-48.889444,-48.889722,45.982445,-51.659722,-51.66,-75.888797,-75.89149,-50.495555,-51.118889,-51.118889,-50.495556,-50.495555,30.961432,-51.771389,-81.168305,-61.445189,-65.640388,-61.47181,-61.47181,30.895196,-61.445189,46.73744,46.73945,-66.873153,-67.03472,-64.899198,35.220302,-50.221389,-50.221389,-44.979978,-80.191673,-80.195897,-81.749457,47.503611,47.503611,118.964015,118.19049,119.167681,119.070843,119.158688]},"kind":"numeric","n":610,"n_null":0,"n_unique":467,"null_rate":0.0,"stats":{"iqr":120.20801225,"kurtosis":0.08440247754095731,"max":153.39134,"mean":-32.93522615934426,"median":-63.056028,"min":-115.044309,"n_outliers":0,"outlier_rate":0.0,"q1":-89.369055,"q3":30.83895725,"skew":1.1835228401749196,"std":78.93129196183042,"zero_rate":0.0}},{"alerts":[],"column":"country","extras":{"singletons":8,"top_values":[["United States of America",130],["Mexico",73],["Brazil",51],["Guadeloupe",48],["Australia",47],["South Africa",41],["Madagascar",40],["Puerto Rico",37],["Dominican Republic",16],["Panama",15],["Argentina",14],["Singapore",10],["Cayman Islands",10],["Antigua and Barbuda",10],["China",10],["Virgin Islands (U.S.)",8],["Kenya",8],["Hong Kong",6],["Costa Rica",5],["Sint Maarten (Dutch part)",4]]},"kind":"categorical","n":610,"n_null":0,"n_unique":35,"null_rate":0.0,"stats":{"cardinality":35,"entropy":3.9605893469796447,"entropy_ratio":0.7721526252101014,"top_rate":0.21311475409836064,"top_value":"United States of America"}},{"alerts":[],"column":"stateProvince","extras":{"singletons":45,"top_values":[["Texas",81],["Florida",46],["Pointe-\u00e0-Pitre",35],["Nayarit",33],["",30],["Sinaloa",26],["Queensland",24],["KwaZulu-Natal",17],["Santa Catarina",16],["Other",11],["Rio Grande do Sul",11],["Mpumalanga",10],["Mahajanga",10],["Fujian",10],["Cabo Rojo",9],["Limpopo",9],["Toliara",9],["New South Wales",8],["Fajardo",8],["Paran\u00e1",8]]},"kind":"categorical","n":610,"n_null":0,"n_unique":108,"null_rate":0.0,"stats":{"cardinality":108,"entropy":5.5303567492758905,"entropy_ratio":0.8187192973242882,"top_rate":0.13278688524590163,"top_value":"Texas"}},{"alerts":[{"code":"long_tail","level":"info","message":"17 singleton categories"}],"column":"locality","extras":{"singletons":17,"top_values":[["",563],["District de Soanierana Ivongo, Commune de Manompana, Fokontany de Vohijiny, Village d'Ambohitsara. For\u00eat littorale de Sahavalanina, au Sud-Est d'Ambohitsara.",3],["District Mahabo, Commune Analamisandy,Fokontany Soazato, For\u00eat d'Azohy. Collect\u00e9s avec: Ando, Tefy, C\u00e9cile, Jean Michel. \u00c9chantillon pr\u00e9serv\u00e9 en l'alcool.",3],["R\u00e9gion Vatovavy, Kianjavato, Ambodifandramanana, Ankarabo, vestige de for\u00eat au sud du Mt Vatovavy. Echantillons pr\u00e9serv\u00e9s en alcool, r\u00e9colt\u00e9s avec \u00e9quipe polisinala (Auguste, Jean Fr\u00e9d\u00e9ric).",3],["Antsiranana, SAVA, District de Voh\u00e9mar, Commune rurale d'Antsirabe-nord, Fokontany d'Andravinambo, foret d'Antsolatra Marojala Sokitra. Plantes pr\u00e9serv\u00e9es en alcool, r\u00e9colt\u00e9es avec Bezanaka Jean Honor\u00e9.",3],["R\u00e9gion Sofia, District de Mandritsara, commune rurale Marotandrano, fokontany Antsiatsiaka. Foret de Bezavona \u00e0 2 km \u00e0 l'Est du village d'Antsiatsiaka, foret humide sempervirente de moyenne altitude sur lat\u00e9rite. Avec Raharimanana Th\u00e9o, Ranaivoson Ernest, Marojery R\u00e9n\u00e9 chef FKT, Traravola, Rabemalaza Justin, Risy guides locaux.",3],["Distrit Sakaraha Commune Rurale Amboronabo Fokontany Mitia village Belambo Collect\u00e9 avec Mamomjy, Tariha, Rehary",3],["District Sakaraha, Commune rurale Amboronabo, Fokotany Mitia-Est. For\u00eat de Herea, au Nord  d'Analavelona, sur sable. Hameau le plus proche Belambo.",3],["District Vaingaindrano, Commnune Tsianofana, Fokontany Abaronga, localit\u00e9 Andasibe . For\u00eat humide de la nouvelle aire prot\u00e9g\u00e9e d' Agnakatrika. Collect\u00e9 avec Iakily Armand.",3],["Serra da Farinha-seca, encosta do Morro Sete.",2],["Serra da Graciosa. Encosta pr\u00f3xima ao Recanto Bela Vista.",2],["Est\u00e2ncia do Meio.",2],["UTM25_32T_0600_5150",1],["Parque Estadual da Serra da Baitaca, proximidades da Cachoeira do Samambaia.",1],["Parque Estadual da Serra da Baitaca,",1],["Ca. 700 m al sur de San Francisco de San Isidro, costado sur (del parqueo sur) de la escuela Golden Valley. Remanentes de bosque muy h\u00famedo, en cafetales, casas, potreros y finca de Hammel y P\u00e9rez por el R\u00edo Tures.",1],["Esta\u00e7\u00e3o de Tratamento de \u00c1gua Pira\u00ed (ETA Pira\u00ed)",1],["Alto Benedito.",1],["Comfloresta.",1],["S\u00edtio Barcelos. \u00c1rea de PRAD. Propriedade de Vilmar de Lima Barcelos.",1]]},"kind":"categorical","n":610,"n_null":0,"n_unique":29,"null_rate":0.0,"stats":{"cardinality":29,"entropy":0.7474767502888109,"entropy_ratio":0.15386572138477086,"top_rate":0.9229508196721311,"top_value":""}},{"alerts":[],"column":"basisOfRecord","extras":{"singletons":0,"top_values":[["HUMAN_OBSERVATION",550],["PRESERVED_SPECIMEN",60]]},"kind":"categorical","n":610,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.4637773498877516,"entropy_ratio":0.4637773498877516,"top_rate":0.9016393442622951,"top_value":"HUMAN_OBSERVATION"}},{"alerts":[],"column":"year","extras":{"histogram":{"counts":[7,0,0,0,70,0,0,0,0,71,0,0,0,0,79,0,0,0,0,75,0,0,0,308],"edges":[2021.0,2021.2083333333333,2021.4166666666667,2021.625,2021.8333333333333,2022.0416666666667,2022.25,2022.4583333333333,2022.6666666666667,2022.875,2023.0833333333333,2023.2916666666667,2023.5,2023.7083333333333,2023.9166666666667,2024.125,2024.3333333333333,2024.5416666666667,2024.75,2024.9583333333333,2025.1666666666667,2025.375,2025.5833333333333,2025.7916666666667,2026.0]},"sample":[2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2021.0,2021.0,2021.0,2021.0,2021.0,2025.0,2024.0,2024.0,2024.0,2024.0]},"kind":"numeric","n":610,"n_null":0,"n_unique":6,"null_rate":0.0,"stats":{"iqr":2.0,"kurtosis":-0.79285948537134,"max":2026.0,"mean":2024.7524590163935,"median":2026.0,"min":2021.0,"n_outliers":0,"outlier_rate":0.0,"q1":2024.0,"q3":2026.0,"skew":-0.7968814306780531,"std":1.502937087000034,"zero_rate":0.0}},{"alerts":[],"column":"month","extras":{"histogram":{"counts":[342,0,18,0,24,0,24,0,21,0,17,0,0,42,0,23,0,15,0,24,0,29,0,30],"edges":[1.0,1.4583333333333333,1.9166666666666665,2.375,2.833333333333333,3.2916666666666665,3.75,4.208333333333333,4.666666666666666,5.125,5.583333333333333,6.041666666666666,6.5,6.958333333333333,7.416666666666666,7.875,8.333333333333332,8.791666666666666,9.25,9.708333333333332,10.166666666666666,10.625,11.083333333333332,11.541666666666666,12.0]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,9.0,9.0,9.0,10.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,6.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,9.0,9.0,9.0,9.0,9.0,9.0,10.0,10.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,9.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,2.0,3.0,3.0,8.0,2.0,4.0,4.0,4.0,5.0]},"kind":"numeric","n":610,"n_null":1,"n_unique":12,"null_rate":0.001639344262295082,"stats":{"iqr":6.0,"kurtosis":-0.5078264138526407,"max":12.0,"mean":3.7520525451559936,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":7.0,"skew":1.001634037248469,"std":3.750342075257268,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"22.6% null"},{"code":"high_skew","level":"info","message":"skew=+17.30"},{"code":"outliers","level":"warn","message":"19.3% rows beyond 1.5 IQR"}],"column":"coordinateUncertainty","extras":{"histogram":{"counts":[467,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,36520.80952380953,73040.61904761905,109560.42857142858,146080.2380952381,182600.04761904763,219119.85714285716,255639.6666666667,292159.4761904762,328679.28571428574,365199.09523809527,401718.9047619048,438238.7142857143,474758.52380952385,511278.3333333334,547798.142857143,584317.9523809524,620837.7619047619,657357.5714285715,693877.3809523811,730397.1904761905,766917.0]},"sample":[1.0,1.0,1.0,1.0,7000.0,1.0,1.0,7000.0,1.0,1.0,7000.0,7000.0,7000.0,7000.0,1.0,7000.0,7000.0,1.0,1.0,7000.0,7000.0,7000.0,7000.0,7000.0,7000.0,1.0,7000.0,7000.0,7000.0,7000.0,25.0,46.0,616.0,34.0,26.0,25.0,616.0,18.0,258.0,2.0,260.0,46.0,12.0,29656.0,192.0,79.0,1480.0,13.0,134.0,2.0,616.0,15.0,616.0,4.0,2.0,262.0,5.0,4.0,68.0,4.0,398.0,71.0,262.0,122.0,7.0,8.0,173.0,8.0,616.0,853.0,27.0,2.0,336.0,2.0,15.0,156.0,212.0,39.0,39.0,38.0,2.0,51.0,2.0,2.0,14.0,7.0,38.0,1056.0,156.0,79.0,2.0,3.0,8.0,61.0,234.0,207.0,2.0,444.0,616.0,28902.0,181.0,211.0,4.0,4026.0,616.0,79.0,39.0,17.0,100.0,4.0,276.0,10.0,616.0,4.0,46.0,29656.0,212.0,61.0,616.0,8.0,2.0,616.0,4.0,186.0,616.0,260.0,29656.0,14415.0,46.0,1.0,2.0,30278.0,1942.0,46.0,15.0,262.0,9610.0,2250.0,29656.0,264.0,1.0,203.0,46.0,46.0,5.0,213.0,616.0,211.0,1942.0,15.0,2.0,4.0,299.0,38.0,186.0,616.0,79.0,38.0,30278.0,616.0,31.0,2.0,8.0,11.0,169.0,31.0,31.0,260.0,9.0,156.0,5.0,30278.0,10.0,186.0,186.0,4.0,122.0,1.0,46.0,122.0,5.0,2.0,122.0,79.0,2.0,10.0,8.0,100.0,29775.0,93.0,29656.0,143.0,46.0,4.0,198.0,4.0,2582.0,15.0,4.0,1.0,11.0,8.0,616.0,6.0,28902.0,1.0,199.0,23.0,108.0,1.0,213.0,212.0,616.0,8.0,244.0,40.0,5.0,186.0,61.0,90.0,213.0,186.0,6.0,7.0,61.0,55.0,1.0,647.0,52.0,30.0,186.0,9.0,157.0,14.0,38.0,15.0,4.0,212.0,29656.0,8.0,448.0,5.0,264.0,9.0,5.0,2.0,4.0,4.0,3.0,4.0,6.0,6.0,4.0,821.0,30545.0,8.0,7.0,3.0,5.0,9.0,3.0,4.0,22.0,15.0,4.0,29458.0,435.0,29458.0,30069.0,6.0,10.0,3.0,6.0,30090.0,13.0,4.0,30666.0,158.0,30580.0,1510.0,227.0,30237.0,3.0,3.0,4.0,5.0,7.0,3.0,3020.0,5.0,3.0,3.0,4.0,30237.0,5.0,9.0,10.0,9.0,30826.0,8.0,7.0,8.0,1000.0,5.0,5.0,5.0,6.0,9.0,6.0,9.0,4.0,3.0,3.0,5.0,4.0,5.0,5.0,5.0,5.0,64.0,1096.0,1096.0,9.0,208.0,200.0,29433.0,4.0,29433.0,11.0,3.0,9.0,400.0,3.0,30196.0,208.0,208.0,3.0,313.0,30217.0,29433.0,3.0,22.0,523.0,28301.0,4.0,6.0,4.0,4.0,3.0,6.0,30133.0,9204.0,31324.0,29533.0,30196.0,30196.0,4.0,30196.0,30196.0,24.0,30.0,29958.0,31236.0,277.0,5.0,9.0,4.0,8.0,115.0,9.0,12.0,13.0,25.0,5.0,3.0,166.0,25.0,32.0,7.0,29868.0,22.0,5.0,29958.0,8.0,5.0,4.0,1485.0,8.0,8.0,4.0,30237.0,1099.0,5.0,8.0,1145.0,4.0,215.0,29631.0,210.0,6.0,30683.0,4.0,30683.0,30683.0,6.0,130.0,7.0,30003.0,9.0,4.0,50.0,50.0,1.0,6.0,8.0,1.0,63.0,41.0,30003.0,30003.0,65.0,10.0,4.0,8.0,4.0,5.0,4.0,5.0,1.0,29775.0,6.0,29775.0,29775.0,8.0,5.0,35.0,9.0,173.0,3.0,9.0,5.0,22.0,30666.0,35.0,4.0,20.0,20.0,9.0,5.0,4.0,30666.0,176.0,10519.0,10519.0,3.0,25.0,25.0,4.0,61.0,82.0,50.0,50.0,123.0,29981.0,56639.0,98400.0,5489.0,222343.0,69815.0,766917.0,202.0,606.0]},"kind":"numeric","n":610,"n_null":138,"n_unique":151,"null_rate":0.2262295081967213,"stats":{"iqr":461.75,"kurtosis":335.68988122973855,"max":766917.0,"mean":6463.375,"median":35.0,"min":1.0,"n_outliers":91,"outlier_rate":0.19279661016949154,"q1":5.0,"q3":466.75,"skew":17.304668453272807,"std":38136.06292733861,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"610 singleton categories"}],"column":"gbifID","extras":{"singletons":610,"top_values":[["5937748304",1],["5937748308",1],["5937748309",1],["5937748312",1],["5937748316",1],["5937748322",1],["5937748325",1],["5937748327",1],["5937748329",1],["5937748333",1],["5937748335",1],["5937748336",1],["5937748338",1],["5937748342",1],["5937748344",1],["5937748350",1],["5937748352",1],["5937748353",1],["5937748363",1],["5937748369",1]]},"kind":"categorical","n":610,"n_null":0,"n_unique":610,"null_rate":0.0,"stats":{"cardinality":610,"entropy":9.252665432450247,"entropy_ratio":0.9999999999999998,"top_rate":0.001639344262295082,"top_value":"5937748304"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.family.top_values","columns.country.top_values","columns.basisOfRecord.top_values","columns.scientificName.top_values","columns.coordinateUncertainty.stats","columns.year.stats"],"featured_charts":[{"caption":"Shows the surprising 50/50 split between Hesperiidae and Canellaceae that defines the dataset.","column":"family","kind":"donut"},{"caption":"Top countries reveal an Americas-heavy footprint led by the USA, Mexico, and Brazil.","column":"country","kind":"bar"},{"caption":"Confirms ~90% of records are human observations rather than preserved specimens.","column":"basisOfRecord","kind":"donut"},{"caption":"Extreme right skew and huge max value flag spatial-precision outliers to filter before mapping.","column":"coordinateUncertainty","kind":"histogram"},{"caption":"Highlights seasonal collection bias, with activity concentrated in the first half of the year.","column":"month","kind":"histogram"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset holds 610 GBIF biodiversity occurrence records across 14 columns, mixing taxonomy (family, genus, species), geography (country, stateProvince, latitude/longitude), and observation metadata (basisOfRecord, year, month, coordinateUncertainty). Despite the 'carnivorous_plants' filename, the taxonomy is dominated by two unrelated families \u2014 Hesperiidae (skipper butterflies) and Canellaceae \u2014 each with 300 records, plus a small Araceae tail; this taxonomic split is the first thing worth investigating. Geographically, records skew to the Americas (USA 130, Mexico 73, Brazil 51) but span 35 countries, and 90% are HUMAN_OBSERVATION rather than preserved specimens. Watch coordinateUncertainty closely: it is highly skewed (skew 17.3) with a max of 766,917 m and 22.6% nulls, so any spatial analysis needs filtering. Years are tightly clustered in 2021\u20132026, indicating a recent-only snapshot.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic binomials with authorship \u2014 almost certainly biodiversity occurrence records keyed by Linnaean scientific name. The distribution is heavily concentrated: 157 distinct taxa across 610 rows, with Canella winterana alone claiming 28.5% (174 records) and a long tail flagged by the profiler. Notably the names mix plants (Canella, Warburgia, Cinnamodendron, Pinellia) with butterflies (Hylephila, Ocybadistes, Urbanus), so this column spans multiple kingdoms rather than a single clade.","role":"label","scope":"column","target":"scientificName","treatment":"Group rare taxa into an 'other' bucket or join to a taxonomy table before using as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical taxonomic labels \u2014 mostly Linnaean binomials (e.g. Canella winterana, Warburgia salutaris) with a few family-level names mixed in (Droseraceae, Sarraceniaceae), suggesting inconsistent taxonomic granularity. One species, Canella winterana, dominates at 28.5% of 610 rows, yet 123 distinct values and an entropy ratio of 0.74 indicate a long tail. The mix of plant genera (Cinnamodendron, Cinnamosma) and butterfly/skipper species (Hylephila phyleus, Ocybadistes walkeri, Urbanus dorantes) is unusual for a single 'species' column.","role":"label","scope":"column","target":"species","treatment":"Normalise to a consistent taxonomic rank before grouping; consider collapsing rare classes or target-encoding given the 123-way cardinality."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical genus name with 94 distinct values across 610 rows and no nulls. The distribution is heavy-tailed: 'Canella' alone accounts for 28.5% (174 records), and the top four values appear to be plant genera (Canella, Warburgia, Cinnamosma, Cinnamodendron) while subsequent entries (Urbanus, Hylephila, Burnsius, Pyrgus) are butterfly/skipper genera, suggesting the column mixes taxa from different kingdoms. Entropy ratio of 0.74 reflects moderate concentration around the dominant genus.","role":"feature","scope":"column","target":"genus","treatment":"Group rare genera into an 'other' bucket and one-hot or target-encode before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical column holding taxonomic family labels across 610 rows with only 3 distinct values and no nulls. The distribution is essentially bimodal \u2014 Hesperiidae and Canellaceae each appear 300 times (top_rate 0.492) while Araceae appears just 10 times \u2014 and notably mixes an animal family (Hesperiidae, skipper butterflies) with two plant families, which is an unusual cross-kingdom blend.","role":"label","scope":"column","target":"family","treatment":"One-hot encode; consider merging or stratifying given the rare Araceae class (10/610)."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","null_rate","stats.min","stats.max","stats.median","stats.iqr","stats.kurtosis","stats.skew","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds geographic latitudes in decimal degrees, ranging from -43.245933 to 46.704735 with a median of 17.008014. The wide IQR of 47.748 and bimodal-leaning kurtosis of -1.28 suggest observations are spread across both hemispheres rather than clustered in one region. With 466 unique values across 610 rows and no nulls or outliers, coverage is clean but globally dispersed.","role":"feature","scope":"column","target":"latitude","treatment":"Pair with longitude for geospatial features; avoid treating as a plain scalar in models."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","n","n_unique","null_rate","q1","q3"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic longitude in decimal degrees, spanning -115.04 to 153.39 across 610 rows with no nulls and 467 unique values. The distribution is right-skewed (1.18) with a median of -63.06 sitting well below the mean of -32.94, suggesting a concentration of points in the Western Hemisphere with a long tail reaching into the Eastern Hemisphere. No outliers flagged, consistent with valid lon bounds.","role":"feature","scope":"column","target":"longitude","treatment":"Pair with latitude for geospatial features; avoid treating as a standalone scalar in linear models."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Country of origin or observation, with 35 distinct values across 610 complete rows. The distribution is moderately concentrated: United States of America leads at 21.3% (130 rows), followed by Mexico (73) and Brazil (51), and the entropy ratio of 0.77 indicates a fairly diverse but US-tilted mix. Notable is the prominence of small territories like Guadeloupe (48) and Puerto Rico (37) ranking above larger nations, suggesting a tropical/Americas sampling bias rather than a global population sample.","role":"feature","scope":"column","target":"country","treatment":"One-hot encode top values and bucket the long tail into 'Other' before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Holds state or province names for 610 records spanning 108 distinct values across multiple countries (Texas, Florida, Nayarit, Queensland, KwaZulu-Natal). The mix is uneven: Texas alone covers 13.3% of rows, and the categories blend US states, Mexican states, Brazilian states, and a French city ('Pointe-\u00e0-Pitre'), suggesting inconsistent administrative granularity. 30 rows carry an empty-string value that null_rate=0 does not flag, and an explicit 'Other' bucket appears 11 times.","role":"feature","scope":"column","target":"stateProvince","treatment":"Normalise empty strings to null and group rare levels before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Free-text locality descriptions for specimen records, mostly in French with Malagasy place names (districts, communes, fokontany in Madagascar). 563 of 610 rows (top_rate 0.923) are empty strings, so the field is effectively blank for the vast majority of records, and the remaining 29 unique values are long sentence-length descriptions rather than controlled vocabulary. Entropy ratio of 0.154 confirms the distribution is dominated by the empty value.","role":"free_text","scope":"column","target":"locality","treatment":"Treat empty string as missing and parse remaining entries with NER or regex to extract administrative units before use."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical provenance flag from a biodiversity occurrence record (GBIF-style basisOfRecord), with only two values present out of the wider controlled vocabulary. HUMAN_OBSERVATION dominates at 550/610 (90.2%), with PRESERVED_SPECIMEN making up the remaining 60; no nulls. Entropy ratio 0.46 confirms the heavy imbalance.","role":"metadata","scope":"column","target":"basisOfRecord","treatment":"Keep as a binary indicator (e.g., is_specimen) for stratification or filtering."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.median","stats.q1","stats.q3","stats.skew"],"model":"anthropic:claude-opus-4-7","narrative":"Calendar year of the record, spanning only 2021 to 2026 across 610 rows with 6 distinct values. The distribution is left-skewed (skew -0.80) and concentrated at the recent end: median and Q3 both sit at 2026, with Q1 at 2024.","role":"timestamp","scope":"column","target":"year","treatment":"Treat as an ordinal time bucket; consider one-hot or year-since-min rather than raw integer."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","stats.min","stats.max","stats.median","stats.q1","stats.q3","stats.skew","null_rate","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"Integer values bounded between 1 and 12 with 12 unique levels strongly indicate a calendar month index. The distribution is heavily front-loaded: the median is 1.0 and Q3 is only 7.0, so at least half the rows fall in January and the skew of 1.00 confirms a long tail toward year-end months. Nulls are negligible (0.16%) and no outliers are flagged.","role":"feature","scope":"column","target":"month","treatment":"Treat as a cyclical categorical (one-hot or sin/cos encode) rather than a raw numeric."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.median","stats.mean","stats.max","stats.skew","stats.kurtosis","stats.outlier_rate","stats.q1","stats.q3"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric coordinate uncertainty values, almost certainly meters of GPS/locality error attached to occurrence records. The distribution is severely right-skewed (skew 17.3, kurtosis 335.7): the median is 35 but the mean is 6463 and the max reaches 766917, with 19.3% of values flagged as outliers. Roughly 22.6% of rows are null, so coverage is partial.","role":"feature","scope":"column","target":"coordinateUncertainty","treatment":"Log-transform and impute missing values before using as a quality filter or feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is the GBIF occurrence identifier: every one of the 610 rows carries a unique numeric ID (n_unique=610, top_rate=0.0016, entropy_ratio\u22481.0) with no nulls. The top values cluster tightly in the 5937748304\u20135937748333 range, suggesting the records were ingested in a single contiguous GBIF batch rather than sampled across time.","role":"identifier","scope":"column","target":"gbifID","treatment":"Keep as a primary key for joins back to GBIF; drop from any model features."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4703,"prompt_tokens":17433,"total_tokens":22136}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:28:58+00:00","mode":"full","row_count":610,"sampled_rows":610,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/carnivorous_plants_real.json"},"notes":[],"saturn_version":"0.2.0","schema":{"basisOfRecord":"categorical","coordinateUncertainty":"numeric","country":"categorical","family":"categorical","gbifID":"categorical","genus":"categorical","latitude":"numeric","locality":"categorical","longitude":"numeric","month":"numeric","scientificName":"categorical","species":"categorical","stateProvince":"categorical","year":"numeric"}}
