{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"81 singleton categories"}],"column":"scientificName","extras":{"singletons":81,"top_values":[["Canella winterana (L.) Gaertn.",174],["Warburgia salutaris (Bertol.fil.) Chiov.",35],["Cinnamodendron dinisii Schwacke",20],["Hylephila phyleus (Drury, 1773)",18],["Cinnamosma Baill.",17],["Cinnamosma fragrans Baill.",14],["Ocybadistes walkeri Heron, 1894",11],["Cinnamodendron occhionianum F.Barros & J.Salazar",10],["Pinellia fujianensis H.Li & G.H.Zhu",10],["Urbanus proteus (Linnaeus, 1758)",8],["Cinnamosma madagascariensis Danguy",8],["Warburgia ugandensis Sprague",8],["Lerodea eufala (Edwards, 1869)",7],["Burnsius albezens Grishin, 2022",7],["Lerema Scudder, 1872",7],["Quasimellana eulogius (Pl\u00f6tz, 1882)",6],["Spicauda procne (Pl\u00f6tz, 1880)",6],["Burnsius oileus (Linnaeus, 1767)",5],["Burnsius orcynoides",5],["Cephrenes augiades (Felder, 1860)",5]]},"kind":"categorical","n":610,"n_null":0,"n_unique":157,"null_rate":0.0,"stats":{"cardinality":157,"entropy":5.516685902166702,"entropy_ratio":0.7562676788926866,"top_rate":0.28524590163934427,"top_value":"Canella winterana (L.) Gaertn."}},{"alerts":[],"column":"species","extras":{"singletons":58,"top_values":[["Canella winterana",174],["Droseraceae",38],["Warburgia salutaris",35],["Cinnamodendron dinisii",20],["Hylephila phyleus",19],["Sarraceniaceae",19],["Cinnamosma fragrans",14],["Ocybadistes walkeri",11],["Urbanus dorantes",10],["Cinnamodendron occhionianum",10],["Pinellia fujianensis",10],["Pyrgus oileus",9],["Cinnamosma madagascariensis",9],["Mellana eulogius",8],["Urbanus proteus",8],["Warburgia ugandensis",8],["Urbanus procne",7],["Lerodea eufala",7],["Burnsius albezens",7],["Gorgythion begga",6]]},"kind":"categorical","n":610,"n_null":0,"n_unique":123,"null_rate":0.0,"stats":{"cardinality":123,"entropy":5.143644338788545,"entropy_ratio":0.7408906866284185,"top_rate":0.28524590163934427,"top_value":"Canella winterana"}},{"alerts":[],"column":"genus","extras":{"singletons":30,"top_values":[["Canella",174],["Warburgia",43],["Cinnamosma",40],["Cinnamodendron",34],["Urbanus",25],["Hylephila",19],["Burnsius",16],["Pyrgus",11],["Lerema",11],["Ocybadistes",11],["Pinellia",10],["Mellana",8],["Trapezites",8],["Heliopetes",7],["Lerodea",7],["Toxidia",7],["Pleodendron",7],["Staphylus",6],["Gorgythion",6],["Polites",6]]},"kind":"categorical","n":610,"n_null":0,"n_unique":94,"null_rate":0.0,"stats":{"cardinality":94,"entropy":4.8396700345516415,"entropy_ratio":0.7383636325736792,"top_rate":0.28524590163934427,"top_value":"Canella"}},{"alerts":[],"column":"family","extras":{"singletons":0,"top_values":[["Hesperiidae",300],["Canellaceae",300],["Araceae",10]]},"kind":"categorical","n":610,"n_null":0,"n_unique":3,"null_rate":0.0,"stats":{"cardinality":3,"entropy":1.1042875713905729,"entropy_ratio":0.6967278852894774,"top_rate":0.4918032786885246,"top_value":"Hesperiidae"}},{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[1,7,22,38,65,32,12,12,6,2,1,18,3,17,9,50,98,49,145,20,2,0,0,1],"edges":[-43.245933,-39.4979885,-35.750044,-32.0020995,-28.254154999999997,-24.506210499999998,-20.758266,-17.010321499999996,-13.262376999999997,-9.514432499999998,-5.766487999999995,-2.0185434999999927,1.7294010000000029,5.4773455000000055,9.225290000000008,12.973234500000004,16.721179000000006,20.46912350000001,24.217068000000005,27.965012500000007,31.71295700000001,35.46090150000001,39.208846000000015,42.956790500000004,46.704735]},"sample":[26.179819,8.621311,8.640193,8.640193,8.621311,26.179819,26.179819,-31.549801,20.804663,26.179819,26.179819,20.804663,-31.549801,26.179819,20.802768,26.179819,20.804663,20.804663,20.804663,20.802768,26.179819,20.804663,20.804663,-31.549801,26.179819,26.179819,8.845343,-31.549801,8.845343,20.804663,20.802768,20.802768,20.804663,20.804663,8.845343,8.845343,8.664558,8.664558,20.804663,20.804663,20.804663,20.804663,46.704735,30.050126,8.364038,8.364038,8.364038,26.481879,28.074026,24.825869,26.179835,26.177018,24.825869,-28.16653,-27.447433,24.81461,19.147782,28.074026,20.762597,-28.121749,-31.380579,20.649369,-2.755168,27.525813,26.179779,24.825869,26.179665,25.853647,24.826057,-34.96593,26.178625,28.035772,26.177985,-27.377124,-27.211439,-37.487088,-6.193826,27.530875,29.898853,27.859713,-28.220045,-12.712856,24.825869,28.003759,27.83223,-19.874459,26.179684,-25.156586,26.176562,24.816513,-29.714013,20.802166,1.334624,22.00364,26.179835,-33.365438,-34.96595,22.232548,20.800117,-34.937613,26.179835,5.413346,20.802166,24.815698,26.178806,-33.899453,-29.611723,24.825025,12.891503,-31.536625,26.177014,24.825869,-33.903937,25.896378,32.253425,24.779719,24.825869,26.179835,-23.40968,-38.167207,30.281394,-26.678298,24.816376,30.320698,26.307988,24.825869,-28.019146,28.074026,-28.1845,1.334624,-29.613833,24.825869,-28.780412,26.179808,32.756558,26.179835,24.825869,19.147782,-28.04139,29.560374,28.074026,26.176526,26.176539,22.485156,19.421646,28.074026,22.221733,26.178625,27.795537,1.320859,-28.154699,4.242971,26.176577,28.074026,-37.3301,24.825869,25.996579,10.687131,19.421646,24.816758,-28.829093,-37.831795,26.481879,26.179835,24.825869,26.179835,26.179835,22.518403,24.825869,-12.713027,-32.885742,-34.907842,-29.71403,-19.606964,19.147782,29.691014,27.0653,20.802166,22.574301,20.800108,26.179835,24.826719,29.075112,-29.596718,-37.664092,24.816517,-29.632672,26.179835,-37.677609,-29.59632,-34.97092,12.959028,-27.083727,26.179835,-28.076625,10.685789,27.162752,28.074026,-28.019145,-34.628714,26.179889,27.2263,26.176813,-27.377205,-27.485914,27.035342,26.177956,1.339805,-33.974328,27.669195,-34.614416,20.675943,29.075102,-27.485856,1.334624,1.334624,24.825869,26.176796,-28.82337,20.820584,-33.912887,26.1792,26.179835,1.334624,26.179835,-31.380563,-28.204655,26.176549,-33.982016,21.513941,26.179835,-27.974187,-33.877167,26.179748,27.861513,1.334624,-28.161847,26.176515,22.00351,-25.156212,-33.912888,5.394318,25.194863,16.242086,25.598732,18.046303,19.720466,25.473693,25.598758,19.403508,19.4036,19.403573,17.936894,-16.318108,17.976625,17.996704,16.472114,25.17987,16.333939,16.339855,25.174378,-29.615876,18.352243,-29.740272,-24.47576,25.176172,-17.29501,-17.315892,-17.294405,-25.381944,-24.30284,25.558868,16.359793,18.31114,25.599225,19.285562,16.243411,16.277479,-22.936497,-20.595523,17.944525,9.998,-26.247665,16.41105,16.399014,16.403318,16.40414,16.244878,16.408485,-26.64,16.32399,-22.951844,-26.187222,16.482446,16.394923,16.253388,25.190633,-17.314728,16.48912,25.182286,25.031644,16.235451,16.253503,16.254015,16.254015,-16.775556,-16.775556,19.721045,18.361462,18.370675,18.370045,-27.223323,17.005077,19.27572,17.005983,17.007669,17.000375,9.48,9.48,16.241928,17.976337,-29.990143,18.041586,-29.891904,16.341862,17.005186,17.007984,18.351419,17.008044,-23.230711,17.976337,17.976337,18.505963,18.341092,-23.189398,-29.818659,18.390567,17.934501,-1.500385,-1.482132,-20.743055,-20.743055,18.043981,19.842773,17.979033,18.329955,18.353547,19.870453,19.842617,19.843152,19.843152,-23.914418,19.778516,-21.404444,-21.404444,7.274151,-29.020732,-23.225633,-23.222212,19.842347,-23.264418,17.158672,17.157071,-25.432485,16.510281,24.901808,-1.284683,19.328337,-0.758683,16.779631,-12.013812,-21.807222,16.441074,-24.84902,18.358599,18.35348,-24.81212,25.174345,-26.318465,17.116414,-25.46056,25.253984,25.082691,19.735511,25.135936,19.735534,18.372312,16.242251,18.028489,16.414783,-25.655798,-22.879271,8.66,18.042642,25.175473,18.238062,18.379792,24.549805,-14.110778,-14.110778,-14.110778,-28.324044,19.288679,-25.69182,18.129318,18.04002,18.009385,18.048152,-16.278889,-16.278889,-16.278889,-33.966344,17.986613,16.267364,16.255,16.267442,25.117507,-29.770023,-29.853557,-12.36694,-13.0986,-15.572397,19.27524,-24.931442,-22.543889,-22.54625,-22.543889,-22.54625,-22.54625,-22.543889,25.753603,25.753512,25.090486,25.19313,-1.099128,25.063855,17.972928,25.277455,25.144483,25.277478,-25.618266,-27.062548,-27.108571,-27.031285,-30.02945,18.312011,16.231592,25.470203,16.782185,19.662051,16.25469,16.340913,16.782183,16.250417,18.369567,16.511455,-25.350556,-25.350556,-25.350556,-25.350556,-25.350556,-25.350556,-17.068878,-25.461944,-25.46194,21.094488,21.09619,-26.278889,-26.731944,-26.731944,-26.278889,-26.278889,-25.529891,-26.897778,19.31372,16.483017,18.371343,16.32459,16.32459,-29.949774,16.483017,-24.59445,-24.59423,17.974925,17.973438,17.684439,-1.063136,-28.350278,-28.350278,-23.189182,25.455462,25.454141,24.573684,-23.439167,-23.439167,25.777421,26.169487,26.280894,26.268276,26.25502]},"kind":"numeric","n":610,"n_null":0,"n_unique":466,"null_rate":0.0,"stats":{"iqr":47.7480595,"kurtosis":-1.2828894787378404,"max":46.704735,"mean":5.199974558196722,"median":17.008014000000003,"min":-43.245933,"n_outliers":0,"outlier_rate":0.0,"q1":-22.9221905,"q3":24.825869,"skew":-0.6516593385910574,"std":22.7455645654173,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[61,91,16,84,128,44,12,0,0,0,0,3,12,38,40,0,0,2,0,14,17,1,6,41],"edges":[-115.044309,-103.85949029166666,-92.67467158333334,-81.489852875,-70.30503416666667,-59.120215458333334,-47.935396749999995,-36.75057804166667,-25.565759333333332,-14.380940624999994,-3.1961219166666694,7.988696791666669,19.173515500000008,30.358334208333332,41.543152916666656,52.72797162500001,63.912790333333334,75.09760904166666,86.28242775000001,97.46724645833334,108.65206516666666,119.83688387500001,131.02170258333334,142.20652129166666,153.39134]},"sample":[-98.366518,-80.139711,-80.128853,-80.128853,-80.139711,-98.366518,-98.366518,-61.070202,-105.464931,-98.366518,-98.366518,-105.464931,-61.070202,-98.366518,-105.464906,-98.366518,-105.464931,-105.464931,-105.464931,-105.464906,-98.366518,-105.464931,-105.464931,-61.070202,-98.366518,-98.366518,-79.854536,-61.070202,-79.854536,-105.464931,-105.464906,-105.464906,-105.464931,-105.464931,-79.854536,-79.854536,-80.201424,-80.201424,-105.464931,-105.464931,-105.464931,-105.464931,10.521621,-99.148571,-80.273465,-80.273465,-80.273465,-81.178253,-97.041326,-107.386056,-98.366493,-98.365221,-107.386056,153.378417,152.948002,-107.357434,-96.170749,-97.041326,-105.355147,153.296383,-58.030776,-100.407058,-78.746653,-97.883522,-98.366254,-107.386056,-98.366191,-80.194197,-107.385551,138.694108,-98.366464,-82.78318,-98.36571,152.907233,153.068481,145.419561,106.880498,-97.841217,-96.336188,-82.72409,31.948183,143.288607,-107.386056,-97.061342,-97.378772,-43.996013,-98.365843,30.542855,-98.365877,-107.360741,-53.738545,-105.464973,103.817803,-98.778952,-98.366493,151.474595,138.694092,113.861083,-105.465858,138.648178,-98.366493,100.782676,-105.464973,-107.35948,-98.36584,151.240617,-53.69346,-80.815525,77.592306,-61.063094,-98.365315,-107.386056,151.080964,-97.488067,-110.796325,-107.362466,-107.386056,-98.366493,-47.35743,144.344189,-98.068091,28.417985,-107.358348,-89.936305,-98.169937,-107.386056,153.322926,-97.041326,153.293845,103.817803,-53.690817,-107.386056,32.087622,-98.366245,-97.25846,-98.366493,-107.386056,-96.170749,153.2223,-98.226343,-97.041326,-98.36583,-98.36584,114.086229,-96.322208,-97.041326,113.916097,-98.366464,-115.044309,103.769375,153.256742,101.322601,-98.365753,-97.041326,144.594208,-107.386056,-97.567558,-61.757618,-96.322208,-107.360841,151.93876,145.161942,-81.178253,-98.366493,-107.386056,-98.366493,-98.366493,114.365529,-107.386056,143.29832,151.64688,138.649018,-53.738542,-46.195667,-96.170749,-81.470794,-82.338038,-105.464973,114.287237,-105.46589,-98.366493,-107.38439,-97.653529,-53.711429,148.365892,-107.360307,-53.611348,-98.366493,148.417999,-53.736237,138.70987,77.524142,152.996957,-98.366493,153.39134,-61.756036,-80.440158,-97.041326,153.322772,-58.609075,-98.366101,-82.266607,-98.365844,152.907212,152.954161,-82.416259,-98.365694,103.820587,151.001888,-97.350443,-58.476618,-87.043645,-97.653529,152.954027,103.817803,103.817803,-107.386056,-98.365204,151.984467,-105.464092,151.182322,-98.36631,-98.366493,103.817803,-98.366493,-58.030727,153.256105,-98.365832,22.607134,-87.667068,-98.366493,153.195847,151.222838,-98.366261,-80.448075,103.817803,153.365129,-98.365577,-98.779028,30.542165,151.18232,100.259073,-80.874222,-61.313876,-80.315696,-66.69737,-79.788632,-80.18732,-80.315705,-70.76923,-70.772903,-70.773505,-67.193009,46.809548,-66.857788,-67.190212,-61.419891,-80.362838,-61.369203,-61.374054,-80.923058,31.071189,-64.768574,31.02554,30.703825,-80.904105,45.92816,45.914649,45.928753,-49.011389,31.239641,-80.451903,-61.398204,-66.013028,-80.315226,-69.303125,-61.175858,-61.246743,30.587292,46.564227,-66.964937,-84.0678,-48.981268,-61.402031,-61.405249,-61.405352,-61.40616,-61.176353,-61.402612,-49.5,-61.298364,-43.279646,-49.140833,-61.503205,-61.525695,-61.467537,-80.874157,45.852547,-61.440749,-80.875398,-80.502197,-61.414507,-61.202008,-61.19918,-61.234006,49.663611,49.663611,-79.788063,-64.924142,-65.651642,-65.651458,-50.462287,-61.768082,-81.380538,-61.765823,-61.773903,-61.746994,-84.04,-84.04,-61.38837,-67.210727,31.197147,-63.071664,31.099706,-61.524778,-61.767986,-61.774211,-64.769835,-61.774196,28.90127,-67.210727,-67.210727,-67.139405,-64.783354,29.033478,30.910679,-65.593413,-67.191656,35.350804,35.12999,45.535278,45.535278,-67.202055,-71.406185,-67.159836,-68.8106,-64.767708,-71.40674,-71.405997,-71.406088,-71.406088,29.629134,-71.266734,47.949722,47.949722,38.691743,31.478708,28.974839,28.836087,-71.40611,28.972834,-61.756172,-61.758021,30.708625,-61.468301,-80.700221,36.811073,-81.103247,36.438018,-62.171217,49.300048,-41.641944,-61.467735,46.77992,-64.927246,-64.767753,46.77883,-80.92308,32.075091,-62.55845,31.541192,-80.313165,-80.440153,-79.737044,-80.397683,-79.737313,-65.639731,-61.325146,-63.092639,-61.421478,28.335765,-43.275835,-83.57,-66.748292,-80.904818,-62.986667,-65.620512,-81.797208,49.964306,49.964306,49.964306,31.936397,-70.924514,27.979859,-66.703806,-66.744995,-66.191952,-66.654481,48.889722,48.889722,48.889722,18.45305,-66.708002,-61.24077,-61.235147,-61.240505,-80.988618,30.984989,30.871719,49.38556,49.2369,46.419437,-81.315193,31.579219,44.227222,44.227556,44.227222,44.227556,44.227556,44.227222,-80.378038,-80.378497,-80.888484,-80.948371,38.023118,-77.311051,-66.865814,-80.296622,-80.392167,-80.297797,27.655584,32.559378,32.507789,32.485324,30.883078,-70.920022,-61.39723,-80.509652,-62.17816,-80.100652,-61.224419,-61.525762,-62.178158,-61.189036,-65.796824,-61.465347,-48.889722,-48.889444,-48.889722,-48.889722,-48.889444,-48.889722,45.982445,-51.659722,-51.66,-75.888797,-75.89149,-50.495555,-51.118889,-51.118889,-50.495556,-50.495555,30.961432,-51.771389,-81.168305,-61.445189,-65.640388,-61.47181,-61.47181,30.895196,-61.445189,46.73744,46.73945,-66.873153,-67.03472,-64.899198,35.220302,-50.221389,-50.221389,-44.979978,-80.191673,-80.195897,-81.749457,47.503611,47.503611,118.964015,118.19049,119.167681,119.070843,119.158688]},"kind":"numeric","n":610,"n_null":0,"n_unique":467,"null_rate":0.0,"stats":{"iqr":120.20801225,"kurtosis":0.08440247754095731,"max":153.39134,"mean":-32.93522615934426,"median":-63.056028,"min":-115.044309,"n_outliers":0,"outlier_rate":0.0,"q1":-89.369055,"q3":30.83895725,"skew":1.1835228401749196,"std":78.93129196183042,"zero_rate":0.0}},{"alerts":[],"column":"country","extras":{"singletons":8,"top_values":[["United States of America",130],["Mexico",73],["Brazil",51],["Guadeloupe",48],["Australia",47],["South Africa",41],["Madagascar",40],["Puerto Rico",37],["Dominican Republic",16],["Panama",15],["Argentina",14],["Singapore",10],["Cayman Islands",10],["Antigua and Barbuda",10],["China",10],["Virgin Islands (U.S.)",8],["Kenya",8],["Hong Kong",6],["Costa Rica",5],["Sint Maarten (Dutch part)",4]]},"kind":"categorical","n":610,"n_null":0,"n_unique":35,"null_rate":0.0,"stats":{"cardinality":35,"entropy":3.9605893469796447,"entropy_ratio":0.7721526252101014,"top_rate":0.21311475409836064,"top_value":"United States of America"}},{"alerts":[],"column":"stateProvince","extras":{"singletons":45,"top_values":[["Texas",81],["Florida",46],["Pointe-\u00e0-Pitre",35],["Nayarit",33],["",30],["Sinaloa",26],["Queensland",24],["KwaZulu-Natal",17],["Santa Catarina",16],["Other",11],["Rio Grande do Sul",11],["Mpumalanga",10],["Mahajanga",10],["Fujian",10],["Cabo Rojo",9],["Limpopo",9],["Toliara",9],["New South Wales",8],["Fajardo",8],["Paran\u00e1",8]]},"kind":"categorical","n":610,"n_null":0,"n_unique":108,"null_rate":0.0,"stats":{"cardinality":108,"entropy":5.5303567492758905,"entropy_ratio":0.8187192973242882,"top_rate":0.13278688524590163,"top_value":"Texas"}},{"alerts":[{"code":"long_tail","level":"info","message":"17 singleton categories"}],"column":"locality","extras":{"singletons":17,"top_values":[["",563],["District de Soanierana Ivongo, Commune de Manompana, Fokontany de Vohijiny, Village d'Ambohitsara. For\u00eat littorale de Sahavalanina, au Sud-Est d'Ambohitsara.",3],["District Mahabo, Commune Analamisandy,Fokontany Soazato, For\u00eat d'Azohy. Collect\u00e9s avec: Ando, Tefy, C\u00e9cile, Jean Michel. \u00c9chantillon pr\u00e9serv\u00e9 en l'alcool.",3],["R\u00e9gion Vatovavy, Kianjavato, Ambodifandramanana, Ankarabo, vestige de for\u00eat au sud du Mt Vatovavy. Echantillons pr\u00e9serv\u00e9s en alcool, r\u00e9colt\u00e9s avec \u00e9quipe polisinala (Auguste, Jean Fr\u00e9d\u00e9ric).",3],["Antsiranana, SAVA, District de Voh\u00e9mar, Commune rurale d'Antsirabe-nord, Fokontany d'Andravinambo, foret d'Antsolatra Marojala Sokitra. Plantes pr\u00e9serv\u00e9es en alcool, r\u00e9colt\u00e9es avec Bezanaka Jean Honor\u00e9.",3],["R\u00e9gion Sofia, District de Mandritsara, commune rurale Marotandrano, fokontany Antsiatsiaka. Foret de Bezavona \u00e0 2 km \u00e0 l'Est du village d'Antsiatsiaka, foret humide sempervirente de moyenne altitude sur lat\u00e9rite. Avec Raharimanana Th\u00e9o, Ranaivoson Ernest, Marojery R\u00e9n\u00e9 chef FKT, Traravola, Rabemalaza Justin, Risy guides locaux.",3],["Distrit Sakaraha Commune Rurale Amboronabo Fokontany Mitia village Belambo Collect\u00e9 avec Mamomjy, Tariha, Rehary",3],["District Sakaraha, Commune rurale Amboronabo, Fokotany Mitia-Est. For\u00eat de Herea, au Nord  d'Analavelona, sur sable. Hameau le plus proche Belambo.",3],["District Vaingaindrano, Commnune Tsianofana, Fokontany Abaronga, localit\u00e9 Andasibe . For\u00eat humide de la nouvelle aire prot\u00e9g\u00e9e d' Agnakatrika. Collect\u00e9 avec Iakily Armand.",3],["Serra da Farinha-seca, encosta do Morro Sete.",2],["Serra da Graciosa. Encosta pr\u00f3xima ao Recanto Bela Vista.",2],["Est\u00e2ncia do Meio.",2],["UTM25_32T_0600_5150",1],["Parque Estadual da Serra da Baitaca, proximidades da Cachoeira do Samambaia.",1],["Parque Estadual da Serra da Baitaca,",1],["Ca. 700 m al sur de San Francisco de San Isidro, costado sur (del parqueo sur) de la escuela Golden Valley. Remanentes de bosque muy h\u00famedo, en cafetales, casas, potreros y finca de Hammel y P\u00e9rez por el R\u00edo Tures.",1],["Esta\u00e7\u00e3o de Tratamento de \u00c1gua Pira\u00ed (ETA Pira\u00ed)",1],["Alto Benedito.",1],["Comfloresta.",1],["S\u00edtio Barcelos. \u00c1rea de PRAD. Propriedade de Vilmar de Lima Barcelos.",1]]},"kind":"categorical","n":610,"n_null":0,"n_unique":29,"null_rate":0.0,"stats":{"cardinality":29,"entropy":0.7474767502888109,"entropy_ratio":0.15386572138477086,"top_rate":0.9229508196721311,"top_value":""}},{"alerts":[],"column":"basisOfRecord","extras":{"singletons":0,"top_values":[["HUMAN_OBSERVATION",550],["PRESERVED_SPECIMEN",60]]},"kind":"categorical","n":610,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.4637773498877516,"entropy_ratio":0.4637773498877516,"top_rate":0.9016393442622951,"top_value":"HUMAN_OBSERVATION"}},{"alerts":[],"column":"year","extras":{"histogram":{"counts":[7,0,0,0,70,0,0,0,0,71,0,0,0,0,79,0,0,0,0,75,0,0,0,308],"edges":[2021.0,2021.2083333333333,2021.4166666666667,2021.625,2021.8333333333333,2022.0416666666667,2022.25,2022.4583333333333,2022.6666666666667,2022.875,2023.0833333333333,2023.2916666666667,2023.5,2023.7083333333333,2023.9166666666667,2024.125,2024.3333333333333,2024.5416666666667,2024.75,2024.9583333333333,2025.1666666666667,2025.375,2025.5833333333333,2025.7916666666667,2026.0]},"sample":[2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2026.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2025.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2024.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2023.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2022.0,2021.0,2021.0,2021.0,2021.0,2021.0,2025.0,2024.0,2024.0,2024.0,2024.0]},"kind":"numeric","n":610,"n_null":0,"n_unique":6,"null_rate":0.0,"stats":{"iqr":2.0,"kurtosis":-0.79285948537134,"max":2026.0,"mean":2024.7524590163935,"median":2026.0,"min":2021.0,"n_outliers":0,"outlier_rate":0.0,"q1":2024.0,"q3":2026.0,"skew":-0.7968814306780531,"std":1.502937087000034,"zero_rate":0.0}},{"alerts":[],"column":"month","extras":{"histogram":{"counts":[342,0,18,0,24,0,24,0,21,0,17,0,0,42,0,23,0,15,0,24,0,29,0,30],"edges":[1.0,1.4583333333333333,1.9166666666666665,2.375,2.833333333333333,3.2916666666666665,3.75,4.208333333333333,4.666666666666666,5.125,5.583333333333333,6.041666666666666,6.5,6.958333333333333,7.416666666666666,7.875,8.333333333333332,8.791666666666666,9.25,9.708333333333332,10.166666666666666,10.625,11.083333333333332,11.541666666666666,12.0]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,9.0,9.0,9.0,10.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,6.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,9.0,9.0,9.0,9.0,9.0,9.0,10.0,10.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,3.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,3.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,6.0,6.0,7.0,7.0,7.0,7.0,7.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,9.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,12.0,12.0,12.0,12.0,12.0,12.0,1.0,1.0,2.0,3.0,3.0,8.0,2.0,4.0,4.0,4.0,5.0]},"kind":"numeric","n":610,"n_null":1,"n_unique":12,"null_rate":0.001639344262295082,"stats":{"iqr":6.0,"kurtosis":-0.5078264138526407,"max":12.0,"mean":3.7520525451559936,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":7.0,"skew":1.001634037248469,"std":3.750342075257268,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"22.6% null"},{"code":"high_skew","level":"info","message":"skew=+17.30"},{"code":"outliers","level":"warn","message":"19.3% rows beyond 1.5 IQR"}],"column":"coordinateUncertainty","extras":{"histogram":{"counts":[467,2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,36520.80952380953,73040.61904761905,109560.42857142858,146080.2380952381,182600.04761904763,219119.85714285716,255639.6666666667,292159.4761904762,328679.28571428574,365199.09523809527,401718.9047619048,438238.7142857143,474758.52380952385,511278.3333333334,547798.142857143,584317.9523809524,620837.7619047619,657357.5714285715,693877.3809523811,730397.1904761905,766917.0]},"sample":[1.0,1.0,1.0,1.0,7000.0,1.0,1.0,7000.0,1.0,1.0,7000.0,7000.0,7000.0,7000.0,1.0,7000.0,7000.0,1.0,1.0,7000.0,7000.0,7000.0,7000.0,7000.0,7000.0,1.0,7000.0,7000.0,7000.0,7000.0,25.0,46.0,616.0,34.0,26.0,25.0,616.0,18.0,258.0,2.0,260.0,46.0,12.0,29656.0,192.0,79.0,1480.0,13.0,134.0,2.0,616.0,15.0,616.0,4.0,2.0,262.0,5.0,4.0,68.0,4.0,398.0,71.0,262.0,122.0,7.0,8.0,173.0,8.0,616.0,853.0,27.0,2.0,336.0,2.0,15.0,156.0,212.0,39.0,39.0,38.0,2.0,51.0,2.0,2.0,14.0,7.0,38.0,1056.0,156.0,79.0,2.0,3.0,8.0,61.0,234.0,207.0,2.0,444.0,616.0,28902.0,181.0,211.0,4.0,4026.0,616.0,79.0,39.0,17.0,100.0,4.0,276.0,10.0,616.0,4.0,46.0,29656.0,212.0,61.0,616.0,8.0,2.0,616.0,4.0,186.0,616.0,260.0,29656.0,14415.0,46.0,1.0,2.0,30278.0,1942.0,46.0,15.0,262.0,9610.0,2250.0,29656.0,264.0,1.0,203.0,46.0,46.0,5.0,213.0,616.0,211.0,1942.0,15.0,2.0,4.0,299.0,38.0,186.0,616.0,79.0,38.0,30278.0,616.0,31.0,2.0,8.0,11.0,169.0,31.0,31.0,260.0,9.0,156.0,5.0,30278.0,10.0,186.0,186.0,4.0,122.0,1.0,46.0,122.0,5.0,2.0,122.0,79.0,2.0,10.0,8.0,100.0,29775.0,93.0,29656.0,143.0,46.0,4.0,198.0,4.0,2582.0,15.0,4.0,1.0,11.0,8.0,616.0,6.0,28902.0,1.0,199.0,23.0,108.0,1.0,213.0,212.0,616.0,8.0,244.0,40.0,5.0,186.0,61.0,90.0,213.0,186.0,6.0,7.0,61.0,55.0,1.0,647.0,52.0,30.0,186.0,9.0,157.0,14.0,38.0,15.0,4.0,212.0,29656.0,8.0,448.0,5.0,264.0,9.0,5.0,2.0,4.0,4.0,3.0,4.0,6.0,6.0,4.0,821.0,30545.0,8.0,7.0,3.0,5.0,9.0,3.0,4.0,22.0,15.0,4.0,29458.0,435.0,29458.0,30069.0,6.0,10.0,3.0,6.0,30090.0,13.0,4.0,30666.0,158.0,30580.0,1510.0,227.0,30237.0,3.0,3.0,4.0,5.0,7.0,3.0,3020.0,5.0,3.0,3.0,4.0,30237.0,5.0,9.0,10.0,9.0,30826.0,8.0,7.0,8.0,1000.0,5.0,5.0,5.0,6.0,9.0,6.0,9.0,4.0,3.0,3.0,5.0,4.0,5.0,5.0,5.0,5.0,64.0,1096.0,1096.0,9.0,208.0,200.0,29433.0,4.0,29433.0,11.0,3.0,9.0,400.0,3.0,30196.0,208.0,208.0,3.0,313.0,30217.0,29433.0,3.0,22.0,523.0,28301.0,4.0,6.0,4.0,4.0,3.0,6.0,30133.0,9204.0,31324.0,29533.0,30196.0,30196.0,4.0,30196.0,30196.0,24.0,30.0,29958.0,31236.0,277.0,5.0,9.0,4.0,8.0,115.0,9.0,12.0,13.0,25.0,5.0,3.0,166.0,25.0,32.0,7.0,29868.0,22.0,5.0,29958.0,8.0,5.0,4.0,1485.0,8.0,8.0,4.0,30237.0,1099.0,5.0,8.0,1145.0,4.0,215.0,29631.0,210.0,6.0,30683.0,4.0,30683.0,30683.0,6.0,130.0,7.0,30003.0,9.0,4.0,50.0,50.0,1.0,6.0,8.0,1.0,63.0,41.0,30003.0,30003.0,65.0,10.0,4.0,8.0,4.0,5.0,4.0,5.0,1.0,29775.0,6.0,29775.0,29775.0,8.0,5.0,35.0,9.0,173.0,3.0,9.0,5.0,22.0,30666.0,35.0,4.0,20.0,20.0,9.0,5.0,4.0,30666.0,176.0,10519.0,10519.0,3.0,25.0,25.0,4.0,61.0,82.0,50.0,50.0,123.0,29981.0,56639.0,98400.0,5489.0,222343.0,69815.0,766917.0,202.0,606.0]},"kind":"numeric","n":610,"n_null":138,"n_unique":151,"null_rate":0.2262295081967213,"stats":{"iqr":461.75,"kurtosis":335.68988122973855,"max":766917.0,"mean":6463.375,"median":35.0,"min":1.0,"n_outliers":91,"outlier_rate":0.19279661016949154,"q1":5.0,"q3":466.75,"skew":17.304668453272807,"std":38136.06292733861,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"610 singleton categories"}],"column":"gbifID","extras":{"singletons":610,"top_values":[["5937748304",1],["5937748308",1],["5937748309",1],["5937748312",1],["5937748316",1],["5937748322",1],["5937748325",1],["5937748327",1],["5937748329",1],["5937748333",1],["5937748335",1],["5937748336",1],["5937748338",1],["5937748342",1],["5937748344",1],["5937748350",1],["5937748352",1],["5937748353",1],["5937748363",1],["5937748369",1]]},"kind":"categorical","n":610,"n_null":0,"n_unique":610,"null_rate":0.0,"stats":{"cardinality":610,"entropy":9.252665432450247,"entropy_ratio":0.9999999999999998,"top_rate":0.001639344262295082,"top_value":"5937748304"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","family.top_values","scientificName.top_value","scientificName.top_rate","country.top_values","coordinateUncertainty.stats.median","coordinateUncertainty.stats.max","coordinateUncertainty.n_outliers","coordinateUncertainty.null_rate","year.stats.min","year.stats.max","basisOfRecord.top_value","basisOfRecord.top_rate"],"featured_charts":[{"caption":"Look for the near-even split between Hesperiidae and Canellaceae, and the tiny Araceae slice \u2014 this mixed taxonomy is the dataset's biggest anomaly.","column":"family","kind":"donut"},{"caption":"The United States leads with 130 records, followed by Mexico, Brazil, and Guadeloupe \u2014 check whether geographic coverage aligns with the supposed carnivorous-plant theme.","column":"country","kind":"bar"},{"caption":"Canella winterana dominates at 28.5% of all records; look for the long tail of 157 unique species to assess how concentrated the data really is.","column":"scientificName","kind":"bar"},{"caption":"Extreme right skew with a max of 766,917 metres versus a median of 35 metres reveals that a small number of records have essentially unusable location precision.","column":"coordinateUncertainty","kind":"histogram"},{"caption":"Collection activity appears front-loaded toward early months \u2014 check whether this reflects genuine seasonality or a recording bias in the dataset.","column":"month","kind":"bar"}],"model":"anthropic:default","narrative":"This is a GBIF biodiversity occurrence dataset with 610 records spanning 14 columns, covering observations and preserved specimens of organisms across 35 countries, primarily recorded between 2021 and 2026. Despite the filename suggesting carnivorous plants, the dataset actually mixes three distinct taxonomic families \u2014 Hesperiidae (skippers/butterflies), Canellaceae (spice plants), and Araceae \u2014 each contributing roughly 300, 300, and 10 records respectively, which is a notable data-quality curiosity worth investigating. The dominant species is Canella winterana with 174 records (28.5%), and the US, Mexico, Brazil, and Guadeloupe together account for nearly half of all country-level records. Coordinate uncertainty is severely skewed and problematic: the median is just 35 metres but the max reaches 766,917 metres, with 91 outliers and a 23% null rate, meaning spatial analyses should treat location precision with caution.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["column","stats.median","stats.mean","stats.max","stats.skew","stats.kurtosis","n_outliers","outlier_rate","null_rate"],"model":"anthropic:default","narrative":"This column represents geographic coordinate uncertainty (likely in meters), a standard biodiversity/occurrence record field indicating the spatial precision of a location fix. Two signals are striking: the distribution is extremely right-skewed (skew=17.3, kurtosis=335.7) with a median of just 35m but a max of 766,917m (~767km), and 91 outliers (19.3% of rows) drive a mean of 6,463m versus a median of 35m \u2014 suggesting a mix of GPS-precise records and very coarse or placeholder uncertainty values. Additionally, 22.6% of values are null, meaning nearly a quarter of records carry no uncertainty estimate at all.","role":"feature","scope":"column","target":"coordinateUncertainty","treatment":"Log-transform or winsorize before modelling; flag nulls and extreme values (>10,000m) as a separate binary indicator for spatial reliability filtering."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","entropy_ratio","top_rate","top_values"],"model":"anthropic:default","narrative":"This column is the GBIF (Global Biodiversity Information Facility) occurrence identifier, a globally unique numeric key assigned to biodiversity occurrence records. With 610 rows, 610 unique values, a null rate of 0.0, and an entropy ratio of essentially 1.0, every record has a distinct ID \u2014 perfect identifier behaviour. The top frequency of 0.001639 (1 occurrence each) confirms zero duplication. Values appear to be sequential large integers in the 5937748xxx range, consistent with GBIF's ID scheme.","role":"identifier","scope":"column","target":"gbifID","treatment":"Retain as a primary key for joins or provenance tracking; drop from any model feature set."},{"confidence":"high","critiques":[],"evidence_keys":["top_rate","top_value","n_unique","n","alerts","top_values","null_rate"],"model":"anthropic:default","narrative":"This column records detailed collection locality descriptions for what appears to be a biological specimen or herbarium dataset, likely from Madagascar (French-language administrative hierarchy: R\u00e9gion, District, Commune, Fokontany) with at least one Brazilian entry ('Serra da Farinha-seca'). The dominant signal is that 92.3% of the 610 rows (563 records) have an empty string, rendering the column nearly uninformative for most records. Among the 29 distinct non-empty values, each of the populated entries appears only 2\u20133 times, confirming the long-tail alert and suggesting these are free-text verbatim field notes rather than a controlled vocabulary.","role":"free_text","scope":"column","target":"locality","treatment":"Treat empty strings as missing; for populated rows, parse administrative hierarchy tokens (Region, District, Commune, Fokontany) via regex or NLP before use in spatial analysis."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","alerts","entropy_ratio"],"model":"anthropic:default","narrative":"This column contains Linnaean binomial scientific names for biological taxa, including both plant species (e.g., 'Canella winterana (L.) Gaertn.') and insect species (e.g., 'Hylephila phyleus (Drury, 1773)'), suggesting a mixed-taxon dataset. The dominant value 'Canella winterana (L.) Gaertn.' accounts for 28.5% of all 610 rows, which is strikingly disproportionate given 157 unique names, and the long-tail alert confirms the remaining mass is spread thinly across many species. Entropy ratio of 0.756 indicates moderate but uneven spread\u2014far from uniform distribution.","role":"label","scope":"column","target":"scientificName","treatment":"Use as a grouping/stratification key; address class imbalance before any species-level modelling, and consider taxonomic hierarchy encoding."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","kurtosis","skew","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column contains geographic latitude values, ranging from -43.25\u00b0 (southern hemisphere, around southern South America or South Africa) to 46.70\u00b0 (northern hemisphere, around central Europe or northern US). The distribution is notably platykurtic (kurtosis -1.28) with a wide IQR of 47.75 degrees, indicating near-uniform spread across a broad swath of the globe rather than clustering around any particular region. The mean (5.20\u00b0) and median (17.01\u00b0) diverge by ~12 degrees, and the slight negative skew suggests a modest tail toward southern latitudes. With 466 unique values out of 610 rows and zero nulls, this is a real-valued geographic coordinate with moderate but not near-unique cardinality.","role":"feature","scope":"column","target":"latitude","treatment":"Use as-is or pair with longitude for spatial modelling; consider binning into latitude bands or projecting to cartesian coordinates if distance metrics are needed."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","skew","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column contains geographic longitude coordinates, with values spanning from -115.04 to 153.39 degrees \u2014 a range that covers locations across the Americas, Europe/Africa, and into the Pacific/Oceania region. The mean (-32.94) and median (-63.06) diverge substantially, and the IQR of 120.21 degrees is very wide, indicating the dataset captures globally dispersed locations rather than a single region. A skew of 1.18 suggests a tail toward positive (eastern) longitudes, meaning most records cluster in western longitudes but some pull toward Oceania (e.g., the max of 153.39 is consistent with eastern Australia). With 467 unique values out of 610 rows and zero nulls, coordinates appear mostly distinct but not fully unique, suggesting some location reuse.","role":"feature","scope":"column","target":"longitude","treatment":"Pair with latitude for spatial modelling; consider geographic clustering or projection before use as a numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","min","max","median","skew","mean","iqr","n"],"model":"anthropic:default","narrative":"This column represents a calendar year dimension spanning 2021\u20132026, with only 6 distinct values across 610 rows \u2014 confirming it is a low-cardinality temporal grouping field rather than a continuous numeric. The distribution is left-skewed (skew = \u20130.80) with both median and max at 2026, meaning the majority of records are concentrated in the most recent year. The presence of 2026 data (a future or current year at time of writing) alongside 2021 suggests multi-year longitudinal coverage, with recent years \u2014 especially 2026 \u2014 heavily over-represented.","role":"feature","scope":"column","target":"year","treatment":"Treat as an ordinal categorical or integer time dimension; consider encoding as a period indicator or time-fixed-effect in modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","min","max","median","mean","q1","q3","skew","null_rate"],"model":"anthropic:default","narrative":"This column represents calendar month, encoded as an integer from 1 to 12 with exactly 12 unique values and near-zero nulls (0.16%). The distribution is notably right-skewed (skew \u2248 1.0) with a median of 1.0 and Q1 also at 1.0, meaning at least 25% of rows are coded as January \u2014 suggesting strong concentration in early months rather than a uniform seasonal spread. The mean of 3.75 versus median of 1.0 confirms this heavy front-loading, which would surprise an analyst expecting roughly uniform monthly representation.","role":"feature","scope":"column","target":"month","treatment":"Treat as a cyclical feature \u2014 apply sin/cos encoding (2\u03c0\u00b7month/12) before modelling to capture periodicity; investigate heavy January concentration before use."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","top_values","n","n_unique","null_rate","entropy"],"model":"anthropic:default","narrative":"This column is a Darwin Core 'basisOfRecord' field classifying how each biodiversity occurrence was recorded, with only two valid categories present. The distribution is heavily skewed: HUMAN_OBSERVATION dominates at 90.2% (550 of 610 records), while PRESERVED_SPECIMEN accounts for just 9.8% (60 records). The extreme imbalance between field observations and museum/herbarium specimens may affect any model trained on this feature, as PRESERVED_SPECIMEN is a near-minority class. No nulls are present, and the low entropy (0.464) confirms the near-uniform dominance of one category.","role":"label","scope":"column","target":"basisOfRecord","treatment":"One-hot encode with awareness of class imbalance; consider stratifying splits to preserve PRESERVED_SPECIMEN representation."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","null_rate","n"],"model":"anthropic:default","narrative":"This column captures country of origin or location for 610 records, spanning 35 distinct countries with no nulls. The United States dominates at 21.3% (130 records), followed by Mexico (73) and Brazil (51), but the presence of Guadeloupe (48) and Madagascar (40) as top-tier entries is surprising for a dataset this size, suggesting a specific thematic or biological focus (e.g., species occurrences, field surveys) rather than a general population sample. Entropy ratio of 0.77 indicates reasonably broad distribution across countries, though the top-heavy US share introduces mild imbalance.","role":"feature","scope":"column","target":"country","treatment":"One-hot encode for low-cardinality modelling or group into regions to reduce 35-level dimensionality."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_values","null_rate","n","top_rate"],"model":"anthropic:default","narrative":"This column contains biological family-level taxonomic names, with exactly 3 distinct values across 610 non-null rows. Notably, two families \u2014 Hesperiidae (butterflies) and Canellaceae (flowering plants) \u2014 each appear exactly 300 times, suggesting a deliberately balanced dataset pairing two taxonomic groups, while Araceae (another plant family) appears only 10 times, likely as a minor addition or control group. The near-perfect split between an animal family and a plant family in equal counts is unusual and warrants investigation into dataset construction intent.","role":"label","scope":"column","target":"family","treatment":"One-hot encode or ordinal-encode for modelling; verify whether the 10-row Araceae class should be included or treated as out-of-distribution."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","n","null_rate"],"model":"anthropic:default","narrative":"This column contains biological genus names, functioning as a taxonomic label for 610 specimens spanning 94 distinct genera. The dominant genus 'Canella' accounts for 28.5% of all rows (174 occurrences), creating notable imbalance \u2014 the top four genera alone (Canella, Warburgia, Cinnamosma, Cinnamodendron) appear to be plant genera (order Canellales), while lower-ranked entries like Urbanus, Hylephila, Burnsius, and Pyrgus are butterfly genera (Hesperiidae), suggesting the dataset may mix taxonomic kingdoms or represent a cross-taxon study. Entropy ratio of 0.74 indicates moderate concentration despite 94 unique values.","role":"label","scope":"column","target":"genus","treatment":"Use as a grouping/stratification variable; address class imbalance (Canella = 28.5%) before any genus-level classification task, and investigate mixed-kingdom composition."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column records biological taxonomic names, appearing to be species-level identifiers (binomial nomenclature) though it also contains family-level names such as 'Droseraceae' and 'Sarraceniaceae', indicating inconsistent taxonomic rank across rows. The dominant value 'Canella winterana' accounts for 28.5% of all 610 rows (174 occurrences), which is a striking concentration given 123 unique values and an entropy ratio of 0.74 suggesting moderate-to-high diversity otherwise. The mix of species binomials (plants and insects, e.g. butterflies like 'Hylephila phyleus') alongside family names is a data quality concern that should be flagged before modelling.","role":"label","scope":"column","target":"species","treatment":"Standardise taxonomic rank (species vs. family) before use; encode as categorical or join to a taxonomy reference table."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","top_values","n_unique","null_rate","entropy_ratio","n"],"model":"anthropic:default","narrative":"This column captures state or province-level geographic designations, with 108 distinct values across 610 rows and no nulls. The top value is 'Texas' (13.3% of rows), but the mix of US states, Mexican states (Nayarit, Sinaloa), Australian states (Queensland), South African provinces (KwaZulu-Natal), Brazilian states (Santa Catarina), and a French Caribbean municipality (Pointe-\u00e0-Pitre) confirms this is an international dataset \u2014 not US-only. Notably, 30 rows (\u22484.9%) contain an empty string rather than a true null, and 11 rows are coded as the catch-all 'Other', both of which will need handling. The high entropy ratio (0.82) reflects genuine geographic spread rather than a dominated distribution.","role":"feature","scope":"column","target":"stateProvince","treatment":"Normalize empty strings and 'Other' to null, then encode geographically (e.g., region groupings or target-encode) before modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4388,"prompt_tokens":14020,"total_tokens":18408}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:47:38+00:00","mode":"full","row_count":610,"sampled_rows":610,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/carnivorous_plants_real.json"},"notes":[],"saturn_version":"0.2.0","schema":{"basisOfRecord":"categorical","coordinateUncertainty":"numeric","country":"categorical","family":"categorical","gbifID":"categorical","genus":"categorical","latitude":"numeric","locality":"categorical","longitude":"numeric","month":"numeric","scientificName":"categorical","species":"categorical","stateProvince":"categorical","year":"numeric"}}
