{"columns":[{"alerts":[],"column":"scientificName","extras":{"singletons":0,"top_values":[["Alphaproteobacteria",17640],["Bacteria",12760],["Nitrosopumilaceae",10840],["Syndiniales",7280],["Amperima rosea",4520],["Porifera",2400],["Thermoplasmata",2360],["Keratoisididae",2320],["Xiphias gladius",2000],["Pseudomonadales",1920],["Gammaproteobacteria",1920],["Monothalamea",1640],["Rhodospirillales",1640],["Scomber scombrus",1520],["Retaria",1520],["Dinophyceae",1320],["Rickettsiales",1200],["Chrysogorgia",1160],["Hexactinellida",1080],["Prionace glauca",1080]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":1478,"null_rate":0.0,"stats":{"cardinality":1478,"entropy":8.377598740606556,"entropy_ratio":0.7956364494286662,"top_rate":0.0882,"top_value":"Alphaproteobacteria"}},{"alerts":[],"column":"species","extras":{"singletons":0,"top_values":[["",146400],["Amperima rosea",4520],["Xiphias gladius",2000],["Scomber scombrus",1520],["Prionace glauca",1080],["Oneirophanta mutabilis",840],["Thunnus albacares",760],["Farrea occa",680],["Trissopathes pseudotristicha",640],["Hoplostethus atlanticus",520],["Trachurus trachurus",480],["Florometra serratissima",440],["Heteropolypus ritteri",400],["Desmophyllum dianthus",400],["Psychropotes longicauda",360],["Thunnus obesus",320],["Solenosmilia variabilis",320],["Etmopterus granulosus",320],["Molpadiodemas villosus",280],["Paragorgia arborea",280]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":678,"null_rate":0.0,"stats":{"cardinality":678,"entropy":3.077328935481046,"entropy_ratio":0.32719645393348984,"top_rate":0.732,"top_value":""}},{"alerts":[],"column":"genus","extras":{"singletons":0,"top_values":[["",109800],["Amperima",4520],["Xiphias",2000],["Scomber",1520],["Retaria",1520],["Farrea",1400],["Thunnus",1360],["Chrysogorgia",1360],["Coryphaenoides",1240],["Prionace",1080],["Hemicorallium",1000],["Alteromonas",960],["Paragorgia",880],["Oneirophanta",840],["Lepidisis",800],["Trissopathes",800],["Alepisaurus",760],["Keratoisis",720],["Pennatula",600],["Hoplostethus",600]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":841,"null_rate":0.0,"stats":{"cardinality":841,"entropy":4.895537670189287,"entropy_ratio":0.5038654612996,"top_rate":0.549,"top_value":""}},{"alerts":[],"column":"family","extras":{"singletons":0,"top_values":[["",80360],["Nitrosopumilaceae",10840],["Elpidiidae",4920],["Keratoisididae",4360],["Coralliidae",4040],["Macrouridae",3120],["Scombridae",3040],["Primnoidae",2440],["Chrysogorgiidae",2320],["Xiphiidae",2000],["Retariidae",1520],["Farreidae",1520],["Alteromonadaceae",1440],["Euplectellidae",1360],["Flavobacteriaceae",1320],["Schizopathidae",1280],["Caryophylliidae",1080],["Carcharhinidae",1080],["Acanthogorgiidae",1000],["Nitrospinaceae",1000]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":606,"null_rate":0.0,"stats":{"cardinality":606,"entropy":5.49965916938821,"entropy_ratio":0.5949968245996182,"top_rate":0.4018,"top_value":""}},{"alerts":[],"column":"order","extras":{"singletons":0,"top_values":[["",56320],["Scleralcyonacea",15720],["Nitrosopumilales",10840],["Syndiniales",7960],["Elasipodida",5680],["Gadiformes",4040],["Scombriformes",3360],["Carangiformes",3240],["Calanoida",2840],["Alteromonadales",2720],["Decapoda",2720],["Antipatharia",2560],["Sceptrulophora",2480],["Pseudomonadales",2400],["Flavobacteriales",2320],["Scleractinia",2240],["Malacalcyonacea",2000],["Lyssacinosida",2000],["Rotaliida",2000],["Amphipoda",1880]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":310,"null_rate":0.0,"stats":{"cardinality":310,"entropy":5.450207419179946,"entropy_ratio":0.6585458545918689,"top_rate":0.2816,"top_value":""}},{"alerts":[],"column":"class","extras":{"singletons":0,"top_values":[["Alphaproteobacteria",22840],["",21920],["Teleostei",19120],["Octocorallia",17880],["Thaumarchaeota incertae sedis",10840],["Dinophyceae",10600],["Gammaproteobacteria",9440],["Holothuroidea",7760],["Malacostraca",7440],["Hexactinellida",6320],["Hexacorallia",5640],["Copepoda",4080],["Ophiuroidea",3320],["Polychaeta",3320],["Polycystina",2800],["Elasmobranchii",2640],["Globothalamea",2520],["Deltaproteobacteria",2440],["Thermoplasmata",2360],["Flavobacteria",2320]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":138,"null_rate":0.0,"stats":{"cardinality":138,"entropy":4.891619971812342,"entropy_ratio":0.6881343662183016,"top_rate":0.1142,"top_value":"Alphaproteobacteria"}},{"alerts":[],"column":"phylum","extras":{"singletons":0,"top_values":[["Proteobacteria",35480],["Cnidaria",25520],["Chordata",23920],["Echinodermata",14000],["",13200],["Arthropoda",13200],["Myzozoa",11280],["Thaumarchaeota",10920],["Porifera",10360],["Foraminifera",4720],["Annelida",4240],["Radiozoa",4000],["Mollusca",3840],["Bacteroidetes",3440],["Euryarchaeota",2440],["Planctomycetes",2320],["Heterokontophyta",1680],["Verrucomicrobia",1520],["Brachiopoda",1520],["Nematoda",1160]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":65,"null_rate":0.0,"stats":{"cardinality":65,"entropy":4.0945217856631,"entropy_ratio":0.6798857048892365,"top_rate":0.1774,"top_value":"Proteobacteria"}},{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[640,440,6000,5160,4960,4360,4360,8560,5560,16440,13320,10400,3480,2520,4280,4280,4440,6080,2160,1080,5280,5680,1960,5960,9160,7680,7480,12840,7000,5880,11360,2480,920,1160,1200,920,920,1600,1200,800],"edges":[-75.0,-70.898500061035,-66.79700012207,-62.695500183105,-58.59400024414,-54.492500305175,-50.391000366209994,-46.289500427244995,-42.18800048828,-38.086500549315,-33.98500061035,-29.883500671384994,-25.782000732419995,-21.680500793454996,-17.57900085448999,-13.477500915524992,-9.376000976559993,-5.2745010375949875,-1.173001098629996,2.92849884033501,7.029998779300001,11.131498718265007,15.232998657230013,19.334498596195004,23.43599853516001,27.537498474125016,31.638998413090007,35.74049835205501,39.84199829102002,43.94349822998501,48.044998168950016,52.14649810791501,56.247998046880014,60.349497985845005,64.45099792481003,68.55249786377502,72.65399780274001,76.75549774170503,80.85699768067002,84.95849761963501,89.0599975586]},"sample":[26.85439,-39.51513,36.9836845398,-36.8789,17.7700004578,-44.328417,40.4367752075,26.97448,-34.3206,47.7333,40.3606376648,-32.4697,83.7667,-36.87545,39.35935,53.0391998291,18.15631,-36.88003,-42.7506,23.01483,36.5753440857,-28.00095,35.7940788269,-32.466,44.78695,14.87248,28.9721,-12.49798,-43.136,48.42300703,35.64251,10.5,-36.8789,-41.5113333333,36.9027252197,39.93269,71.66667,35.7181434631,-36.88208,65.0,-30.6224,-32.56103,12.02745,-36.88208,37.38996,-0.9992,28.96551,39.02341,-71.8557,43.37,48.80417269,56.5,-35.68073,-27.92278,30.69546,21.2848,48.83237276,-54.4985,-52.503917,25.97161,2.64166665077,13.28,-34.4018,-32.4697,35.72038,-44.77,28.6722,-44.274,3.664,-36.87975,-4.99815,-18.50122,36.7001533508,29.10948,9.1,-31.00057,-0.49677,-65.6622,-64.72935,-17.9986,27.6499996185,63.312,-27.92278,-36.88202,58.48850017,-50.00695,30.68655,-34.50183,-44.834983,-3.9948,37.36276,-27.547083,-33.9845,36.9773864746,-69.733333,27.98989,25.2083330154,31.2124,-31.640967,-64.471,35.70197,-28.00505,-7.49845,-48.5025,29.32823,-63.129667,-42.3332,-52.002417,-27.92278,-30.62238,-64.72935,10.97609091,5.10163,38.85734,-33.62,42.70496,-64.310583,-27.92278,26.875,-36.2512,-62.99995,-16.0017,-30.62238,26.959262,-47.499267,38.29948,-13.8176,36.98663,-3.9948,48.91168654,42.6500015259,55.75048333,29.4725894928,-42.6577,-36.88145,-22.99507,-64.995883,-30.5042,-44.328417,-32.46597,-31.83217,56.15,30.68655,-42.6678,35.7877235413,51.86922,-67.50813,-44.22,-64.0,25.8166666666667,-32.00123,-13.8176,29.11195,26.59578,72.0113333333,47.0,12.144878,35.13673,51.6958007812,-32.49512,80.5599975586,-37.53316667,-60.1,-36.87993,-64.5387,35.77122,-2.00113,-38.3800010681,-36.88035,-36.87938,-41.2933,35.71663,22.937431666666665,31.21908,-52.002417,48.80585974,-0.49677,-24.4992,48.87577096,33.233001709,47.45,48.83679268,-32.57998,-36.24837,-13.8176,52.9792,-59.3435,27.57751,34.00061,83.3499984741,21.2848,40.78026,10.35972933,-45.849998,28.13488,-37.69166,-27.92278,-52.002417,-13.50338,-47.112133,-7.00215,21.0417,71.68917,-4.99815,48.9350107,-30.13316,-53.003483,-37.69166,-7.00215,-28.5013,-55.5058,-27.92278,-51.491767,-24.6,47.0,78.816,-32.74958,-63.95155,36.584651947,-41.3193,-44.834983,47.0,-36.45077,-37.767,-45.176767,-11.16787,-1.03704,-34.50183,12.192167,56.3035,-13.8176,13.88,11.862434,19.44499,36.5917434692,-57.502917,48.91919972,-8.99877,37.37217,27.1292877197,-31.00057,14.02243,-34.4497,-2.00113,-13.8176,-28.00095,80.6832962036,-31.492,-36.98872,-7.00215,-64.50175,79.739,35.16833,28.67217,-13.5034,-53.992383,48.83002737,-13.9691,13.265601,-64.310583,38.2975,29.70129,40.37424,36.5848083496,28.6722,-27.92278,-7.0,-63.990333,-62.4989,-28.00165,-3.50138,-36.87897,-32.74958,38.29945933,-35.3,-0.49677,-54.498483,-25.49173,48.8658558,-9.65,46.7333,-36.88003,14.53575,81.822024,-64.471,-17.9986,11.7906,-42.8068,23.057735,8.88,39.86572,-35.33387,39.38557,37.36929,26.221,-17.00137,36.9918632507,23.30751,47.75,12.566733,-13.5034,-13.50338,-27.92278,35.82497,-46.14833333,10.383,30.68624,-31.63537,-39.8873,-46.9037,36.9805603027,-53.0035,48.7333,-39.9166666666667,-52.503917,-41.2933,-55.5058,-27.2661,-11.99465,40.73369,-46.9037,9.555317,-39.96945,-27.92278,37.37251,-24.6,30.6788,79.2416992188,-30.80827,-27.92278,35.80318,11.772433,-44.522333,-27.92278,-60.1329994202,-32.47087,36.9842453003,-36.87897,27.3672,-39.96945,-56.000533,-55.5058,-19.4953,-12.9974,12.01611111,-66.331733,64.3653,-4.5005,24.57702,-28.00165,34.1166667938,49.849998,-4.99815,67.06812501388889,-42.953,30.69546,28.6722,-29.9979991913,40.7885932922,-64.345,12.11786967,41.03102,-31.00057,40.3499984741,42.25,-32.5958333,45.3116666667,53.7487,-43.83068,-0.37554,-7.49845,47.7539,35.7277,7.58978,24.201499939,48.9350107,-57.0067,40.5,-36.2338,35.78004,-3.9948,27.97798,-34.84612,-36.87912,-22.0032,-41.12,-26.0515,28.96551,-24.99308,36.6843566895,-36.87993,-28.50127,26.20099,-16.00165,-63.9903,27.5249996185,-5.9993,30.69581,6.508,-4.99815,18.87338,54.2446,14.3797,27.93695,22.99905167,44.8347,48.81810744,35.632,-36.88133,-36.2338,-36.0667,-35.33387,23.088,-32.99448,-44.295,22.0,-52.503917,-31.492,44.778333,-65.1605,-27.92278,5.90078,40.37355,39.88367,81.822024,71.01787,48.87420486,-36.87545,-58.00085,-48.9954,-2.99973,-28.00505,-36.98872,-36.88145,25.5081,-1.50055,54.7295,-32.00123,-64.72935,-17.0014,61.75,-36.98872,80.6832962036,63.49964523333333,40.0,-36.9988,-27.9228,36.73154,-46.328533,-40.475,-32.00123,28.701,42.3797149658,-36.87975,-40.3974,30.68912,21.37731,42.63666667,41.03062,11.00144918,9.5177,-36.88145,40.6984672546,-51.491767,-64.471,-28.00165,-34.70815,-12.49798,-36.2512,5.17865,37.0833,-41.22977,-7.49845,-36.88145,10.97473917,36.7148857117,36.56848526,-36.8821,-10.9976,-64.995883,30.6788,65.0,22.999165,-42.169,-44.6165,35.72204,40.7869529724,-44.32458,-32.00123,-51.491767,50.2483,-30.5042,13.95505,-36.87545,30.69546,47.25,51.86922,63.3875,-9.92345,29.7025]},"kind":"numeric","n":200000,"n_null":0,"n_unique":2617,"null_rate":0.0,"stats":{"iqr":71.9735057355,"kurtosis":-1.2226338646729928,"max":89.0599975586,"mean":-1.5806687265120904,"median":-4.99815,"min":-75.0,"n_outliers":0,"outlier_rate":0.0,"q1":-36.25052,"q3":35.722985735500004,"skew":0.11822792793970917,"std":39.476864430621845,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[17200,53840,2320,680,520,3240,17080,4360,320,3760,4040,4120,5680,1840,3280,2240,1280,2360,12680,2440,2760,640,560,680,560,520,880,920,480,3320,680,160,1160,1200,1080,1000,14840,19120,1520,4640],"edges":[-179.9872,-170.9875575,-161.987915,-152.9882725,-143.98863,-134.9889875,-125.989345,-116.98970249999999,-107.99006,-98.9904175,-89.990775,-80.99113249999999,-71.99149,-62.991847500000006,-53.992205,-44.99256249999999,-35.99292,-26.993277500000005,-17.993635000000012,-8.99399249999999,0.005650000000002819,9.005292499999996,18.004935000000017,27.00457750000001,36.004220000000004,45.0038625,54.00350499999999,63.00314750000001,72.00279,81.00243250000003,90.00207500000002,99.00171750000001,108.00136,117.0010025,126.00064499999999,135.00028749999998,143.99992999999998,152.99957250000003,161.99921500000002,170.9988575,179.9985]},"sample":[-168.0684,-172.4141,-123.362541199,152.5661,-25.4350004196,-173.74595,-126.414466858,-168.85732,152.06015,-8.2333,-125.211997986,154.10308,-155.2167,-170.59835,-67.20591,-20.0166501999,-65.05737,152.57005,-174.6475,154.0076,-122.305511475,154.7829,-122.655403137,153.3989,-130.36875,-73.53915,-170.92645,-169.99903,173.9645,-15.62183031,-122.76729,-43.2000007629,152.5661,175.7198333333,-122.404510498,-69.22839,8.11667,-122.723999023,152.55653,0.0,153.6351,153.11935,-117.350067,152.55653,-123.42307,-168.7492,-170.92561,-72.44906,174.033,-52.08,-16.51334518,-57.64,-170.00705,155.1438,-174.60594,38.0484,-16.60860368,-170.0029,-169.996817,-164.74452,-40.5333328247,80.73,151.80088,154.10308,-122.72343,-176.84,-88.4765,177.3572,63.756,152.56255,-168.75025,-170.00235,-122.11907196,-178.10763,58.259,-169.99582,-168.74373,-170.0328,118.69735,-170.00035,-87.3999977112,5.094,155.14157,152.5582,-9.727504142,-169.99245,-174.61931,-170.00213,-173.1415,-168.75178,-123.39408,-46.433483,-169.99478,-123.364280701,-126.7,-170.95193,-93.1916656494,-179.63601,42.832517,115.623,-122.74405,154.35052,-168.75027,-170.0004,-178.83216,87.259917,174.3825,-170.076017,155.1421,153.63512,118.69735,-116.158062,-141.88441,-63.74344,16.92,29.49043,89.803133,155.14233,-90.0416641235,154.24928,-170.030533,-170.0016,153.63512,-85.189096,-169.989033,-62.51603,-169.999,-123.35343,-168.75178,-16.57333131,-52.125,-10.7221,-87.1689300537,-176.7562,152.55905,-170.00023,-170.009767,-169.99368,-173.74595,153.39892,153.3184,10.32,-174.61931,175.913,-122.660453796,-178.32357,-22.52148,147.37,93.55,-54.9666666666667,-169.9937,-169.999,-178.10697,-160.67294,-154.6221666667,-8.0,-117.3254597,-122.94958,-13.006649971,-169.99072,-179.323303223,179.8331667,-22.26667,152.5619,116.6403,-122.69264,-168.7501,149.309997559,152.56053,152.56903,176.4667,-122.72412,153.931905,-179.62101,-170.076017,-16.63275563,-168.74373,-170.00232,-15.59824052,-118.5,-8.36,-16.56549874,153.07755,151.92822,-169.999,-132.756616,81.071167,-174.98835,-121.11772,-179.648300171,38.0484,-66.5866,-117.1622087,164.533005,-176.90194,139.21667,155.14362,-170.076017,-170.00263,-170.468833,-168.75268,-17.0822,0.516,-168.75025,-16.80249849,-71.8695,-170.014317,139.21667,-168.75268,-169.9972,-170.002633,155.14132,-170.01745,70.4,-9.0,-2.729,152.99235,83.140217,-122.521057129,145.0833,-173.1415,-43.0,-170.2935,49.517,-172.73455,-169.89206,-176.15658,-170.00213,-117.325467,-10.275,-169.999,-116.47,-117.0528928,165.79832,-122.236869812,-170.006333,-16.85498721,-168.87517,-123.40324,-96.6004257202,-169.99582,144.64108,129.512,-168.7501,-169.999,154.7829,68.6999969482,153.32997,153.88842,-168.75268,-170.003333,4.508,-123.01845,-88.47649,-170.0026,-169.957917,-16.54500336,-169.99773,-73.062007,89.803133,-62.51863,-174.01773,-67.40372,-122.521690369,-88.4765,155.1421,11.0,-170.04835,-169.9932,155.0341,-168.75058,152.56713,152.99235,-62.51625717,-19.0167,-168.74373,-170.002917,-170.00217,-16.63282891,-25.0167,-6.7333,152.57005,-169.9222,11.538371,115.623,-170.00035,-117.536883,173.8305,153.88675,53.15,-69.59031,-170.00248,-67.1368,-123.39246,-85.665,-170.00125,-123.353630066,-158.35707,-9.25,-116.708233,-170.0026,-170.00263,155.14132,-122.60916,177.1883333,-76.317,-174.62126,153.32147,177.972,165.44,-123.364067078,-170.0143,-12.3333,-26.0333333333333,-169.996817,176.4667,-170.002633,-169.9975,-170.00237,-66.65718,165.44,-76.966321,-172.70148,155.14293,-123.40476,70.4,-174.62802,61.5349998474,153.45375,155.1435,-122.6502,-116.850133,-173.502283,155.14312,170.716995239,153.17248,-123.361366272,152.56713,-90.5779,-172.70148,-170.004417,-170.002633,-170.0026,-169.99823,-117.57722222,-170.010233,6.44672,-168.7502,-173.97436,155.0341,-75.5833320618,-49.5,-168.75025,7.010967669444445,-174.0052,-174.60594,-88.4765,-73.2789993286,-127.520950317,83.536,-117.34347917,-66.32953,-169.99582,-66.7080001831,-9.74,-179.6078333,-126.5733333333,-14.2157,150.3028,-176.1319,-168.75027,-127.7643,-122.71289,133.8003,58.6301498413,-16.80249849,-169.999167,-66.8083305359,151.2642,-122.66442,-168.7518,-170.95512,151.3434,152.56818,-39.8231,176.78,-156.8941,-170.92561,-170.00397,-122.118186951,152.5619,-169.99722,-173.3253,-170.00158,-170.0484,-88.4499969482,-168.7513,-174.60574,60.226,-168.75025,-155.25128,-12.7366,-72.6628,-170.92008,154.4091583,137.2352,-16.46868641,-122.82703,152.55955,151.26423,173.0283,-170.00248,-86.273,152.92047,147.37417,-92.917,-169.996817,153.32997,-125.696667,91.637833,155.14312,-162.13826,-67.40515,-67.42617,11.538371,-146.1322,-16.69415238,-170.59835,-170.01025,-170.0036,-168.75135,154.35052,153.88842,152.55905,-173.5221,-168.74968,-11.7855,-169.9937,118.69735,-170.0013,-2.75,153.88842,68.6999969482,4.998265294444445,-9.0,153.99932,155.1429,-122.00829,-171.373367,177.75,-169.9937,-87.76,-127.635673523,152.56255,-173.022,-174.61558,-158.23686,-10.04166667,-66.32836,-116.1510258,134.75164,152.55905,-127.518119812,-170.01745,115.623,155.0341,129.721,-169.99903,154.2493,-162.05634,-9.25,148.76,-168.75027,152.55905,-116.1600608,-122.052452087,-122.577819824,152.5565,-169.9994,-170.009767,-174.62802,0.0,154.3426383,-174.25463,-178.0207,-122.72321,-127.524452209,147.27278,-169.9937,-170.01745,-12.2273,-169.99368,-116.51406667,-170.59835,-174.60594,-41.2166671753,-178.32357,-2.0378,-169.63012,-174.01496]},"kind":"numeric","n":200000,"n_null":0,"n_unique":2654,"null_rate":0.0,"stats":{"iqr":233.751,"kurtosis":-1.1234210179498667,"max":179.9985,"mean":-51.57663763133836,"median":-94.29,"min":-179.9872,"n_outliers":0,"outlier_rate":0.0,"q1":-169.995,"q3":63.756,"skew":0.6619478858305774,"std":127.08723895239815,"zero_rate":0.0004}},{"alerts":[],"column":"depth","extras":{"histogram":{"counts":[58400,15280,10920,20200,23560,5720,5320,4360,5080,3520,3520,3360,8000,4680,1880,11400,3240,3360,5520,1760,280,40,40,0,0,80,80,0,0,0,80,80,40,0,40,0,120,0,0,40],"edges":[1000.0,1250.0,1500.0,1750.0,2000.0,2250.0,2500.0,2750.0,3000.0,3250.0,3500.0,3750.0,4000.0,4250.0,4500.0,4750.0,5000.0,5250.0,5500.0,5750.0,6000.0,6250.0,6500.0,6750.0,7000.0,7250.0,7500.0,7750.0,8000.0,8250.0,8500.0,8750.0,9000.0,9250.0,9500.0,9750.0,10000.0,10250.0,10500.0,10750.0,11000.0]},"sample":[2149.0,2000.8,1933.14,3800.0,1100.0,1035.7,2724.04,2336.0,1000.0,1245.0,1565.72,1000.0,1000.0,1001.6,2860.0,1750.0,1107.0,4600.0,1585.0,5599.0,2225.37,1000.0,1811.5,1000.0,2347.0,2471.7,2029.0,5077.4,1057.5,4844.0,2124.0,4292.0,3800.0,1076.0,1255.88,1115.0,2850.0,1343.86,1800.0,2913.0,1000.0,1000.0,4093.0,1800.0,1001.0,2150.0,2086.0,1511.0,1972.0,1337.5,4842.0,1320.0,2000.7,1400.0,1301.0,2180.0,4843.5,4902.0,1067.2,2346.0,1400.0,1000.0,1000.0,1000.0,1325.0,1207.5,1370.0,1072.5,3400.0,3200.0,2085.2,1935.9,1716.31,1874.0,2540.0,1037.0,2085.6,3200.0,3361.1,1002.1,1605.0,1059.79,4200.0,2000.0,1801.0,2085.3,1539.0,5589.4,1062.5,2001.2,1351.0,1667.0,2087.1,1917.85,1262.0,1931.0,1960.0,1306.0,1308.0,2161.0,1494.0,1000.0,1935.5,5390.0,2085.0,3840.0,1433.0,1001.8,3800.0,1000.0,3361.1,4191.325,4859.0,1977.0,1828.0,2054.0,3626.0,3600.0,1250.0,1000.0,1966.0,2085.0,1000.0,1772.0,2000.3,1867.0,1035.2,1899.0,2001.2,4844.5,1100.0,1200.0,1289.5,1396.0,2200.0,2065.8,1035.0,5642.4,2065.9,1000.0,1000.0,1055.0,1539.0,1063.0,1660.03,1812.0,4893.0,1306.0,3260.0,2000.0,1064.4,2086.0,1896.0,1899.0,1099.0,2147.0,4041.0,3965.0,1185.0,1035.5,8700.0,3142.0,3596.0,3000.0,2100.0,1670.0,2066.0,1121.0,2600.0,4400.0,1039.5,1398.0,5581.0,2026.0,1001.8,4839.0,1035.9,2064.8,4832.5,1240.0,2210.0,4840.5,1000.0,1000.0,4382.4,1288.5,1943.0,1239.0,1197.0,1505.0,2180.0,1267.0,4275.95,4400.0,1852.0,1042.5,2000.0,1999.7,4949.8,1036.7,1900.3,1229.5,1500.0,1035.9,4834.5,2733.0,1136.6,1042.5,1900.3,5551.0,1936.3,4400.0,4805.6,3366.0,4277.0,2604.7,1000.0,3707.0,1186.1,1200.0,1931.1,1000.0,2085.6,4000.0,2084.2,2031.0,1924.0,1002.2,4063.0,2000.0,1035.0,1000.0,4133.0,2093.0,2060.11,1035.6,4839.5,4954.6,1024.0,1000.0,1037.0,1252.0,3292.5,2066.0,1035.0,1000.0,3250.0,1000.0,1000.0,1900.3,2356.3,2708.2,3953.0,1370.0,4949.0,5200.0,4841.5,1001.9,2976.0,3626.0,1843.0,1896.0,1084.0,3008.7,1370.0,3800.0,1354.0,1001.8,1065.0,1000.0,2085.7,4000.0,1000.0,2041.355,1000.0,1035.9,4902.0,1900.8,4839.5,2130.0,2187.0,4600.0,2232.0,1546.7,2161.0,1002.1,4351.5,1107.0,5663.0,4743.0,1237.0,2086.4,2802.0,1233.0,3241.0,1935.8,1767.5,1789.0,1672.0,4201.0,4949.0,4949.8,4400.0,2634.0,2424.0,2357.0,1580.0,1000.0,1271.0,1049.0,1998.43,1136.0,1200.0,2000.0,1935.4,1039.5,1936.3,5546.0,1936.5,1316.0,1049.0,2842.0,1065.1,3000.0,1069.0,3366.0,1758.0,1500.0,1000.0,2400.0,1982.0,2070.0,1999.4,2800.0,3660.0,1000.0,1880.63,4000.0,1178.0,4759.7,1035.7,1936.3,2000.0,1001.1,4340.0,1065.0,2600.0,5637.0,1799.0,1000.0,1300.0,1000.0,1035.9,1277.0,1378.5,1301.0,1370.0,2578.0,3269.2,3632.0,4181.0,1614.0,1037.0,2406.5,1106.5,1213.5,2750.0,1302.0,1571.0,1145.0,1935.5,2667.0,1581.0,2746.0,1305.0,4834.5,1935.5,1750.0,1000.0,1696.0,5323.0,1433.0,1000.0,4200.0,1350.0,1375.5,5127.0,2086.0,1974.8,1537.14,3000.0,5551.2,1600.0,2085.8,2000.0,1235.0,5765.0,1302.0,2009.0,2085.2,1681.0,2787.0,3404.0,1240.0,5552.0,1355.0,4842.0,2691.0,2400.0,1000.0,1024.0,1035.8,1220.0,1000.0,1510.5,2862.0,1935.4,1000.0,2800.0,1226.0,2800.0,1073.0,1074.0,1396.0,1546.7,1000.0,4844.5,5380.4,1999.3,1999.2,2065.7,1000.0,1000.0,2200.0,2103.0,2061.1,1400.0,1064.4,3361.1,5071.0,1595.0,1000.0,3250.0,1180.0,1170.0,1000.0,3000.0,1062.0,2001.1,1920.0,1064.4,2245.0,2915.68,3200.0,1035.0,1470.0,1016.0,2270.0,1660.0,4186.087,2183.0,2200.0,3258.62,1035.8,2161.0,1000.0,3714.0,5077.4,1000.0,1506.0,1500.0,1005.0,1935.5,2200.0,4206.19,1000.07,3057.72,1800.0,1035.9,1035.0,1758.0,2913.0,5565.0,1001.0,1137.5,1307.0,3269.2,1397.0,5787.7,1035.8,1127.5,5642.4,4152.0,5380.4,1301.0,2176.5,1812.0,1721.5,2000.5,1849.0]},"kind":"numeric","n":200000,"n_null":0,"n_unique":1938,"null_rate":0.0,"stats":{"iqr":2174.25,"kurtosis":0.502188962740926,"max":11000.0,"mean":2405.742162760078,"median":1961.6950000000002,"min":1000.0,"n_outliers":560,"outlier_rate":0.0028,"q1":1149.0,"q3":3323.25,"skew":1.0905346340353783,"std":1477.2018060347082,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-3.57"},{"code":"outliers","level":"warn","message":"6.3% rows beyond 1.5 IQR"}],"column":"year","extras":{"histogram":{"counts":[40,120,320,80,120,0,0,80,40,120,0,0,0,160,400,240,80,0,0,40,80,40,120,1680,840,1520,1840,1360,960,2160,2880,5200,8520,13760,10040,7360,7280,98920,14120,12240],"edges":[1875.0,1878.725,1882.45,1886.175,1889.9,1893.625,1897.35,1901.075,1904.8,1908.525,1912.25,1915.975,1919.7,1923.425,1927.15,1930.875,1934.6,1938.325,1942.05,1945.775,1949.5,1953.225,1956.95,1960.675,1964.4,1968.125,1971.85,1975.575,1979.3,1983.025,1986.75,1990.475,1994.2,1997.925,2001.65,2005.375,2009.1,2012.825,2016.55,2020.275,2024.0]},"sample":[2023.0,1997.0,2000.0,2024.0,1984.0,2015.0,1985.0,1999.0,2016.0,2012.0,2000.0,2016.0,1958.0,2016.0,2016.0,2016.0,2000.0,1998.0,2016.0,2003.0,2016.0,2016.0,2004.0,2014.0,1993.0,2016.0,2016.0,2016.0,2004.0,1992.0,2016.0,1997.0,2016.0,1998.0,2012.0,1997.0,1996.0,2013.0,2016.0,1975.0,1971.0,2016.0,2016.0,2004.0,2016.0,2016.0,1978.0,1993.0,1969.0,2016.0,1992.0,2000.0,2016.0,1972.0,2013.0,2016.0,2016.0,2011.0,1998.0,2018.0,2017.0,2016.0,2016.0,1997.0,2016.0,2016.0,2016.0,2016.0,2012.0,2016.0,2016.0,2016.0,1987.0,1964.0,2016.0,2022.0,2016.0,2013.0,2016.0,2007.0,2016.0,2016.0,2016.0,1962.0,1976.0,2015.0,2015.0,2016.0,2016.0,2016.0,1980.0,2016.0,2003.0,2016.0,2000.0,1999.0,2004.0,2016.0,2016.0,2016.0,1985.0,2016.0,1998.0,2018.0,2016.0,2016.0,2002.0,2016.0,2016.0,2000.0,2016.0,2011.0,1996.0,1997.0,2016.0,2016.0,2001.0,2018.0,2016.0,2024.0,2016.0,2016.0,2016.0,2016.0,2020.0,1990.0,2020.0,2019.0,2016.0,2016.0,2003.0,2016.0,2019.0,2006.0,2017.0,1997.0,1969.0,2016.0,2016.0,2016.0,2016.0,2016.0,2003.0,1994.0,1998.0,2004.0,1990.0,2016.0,1984.0,1999.0,2017.0,2022.0,2021.0,2016.0,2016.0,2022.0,1993.0,2016.0,1981.0,2016.0,2002.0,2022.0,2008.0,1980.0,1986.0,2016.0,2024.0,2016.0,2016.0,2016.0,2016.0,2015.0,1996.0,1974.0,2000.0,2002.0,2000.0,1981.0,2016.0,2007.0,2016.0,2016.0,2016.0,2016.0,2004.0,2017.0,2016.0,2002.0,1983.0,1995.0,2014.0,2004.0,2016.0,2016.0,1999.0,2000.0,2016.0,2016.0,2009.0,2023.0,2022.0,2015.0,2021.0,2004.0,2001.0,1997.0,1995.0,2016.0,2016.0,2016.0,2022.0,2016.0,2009.0,2022.0,2016.0,2014.0,2016.0,1981.0,2000.0,2021.0,2016.0,2016.0,2016.0,2016.0,2006.0,2023.0,2016.0,2011.0,2016.0,2017.0,2014.0,2021.0,2021.0,2016.0,1969.0,2016.0,2016.0,2016.0,2016.0,1996.0,2000.0,2019.0,1994.0,2016.0,1997.0,2011.0,2016.0,2013.0,2016.0,2016.0,2016.0,1998.0,2016.0,2015.0,1997.0,2017.0,2016.0,2016.0,1974.0,2014.0,2000.0,2016.0,1992.0,2006.0,1998.0,2010.0,2016.0,2016.0,2023.0,2011.0,2016.0,2002.0,2024.0,1971.0,2000.0,2016.0,2000.0,2016.0,2016.0,1985.0,2016.0,2013.0,2019.0,2016.0,2009.0,1992.0,2016.0,2016.0,2016.0,2019.0,2017.0,1998.0,2016.0,2015.0,1988.0,2003.0,2020.0,2021.0,1996.0,2016.0,2015.0,2014.0,2021.0,2016.0,2017.0,2017.0,2016.0,1988.0,2016.0,2016.0,1909.0,2000.0,2016.0,2016.0,2022.0,2014.0,2016.0,2015.0,1994.0,2020.0,2016.0,2016.0,1987.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2014.0,2006.0,1997.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2018.0,2016.0,2016.0,2016.0,2016.0,2000.0,2000.0,2016.0,2017.0,2016.0,2011.0,2016.0,2016.0,2016.0,2016.0,2017.0,2016.0,2001.0,1990.0,1997.0,2016.0,2000.0,2016.0,1976.0,1989.0,2012.0,2000.0,2000.0,1985.0,2010.0,2021.0,2000.0,2016.0,2022.0,2004.0,2016.0,2010.0,2016.0,2015.0,2016.0,2016.0,2015.0,2021.0,2022.0,2003.0,1996.0,2013.0,1888.0,2017.0,2016.0,2006.0,1999.0,2016.0,2000.0,2016.0,2004.0,1982.0,2000.0,1984.0,2006.0,2016.0,2016.0,2013.0,2016.0,2016.0,2016.0,1997.0,2016.0,1992.0,2016.0,2010.0,2016.0,2008.0,2000.0,2012.0,2016.0,2000.0,2003.0,2001.0,2016.0,2015.0,1999.0,2016.0,1995.0,2017.0,2016.0,2016.0,2016.0,2016.0,1994.0,2016.0,2016.0,1998.0,2014.0,2023.0,2014.0,2007.0,2017.0,2002.0,2016.0,2019.0,2016.0,2011.0,2020.0,2013.0,2005.0,1948.0,2016.0,2016.0,2016.0,2019.0,2016.0,2016.0,2004.0,2015.0,2016.0,2002.0,2000.0,2016.0,2016.0,2022.0,2021.0,2016.0,2017.0,2016.0,1994.0,2016.0,2020.0,2016.0,1981.0,2016.0,2016.0,1978.0,2016.0,2000.0,1989.0,2020.0,2016.0,2022.0,2009.0,1999.0,2013.0,2016.0,2016.0,2022.0,2020.0,2004.0,2023.0,2011.0,2016.0,2016.0,1995.0,2013.0,2008.0,2023.0,2016.0,2017.0,2020.0,2004.0,2011.0,2016.0]},"kind":"numeric","n":200000,"n_null":7240,"n_unique":98,"null_rate":0.0362,"stats":{"iqr":12.0,"kurtosis":19.645304978799555,"max":2024.0,"mean":2008.877152936294,"median":2016.0,"min":1875.0,"n_outliers":12080,"outlier_rate":0.06266860344469807,"q1":2004.0,"q3":2016.0,"skew":-3.573952335668651,"std":15.425050724093095,"zero_rate":0.0}},{"alerts":[],"column":"country","extras":{"singletons":0,"top_values":[["",103840],["Australia",79320],["United States",8160],["New Zealand",1320],["USA",680],["Antarctica",680],["Colombia",640],["Chile",520],["Bermuda",400],["Portugal",320],["UNITED STATES",320],["Ross Dependency",240],["Russia",240],["United States of America",240],["GREAT BRITAIN",200],["Ecuador",160],["Bahamas",160],["Italy",160],["CO",160],["Discovery Deep, Red Sea",160]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":57,"null_rate":0.0,"stats":{"cardinality":57,"entropy":1.6158929939879074,"entropy_ratio":0.27703128124545995,"top_rate":0.5192,"top_value":""}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["phylum.top_values","species.stats.top_rate","genus.stats.top_rate","family.stats.top_rate","country.top_values","depth.stats.mean","depth.stats.max","depth.stats.min","year.stats.median","year.stats.min","year.stats.n_outliers","year.alerts"],"featured_charts":[{"caption":"Look for the dominance of Proteobacteria and which marine phyla (Cnidaria, Chordata, Echinodermata) make up the bulk of identified records.","column":"phylum","kind":"bar"},{"caption":"Look for the right-skewed spread of sampling depths, with most records between 1,000\u20133,300 m and a long tail extending to 11,000 m.","column":"depth","kind":"histogram"},{"caption":"Look for the sharp concentration of records after 2004 and the small but notable cluster of historical outlier observations dating back to 1875.","column":"year","kind":"histogram"},{"caption":"Look for Australia's overwhelming share of named-country records versus all other nations combined.","column":"country","kind":"bar"},{"caption":"Look for Alphaproteobacteria and Teleostei at the top, and note the large blank category indicating records unclassified at class level.","column":"class","kind":"bar"}],"model":"anthropic:default","narrative":"This dataset contains 200,000 deep-sea biodiversity occurrence records spanning taxonomic classification, geographic coordinates, ocean depth, and collection year. The most striking feature is the dominance of blank values across taxonomy columns \u2014 55% of genus, 40% of family, and 73% of species entries are empty strings, suggesting many records are identified only at higher taxonomic levels. Proteobacteria, Cnidaria, and Chordata are the best-represented phyla, while Australia accounts for the vast majority of records with a named country (~79k of ~96k non-blank entries). Depth ranges from 1,000 to 11,000 metres with a mean around 2,400 m, and the year column is heavily left-skewed with over 12,000 outlier records dating back as far as 1875, versus a median of 2016.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","q1","q3","iqr","skew","kurtosis","n_outliers","outlier_rate","null_rate","n_unique"],"model":"anthropic:default","narrative":"This column represents a calendar year, spanning from 1875 to 2024 with 98 distinct values across 200,000 rows. The bulk of records cluster tightly between 2004 and 2016 (IQR of 12 years, median 2016), but the distribution is heavily left-skewed (skew = -3.57, kurtosis = 19.65), driven by a long tail of historically old entries stretching back to 1875. Roughly 6.3% of rows (12,080) are flagged as outliers, almost certainly the pre-20th/early-20th century records that sit far below the modern core; analysts should verify whether these antique years are legitimate data or encoding errors.","role":"feature","scope":"column","target":"year","treatment":"Inspect and potentially cap or bin pre-1950 outlier years; use as an ordinal/numeric feature or derive 'age relative to reference year' before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","iqr","n_unique","n","null_rate","zero_rate","n_outliers","outlier_rate"],"model":"anthropic:default","narrative":"This column almost certainly represents depth measurements (e.g., ocean depth, well depth, or seismic depth) in meters, ranging from 1,000 m to 11,000 m with a mean of ~2,406 m and median of ~1,962 m. The distribution is right-skewed (skew \u2248 1.09) with a wide IQR of 2,174 m, indicating most observations cluster at shallower depths while a tail extends toward very deep values \u2014 the maximum of 11,000 m is consistent with ocean trench depths. Only 1,938 unique values across 200,000 rows suggests the depth values are rounded or binned rather than continuous measurements, which is worth noting for precision-sensitive analyses. Outlier count is modest (560, 0.28%) and no nulls or zeros are present.","role":"feature","scope":"column","target":"depth","treatment":"Apply log-transform or quantile transformation before regression/modelling to reduce right skew; verify whether discretisation into 1,938 unique values is intentional."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","n_unique","n","skew","kurtosis","iqr","null_rate","outlier_rate","zero_rate"],"model":"anthropic:default","narrative":"This column contains geographic latitude values, ranging from -75.0 to 89.06 degrees, consistent with a near-global spatial dataset. With only 2,617 unique values across 200,000 rows, each distinct latitude is reused on average ~76 times, suggesting coordinates are discretized or snapped to a coarse grid rather than recorded at full precision. The distribution is nearly symmetric (skew 0.12) with a platykurtic shape (kurtosis -1.22) and an IQR of ~72 degrees, indicating broad global coverage with no strong concentration in any hemisphere. No nulls, no outliers, and zero_rate of 0.0 are all clean signals.","role":"feature","scope":"column","target":"latitude","treatment":"Pair with longitude for spatial joins or clustering; investigate grid resolution given only 2,617 unique values across 200,000 rows before treating as continuous."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","kurtosis","n_unique","n","zero_rate","null_rate"],"model":"anthropic:default","narrative":"This column represents geographic longitude, with values spanning the full valid range from approximately -180 to +180 degrees and a mean of -51.58, suggesting a dataset skewed toward the Western Hemisphere (median -94.29, well west of the prime meridian). The IQR of 233.75 and near-flat kurtosis (-1.12) indicate values are broadly spread across the globe with no sharp central peak \u2014 consistent with global or multi-continental coverage. Surprisingly, only 2,654 unique values exist across 200,000 rows, implying heavy coordinate quantization or snapping to a fixed grid rather than continuous GPS precision. The zero_rate of 0.0004 is negligible but worth monitoring as a potential null-proxy sentinel.","role":"feature","scope":"column","target":"longitude","treatment":"Use as-is for spatial joins or map projections; investigate coordinate quantization (2654 unique values over 200000 rows) before using as a continuous feature in regression."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","n_unique","cardinality","entropy","entropy_ratio","top_rate","top_value","null_rate","n"],"model":"anthropic:default","narrative":"This column contains biological taxonomic class-level classifications (e.g., Alphaproteobacteria, Teleostei, Octocorallia), consistent with a marine or environmental biodiversity dataset. With 138 unique values across 200,000 rows, cardinality is moderate and entropy is reasonably high (4.89, ratio 0.69), indicating a fairly broad but not flat distribution. The top value 'Alphaproteobacteria' accounts for 11.42% of records, suggesting mild concentration at the top. A notable concern is the second-most-frequent entry being an empty string ('') with 21,920 occurrences (\u224811% of rows), which likely represents missing or unclassified taxa rather than a true category and should be treated as null.","role":"label","scope":"column","target":"class","treatment":"Replace empty-string entries with null, then encode as a nominal categorical feature (e.g., target-encode or embed) for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","null_rate","top_values","n"],"model":"anthropic:default","narrative":"This column represents the country of origin or registration for records in the dataset, with 57 distinct values across 200,000 rows. The dominant signal is that 51.92% of rows (103,840) have an empty string \u2014 effectively a missing value masked as a non-null entry, which would go undetected by null checks. Among populated values, 'Australia' accounts for 79,320 rows (~39.7%), making this a heavily Australia-centric dataset; a further data quality concern is the presence of both 'United States' (8,160) and 'USA' (680) as separate values, indicating inconsistent country name standardisation.","role":"feature","scope":"column","target":"country","treatment":"Replace empty strings with null, then standardise country name variants (e.g. 'USA' \u2192 'United States') before encoding or aggregating."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n","n_unique","null_rate","entropy","top_values"],"model":"anthropic:default","narrative":"This column contains biological family-level taxonomic names (e.g., Nitrosopumilaceae, Elpidiidae, Scombridae), consistent with a marine species or biodiversity dataset. The most striking issue is that the top value is an empty string, accounting for 40.18% of all 200,000 rows (80,360 records) \u2014 a substantial proportion of missing taxonomy that is masked by a null_rate of 0.0, meaning blanks were stored as empty strings rather than true nulls. With 606 unique values and moderate entropy (5.50), the remaining distribution is fairly spread but dominated by a handful of families.","role":"label","scope":"column","target":"family","treatment":"Replace empty strings with NaN, then use as a categorical grouping variable or encode (e.g., target/ordinal encoding) for modelling; investigate whether blank family records can be imputed from genus/species columns."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n","n_unique","null_rate","entropy","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column contains biological genus names, likely from a marine species observation or biodiversity dataset given genera such as Xiphias (swordfish), Thunnus (tuna), Prionace (blue shark), and deep-sea taxa like Amperima and Chrysogorgia. The most striking issue is that 54.9% of rows (109,800 of 200,000) carry an empty string rather than a null, meaning missingness is systematically masked and will not be caught by standard null checks. The remaining 840 distinct genus values show moderate entropy (4.90, entropy_ratio 0.50), indicating a reasonably spread but skewed distribution.","role":"label","scope":"column","target":"genus","treatment":"Replace empty-string values with NaN to expose true missingness (~55%), then use as a categorical label or grouping key; consider hierarchical encoding with higher taxonomic ranks if available."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","entropy_ratio","n"],"model":"anthropic:default","narrative":"This column represents the biological taxonomic rank 'Order' for marine organisms, containing 310 distinct taxonomic order names drawn from bacteria, archaea, protists, invertebrates, and fish. The most striking signal is that the top value is an empty string, accounting for 28.16% of all 200,000 rows (56,320 records) \u2014 suggesting a substantial proportion of specimens could not be classified at this rank, which is common in metagenomic or environmental sampling datasets. The entropy ratio of 0.659 indicates moderate diversity across the 310 categories, with a long tail of rarer orders beneath the dominant few.","role":"label","scope":"column","target":"order","treatment":"Treat empty string as a distinct 'unclassified' category or null before encoding; encode remaining values as nominal categories (one-hot or target encoding depending on model)."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","null_rate","top_values","entropy_ratio"],"model":"anthropic:default","narrative":"This column contains biological phylum classifications, covering 65 distinct phyla across 200,000 rows with no nulls. The dominant value is Proteobacteria (17.74%), followed by Cnidaria and Chordata, suggesting a marine or mixed ecological dataset spanning bacteria, invertebrates, and vertebrates. Notably, empty strings appear as the 5th most frequent 'value' with 13,200 occurrences (6.6%), which are functionally missing values masked as non-null entries. The entropy ratio of 0.68 indicates moderate concentration \u2014 a few phyla dominate while the long tail of 65 categories is spread unevenly.","role":"label","scope":"column","target":"phylum","treatment":"Replace empty-string entries (13,200 rows) with explicit nulls, then encode as a categorical feature (e.g., ordinal or one-hot for low-cardinality models)."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","top_value","top_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column contains biological taxonomic names (scientific names) spanning multiple ranks \u2014 from broad groups like 'Bacteria' and 'Alphaproteobacteria' down to species-level binomials like 'Amperima rosea' and 'Xiphias gladius'. With 1,478 unique values across 200,000 rows and zero nulls, it is a well-populated label field, though the top value 'Alphaproteobacteria' accounts for 8.82% of all rows, indicating moderate concentration at a handful of higher-rank taxa. The high entropy ratio of 0.796 confirms substantial spread across many taxa, and the mix of ranks (class, order, family, genus, species) within a single column is a structural issue that may require rank-disambiguation before analysis.","role":"label","scope":"column","target":"scientificName","treatment":"Normalise taxonomic rank before grouping or modelling; consider splitting into rank-specific columns or encoding hierarchy via a taxonomy backbone."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","entropy_ratio","top_values","n"],"model":"anthropic:default","narrative":"This column contains biological species names (binomial Latin nomenclature), likely from a marine/oceanographic observation dataset given taxa such as swordfish (Xiphias gladius), Atlantic mackerel (Scomber scombrus), and deep-sea holothurians (Amperima rosea). The dominant 'value' is an empty string, accounting for 73.2% of all 200,000 rows (146,400 records), which is a critical data quality issue \u2014 species was not recorded for nearly three-quarters of observations. Among the 678 distinct non-empty values, entropy ratio is only 0.33, indicating heavy concentration in a handful of species.","role":"label","scope":"column","target":"species","treatment":"Treat empty string as missing/unknown; impute or filter before modelling, then encode remaining 677 species (e.g. target-encode or embed taxonomic hierarchy) given high cardinality."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4010,"prompt_tokens":11343,"total_tokens":15353}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:40:42+00:00","mode":"full","row_count":200000,"sampled_rows":200000,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/deep_sea.json"},"notes":[],"saturn_version":"0.2.0","schema":{"class":"categorical","country":"categorical","depth":"numeric","family":"categorical","genus":"categorical","latitude":"numeric","longitude":"numeric","order":"categorical","phylum":"categorical","scientificName":"categorical","species":"categorical","year":"numeric"}}
