{"columns":[{"alerts":[],"column":"scientificName","extras":{"singletons":0,"top_values":[["Alphaproteobacteria",17640],["Bacteria",12760],["Nitrosopumilaceae",10840],["Syndiniales",7280],["Amperima rosea",4520],["Porifera",2400],["Thermoplasmata",2360],["Keratoisididae",2320],["Xiphias gladius",2000],["Pseudomonadales",1920],["Gammaproteobacteria",1920],["Monothalamea",1640],["Rhodospirillales",1640],["Scomber scombrus",1520],["Retaria",1520],["Dinophyceae",1320],["Rickettsiales",1200],["Chrysogorgia",1160],["Hexactinellida",1080],["Prionace glauca",1080]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":1478,"null_rate":0.0,"stats":{"cardinality":1478,"entropy":8.377598740606556,"entropy_ratio":0.7956364494286662,"top_rate":0.0882,"top_value":"Alphaproteobacteria"}},{"alerts":[],"column":"species","extras":{"singletons":0,"top_values":[["",146400],["Amperima rosea",4520],["Xiphias gladius",2000],["Scomber scombrus",1520],["Prionace glauca",1080],["Oneirophanta mutabilis",840],["Thunnus albacares",760],["Farrea occa",680],["Trissopathes pseudotristicha",640],["Hoplostethus atlanticus",520],["Trachurus trachurus",480],["Florometra serratissima",440],["Heteropolypus ritteri",400],["Desmophyllum dianthus",400],["Psychropotes longicauda",360],["Thunnus obesus",320],["Solenosmilia variabilis",320],["Etmopterus granulosus",320],["Molpadiodemas villosus",280],["Paragorgia arborea",280]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":678,"null_rate":0.0,"stats":{"cardinality":678,"entropy":3.077328935481046,"entropy_ratio":0.32719645393348984,"top_rate":0.732,"top_value":""}},{"alerts":[],"column":"genus","extras":{"singletons":0,"top_values":[["",109800],["Amperima",4520],["Xiphias",2000],["Scomber",1520],["Retaria",1520],["Farrea",1400],["Thunnus",1360],["Chrysogorgia",1360],["Coryphaenoides",1240],["Prionace",1080],["Hemicorallium",1000],["Alteromonas",960],["Paragorgia",880],["Oneirophanta",840],["Lepidisis",800],["Trissopathes",800],["Alepisaurus",760],["Keratoisis",720],["Pennatula",600],["Hoplostethus",600]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":841,"null_rate":0.0,"stats":{"cardinality":841,"entropy":4.895537670189287,"entropy_ratio":0.5038654612996,"top_rate":0.549,"top_value":""}},{"alerts":[],"column":"family","extras":{"singletons":0,"top_values":[["",80360],["Nitrosopumilaceae",10840],["Elpidiidae",4920],["Keratoisididae",4360],["Coralliidae",4040],["Macrouridae",3120],["Scombridae",3040],["Primnoidae",2440],["Chrysogorgiidae",2320],["Xiphiidae",2000],["Retariidae",1520],["Farreidae",1520],["Alteromonadaceae",1440],["Euplectellidae",1360],["Flavobacteriaceae",1320],["Schizopathidae",1280],["Caryophylliidae",1080],["Carcharhinidae",1080],["Acanthogorgiidae",1000],["Nitrospinaceae",1000]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":606,"null_rate":0.0,"stats":{"cardinality":606,"entropy":5.49965916938821,"entropy_ratio":0.5949968245996182,"top_rate":0.4018,"top_value":""}},{"alerts":[],"column":"order","extras":{"singletons":0,"top_values":[["",56320],["Scleralcyonacea",15720],["Nitrosopumilales",10840],["Syndiniales",7960],["Elasipodida",5680],["Gadiformes",4040],["Scombriformes",3360],["Carangiformes",3240],["Calanoida",2840],["Alteromonadales",2720],["Decapoda",2720],["Antipatharia",2560],["Sceptrulophora",2480],["Pseudomonadales",2400],["Flavobacteriales",2320],["Scleractinia",2240],["Malacalcyonacea",2000],["Lyssacinosida",2000],["Rotaliida",2000],["Amphipoda",1880]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":310,"null_rate":0.0,"stats":{"cardinality":310,"entropy":5.450207419179946,"entropy_ratio":0.6585458545918689,"top_rate":0.2816,"top_value":""}},{"alerts":[],"column":"class","extras":{"singletons":0,"top_values":[["Alphaproteobacteria",22840],["",21920],["Teleostei",19120],["Octocorallia",17880],["Thaumarchaeota incertae sedis",10840],["Dinophyceae",10600],["Gammaproteobacteria",9440],["Holothuroidea",7760],["Malacostraca",7440],["Hexactinellida",6320],["Hexacorallia",5640],["Copepoda",4080],["Ophiuroidea",3320],["Polychaeta",3320],["Polycystina",2800],["Elasmobranchii",2640],["Globothalamea",2520],["Deltaproteobacteria",2440],["Thermoplasmata",2360],["Flavobacteria",2320]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":138,"null_rate":0.0,"stats":{"cardinality":138,"entropy":4.891619971812342,"entropy_ratio":0.6881343662183016,"top_rate":0.1142,"top_value":"Alphaproteobacteria"}},{"alerts":[],"column":"phylum","extras":{"singletons":0,"top_values":[["Proteobacteria",35480],["Cnidaria",25520],["Chordata",23920],["Echinodermata",14000],["",13200],["Arthropoda",13200],["Myzozoa",11280],["Thaumarchaeota",10920],["Porifera",10360],["Foraminifera",4720],["Annelida",4240],["Radiozoa",4000],["Mollusca",3840],["Bacteroidetes",3440],["Euryarchaeota",2440],["Planctomycetes",2320],["Heterokontophyta",1680],["Verrucomicrobia",1520],["Brachiopoda",1520],["Nematoda",1160]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":65,"null_rate":0.0,"stats":{"cardinality":65,"entropy":4.0945217856631,"entropy_ratio":0.6798857048892365,"top_rate":0.1774,"top_value":"Proteobacteria"}},{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[640,440,6000,5160,4960,4360,4360,8560,5560,16440,13320,10400,3480,2520,4280,4280,4440,6080,2160,1080,5280,5680,1960,5960,9160,7680,7480,12840,7000,5880,11360,2480,920,1160,1200,920,920,1600,1200,800],"edges":[-75.0,-70.898500061035,-66.79700012207,-62.695500183105,-58.59400024414,-54.492500305175,-50.391000366209994,-46.289500427244995,-42.18800048828,-38.086500549315,-33.98500061035,-29.883500671384994,-25.782000732419995,-21.680500793454996,-17.57900085448999,-13.477500915524992,-9.376000976559993,-5.2745010375949875,-1.173001098629996,2.92849884033501,7.029998779300001,11.131498718265007,15.232998657230013,19.334498596195004,23.43599853516001,27.537498474125016,31.638998413090007,35.74049835205501,39.84199829102002,43.94349822998501,48.044998168950016,52.14649810791501,56.247998046880014,60.349497985845005,64.45099792481003,68.55249786377502,72.65399780274001,76.75549774170503,80.85699768067002,84.95849761963501,89.0599975586]},"sample":[26.85439,-39.51513,36.9836845398,-36.8789,17.7700004578,-44.328417,40.4367752075,26.97448,-34.3206,47.7333,40.3606376648,-32.4697,83.7667,-36.87545,39.35935,53.0391998291,18.15631,-36.88003,-42.7506,23.01483,36.5753440857,-28.00095,35.7940788269,-32.466,44.78695,14.87248,28.9721,-12.49798,-43.136,48.42300703,35.64251,10.5,-36.8789,-41.5113333333,36.9027252197,39.93269,71.66667,35.7181434631,-36.88208,65.0,-30.6224,-32.56103,12.02745,-36.88208,37.38996,-0.9992,28.96551,39.02341,-71.8557,43.37,48.80417269,56.5,-35.68073,-27.92278,30.69546,21.2848,48.83237276,-54.4985,-52.503917,25.97161,2.64166665077,13.28,-34.4018,-32.4697,35.72038,-44.77,28.6722,-44.274,3.664,-36.87975,-4.99815,-18.50122,36.7001533508,29.10948,9.1,-31.00057,-0.49677,-65.6622,-64.72935,-17.9986,27.6499996185,63.312,-27.92278,-36.88202,58.48850017,-50.00695,30.68655,-34.50183,-44.834983,-3.9948,37.36276,-27.547083,-33.9845,36.9773864746,-69.733333,27.98989,25.2083330154,31.2124,-31.640967,-64.471,35.70197,-28.00505,-7.49845,-48.5025,29.32823,-63.129667,-42.3332,-52.002417,-27.92278,-30.62238,-64.72935,10.97609091,5.10163,38.85734,-33.62,42.70496,-64.310583,-27.92278,26.875,-36.2512,-62.99995,-16.0017,-30.62238,26.959262,-47.499267,38.29948,-13.8176,36.98663,-3.9948,48.91168654,42.6500015259,55.75048333,29.4725894928,-42.6577,-36.88145,-22.99507,-64.995883,-30.5042,-44.328417,-32.46597,-31.83217,56.15,30.68655,-42.6678,35.7877235413,51.86922,-67.50813,-44.22,-64.0,25.8166666666667,-32.00123,-13.8176,29.11195,26.59578,72.0113333333,47.0,12.144878,35.13673,51.6958007812,-32.49512,80.5599975586,-37.53316667,-60.1,-36.87993,-64.5387,35.77122,-2.00113,-38.3800010681,-36.88035,-36.87938,-41.2933,35.71663,22.937431666666665,31.21908,-52.002417,48.80585974,-0.49677,-24.4992,48.87577096,33.233001709,47.45,48.83679268,-32.57998,-36.24837,-13.8176,52.9792,-59.3435,27.57751,34.00061,83.3499984741,21.2848,40.78026,10.35972933,-45.849998,28.13488,-37.69166,-27.92278,-52.002417,-13.50338,-47.112133,-7.00215,21.0417,71.68917,-4.99815,48.9350107,-30.13316,-53.003483,-37.69166,-7.00215,-28.5013,-55.5058,-27.92278,-51.491767,-24.6,47.0,78.816,-32.74958,-63.95155,36.584651947,-41.3193,-44.834983,47.0,-36.45077,-37.767,-45.176767,-11.16787,-1.03704,-34.50183,12.192167,56.3035,-13.8176,13.88,11.862434,19.44499,36.5917434692,-57.502917,48.91919972,-8.99877,37.37217,27.1292877197,-31.00057,14.02243,-34.4497,-2.00113,-13.8176,-28.00095,80.6832962036,-31.492,-36.98872,-7.00215,-64.50175,79.739,35.16833,28.67217,-13.5034,-53.992383,48.83002737,-13.9691,13.265601,-64.310583,38.2975,29.70129,40.37424,36.5848083496,28.6722,-27.92278,-7.0,-63.990333,-62.4989,-28.00165,-3.50138,-36.87897,-32.74958,38.29945933,-35.3,-0.49677,-54.498483,-25.49173,48.8658558,-9.65,46.7333,-36.88003,14.53575,81.822024,-64.471,-17.9986,11.7906,-42.8068,23.057735,8.88,39.86572,-35.33387,39.38557,37.36929,26.221,-17.00137,36.9918632507,23.30751,47.75,12.566733,-13.5034,-13.50338,-27.92278,35.82497,-46.14833333,10.383,30.68624,-31.63537,-39.8873,-46.9037,36.9805603027,-53.0035,48.7333,-39.9166666666667,-52.503917,-41.2933,-55.5058,-27.2661,-11.99465,40.73369,-46.9037,9.555317,-39.96945,-27.92278,37.37251,-24.6,30.6788,79.2416992188,-30.80827,-27.92278,35.80318,11.772433,-44.522333,-27.92278,-60.1329994202,-32.47087,36.9842453003,-36.87897,27.3672,-39.96945,-56.000533,-55.5058,-19.4953,-12.9974,12.01611111,-66.331733,64.3653,-4.5005,24.57702,-28.00165,34.1166667938,49.849998,-4.99815,67.06812501388889,-42.953,30.69546,28.6722,-29.9979991913,40.7885932922,-64.345,12.11786967,41.03102,-31.00057,40.3499984741,42.25,-32.5958333,45.3116666667,53.7487,-43.83068,-0.37554,-7.49845,47.7539,35.7277,7.58978,24.201499939,48.9350107,-57.0067,40.5,-36.2338,35.78004,-3.9948,27.97798,-34.84612,-36.87912,-22.0032,-41.12,-26.0515,28.96551,-24.99308,36.6843566895,-36.87993,-28.50127,26.20099,-16.00165,-63.9903,27.5249996185,-5.9993,30.69581,6.508,-4.99815,18.87338,54.2446,14.3797,27.93695,22.99905167,44.8347,48.81810744,35.632,-36.88133,-36.2338,-36.0667,-35.33387,23.088,-32.99448,-44.295,22.0,-52.503917,-31.492,44.778333,-65.1605,-27.92278,5.90078,40.37355,39.88367,81.822024,71.01787,48.87420486,-36.87545,-58.00085,-48.9954,-2.99973,-28.00505,-36.98872,-36.88145,25.5081,-1.50055,54.7295,-32.00123,-64.72935,-17.0014,61.75,-36.98872,80.6832962036,63.49964523333333,40.0,-36.9988,-27.9228,36.73154,-46.328533,-40.475,-32.00123,28.701,42.3797149658,-36.87975,-40.3974,30.68912,21.37731,42.63666667,41.03062,11.00144918,9.5177,-36.88145,40.6984672546,-51.491767,-64.471,-28.00165,-34.70815,-12.49798,-36.2512,5.17865,37.0833,-41.22977,-7.49845,-36.88145,10.97473917,36.7148857117,36.56848526,-36.8821,-10.9976,-64.995883,30.6788,65.0,22.999165,-42.169,-44.6165,35.72204,40.7869529724,-44.32458,-32.00123,-51.491767,50.2483,-30.5042,13.95505,-36.87545,30.69546,47.25,51.86922,63.3875,-9.92345,29.7025]},"kind":"numeric","n":200000,"n_null":0,"n_unique":2617,"null_rate":0.0,"stats":{"iqr":71.9735057355,"kurtosis":-1.2226338646729928,"max":89.0599975586,"mean":-1.5806687265120904,"median":-4.99815,"min":-75.0,"n_outliers":0,"outlier_rate":0.0,"q1":-36.25052,"q3":35.722985735500004,"skew":0.11822792793970917,"std":39.476864430621845,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[17200,53840,2320,680,520,3240,17080,4360,320,3760,4040,4120,5680,1840,3280,2240,1280,2360,12680,2440,2760,640,560,680,560,520,880,920,480,3320,680,160,1160,1200,1080,1000,14840,19120,1520,4640],"edges":[-179.9872,-170.9875575,-161.987915,-152.9882725,-143.98863,-134.9889875,-125.989345,-116.98970249999999,-107.99006,-98.9904175,-89.990775,-80.99113249999999,-71.99149,-62.991847500000006,-53.992205,-44.99256249999999,-35.99292,-26.993277500000005,-17.993635000000012,-8.99399249999999,0.005650000000002819,9.005292499999996,18.004935000000017,27.00457750000001,36.004220000000004,45.0038625,54.00350499999999,63.00314750000001,72.00279,81.00243250000003,90.00207500000002,99.00171750000001,108.00136,117.0010025,126.00064499999999,135.00028749999998,143.99992999999998,152.99957250000003,161.99921500000002,170.9988575,179.9985]},"sample":[-168.0684,-172.4141,-123.362541199,152.5661,-25.4350004196,-173.74595,-126.414466858,-168.85732,152.06015,-8.2333,-125.211997986,154.10308,-155.2167,-170.59835,-67.20591,-20.0166501999,-65.05737,152.57005,-174.6475,154.0076,-122.305511475,154.7829,-122.655403137,153.3989,-130.36875,-73.53915,-170.92645,-169.99903,173.9645,-15.62183031,-122.76729,-43.2000007629,152.5661,175.7198333333,-122.404510498,-69.22839,8.11667,-122.723999023,152.55653,0.0,153.6351,153.11935,-117.350067,152.55653,-123.42307,-168.7492,-170.92561,-72.44906,174.033,-52.08,-16.51334518,-57.64,-170.00705,155.1438,-174.60594,38.0484,-16.60860368,-170.0029,-169.996817,-164.74452,-40.5333328247,80.73,151.80088,154.10308,-122.72343,-176.84,-88.4765,177.3572,63.756,152.56255,-168.75025,-170.00235,-122.11907196,-178.10763,58.259,-169.99582,-168.74373,-170.0328,118.69735,-170.00035,-87.3999977112,5.094,155.14157,152.5582,-9.727504142,-169.99245,-174.61931,-170.00213,-173.1415,-168.75178,-123.39408,-46.433483,-169.99478,-123.364280701,-126.7,-170.95193,-93.1916656494,-179.63601,42.832517,115.623,-122.74405,154.35052,-168.75027,-170.0004,-178.83216,87.259917,174.3825,-170.076017,155.1421,153.63512,118.69735,-116.158062,-141.88441,-63.74344,16.92,29.49043,89.803133,155.14233,-90.0416641235,154.24928,-170.030533,-170.0016,153.63512,-85.189096,-169.989033,-62.51603,-169.999,-123.35343,-168.75178,-16.57333131,-52.125,-10.7221,-87.1689300537,-176.7562,152.55905,-170.00023,-170.009767,-169.99368,-173.74595,153.39892,153.3184,10.32,-174.61931,175.913,-122.660453796,-178.32357,-22.52148,147.37,93.55,-54.9666666666667,-169.9937,-169.999,-178.10697,-160.67294,-154.6221666667,-8.0,-117.3254597,-122.94958,-13.006649971,-169.99072,-179.323303223,179.8331667,-22.26667,152.5619,116.6403,-122.69264,-168.7501,149.309997559,152.56053,152.56903,176.4667,-122.72412,153.931905,-179.62101,-170.076017,-16.63275563,-168.74373,-170.00232,-15.59824052,-118.5,-8.36,-16.56549874,153.07755,151.92822,-169.999,-132.756616,81.071167,-174.98835,-121.11772,-179.648300171,38.0484,-66.5866,-117.1622087,164.533005,-176.90194,139.21667,155.14362,-170.076017,-170.00263,-170.468833,-168.75268,-17.0822,0.516,-168.75025,-16.80249849,-71.8695,-170.014317,139.21667,-168.75268,-169.9972,-170.002633,155.14132,-170.01745,70.4,-9.0,-2.729,152.99235,83.140217,-122.521057129,145.0833,-173.1415,-43.0,-170.2935,49.517,-172.73455,-169.89206,-176.15658,-170.00213,-117.325467,-10.275,-169.999,-116.47,-117.0528928,165.79832,-122.236869812,-170.006333,-16.85498721,-168.87517,-123.40324,-96.6004257202,-169.99582,144.64108,129.512,-168.7501,-169.999,154.7829,68.6999969482,153.32997,153.88842,-168.75268,-170.003333,4.508,-123.01845,-88.47649,-170.0026,-169.957917,-16.54500336,-169.99773,-73.062007,89.803133,-62.51863,-174.01773,-67.40372,-122.521690369,-88.4765,155.1421,11.0,-170.04835,-169.9932,155.0341,-168.75058,152.56713,152.99235,-62.51625717,-19.0167,-168.74373,-170.002917,-170.00217,-16.63282891,-25.0167,-6.7333,152.57005,-169.9222,11.538371,115.623,-170.00035,-117.536883,173.8305,153.88675,53.15,-69.59031,-170.00248,-67.1368,-123.39246,-85.665,-170.00125,-123.353630066,-158.35707,-9.25,-116.708233,-170.0026,-170.00263,155.14132,-122.60916,177.1883333,-76.317,-174.62126,153.32147,177.972,165.44,-123.364067078,-170.0143,-12.3333,-26.0333333333333,-169.996817,176.4667,-170.002633,-169.9975,-170.00237,-66.65718,165.44,-76.966321,-172.70148,155.14293,-123.40476,70.4,-174.62802,61.5349998474,153.45375,155.1435,-122.6502,-116.850133,-173.502283,155.14312,170.716995239,153.17248,-123.361366272,152.56713,-90.5779,-172.70148,-170.004417,-170.002633,-170.0026,-169.99823,-117.57722222,-170.010233,6.44672,-168.7502,-173.97436,155.0341,-75.5833320618,-49.5,-168.75025,7.010967669444445,-174.0052,-174.60594,-88.4765,-73.2789993286,-127.520950317,83.536,-117.34347917,-66.32953,-169.99582,-66.7080001831,-9.74,-179.6078333,-126.5733333333,-14.2157,150.3028,-176.1319,-168.75027,-127.7643,-122.71289,133.8003,58.6301498413,-16.80249849,-169.999167,-66.8083305359,151.2642,-122.66442,-168.7518,-170.95512,151.3434,152.56818,-39.8231,176.78,-156.8941,-170.92561,-170.00397,-122.118186951,152.5619,-169.99722,-173.3253,-170.00158,-170.0484,-88.4499969482,-168.7513,-174.60574,60.226,-168.75025,-155.25128,-12.7366,-72.6628,-170.92008,154.4091583,137.2352,-16.46868641,-122.82703,152.55955,151.26423,173.0283,-170.00248,-86.273,152.92047,147.37417,-92.917,-169.996817,153.32997,-125.696667,91.637833,155.14312,-162.13826,-67.40515,-67.42617,11.538371,-146.1322,-16.69415238,-170.59835,-170.01025,-170.0036,-168.75135,154.35052,153.88842,152.55905,-173.5221,-168.74968,-11.7855,-169.9937,118.69735,-170.0013,-2.75,153.88842,68.6999969482,4.998265294444445,-9.0,153.99932,155.1429,-122.00829,-171.373367,177.75,-169.9937,-87.76,-127.635673523,152.56255,-173.022,-174.61558,-158.23686,-10.04166667,-66.32836,-116.1510258,134.75164,152.55905,-127.518119812,-170.01745,115.623,155.0341,129.721,-169.99903,154.2493,-162.05634,-9.25,148.76,-168.75027,152.55905,-116.1600608,-122.052452087,-122.577819824,152.5565,-169.9994,-170.009767,-174.62802,0.0,154.3426383,-174.25463,-178.0207,-122.72321,-127.524452209,147.27278,-169.9937,-170.01745,-12.2273,-169.99368,-116.51406667,-170.59835,-174.60594,-41.2166671753,-178.32357,-2.0378,-169.63012,-174.01496]},"kind":"numeric","n":200000,"n_null":0,"n_unique":2654,"null_rate":0.0,"stats":{"iqr":233.751,"kurtosis":-1.1234210179498667,"max":179.9985,"mean":-51.57663763133836,"median":-94.29,"min":-179.9872,"n_outliers":0,"outlier_rate":0.0,"q1":-169.995,"q3":63.756,"skew":0.6619478858305774,"std":127.08723895239815,"zero_rate":0.0004}},{"alerts":[],"column":"depth","extras":{"histogram":{"counts":[58400,15280,10920,20200,23560,5720,5320,4360,5080,3520,3520,3360,8000,4680,1880,11400,3240,3360,5520,1760,280,40,40,0,0,80,80,0,0,0,80,80,40,0,40,0,120,0,0,40],"edges":[1000.0,1250.0,1500.0,1750.0,2000.0,2250.0,2500.0,2750.0,3000.0,3250.0,3500.0,3750.0,4000.0,4250.0,4500.0,4750.0,5000.0,5250.0,5500.0,5750.0,6000.0,6250.0,6500.0,6750.0,7000.0,7250.0,7500.0,7750.0,8000.0,8250.0,8500.0,8750.0,9000.0,9250.0,9500.0,9750.0,10000.0,10250.0,10500.0,10750.0,11000.0]},"sample":[2149.0,2000.8,1933.14,3800.0,1100.0,1035.7,2724.04,2336.0,1000.0,1245.0,1565.72,1000.0,1000.0,1001.6,2860.0,1750.0,1107.0,4600.0,1585.0,5599.0,2225.37,1000.0,1811.5,1000.0,2347.0,2471.7,2029.0,5077.4,1057.5,4844.0,2124.0,4292.0,3800.0,1076.0,1255.88,1115.0,2850.0,1343.86,1800.0,2913.0,1000.0,1000.0,4093.0,1800.0,1001.0,2150.0,2086.0,1511.0,1972.0,1337.5,4842.0,1320.0,2000.7,1400.0,1301.0,2180.0,4843.5,4902.0,1067.2,2346.0,1400.0,1000.0,1000.0,1000.0,1325.0,1207.5,1370.0,1072.5,3400.0,3200.0,2085.2,1935.9,1716.31,1874.0,2540.0,1037.0,2085.6,3200.0,3361.1,1002.1,1605.0,1059.79,4200.0,2000.0,1801.0,2085.3,1539.0,5589.4,1062.5,2001.2,1351.0,1667.0,2087.1,1917.85,1262.0,1931.0,1960.0,1306.0,1308.0,2161.0,1494.0,1000.0,1935.5,5390.0,2085.0,3840.0,1433.0,1001.8,3800.0,1000.0,3361.1,4191.325,4859.0,1977.0,1828.0,2054.0,3626.0,3600.0,1250.0,1000.0,1966.0,2085.0,1000.0,1772.0,2000.3,1867.0,1035.2,1899.0,2001.2,4844.5,1100.0,1200.0,1289.5,1396.0,2200.0,2065.8,1035.0,5642.4,2065.9,1000.0,1000.0,1055.0,1539.0,1063.0,1660.03,1812.0,4893.0,1306.0,3260.0,2000.0,1064.4,2086.0,1896.0,1899.0,1099.0,2147.0,4041.0,3965.0,1185.0,1035.5,8700.0,3142.0,3596.0,3000.0,2100.0,1670.0,2066.0,1121.0,2600.0,4400.0,1039.5,1398.0,5581.0,2026.0,1001.8,4839.0,1035.9,2064.8,4832.5,1240.0,2210.0,4840.5,1000.0,1000.0,4382.4,1288.5,1943.0,1239.0,1197.0,1505.0,2180.0,1267.0,4275.95,4400.0,1852.0,1042.5,2000.0,1999.7,4949.8,1036.7,1900.3,1229.5,1500.0,1035.9,4834.5,2733.0,1136.6,1042.5,1900.3,5551.0,1936.3,4400.0,4805.6,3366.0,4277.0,2604.7,1000.0,3707.0,1186.1,1200.0,1931.1,1000.0,2085.6,4000.0,2084.2,2031.0,1924.0,1002.2,4063.0,2000.0,1035.0,1000.0,4133.0,2093.0,2060.11,1035.6,4839.5,4954.6,1024.0,1000.0,1037.0,1252.0,3292.5,2066.0,1035.0,1000.0,3250.0,1000.0,1000.0,1900.3,2356.3,2708.2,3953.0,1370.0,4949.0,5200.0,4841.5,1001.9,2976.0,3626.0,1843.0,1896.0,1084.0,3008.7,1370.0,3800.0,1354.0,1001.8,1065.0,1000.0,2085.7,4000.0,1000.0,2041.355,1000.0,1035.9,4902.0,1900.8,4839.5,2130.0,2187.0,4600.0,2232.0,1546.7,2161.0,1002.1,4351.5,1107.0,5663.0,4743.0,1237.0,2086.4,2802.0,1233.0,3241.0,1935.8,1767.5,1789.0,1672.0,4201.0,4949.0,4949.8,4400.0,2634.0,2424.0,2357.0,1580.0,1000.0,1271.0,1049.0,1998.43,1136.0,1200.0,2000.0,1935.4,1039.5,1936.3,5546.0,1936.5,1316.0,1049.0,2842.0,1065.1,3000.0,1069.0,3366.0,1758.0,1500.0,1000.0,2400.0,1982.0,2070.0,1999.4,2800.0,3660.0,1000.0,1880.63,4000.0,1178.0,4759.7,1035.7,1936.3,2000.0,1001.1,4340.0,1065.0,2600.0,5637.0,1799.0,1000.0,1300.0,1000.0,1035.9,1277.0,1378.5,1301.0,1370.0,2578.0,3269.2,3632.0,4181.0,1614.0,1037.0,2406.5,1106.5,1213.5,2750.0,1302.0,1571.0,1145.0,1935.5,2667.0,1581.0,2746.0,1305.0,4834.5,1935.5,1750.0,1000.0,1696.0,5323.0,1433.0,1000.0,4200.0,1350.0,1375.5,5127.0,2086.0,1974.8,1537.14,3000.0,5551.2,1600.0,2085.8,2000.0,1235.0,5765.0,1302.0,2009.0,2085.2,1681.0,2787.0,3404.0,1240.0,5552.0,1355.0,4842.0,2691.0,2400.0,1000.0,1024.0,1035.8,1220.0,1000.0,1510.5,2862.0,1935.4,1000.0,2800.0,1226.0,2800.0,1073.0,1074.0,1396.0,1546.7,1000.0,4844.5,5380.4,1999.3,1999.2,2065.7,1000.0,1000.0,2200.0,2103.0,2061.1,1400.0,1064.4,3361.1,5071.0,1595.0,1000.0,3250.0,1180.0,1170.0,1000.0,3000.0,1062.0,2001.1,1920.0,1064.4,2245.0,2915.68,3200.0,1035.0,1470.0,1016.0,2270.0,1660.0,4186.087,2183.0,2200.0,3258.62,1035.8,2161.0,1000.0,3714.0,5077.4,1000.0,1506.0,1500.0,1005.0,1935.5,2200.0,4206.19,1000.07,3057.72,1800.0,1035.9,1035.0,1758.0,2913.0,5565.0,1001.0,1137.5,1307.0,3269.2,1397.0,5787.7,1035.8,1127.5,5642.4,4152.0,5380.4,1301.0,2176.5,1812.0,1721.5,2000.5,1849.0]},"kind":"numeric","n":200000,"n_null":0,"n_unique":1938,"null_rate":0.0,"stats":{"iqr":2174.25,"kurtosis":0.502188962740926,"max":11000.0,"mean":2405.742162760078,"median":1961.6950000000002,"min":1000.0,"n_outliers":560,"outlier_rate":0.0028,"q1":1149.0,"q3":3323.25,"skew":1.0905346340353783,"std":1477.2018060347082,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-3.57"},{"code":"outliers","level":"warn","message":"6.3% rows beyond 1.5 IQR"}],"column":"year","extras":{"histogram":{"counts":[40,120,320,80,120,0,0,80,40,120,0,0,0,160,400,240,80,0,0,40,80,40,120,1680,840,1520,1840,1360,960,2160,2880,5200,8520,13760,10040,7360,7280,98920,14120,12240],"edges":[1875.0,1878.725,1882.45,1886.175,1889.9,1893.625,1897.35,1901.075,1904.8,1908.525,1912.25,1915.975,1919.7,1923.425,1927.15,1930.875,1934.6,1938.325,1942.05,1945.775,1949.5,1953.225,1956.95,1960.675,1964.4,1968.125,1971.85,1975.575,1979.3,1983.025,1986.75,1990.475,1994.2,1997.925,2001.65,2005.375,2009.1,2012.825,2016.55,2020.275,2024.0]},"sample":[2023.0,1997.0,2000.0,2024.0,1984.0,2015.0,1985.0,1999.0,2016.0,2012.0,2000.0,2016.0,1958.0,2016.0,2016.0,2016.0,2000.0,1998.0,2016.0,2003.0,2016.0,2016.0,2004.0,2014.0,1993.0,2016.0,2016.0,2016.0,2004.0,1992.0,2016.0,1997.0,2016.0,1998.0,2012.0,1997.0,1996.0,2013.0,2016.0,1975.0,1971.0,2016.0,2016.0,2004.0,2016.0,2016.0,1978.0,1993.0,1969.0,2016.0,1992.0,2000.0,2016.0,1972.0,2013.0,2016.0,2016.0,2011.0,1998.0,2018.0,2017.0,2016.0,2016.0,1997.0,2016.0,2016.0,2016.0,2016.0,2012.0,2016.0,2016.0,2016.0,1987.0,1964.0,2016.0,2022.0,2016.0,2013.0,2016.0,2007.0,2016.0,2016.0,2016.0,1962.0,1976.0,2015.0,2015.0,2016.0,2016.0,2016.0,1980.0,2016.0,2003.0,2016.0,2000.0,1999.0,2004.0,2016.0,2016.0,2016.0,1985.0,2016.0,1998.0,2018.0,2016.0,2016.0,2002.0,2016.0,2016.0,2000.0,2016.0,2011.0,1996.0,1997.0,2016.0,2016.0,2001.0,2018.0,2016.0,2024.0,2016.0,2016.0,2016.0,2016.0,2020.0,1990.0,2020.0,2019.0,2016.0,2016.0,2003.0,2016.0,2019.0,2006.0,2017.0,1997.0,1969.0,2016.0,2016.0,2016.0,2016.0,2016.0,2003.0,1994.0,1998.0,2004.0,1990.0,2016.0,1984.0,1999.0,2017.0,2022.0,2021.0,2016.0,2016.0,2022.0,1993.0,2016.0,1981.0,2016.0,2002.0,2022.0,2008.0,1980.0,1986.0,2016.0,2024.0,2016.0,2016.0,2016.0,2016.0,2015.0,1996.0,1974.0,2000.0,2002.0,2000.0,1981.0,2016.0,2007.0,2016.0,2016.0,2016.0,2016.0,2004.0,2017.0,2016.0,2002.0,1983.0,1995.0,2014.0,2004.0,2016.0,2016.0,1999.0,2000.0,2016.0,2016.0,2009.0,2023.0,2022.0,2015.0,2021.0,2004.0,2001.0,1997.0,1995.0,2016.0,2016.0,2016.0,2022.0,2016.0,2009.0,2022.0,2016.0,2014.0,2016.0,1981.0,2000.0,2021.0,2016.0,2016.0,2016.0,2016.0,2006.0,2023.0,2016.0,2011.0,2016.0,2017.0,2014.0,2021.0,2021.0,2016.0,1969.0,2016.0,2016.0,2016.0,2016.0,1996.0,2000.0,2019.0,1994.0,2016.0,1997.0,2011.0,2016.0,2013.0,2016.0,2016.0,2016.0,1998.0,2016.0,2015.0,1997.0,2017.0,2016.0,2016.0,1974.0,2014.0,2000.0,2016.0,1992.0,2006.0,1998.0,2010.0,2016.0,2016.0,2023.0,2011.0,2016.0,2002.0,2024.0,1971.0,2000.0,2016.0,2000.0,2016.0,2016.0,1985.0,2016.0,2013.0,2019.0,2016.0,2009.0,1992.0,2016.0,2016.0,2016.0,2019.0,2017.0,1998.0,2016.0,2015.0,1988.0,2003.0,2020.0,2021.0,1996.0,2016.0,2015.0,2014.0,2021.0,2016.0,2017.0,2017.0,2016.0,1988.0,2016.0,2016.0,1909.0,2000.0,2016.0,2016.0,2022.0,2014.0,2016.0,2015.0,1994.0,2020.0,2016.0,2016.0,1987.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2014.0,2006.0,1997.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2018.0,2016.0,2016.0,2016.0,2016.0,2000.0,2000.0,2016.0,2017.0,2016.0,2011.0,2016.0,2016.0,2016.0,2016.0,2017.0,2016.0,2001.0,1990.0,1997.0,2016.0,2000.0,2016.0,1976.0,1989.0,2012.0,2000.0,2000.0,1985.0,2010.0,2021.0,2000.0,2016.0,2022.0,2004.0,2016.0,2010.0,2016.0,2015.0,2016.0,2016.0,2015.0,2021.0,2022.0,2003.0,1996.0,2013.0,1888.0,2017.0,2016.0,2006.0,1999.0,2016.0,2000.0,2016.0,2004.0,1982.0,2000.0,1984.0,2006.0,2016.0,2016.0,2013.0,2016.0,2016.0,2016.0,1997.0,2016.0,1992.0,2016.0,2010.0,2016.0,2008.0,2000.0,2012.0,2016.0,2000.0,2003.0,2001.0,2016.0,2015.0,1999.0,2016.0,1995.0,2017.0,2016.0,2016.0,2016.0,2016.0,1994.0,2016.0,2016.0,1998.0,2014.0,2023.0,2014.0,2007.0,2017.0,2002.0,2016.0,2019.0,2016.0,2011.0,2020.0,2013.0,2005.0,1948.0,2016.0,2016.0,2016.0,2019.0,2016.0,2016.0,2004.0,2015.0,2016.0,2002.0,2000.0,2016.0,2016.0,2022.0,2021.0,2016.0,2017.0,2016.0,1994.0,2016.0,2020.0,2016.0,1981.0,2016.0,2016.0,1978.0,2016.0,2000.0,1989.0,2020.0,2016.0,2022.0,2009.0,1999.0,2013.0,2016.0,2016.0,2022.0,2020.0,2004.0,2023.0,2011.0,2016.0,2016.0,1995.0,2013.0,2008.0,2023.0,2016.0,2017.0,2020.0,2004.0,2011.0,2016.0]},"kind":"numeric","n":200000,"n_null":7240,"n_unique":98,"null_rate":0.0362,"stats":{"iqr":12.0,"kurtosis":19.645304978799555,"max":2024.0,"mean":2008.877152936294,"median":2016.0,"min":1875.0,"n_outliers":12080,"outlier_rate":0.06266860344469807,"q1":2004.0,"q3":2016.0,"skew":-3.573952335668651,"std":15.425050724093095,"zero_rate":0.0}},{"alerts":[],"column":"country","extras":{"singletons":0,"top_values":[["",103840],["Australia",79320],["United States",8160],["New Zealand",1320],["USA",680],["Antarctica",680],["Colombia",640],["Chile",520],["Bermuda",400],["Portugal",320],["UNITED STATES",320],["Ross Dependency",240],["Russia",240],["United States of America",240],["GREAT BRITAIN",200],["Ecuador",160],["Bahamas",160],["Italy",160],["CO",160],["Discovery Deep, Red Sea",160]]},"kind":"categorical","n":200000,"n_null":0,"n_unique":57,"null_rate":0.0,"stats":{"cardinality":57,"entropy":1.6158929939879074,"entropy_ratio":0.27703128124545995,"top_rate":0.5192,"top_value":""}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","species.top_rate","genus.top_rate","country.top_rate","country.top_values","phylum.top_value","phylum.top_rate","class.top_value","class.top_rate","year.skew","year.min","year.max","depth.min","depth.max","depth.median"],"featured_charts":[{"caption":"Shows the dominant taxonomic groups, led by Proteobacteria, Cnidaria, and Chordata.","column":"phylum","kind":"bar"},{"caption":"Reveals the distribution of sampling depths from 1,000 m down to the 11,000 m hadal zone.","column":"depth","kind":"histogram"},{"caption":"Highlights the strong left skew toward recent years with a long historical tail back to 1875.","column":"year","kind":"histogram"},{"caption":"Exposes the heavy Australia bias and the large share of records with no country recorded.","column":"country","kind":"bar"},{"caption":"Illustrates how 73% of rows have no species assignment, with only a handful of named species dominating the rest.","column":"species","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This is a 200,000-row deep-sea biodiversity dataset with 12 columns covering taxonomy (phylum, class, order, family, genus, species, scientificName), geography (country, latitude, longitude), depth, and observation year. Two things stand out: the taxonomic hierarchy is heavily incomplete at lower ranks \u2014 species is blank in 73.2% of rows and genus in 54.9% \u2014 so most records can only be analyzed at higher ranks like phylum (top: Proteobacteria at 17.7%) or class (top: Alphaproteobacteria at 11.4%). Country is also mostly missing (51.9% blank) with Australia dominating the populated entries at 79,320 records, suggesting a strong sampling bias. Year is left-skewed (skew -3.57) toward recent records with a long tail back to 1875, while depth ranges from 1,000 to 11,000 m with a median near 1,962 m. Start by checking the missingness in species/country and the geographic concentration before any biodiversity analysis.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic name field mixing ranks from class (Alphaproteobacteria, 8.82% of rows) and domain (Bacteria) down to species (Xiphias gladius, Amperima rosea), across 1,478 distinct values with no nulls. The rank inconsistency is the headline issue: aggregating or joining on this column will conflate broad clades with individual species. Entropy ratio of 0.80 shows the distribution is fairly diffuse despite the dominant top value.","role":"label","scope":"column","target":"scientificName","treatment":"Normalise to a consistent taxonomic rank (or join to a taxonomy table) before grouping or modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical species identifier with 678 distinct binomial names (e.g., Amperima rosea, Xiphias gladius, Prionace glauca) covering marine taxa. The dominant value is an empty string at 73.2% of 200,000 rows, meaning species is unrecorded for nearly three quarters of observations despite a reported null_rate of 0.0. Among labelled rows, distribution is long-tailed with no single species exceeding 4,520 occurrences, and overall entropy_ratio is 0.327.","role":"label","scope":"column","target":"species","treatment":"Treat empty strings as missing, then group rare categories or target-encode before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic genus label for what appears to be a marine biology dataset (Amperima, Xiphias, Scomber, Thunnus, Prionace). The dominant signal is missingness encoded as empty string: 109,800 of 200,000 rows (54.9%) have no genus assigned, despite a stated null_rate of 0.0. Across the remaining records, 840 distinct genera spread fairly thin, with Amperima the largest non-empty bucket at 4,520.","role":"label","scope":"column","target":"genus","treatment":"Recode empty strings as nulls, then group rare genera or roll up to a higher taxonomic rank before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic family classification, with 606 distinct families across 200000 records and zero nulls. The dominant value is an empty string at 40.18% (80360 rows), effectively a hidden missing-data category, while the next-largest real family Nitrosopumilaceae covers only 5.42% (10840). The remaining tail spans marine taxa (Elpidiidae, Keratoisididae, Coralliidae, Macrouridae, Scombridae), consistent with a deep-sea or oceanographic biodiversity dataset.","role":"feature","scope":"column","target":"family","treatment":"Recode empty strings as missing, then group rare families into an 'other' bucket before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic order assignments for biological records, with 310 distinct orders spanning corals (Scleralcyonacea), archaea (Nitrosopumilales), dinoflagellates (Syndiniales), and various fish and crustacean groups. The dominant value is an empty string at 28.16% (56,320 rows), indicating a large block of unassigned/missing orders rather than true nulls. Entropy ratio of 0.66 shows moderate concentration, with the top 10 orders accounting for the bulk of labeled records.","role":"feature","scope":"column","target":"order","treatment":"Recode empty strings as missing, then group rare orders before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic class assignments across 200,000 records, spanning 138 distinct values with moderate concentration (entropy ratio 0.69, top class 'Alphaproteobacteria' at 11.4%). The mix is biologically heterogeneous, blending bacteria, archaea, fish, corals, and sponges, suggesting a marine biodiversity catalogue rather than a single-domain dataset. Notably, the second most common value is an empty string at 21,920 rows (~11%), which null_rate=0.0 misses because blanks are encoded as strings rather than nulls.","role":"label","scope":"column","target":"class","treatment":"Recode empty strings to nulls, then group rare classes before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Taxonomic phylum labels across 200,000 records spanning 65 distinct values, led by Proteobacteria at 17.74% and followed by Cnidaria, Chordata, and Echinodermata. The mix of bacterial, archaeal, and animal phyla (e.g., Thaumarchaeota alongside Chordata) suggests a broad biodiversity or environmental-sequencing dataset rather than a single kingdom. Notably, 13,200 rows carry an empty-string phylum \u2014 a hidden null channel despite the reported null_rate of 0.0.","role":"feature","scope":"column","target":"phylum","treatment":"Recode empty strings to null and group-encode (target or frequency encoding) given 65 categories."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.std","stats.skew","stats.kurtosis","stats.iqr","stats.q1","stats.q3"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic latitude in degrees, ranging from -75.0 to 89.06 across 200000 rows with no nulls and only 2617 unique values, suggesting coordinates snapped to a coarse grid. The distribution is nearly symmetric (skew 0.12) but platykurtic (kurtosis -1.22) with a wide IQR of 71.97, indicating fairly uniform global coverage rather than a concentration near populated bands. Mean (-1.58) and median (-4.998) sit just south of the equator.","role":"feature","scope":"column","target":"latitude","treatment":"Pair with longitude for geospatial features; consider binning or projecting rather than using raw degrees in linear models."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","q1","q3","std","skew","kurtosis","n_unique","null_rate","n"],"model":"anthropic:claude-opus-4-7","narrative":"This is a geographic longitude feature spanning the full valid range from -179.9872 to 179.9985 across 200000 rows with no nulls. The distribution is wide (std 127.09) and platykurtic (kurtosis -1.12) with a median of -94.29 and Q1 at -169.995, suggesting a heavy concentration in the western hemisphere and Pacific quadrant rather than a uniform global spread. Only 2654 unique values indicate the coordinates are quantised rather than raw floats.","role":"feature","scope":"column","target":"longitude","treatment":"Pair with latitude for geospatial features; avoid treating as a plain scalar in models."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","iqr","n_unique","n","null_rate","zero_rate","outlier_rate","q1","q3","std"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric measurement of depth ranging from 1000.0 to 11000.0 with mean 2405.74 and median 1961.70, suggesting a physical quantity like well or borehole depth in meters/feet. The distribution is right-skewed (skew 1.09) with IQR 2174.25 and a tight 0.28% outlier rate, and only 1938 unique values across 200000 rows points to discretized or rounded measurements. No nulls or zeros are present.","role":"feature","scope":"column","target":"depth","treatment":"Consider a log or sqrt transform before regression to tame the right skew."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.skew","stats.kurtosis","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is a year column spanning 1875 to 2024 with 98 distinct values, almost certainly the release or production year of each record. The distribution is heavily left-skewed (skew -3.57, kurtosis 19.6) with a median of 2016 but mean pulled down to 2008.9, and 6.27% of values flagged as outliers \u2014 i.e. a long tail of older entries against a modern-heavy mass. About 3.6% of rows are null.","role":"feature","scope":"column","target":"year","treatment":"Impute the 3.6% nulls and consider bucketing into decades or capping the pre-1980 tail before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Country of origin as a free-form categorical with 57 distinct values, dominated by an empty string at 51.92% (103,840 rows) which functions as an undeclared missing token rather than a true null (null_rate is 0.0). Australia accounts for the bulk of populated values (79,320), making the dataset overwhelmingly Australia-centric, and the entropy_ratio of 0.277 confirms heavy concentration. Note also the inconsistent encoding of the United States as both 'United States' (8,160) and 'USA' (680).","role":"feature","scope":"column","target":"country","treatment":"Recode empty strings as missing, canonicalise duplicates like 'USA'/'United States', then group rare countries before one-hot encoding."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4312,"prompt_tokens":13846,"total_tokens":18158}},"language_counts":{},"meta":{"generated_at":"2026-05-01T23:35:12+00:00","mode":"full","row_count":200000,"sampled_rows":200000,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/deep_sea.json"},"notes":[],"saturn_version":"0.2.0","schema":{"class":"categorical","country":"categorical","depth":"numeric","family":"categorical","genus":"categorical","latitude":"numeric","longitude":"numeric","order":"categorical","phylum":"categorical","scientificName":"categorical","species":"categorical","year":"numeric"}}
