{"columns":[{"alerts":[],"column":"scientificName","extras":{"singletons":1,"top_values":[["Mnemiopsis leidyi",2000],["Lingulodinium",1976],["Meganyctiphanes norvegica",1928],["Photobacterium",1842],["Periphylla periphylla",1802],["Pelagia noctiluca",1768],["Noctiluca scintillans",1728],["Vibrio",1584],["Vargula norvegica",1482],["Cypridina dentata",1320],["Euphausia superba",1298],["Chaetopterus variopedatus",1222],["Beroe",1202],["Oplophorus spinosus",1170],["Histioteuthis",952],["Alexandrium",944],["Metridia lucens",872],["Aequorea",798],["Atolla wyvillei",756],["Pyrocystis pseudonoctiluca",742]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":245,"null_rate":0.0,"stats":{"cardinality":245,"entropy":5.928473918399067,"entropy_ratio":0.7469754780251602,"top_rate":0.046446818392940084,"top_value":"Mnemiopsis leidyi"}},{"alerts":[],"column":"genus","extras":{"singletons":0,"top_values":[["Noctiluca",2000],["Pyrocystis",2000],["Lingulodinium",2000],["Alexandrium",2000],["Aequorea",2000],["Pelagia",2000],["Mnemiopsis",2000],["Atolla",2000],["Periphylla",2000],["Beroe",2000],["Euphausia",2000],["Meganyctiphanes",2000],["Metridia",2000],["Oplophorus",2000],["Vargula",2000],["Cypridina",2000],["Histioteuthis",2000],["Vibrio",2000],["Photobacterium",2000],["Chaetopterus",2000]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":27,"null_rate":0.0,"stats":{"cardinality":27,"entropy":4.558171819894509,"entropy_ratio":0.9586287410208015,"top_rate":0.046446818392940084,"top_value":"Noctiluca"}},{"alerts":[],"column":"family","extras":{"singletons":0,"top_values":[["Pyrocystaceae",4000],["Euphausiidae",4000],["Cypridinidae",4000],["Vibrionaceae",4000],["Metridinidae",2297],["Noctilucaceae",2000],["Lingulodiniaceae",2000],["Aequoreidae",2000],["Pelagiidae",2000],["Bolinopsidae",2000],["Atollidae",2000],["Periphyllidae",2000],["Beroidae",2000],["Oplophoridae",2000],["Histioteuthidae",2000],["Chaetopteridae",2000],["Pholadidae",928],["Renillidae",874],["Vampyroteuthidae",484],["Thysanoteuthidae",209]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":22,"null_rate":0.0,"stats":{"cardinality":22,"entropy":4.156963903695181,"entropy_ratio":0.9321734828990283,"top_rate":0.09289363678588017,"top_value":"Pyrocystaceae"}},{"alerts":[],"column":"phylum","extras":{"singletons":0,"top_values":[["Arthropoda",12297],["Cnidaria",8874],["Myzozoa",8000],["Ctenophora",4168],["Proteobacteria",4000],["Mollusca",3721],["Annelida",2000]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":7,"null_rate":0.0,"stats":{"cardinality":7,"entropy":2.5925891501842373,"entropy_ratio":0.9234988885139048,"top_rate":0.28557826288899213,"top_value":"Arthropoda"}},{"alerts":[],"column":"class","extras":{"singletons":0,"top_values":[["Dinophyceae",8000],["Scyphozoa",6000],["Malacostraca",6000],["Ostracoda",4000],["Gammaproteobacteria",4000],["Cephalopoda",2793],["Copepoda",2297],["Tentaculata",2168],["Hydrozoa",2000],["Nuda",2000],["Polychaeta",2000],["Bivalvia",928],["Octocorallia",874]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":13,"null_rate":0.0,"stats":{"cardinality":13,"entropy":3.429565028070785,"entropy_ratio":0.9267993236743279,"top_rate":0.18578727357176034,"top_value":"Dinophyceae"}},{"alerts":[],"column":"order","extras":{"singletons":0,"top_values":[["Gonyaulacales",6000],["Coronatae",4000],["Euphausiacea",4000],["Myodocopida",4000],["Vibrionales",4000],["Oegopsida",2309],["Calanoida",2297],["Lobata",2168],["Noctilucales",2000],["Leptothecata",2000],["Semaeostomeae",2000],["Beroida",2000],["Decapoda",2000],["",2000],["Myida",928],["Scleralcyonacea",874],["Vampyromorpha",484]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":17,"null_rate":0.0,"stats":{"cardinality":17,"entropy":3.879347040998362,"entropy_ratio":0.9490843566449854,"top_rate":0.13934045517882024,"top_value":"Gonyaulacales"}},{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[34,134,770,872,500,309,279,377,1598,900,2736,1218,589,615,671,768,598,504,319,199,628,953,793,744,566,783,1840,2424,2931,3500,4244,3508,2052,1070,764,1560,532,94,52,32],"edges":[-76.619,-72.4962749771125,-68.373549954225,-64.2508249313375,-60.12809990845,-56.0053748855625,-51.882649862675,-47.7599248397875,-43.6371998169,-39.5144747940125,-35.391749771125,-31.2690247482375,-27.146299725350005,-23.023574702462504,-18.900849679575003,-14.778124656687503,-10.655399633800002,-6.532674610912508,-2.4099495880250004,1.7127754348624933,5.835500457750001,9.958225480637495,14.080950503525003,18.203675526412496,22.32640054929999,26.449125572187498,30.57185059507499,34.6945756179625,38.81730064084999,42.9400256637375,47.062750686624995,51.1854757095125,55.308200732399996,59.43092575528749,63.55365077817498,67.6763758010625,71.79910082395,75.92182584683749,80.04455086972499,84.16727589261251,88.2900009155]},"sample":[44.75,25.25,-35.7051,45.625,55.03,-34.1176,-8.555,-8.19,53.0517,-37.0912,53.582,53.953890084846,-38.10097,43.8333,-15.831,34.682718,27.5831,47.53333,49.7833,-32.7954,51.5744987,-28.2137,-8.88,-6.01,42.487,-11.73,-12.807,-16.28,14.0015667,43.6858,16.594,6.3378,-7.563,-5.062,32.7883,54.61527,-9.702,-14.27,-5.0,-6.368,-18.0402,-4.3833,-8.8946,-8.9111,-1.37,-36.4846,22.7267,33.02,-30.2163,-8.6996,43.5214005,58.2598,47.5088692,-38.4254,6.3199,50.2207832,-23.073,-8.9068,31.6667,33.0007,42.487,-32.5038,-0.0283,14.52,50.2333336,-41.9998,40.6769,35.66,36.3385,50.7531319,42.53,43.6100006104,54.31601667,47.6667404,55.03,40.60905,40.61729997,48.6070099,22.75,48.2561607,47.5347633,51.70306,40.60905,45.7998047,74.3386,58.075,-42.5967,49.23778,54.31546667,-10.80632,42.25,53.79111,55.03,42.8100013733,40.62213331,47.8334045,-32.0012,40.61729997,43.5699996948,50.1258,43.522156,51.464826480845,5.9833,51.5055,48.3758333333333,34.1,47.3562,9.166667,51.274,50.7078,47.14032,51.9605,48.06646,48.940267,34.566666667,38.1979,38.581461,38.719564,53.566,41.086708,44.135833,41.9567,5.51351351,41.8600006104,44.9403,43.402972,38.895059,40.290253,39.2639,36.7438,44.171626,38.052472,50.3326,31.92452,44.61,52.78,45.72504513,52.9975,52.23,53.03,52.9975,45.0,52.9975,32.05574,45.72518509,44.25,53.03,45.0,44.026081,45.614142,52.78,43.5,44.62,44.49,44.8506993,44.75,42.6713123,58.2598,43.5,45.396774,32.35544,58.2598,44.75,44.10660934,52.23,36.7113418579,-0.7667,-5.555560112,-4.58333015442,36.7043952942,-59.1,85.1299972534,-59.9949989319,47.0,85.3700027466,-67.1288,-36.88255,-28.00147,-33.55,41.948348999,-60.02783333,44.0558490753,-53.5009,-66.331733,-27.92278,53.893666,-56.7333,51.800566,-64.0670013428,47.84566667,-58.94,52.2416687012,-65.0,55.8167,-57.4,49.84033333,56.666666666666664,-64.04567,56.439998626709006,50.33716667,50.08816667,49.849998,36.7900505066,44.942933,80.1667022705,44.62,68.803,-57.125,42.2999992371,69.55,36.7084655762,69.43,44.62,68.88,69.62,58.77996,36.8421020508,53.72038333,38.85,68.57,36.7096786499,68.37,43.1666667,66.4000015259,34.416666667,36.8381118774,26.9666690826,43.1666667,44.719166,69.267,44.62,49.83,49.666,72.69804,51.15629959106445,69.28,36.7077598572,68.172,36.7038841248,53.47901667,49.25,-61.43333333,-47.75,-62.66666667,-64.46666667,-61.5,-61.9353,50.666666666666664,-65.96666667,-32.4333,44.0683498383,-65.03333333,-60.61666667,48.25673,-65.8,-62.81666667,-64.63333333,-52.9501,25.3833,-62.33333333,-65.91666667,-67.56666667,-60.28333333,48.06646,60.625,41.25,61.685,43.88,43.93283,65.098,49.8508300781,36.75,64.393,49.6002311707,44.80611,43.86,60.25,60.859665,43.2299995422,43.2299995422,47.0900001526,60.694668,42.78,39.9416999817,41.4900016785,51.26667,42.924333,44.0683498383,-37.2000007629,-40.2579994202,53.132,65.3167,51.4281666666667,49.198,55.3233,43.1174,59.589,56.64,47.66,58.8333,44.018,62.8333,50.4683,41.983,39.017,47.65,54.5,-34.983001709,36.65,39.516998291,42.72,11.1669998169,-38.5022,34.0,28.5830001831,12.6330003738,-38.1516685486,-19.40115,-17.501667,-20.6487,-38.5267,28.0650005341,-22.3949,-32.81666,-19.40115,23.216999054,-22.01678333,36.65,-44.66,-43.11166,71.17192397222222,71.88451026111112,32.71900177,-14.666,-37.8317,71.2864953888889,-34.44233,-37.8183,64.9431406361111,63.22788948611111,66.7879996888889,61.36115133333333,-43.12266,64.37221787,72.0910743361111,71.47470947222222,71.65595493888888,-38.5,66.7879996888889,71.75015730277778,71.2737897361111,71.38537970833333,9.15,17.8500003815,7.2,71.79871797222222,17.5200004578,13.6,23.95,17.0,16.6,16.032,11.0,71.58670197222222,21.0837,-0.0500000007451,72.01905447222222,20.7792,15.220000267,2.5,15.6000003815,22.1800003052,20.0,16.75,11.029999733,18.6499996185,23.7199993134,18.5200004578,9.909925,36.7085990906,31.3332996368,-33.066,-9.19,17.6833,-12.7833,14.0830001831,43.5,-41.2683,-41.8727,-26.426,36.6,-40.01,36.4,-42.77,-39.8717,-41.57,-44.2038,-42.2667,12.6000003815,-33.23,-5.61167,-25.5833,-54.016998291,41.1669998169,-40.7663,-32.0299987793,39.8038,-43.0867,-39.8967,-40.1333,-42.78,42.9166984558,-39.75,52.3947,43.5917,24.625834,9.83300018311,-32.0,-42.59667,-34.11923,-27.345,-42.59667,-27.345,6.402,-33.0031,-34.11923,-32.0,-34.11923,-27.345,-32.0,-27.345,-19.30847,-35.83217,-27.345,-35.83217,-34.84293,-32.0,-27.345,-34.84263,-33.8258,-34.1192,-34.11923,-32.0,-19.30847,37.27,-42.59667,-24.4992,-32.5,48.6,-42.3962,-32.5,-32.0,-34.11923,26.164,-46.98,34.2916717529,34.9500007629,10.6278,-23.415,34.0250015259,-27.74722222,8.3117833,10.6277799606,-23.36444444,-23.2,52.69908,48.95633,43.4223022,-27.2,57.41067,53.41584,58.84127,50.21112,54.01872,58.44741,34.73493611,59.64015,73.86639877777777,58.9912,56.10724,61.35318549,13.2415800095,50.1655,56.09404,56.8634227199997,60.5562,-38.27183,54.26594,50.338329,58.23704,55.21433,55.96625,51.3926,45.64861,50.6342607929676,50.78626,53.3491009484189,50.61333,50.65895,31.6687]},"kind":"numeric","n":43060,"n_null":0,"n_unique":14146,"null_rate":0.0,"stats":{"iqr":69.61156749999999,"kurtosis":-0.9355107722263885,"max":88.2900009155,"mean":19.104855700014824,"median":36.710105896,"min":-76.619,"n_outliers":0,"outlier_rate":0.0,"q1":-19.3084775,"q3":50.30309,"skew":-0.6613857405110835,"std":40.26627883931429,"zero_rate":0.00046446818392940084}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[405,653,381,284,177,485,1914,117,151,289,785,1676,2314,2269,643,680,556,530,1463,3887,4520,2373,1671,2361,834,313,501,696,561,360,288,70,753,480,964,761,3177,1422,582,714],"edges":[-179.9986667,-170.9989500325,-161.999233365,-152.9995166975,-143.99980003000002,-135.0000833625,-126.000366695,-117.0006500275,-108.00093336,-99.00121669250001,-90.00150002500001,-81.0017833575,-72.00206669,-63.00235002250001,-54.002633355,-45.002916687500004,-36.00320002000001,-27.003483352500012,-18.003766685000016,-9.00405001750002,-0.0043333500000244385,8.9953833175,17.995099984999996,26.994816652499992,35.99453331999999,44.994249987499984,53.99396665499998,62.993683322499976,71.99339999,80.99311665749997,89.99283332499999,98.99254999249996,107.99226665999998,116.99198332750001,125.99169999499998,134.9914166625,143.99113332999997,152.9908499975,161.99056666499996,170.99028333249998,179.99]},"sample":[33.333333333333336,122.68,150.141,-3.281800031662,8.46,151.2182,-79.2033,-79.558,4.435,149.914,-4.775,6.310003936961,144.39873,30.5,-33.403,139.444779,-49.633,-122.4333,-124.4917,-87.0839,2.7896102,153.504,-79.75,-81.368,3.169,-78.1,45.2248,-76.125,-80.09975,-16.8474,42.429,-102.9538,-82.18,-81.443,-123.8,-8.41263,-79.37,-76.782,-81.38,-80.8272,-71.3067,-81.7333,-142.5944,-142.5571,99.69,13.276,-157.9955,-121.7406,-43.275,-17.9926,3.909704,11.436,-2.6378701,10.0888,-102.96,1.538673,-135.018,-140.283,-64.1667,-121.8701,3.169,-156.0107,-84.5862,-26.0,1.441667,-102.9998,2.8662,24.99,-72.7366,1.5189909,-70.19,-69.2600021362,11.53901667,-3.428251,8.46,0.656149965,0.597816696,-3.8553889,-158.0,-4.5567255,-3.0937481,-8.46194,0.656149965,-1.2052701,-85.6208,11.493,148.2333,-65.72639,11.54886667,138.43402,-70.54,-9.64611,8.46,-70.3399963379,0.658183331,-3.9499221,-169.9937,0.597816696,-69.3099975586,-4.4452,10.309964,2.704457650448,92.4833,-8.884,-129.513333333333,35.433333333,-123.0249,92.78333,-9.23,-0.0545,-122.6377,1.5895,-122.3956,-123.501676,35.583333333,12.747989,14.839258,13.153978,-11.57,13.727556,-58.289,3.2367,-87.10569152,-65.3499984741,-2.2215,10.419027,8.811023,14.923235,3.0525,-3.6032,8.35943,14.035439,-10.8315,34.68645,33.53,4.67,13.69218986,4.77667,4.41,4.71,4.77667,31.2,4.77667,34.75475,13.6920068,38.0,4.71,29.9167,10.067364,13.771829,4.67,37.0,33.44,37.92,13.8349822,33.26,14.019673,11.4333,37.0,12.453741,34.79095,11.4333,29.6667,12.51640606,4.41,-122.053413391,5.8208,120.786109924,121.385002136,-122.060180664,20.1,172.410003662,140.600006104,-46.8333,155.399993896,171.0905,152.5549,154.78133,15.2167,-16.5966997147,141.31666667,-12.7849998474,-169.9911,-170.010233,155.14233,-133.860266,-2.3667,-128.574816,-41.266998291,-59.76883333,28.97,174.216674805,66.0215,-170.8167,149.38333,-65.65683333,-143.0,125.81267,-56.6100006103516,-58.46083333,-59.09316667,-49.5,-121.903717041,-66.8464,37.0833015442,33.45,36.3,-55.307,-66.9000015259,35.2167,-122.062110901,36.43,33.45,38.867,34.685,-94.1976,-121.967697144,8.06213333,142.5,38.665,-122.041671753,39.93,29.1666667,34.3499984741,35.533333333,-121.968849182,-84.1912689209,28.1666667,-66.4694,35.167,33.51,-60.5,-124.271,-77.97591,2.605299949645996,36.27,-122.061523438,39.8,-122.053390503,6.9176,-123.755,-57.416668,69.0,72.98333,131.86667,-56.5,-56.7883,-140.00016666666667,146.65,83.7,-12.7725000381,57.166668,-55.916668,-122.5442,138.48334,63.333332,59.75,109.9981,52.85,-60.816666,145.43333,-85.933334,-43.566666,-122.3956,-63.113,-9.25,-61.853,-62.89,-62.7835,-59.652,-59.4485015869,-8.0,-60.074,-58.9644508362,-66.4239,-62.89,2.5,-36.813,-65.0699996948,-65.0699996948,-59.9799995422,5.6835,-63.42,-20.2716493607,-68.9499969482,-4.06667,-64.095333,-12.7725000381,70.1699981689,-144.733001709,-40.915,36.85,-146.016166666667,-123.436,-8.4317,-66.3541,-5.915,-56.64,-4.948,-1.4133,-63.367,4.1833,-8.8317,141.817,144.983,-7.779,-24.898,14.5329999924,-7.2333,-43.5499992371,-30.215,-59.533000946,149.259,-21.0,-67.4000015259,-74.1829986572,127.987503052,160.37675,-149.740005,161.5061,149.397,0.0,166.0787167,153.7,160.37675,-48.9500007629,164.00155,-7.2333,175.8,148.0575,20.90120688888889,19.971863569444444,-117.220001221,145.45,148.5,22.289253916666667,150.96633,148.653,6.95840613611111,5.993200513888889,10.9841806277778,4.0124275,148.22916,7.792610114,18.445003238888887,20.431904361111112,21.06088517222222,172.4,10.9841806277778,21.998493591666666,22.1148497888889,20.814959319444444,74.58,59.3300018311,78.57,21.124078472222223,83.6800003052,70.83,60.97,84.8199996948,64.0,122.046,87.0,24.09869225,-157.249,76.9300003052,20.070669916666667,-156.4906,84.0,53.0,64.9800033569,67.6800003052,71.75,60.52,81.0199966431,58.5200004578,66.3499984741,63.1300010681,-154.33001833,-122.061767578,-35.1166992188,153.133,-5.29,-60.9667,147.667,-23.2000007629,-59.0333,169.0867,148.2999,167.189,-3.55,178.0967,-4.44,-179.7,168.22,174.8,-174.5025,170.325,-72.1330032349,153.308,152.637,6.15,-145.033004761,7.117000103,145.2894,16.0499992371,2.3462,168.9983,168.195,168.4167,-179.83,-60.7167015076,-21.8169994354,-13.2623,139.575,126.398333,-157.632995605,115.41667,148.23333,151.22667,153.562,148.23333,153.562,61.3622,152.90832,151.22667,115.41667,151.22667,153.562,115.41667,153.562,147.61842,136.44733,153.562,136.44733,151.34408,115.41667,153.562,151.34415,151.2633,151.2267,151.22667,115.41667,147.61842,126.4427778,148.23333,-170.0023,-176.0,-123.5,-174.4089,-170.0,115.4167,151.22667,-80.077,168.11,-76.5583267212,-75.7249984741,-61.7111,-44.85861111,-120.025001526,-48.50277778,-78.7574833,-61.7111091614,-44.84888889,-44.00194444,-4.369412,-9.3152917071,6.8685002,153.1,-0.9236789027,-4.310661,-2.963323,-3.811394,4.91529,-5.067216,128.6293917,-1.0876498665,24.54626875,-4.9445989775,-1.0919339199,1.759683047,144.702819824,-5.027301,-4.875153,-6.853536265464715,-0.438434,144.97167,4.04859,-4.1356391,-6.791333,-6.6286,-5.659448,1.419716,13.78,-2.3740475348147885,-0.01189616,-3.727803322273063,-2.122904,-0.89085,-81.1598]},"kind":"numeric","n":43060,"n_null":0,"n_unique":14637,"null_rate":0.0,"stats":{"iqr":124.11930000000001,"kurtosis":-0.6464121362462945,"max":179.99,"mean":9.640325067518438,"median":3.05735505,"min":-179.9986667,"n_outliers":0,"outlier_rate":0.0,"q1":-60.186,"q3":63.9333,"skew":0.13761981257556744,"std":88.60871571032065,"zero_rate":0.001114723641430562}},{"alerts":[{"code":"null_rate","level":"warn","message":"24.8% null"},{"code":"high_skew","level":"info","message":"skew=+4.72"},{"code":"outliers","level":"warn","message":"10.6% rows beyond 1.5 IQR"}],"column":"depth","extras":{"histogram":{"counts":[21893,4443,1966,1504,1070,303,226,182,255,95,111,57,55,56,42,24,31,20,6,14,12,14,6,2,4,2,0,0,0,0,3,0,0,0,2,0,0,0,0,4],"edges":[-53.0,198.325,449.65,700.9749999999999,952.3,1203.625,1454.9499999999998,1706.2749999999999,1957.6,2208.9249999999997,2460.25,2711.575,2962.8999999999996,3214.225,3465.5499999999997,3716.875,3968.2,4219.525,4470.849999999999,4722.175,4973.5,5224.825,5476.15,5727.474999999999,5978.799999999999,6230.125,6481.45,6732.775,6984.099999999999,7235.424999999999,7486.75,7738.075,7989.4,8240.725,8492.05,8743.375,8994.699999999999,9246.025,9497.35,9748.675,10000.0]},"sample":[20.0,0.0,0.0,5.0,25.0,7.405,3.0,20.0,5.175,5.0,0.0,20.0,10.0,7.5,20.0,7.5,0.0,23.0,50.0,6.0,1.0,7.5,17.0,25.0,0.0,100.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,70.0,0.0,175.0,30.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,375.0,15.0,0.0,0.0,5.0,1741.0,50.0,5.0,5.0,50.0,488.0,20.0,5.0,3.0,3.0,45.0,5.0,5.0,0.0,5.0,16.0,5.0,5.0,0.0,35.0,115.0,100.0,3.0,5.0,50.0,89.0,5.0,5.0,10.0,5.0,71.0,0.0,5.0,7.5,0.0,0.0,5.0,0.5,0.0,0.5,0.0,0.0,2.0,0.0,0.0,15.0,5.0,1.0,18.63472222,10.0,26.0,100.0,0.0,13.62,0.0,33.0,40.0,100.0,92.0,50.0,13.62,16.0,50.0,1.0,110.0,0.0,0.0,0.0,0.0,0.0,149.0,0.0,0.0,0.0,1000.0,755.0,0.0,240.0,0.0,0.0,0.0,70.0,500.0,0.0,687.84,96.5,0.0,0.0,0.0,5.0,0.0,6.0,0.0,6.0,21.0,32.5,0.0,6.0,5.0,7.5,41.5,17.5,5.0,5.0,0.0,36.5,6.5,5.0,0.0,0.0,0.0,0.0,7.5,5.0,187.5,618.0,100.0,853.15,1005.0,910.0,479.28,859.65,900.0,750.0,1000.0,1103.0,375.0,301.0,1000.0,1000.0,479.31,800.0,1976.2,950.0,61.2,521.0,1007.5,2100.0,1043.5,680.0,3609.0,813.33,520.0,750.0,1865.0,411.0,200.0,745.0,235.0,282.0,861.0,460.0,299.0,205.75,128.4,100.0,338.0,318.0,70.5,175.0,500.0,342.0,357.0,390.0,300.0,260.0,333.0,2363.1,2866.0,3.37,50.0,125.0,7.0,1.0,408.94,122.5,120.0,37.0,25.0,1000.98,18.0,50.5,5.0,79.0,10.0,59.0,16.5,125.0,125.0,292.11,379.4,5.0,5.0,1250.0,0.0,7.5,165.985,37.5,25.0,15.0,387.0,0.0,999.51,104.5,7.5,0.0,5.0,46.0,197.5,50.0,300.03,400.0,12.5,10.0,355.5,125.0,100.0,17.5,236.0,450.0,541.0,129.5,190.0,241.0,55.0,262.86,56.5,8.0,68.0,17.5,630.0,14.0,154.0,400.0,240.0,130.0,25.5,439.0,42.0,168.0,85.0,125.0,965.0,5.0,57.5,152.0,37.5,105.0,7.5,51.5,62.5,17.5,166.0,7.5,348.0,91.75,5700.0,121.0,75.0,7.5,12.5,217.5,7.5,7.5,24.0,7.5,7.5,17.5,390.0,57.5,125.0,70.0,87.5,2000.0,260.0,470.0,227.5,106.0,0.0,85.0,0.0,1280.0,85.0,175.0,512.5,105.0,539.0,101.5,500.0,187.5,467.5,300.0,147.5,327.0,376.0,296.0,298.9,343.0,323.0,273.0,223.0,360.0,230.0,431.645,270.0,348.0,304.0,381.0,190.0,416.0,348.0,160.0,360.0,173.0,572.5,333.0,383.0,194.0,5.0,2240.0,320.0,358.0,714.0,5.0,0.0,2500.0,317.0,2200.0,348.0,98.0,3780.0,2305.0,1280.16,733.91,839.83,800.0,565.0,2907.5,822.96,935.0,443.5,733.58,1030.0,2000.0,4525.5,718.0,7.5,1000.0,847.5,1213.5,610.0,686.7797,470.0,555.0,834.5,432.0,199.5,700.0,851.5,661.0,705.5,1401.5,648.5,634.0,727.5,713.5,585.0,555.0,296.0,926.5,550.0,55.0,123.0,50.0,85.0,1.0,3.0,4.2,10.0,46.0,10.0,20.0,100.0,100.0,50.0,46.0,100.0,30.0,0.0,19.0,50.0,30.0,50.0,26.0,2.0,217.0,18.0,75.0,20.0,10.0,30.0,75.0,20.0,3.0,3.0,75.0,0.0,40.0,100.0,6.0,46.0,100.0,3.0,1.0,51.0,0.0,82.0,50.0,1000.0,20.0,2.0,19.0,14.0,60.0,59.0,20.0,11.5,22.0,77.0,6.25,130.0,111.0,155.100006,18.0,11.5,39.2,6.19999981,0.0,20.0,19.5,18.5,36.0,7.5,116.0,10.0,10.0,11.6000004,26.3500004,13.5500002,12.25,5.69999981]},"kind":"numeric","n":43060,"n_null":10658,"n_unique":3283,"null_rate":0.2475150952159777,"stats":{"iqr":313.5,"kurtosis":35.887405589291085,"max":10000.0,"mean":281.20921408976113,"median":52.5,"min":-53.0,"n_outliers":3444,"outlier_rate":0.10628973520153077,"q1":7.5,"q3":321.0,"skew":4.724373885977974,"std":570.177750269519,"zero_rate":0.11919017344608357}},{"alerts":[{"code":"one_word","level":"warn","message":"97.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"duplicates","level":"warn","message":"67.4% duplicate strings"}],"column":"date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[276,11,1316,78,573,13982,0,4,0,1820,691,49,3297,11038,392,858,102,993,0,15,100,126,12,0,224,0,0,1,0,1632,2,226,0,0,18,0,0,0,0,42],"edges":[4.0,5.175,6.35,7.525,8.7,9.875,11.05,12.225,13.4,14.575000000000001,15.75,16.925,18.1,19.275,20.45,21.625,22.8,23.975,25.150000000000002,26.325,27.5,28.675,29.85,31.025000000000002,32.2,33.375,34.55,35.725,36.9,38.075,39.25,40.425000000000004,41.6,42.775,43.95,45.125,46.300000000000004,47.475,48.65,49.825,51.0]},"near_unique":false,"sample":["2016-05-20","1996-07-04","2016-09-20T10:18:00Z","2015-01-18T14:35:00Z","2017-08-19","2017-04-29T00:01:00","2018-08-30","2020-11-05T16:06:00Z","2013-08-05T14:15:00Z","2012-03-24","1996-03-03T14:45:00Z","1999-01-16T12:38:29Z","2005-07-01","2019-07-28T14:03:00Z","2015-08","2015-06-21T15:15:00","1954-01-01T00:00:00Z","2014-06-12T13:20:00Z","1993-03-07","2019-08-23","2017-10-03","2012-03-12","1991-06-11T00:00:00","2002-10-06T17:45:00Z","2008-05/2008-06","2010-06-26T03:24/2010-06-26T03:52","1904-11-01T00:00:00","2019-08-05","2011-09-10","2014-11-07","2011-05/2011-06","2017-05/2017-06","2016-05/2016-06","1968-12-07T12:00:00Z","2016-05-13T13:31:00Z","2015-08-19","2015-07-17","1979-05-26T11:34:48Z","2022-08-22T12:42:21/2022-08-22T13:14:26","1978-01-20","2012-07-16T14:00:00Z","1977-08-28T12:00:00Z","2024-11-16T12:45:00","1977-02-07","2013-05-05","2019-01-06T21:50:00Z","2025-01-13T06:36","1974-04-15T12:00:00Z","2010-11-10","2017-03-15T01:57:00Z"],"top_values":[["1962/1964",372],["2010-05/2010-06",276],["2008-05/2008-06",246],["2011-05/2011-06",236],["2013-08",142],["2012-05/2012-06",138],["2016-05/2016-06",124],["2013-05/2013-06",110],["2011-08",106],["2017-05-30",74],["2001-05/2001-06",70],["2006-05/2006-06",68],["2007-05/2007-06",68],["2018-05/2018-06",66],["2017-05/2017-06",64],["2011-07",60],["2014-08-01",58],["2013-07",58],["1978-05-17T12:00:00Z",58],["2015-08",56]],"top_words":[["1962/1964",192],["2010-05/2010-06",143],["2008-05/2008-06",124],["2011-05/2011-06",118],["2013-08",78],["2012-05/2012-06",70],["2016-05/2016-06",66],["2013-05/2013-06",48],["2011-08",46],["2006-05/2006-06",42],["2017-05-30",42],["2011-07",41],["2017-05/2017-06",37],["2001-05/2001-06",33],["2013-09",32],["2018-05/2018-06",32],["2007-05/2007-06",31],["2014-08-01",30],["2012-08",28],["2003-05/2003-06",28],["1978-05-17t12:00:00z",28],["2010-08",28],["2013-07",26],["2015-08",26],["2014-09-01",26]],"vocab_skipped":null,"word_histogram":{"counts":[36760,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1108,0,0,0,0,0,0,0,0,0,0,0,0,0,10],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":43060,"n_null":5182,"n_unique":12338,"null_rate":0.12034370645610776,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.6742700248165162,"emoji_rate":0.0,"len_max":51,"len_mean":16.4484133269972,"len_median":19.0,"len_min":4,"len_p95":39.0,"n_duplicates":25540,"n_empty":0,"one_word_rate":0.9704841860710702,"readability_flesch_mean":121.19970000000004,"url_rate":0.0,"vocab_size":10135,"word_mean":1.0297798194202439,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"42.2% null"}],"column":"year","extras":{"singletons":4,"top_values":[["2000",1287],["2001",703],["2016",691],["2008",688],["2010",651],["2002",579],["2013",556],["2011",554],["1979",524],["2014",519],["2003",514],["2004",511],["2015",504],["2012",493],["2007",459],["2006",442],["2005",438],["1998",437],["2020",437],["2019",436]]},"kind":"categorical","n":43060,"n_null":18164,"n_unique":137,"null_rate":0.42183000464468184,"stats":{"cardinality":137,"entropy":6.141881053174018,"entropy_ratio":0.8652935040851906,"top_rate":0.05169505141388175,"top_value":"2000"}},{"alerts":[],"column":"country","extras":{"singletons":9,"top_values":[["",27422],["Australia",4573],["United States",1416],["PERU",1098],["Canada",976],["SOVIET UNION",634],["Israel",550],["GB",465],["Spain",370],["Sweden",340],["USA",323],["Ukraine",316],["Romania",310],["Antarctica",242],["Republic of Korea",225],["Colombia",214],["Italy",213],["New Zealand",212],["FR",210],["Brazil",179]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":130,"null_rate":0.0,"stats":{"cardinality":130,"entropy":2.5689026628298843,"entropy_ratio":0.3658171618501458,"top_rate":0.6368323269856014,"top_value":""}},{"alerts":[],"column":"dataset","extras":{"singletons":9,"top_values":[["",26317],["Environmental Monitoring database (MOD) DNV",1760],["Jellyfish sightings along the Italian coastline from 2009 to 2017",1024],["QUADRIGE - Coastal monitoring database and products, 1974 onwards. (6064)",978],["MBIS research trawl surveys",714],["Groundfish Survey Invertebrate Data",674],["DFO Quebec Region Ecosystemic bottom trawl surveys",650],["Marine Recorder Snapshot extract of surveys entered by SeaSearch",643],["CPR",604],["DATRAS: ICES Database of trawl surveys",591],["Citizen Science based jellyfish observations along the Israeli Mediterranean coast in 2011-2025",546],["BioChem: Sameoto zooplankton collection",516],["Marine Recorder Snapshot extract of surveys entered by JNCC",396],["Atlantic Reference Centre",383],["DFO Central and Arctic Multi-species Stock Assessment Surveys",364],["MEDITS-Spain: Demersal and mega-benthic species from the MEDITS (Mediterranean International Trawl Survey) project on the Spanish continental shelf between 1994 and 2010",277],["NIWA Invertebrate Collection",267],["ANEMOON Beach washup monitoring (SMP) data along the Dutch coastline collected through citizen science",240],["Phytoplankton abundance and composition  in the Ebro delta embayments (Alfacs Bay and Fangar Bay, North Western Mediterranean) during 1990-2019",198],["Romanian Black Sea Zooplankton data from 1981 to 2000",196]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":214,"null_rate":0.0,"stats":{"cardinality":214,"entropy":3.189902498762941,"entropy_ratio":0.41205400789881336,"top_rate":0.611170459823502,"top_value":""}},{"alerts":[],"column":"bioluminescence_group","extras":{"singletons":0,"top_values":[["Dinoflagellate",4000],["Sea sparkle dinoflagellate",2000],["Bioluminescent dinoflagellate",2000],["Crystal jelly (source of GFP)",2000],["Mauve stinger jellyfish",2000],["Warty comb jelly",2000],["Crown jellyfish (alarm jelly)",2000],["Helmet jellyfish",2000],["Comb jelly",2000],["Krill (many species bioluminescent)",2000],["Northern krill",2000],["Copepod (secretes luminous fluid)",2000],["Deep-sea shrimp (NanoLuc source)",2000],["Sea firefly ostracod",2000],["Bioluminescent ostracod",2000],["Cock-eyed squid",2000],["Bioluminescent marine bacteria",2000],["Marine luminous bacteria",2000],["Parchment tube worm",2000],["Boring clam (piddock)",928]]},"kind":"categorical","n":43060,"n_null":0,"n_unique":26,"null_rate":0.0,"stats":{"cardinality":26,"entropy":4.465278183108629,"entropy_ratio":0.9499703114742927,"top_rate":0.09289363678588017,"top_value":"Dinoflagellate"}}],"insights":{"errors":[],"insights":[{"confidence":"medium","critiques":[],"evidence_keys":["depth.null_rate","depth.stats.outlier_rate","depth.stats.median","depth.stats.max","depth.stats.skew","country.stats.top_rate","country.top_values","year.null_rate","bioluminescence_group.stats.cardinality","phylum.top_values","row_count"],"featured_charts":[{"caption":"Look for which bioluminescent groups dominate the record count \u2014 Dinoflagellates lead but many other groups cluster near 2,000 records each, suggesting structured sampling.","column":"bioluminescence_group","kind":"bar"},{"caption":"Arthropoda and Cnidaria together account for nearly half of all records; check whether this reflects true ecological abundance or sampling bias.","column":"phylum","kind":"donut"},{"caption":"The distribution is heavily right-skewed with a median of 52.5 m but values reaching 10,000 m \u2014 watch for the long tail of deep-water outliers that may distort analyses.","column":"depth","kind":"histogram"},{"caption":"Year 2000 has by far the most records; look for uneven temporal coverage and note the 42% null rate means a large share of observations have no year assigned.","column":"year","kind":"bar"},{"caption":"Over 63% of records have no country value, and among those that do, Australia dominates \u2014 flag this geographic gap before drawing any regional conclusions.","column":"country","kind":"bar"}],"model":"anthropic:default","narrative":"This dataset contains 43,060 occurrence records of bioluminescent marine organisms, covering 26 named groups across 7 phyla \u2014 from dinoflagellates and jellyfish to krill and bacteria \u2014 with geographic coordinates, taxonomy, and sampling depth. The most notable issue is that depth has a 24.75% null rate, extreme skew (max 10,000 m vs. median 52.5 m), and over 10% outliers, meaning depth-based analysis needs careful filtering before any conclusions are drawn. A second area to investigate is geographic bias: over 63% of country values are blank, yet Australia, the United States, Peru, and Canada dominate the named entries, suggesting strong regional over-representation in the sourced datasets. The year column also carries a 42% null rate, which limits time-trend analysis despite records spanning from at least 1962 to 2017.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["alerts","n","n_unique","null_rate","duplicate_rate","n_duplicates","len_min","len_max","top_values","one_word_rate","allcaps_rate"],"model":"anthropic:default","narrative":"This column contains publication or recording date strings stored as text, using multiple formats including year ranges ('1962/1964'), year-month ranges ('2010-05/2010-06'), year-month ('2013-08'), and full ISO dates ('2017-05-30'). The heterogeneous format mix and wide length range (min 4, max 51 chars) will require normalization before any temporal analysis. A 67.4% duplicate rate across 43,060 rows with only 12,338 unique values indicates many records share the same date, consistent with batch or periodical publication data. The 12.0% null rate is also notable and should be investigated for systematic missingness.","role":"timestamp","scope":"column","target":"date","treatment":"Parse and normalize the mixed date formats (range, year-month, ISO) into structured start/end date fields before use in temporal analysis or modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","skew","kurtosis","min","max","median","q1","q3","iqr","std","mean","n_outliers","outlier_rate","zero_rate"],"model":"anthropic:default","narrative":"This column represents a physical depth measurement (e.g., depth below surface for geological, seismic, or oceanographic observations), ranging from -53.0 to 10,000.0 with a median of only 52.5 \u2014 meaning most records are shallow, but a long tail of deep measurements drives extreme skew (4.72) and very high kurtosis (35.89). The null rate of 24.75% and outlier rate of 10.63% (3,444 rows) are both flagged as alerts, and 11.92% of values are exactly zero, suggesting possible default-fill or surface-level records that may need special handling. The IQR of 313.5 against a std of 570.18 confirms the heavy-tailed distribution driven by a minority of extreme deep values up to 10,000.0.","role":"feature","scope":"column","target":"depth","treatment":"Impute nulls carefully (median preferred over mean given skew), investigate zero values for validity, then log-transform or apply a signed-log transform before modelling to reduce the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","top_values","n"],"model":"anthropic:default","narrative":"This column represents a calendar year associated with each record, stored as a categorical type despite being numeric in nature. The top value is '2000' with 1,287 occurrences (~5.2% of total), and the 137 unique values span a range that includes at least 1979 through 2016. Two signals stand out: the null rate is 42.18%, which is severe enough to warrant an alert, and the year 2000 is notably over-represented relative to adjacent years (e.g., 2001 has only 703), suggesting either a data collection artifact or a large batch of undated records defaulted to that year.","role":"feature","scope":"column","target":"year","treatment":"Investigate the 2000 spike for default-value contamination, impute or flag the 42.18% nulls, then cast to integer for temporal modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","skew","n_unique","n","zero_rate","outlier_rate"],"model":"anthropic:default","narrative":"This column contains geographic latitude values, spanning from -76.619\u00b0 (deep Southern Hemisphere) to 88.29\u00b0 (near the North Pole), with 14,146 unique values across 43,060 rows suggesting many repeated locations. The mean (19.1\u00b0) is notably lower than the median (36.7\u00b0), and the IQR of 69.6\u00b0 spans nearly the full usable latitude range, indicating records are spread across both hemispheres rather than clustered in any one region. The slight negative skew (-0.66) and near-zero outlier rate confirm a broadly spread but reasonably uniform distribution, which is unusual \u2014 most real-world geo datasets cluster in populated mid-latitude bands. The 0.046% zero-rate (\u224820 rows) warrants inspection as 0.0\u00b0 latitude may represent missing/default values.","role":"feature","scope":"column","target":"latitude","treatment":"Verify ~20 zero-latitude rows for data quality; use as-is or pair with longitude for spatial joins/clustering; consider binning into latitude bands if used as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","skew","kurtosis","zero_rate","null_rate","n_unique"],"model":"anthropic:default","narrative":"This column contains geographic longitude values, spanning the full valid range from -179.9987 to 179.99 degrees, confirming global coverage. The distribution is remarkably flat and near-symmetric (skew 0.138, kurtosis -0.646) with an IQR of 124.12 degrees, indicating records are spread broadly across both hemispheres rather than clustered in any region. The mean (9.64) is notably higher than the median (3.06), hinting at a slight eastward bias in the dataset. A zero_rate of 0.11% warrants a check for null-substituted zeros masquerading as the Prime Meridian.","role":"feature","scope":"column","target":"longitude","treatment":"Use as-is for geospatial joins or pair with latitude for coordinate-based features; verify ~48 zero-value rows are genuine Prime Meridian locations and not null substitutions."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy_ratio","null_rate","top_values"],"model":"anthropic:default","narrative":"This column classifies observations into one of 26 named bioluminescent organism groups (e.g., 'Dinoflagellate', 'Crystal jelly (source of GFP)', 'Krill (many species bioluminescent)'), covering marine taxa from dinoflagellates to jellyfish and crustaceans. The distribution is remarkably uniform: the top category 'Dinoflagellate' holds only 9.3% of rows, and the entropy ratio of 0.95 (near-maximum for 26 categories) indicates near-flat class balance. With no nulls across 43,060 rows and exactly 2,000 records for all visible non-top categories, the dataset appears deliberately balanced or synthetically constructed.","role":"label","scope":"column","target":"bioluminescence_group","treatment":"Use as classification target or stratification variable; near-perfect class balance means no resampling required."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","top_value","top_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column contains biological taxonomic class labels for marine/aquatic organisms, with 13 distinct classes across 43,060 records and zero nulls. The entropy ratio of 0.927 indicates a near-uniform distribution across classes, though mild imbalance exists: 'Dinophyceae' is the most frequent at 18.6% (8,000 records), while several classes like 'Scyphozoa' and 'Malacostraca' each hold exactly 6,000 records, suggesting some classes may have been deliberately sampled to round numbers. The mix spans protists (Dinophyceae, Gammaproteobacteria), crustaceans (Malacostraca, Ostracoda, Copepoda), molluscs (Cephalopoda), and cnidarians/ctenophores (Scyphozoa, Hydrozoa, Nuda, Tentaculata), pointing to a plankton or marine biodiversity dataset.","role":"label","scope":"column","target":"class","treatment":"Use as classification target or stratification key; check class imbalance before modelling and consider stratified splits given the ~2\u00d7 spread between largest and smallest classes."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","top_values","null_rate"],"model":"anthropic:default","narrative":"This column captures the country associated with each record, with 130 distinct values across 43,060 rows. The dominant 'value' is an empty string, accounting for 63.7% of all records (27,422 rows) \u2014 a critical data quality issue that functionally resembles a very high null rate. The remaining values show inconsistent normalisation: mixed casing ('PERU', 'SOVIET UNION' vs. 'Australia', 'Canada'), abbreviations ('GB' instead of 'United Kingdom'), and anachronistic entities ('SOVIET UNION'), suggesting data was collected from heterogeneous or historical sources without standardisation.","role":"feature","scope":"column","target":"country","treatment":"Treat empty strings as nulls, then standardise to ISO 3166-1 country codes before modelling or aggregation."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","top_values","cardinality"],"model":"anthropic:default","narrative":"This column identifies the source dataset or survey program for each observation, with 214 distinct named sources across 43,060 rows. The dominant value is an empty string, accounting for 61.1% of all records (26,317 rows), meaning the majority of observations carry no dataset attribution \u2014 a significant data quality concern. The remaining records span named marine/environmental monitoring programs (trawl surveys, coastal monitoring, jellyfish sightings, etc.), with the largest named source ('Environmental Monitoring database (MOD) DNV') covering only ~4% of rows.","role":"metadata","scope":"column","target":"dataset","treatment":"Treat empty string as missing/unknown; encode named values as a categorical feature or use as a grouping/stratification variable, but investigate whether the 61.1% blank rate reflects a systematic data gap before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","cardinality","top_values","entropy_ratio","null_rate","top_rate"],"model":"anthropic:default","narrative":"This column contains biological family-level taxonomic classifications, covering 22 distinct families across 43,060 records with no nulls. The distribution is notably near-uniform for the top entries: four families (Pyrocystaceae, Euphausiidae, Cypridinidae, Vibrionaceae) each have exactly 4,000 records, strongly suggesting deliberate stratified sampling or synthetic balancing rather than natural occurrence frequencies. The high entropy ratio of 0.932 (close to maximum for 22 categories) confirms an unusually flat distribution. Families represented include bioluminescent marine organisms (dinoflagellates, krill, ostracods, bacteria), hinting this is a marine bioluminescence or plankton dataset.","role":"label","scope":"column","target":"family","treatment":"Use as a categorical grouping variable or classification target; encode with integer or one-hot encoding, noting the artificially balanced class distribution may not reflect real-world priors."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","cardinality","entropy_ratio","top_values","top_rate"],"model":"anthropic:default","narrative":"This column contains biological genus names for marine organisms \u2014 including bioluminescent dinoflagellates (Noctiluca, Pyrocystis, Lingulodinium, Alexandrium), jellyfish (Pelagia, Atolla, Periphylla), and ctenophores (Mnemiopsis, Beroe) \u2014 suggesting a marine biology or bioluminescence dataset. With 27 unique genera across 43,060 rows and no nulls, the distribution is remarkably flat: every visible top value appears exactly 2,000 times, implying a deliberately balanced or stratified dataset. The entropy ratio of 0.9586 is very high for only 27 categories, confirming near-uniform representation across genera. No skew or imbalance alerts were triggered.","role":"label","scope":"column","target":"genus","treatment":"Use as a classification target or stratification key; one-hot encode or ordinal encode for modelling given 27 balanced classes."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","null_rate"],"model":"anthropic:default","narrative":"This column contains biological taxonomic order classifications for marine organisms, with 17 distinct orders spanning bacteria (Vibrionales), dinoflagellates (Gonyaulacales, Noctilucales), jellyfish (Coronatae, Leptothecata), crustaceans (Euphausiacea, Calanoida), and cephalopods (Oegopsida) among others. The distribution is moderately uneven \u2014 Gonyaulacales dominates at 13.9% (6,000 rows) while several orders share exactly 4,000 rows, suggesting possible stratified or quota-based sampling rather than natural observation frequencies. Entropy ratio of 0.949 indicates near-uniform spread across the 17 classes, which is unusually high for a taxonomic label and reinforces the structured-sampling hypothesis. No nulls are present.","role":"label","scope":"column","target":"order","treatment":"Use as a categorical target or stratification variable; one-hot encode or ordinal-encode for modelling, and verify whether the equal-count groupings (4,000 each) reflect intentional sampling design."},{"confidence":"high","critiques":[],"evidence_keys":["cardinality","n_unique","null_rate","top_value","top_rate","top_values","entropy_ratio"],"model":"anthropic:default","narrative":"This column encodes biological phylum classifications across 43,060 records with exactly 7 distinct values and no nulls, making it a clean taxonomic label field. The distribution spans both animal (Arthropoda, Cnidaria, Ctenophora, Mollusca, Annelida) and non-animal (Myzozoa, Proteobacteria) kingdoms, suggesting the dataset covers a broad range of marine or environmental organisms. Arthropoda dominates at 28.6% (12,297 records), while the entropy ratio of 0.923 indicates a fairly well-spread distribution across categories. The presence of Proteobacteria (bacteria) and Myzozoa (protists) alongside metazoans may surprise analysts expecting a purely animal-focused dataset.","role":"label","scope":"column","target":"phylum","treatment":"One-hot encode or use as a stratification/grouping variable; verify whether cross-kingdom mixing is intentional before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["column","n_unique","null_rate","top_value","top_rate","top_values","entropy_ratio","cardinality"],"model":"anthropic:default","narrative":"This column contains scientific (Latin) names of marine organisms, covering 245 distinct taxa across 43,060 records with no nulls. The values span both genus-only entries (e.g., 'Lingulodinium', 'Photobacterium', 'Vibrio') and full binomial species names, suggesting inconsistent taxonomic resolution across records. The top value 'Mnemiopsis leidyi' appears 2,000 times (~4.6% of rows), and the top 10 values together account for a substantial share of records, indicating the dataset is dominated by a relatively small set of species. Entropy ratio of 0.747 confirms moderate-to-high concentration for a 245-cardinality field.","role":"label","scope":"column","target":"scientificName","treatment":"Standardise taxonomic resolution (genus vs. species) before grouping; one-hot or target-encode for modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4703,"prompt_tokens":14525,"total_tokens":19228}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:42:25+00:00","mode":"full","row_count":43060,"sampled_rows":43060,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/bioluminescence.json"},"notes":[],"saturn_version":"0.2.0","schema":{"bioluminescence_group":"categorical","class":"categorical","country":"categorical","dataset":"categorical","date":"text","depth":"numeric","family":"categorical","genus":"categorical","latitude":"numeric","longitude":"numeric","order":"categorical","phylum":"categorical","scientificName":"categorical","year":"categorical"}}
