{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"98.9% of rows are unique strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[173,481,493,305,3911,1393,71,35,16,12,10,5,3,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[2.0,5.775,9.55,13.325,17.1,20.875,24.65,28.425,32.2,35.975,39.75,43.525,47.3,51.074999999999996,54.85,58.625,62.4,66.175,69.95,73.725,77.5,81.27499999999999,85.05,88.825,92.6,96.375,100.14999999999999,103.925,107.7,111.475,115.25,119.02499999999999,122.8,126.575,130.35,134.125,137.9,141.67499999999998,145.45,149.225,153.0]},"near_unique":true,"sample":["Falke","Shipwreck 1389423810","Shipwreck 10053641911","Shipwreck 9160141348","Shipwreck 9456262933","Shipwreck 11826210406","Shipwreck 9980843157","Shipwreck 11826207717","Shipwreck 9164313171","City of Waterford","Shipwreck 886882038","Suffield Point Wreckage","Bootswrack","The Kingstone","Burnt out car","Shipwreck 3145391521","Shipwreck 1934285525","Shipwreck 9164315327","Shipwreck 11826210555","Shipwreck 9220189258","Shipwreck 5849369401","SMS Kronprinz Wilhelm","Shipwreck 2919675063","Shipwreck 1973522835","Shipwreck 1318884596","Shipwreck 1391993117","Tank","Shipwreck 3145391649","Shipwreck 1156298992","Shipwreck 11826210418","ANDEN","Shipwreck 11827115839","Shipwreck 1181415907","Shipwreck 8828291182","Shipwrecks","Spanish galleon San Jos\u00e9","Shipwreck 11826208013","Ainemman","Shipwreck 10538093141","O'Brians plane wreck","Shipwreck 8985237575","Shipwreck 11826207938","Ingar\u00f6 102","Shipwreck 11826207931","Shipwreck 11826210299","Shipwreck 11826210468","Eala Bh\u00e0n","Shipwreck 8862890954","Shipwreck 2912221024","Shipwreck 11826190328"],"top_values":[],"top_words":[["shipwreck",5032],["wreck",96],["ss",90],["(wrack)",57],["of",54],["uss",42],["the",39],["barge",36],["maru",33],["hms",31],["de",24],["mv",23],["wrack",21],["ii",19],["le",19],["concrete",18],["boat",17],["s.s.",16],["(wreck)",16],["city",16],["lcm",16],["port",15],["ship",15],["relitto",14],["werburgh",14]],"vocab_skipped":null,"word_histogram":{"counts":[587,5729,0,353,0,174,38,0,19,0,8,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.6,2.2,2.8,3.4,4.0,4.6,5.2,5.8,6.3999999999999995,7.0,7.6,8.2,8.8,9.4,10.0,10.6,11.2,11.799999999999999,12.4,13.0,13.6,14.2,14.799999999999999,15.399999999999999,16.0,16.6,17.2,17.8,18.4,19.0]}},"kind":"text","n":6914,"n_null":0,"n_unique":6841,"null_rate":0.0,"stats":{"allcaps_rate":0.014029505351460804,"boilerplate_rate":0.0,"duplicate_rate":0.010558287532542667,"emoji_rate":0.0,"len_max":153,"len_mean":18.353630315302286,"len_median":20.0,"len_min":2,"len_p95":21.0,"n_duplicates":73,"n_empty":0,"one_word_rate":0.0849002024877061,"readability_flesch_mean":73.369325,"url_rate":0.0,"vocab_size":7602,"word_mean":2.0579982643910903,"word_median":2.0}},{"alerts":[{"code":"outliers","level":"warn","message":"12.5% rows beyond 1.5 IQR"}],"column":"lat","extras":{"histogram":{"counts":[1,0,0,1,1,20,39,41,50,107,235,110,56,118,67,28,30,72,73,67,40,180,105,85,108,84,75,149,529,748,608,494,1302,846,212,85,103,39,5,1],"edges":[-77.4249898,-73.43508664500001,-69.44518349,-65.455280335,-61.465377180000004,-57.475474025000004,-53.485570870000004,-49.495667715,-45.50576456,-41.515861405,-37.52595825,-33.536055095,-29.54615194,-25.556248785,-21.56634563,-17.576442475,-13.58653932,-9.596636164999993,-5.606733009999999,-1.616829855000006,2.3730733000000015,6.362976455000009,10.352879610000002,14.342782764999995,18.332685920000003,22.32258907500001,26.312492230000004,30.302395384999997,34.292298540000004,38.28220169500001,42.272104850000005,46.262008005,50.251911160000006,54.24181431500001,58.23171747000002,62.221620625,66.21152378000001,70.20142693500001,74.19133009,78.181233245,82.1711364]},"sample":[12.3201688,43.3983072,54.521617,-7.2760612,18.5368335,30.0438888,30.1782667,42.8654333,42.6557194,42.6266667,42.4252694,34.5616667,34.27805,45.2678,51.433333,50.6727777,50.7425,58.898052,23.6186333,26.3156833,3.820782,3.972345,43.9466613,48.166123,-39.4521323,-36.6183364,54.5514672,54.6243869,55.8826659,49.1114188,54.5415484,57.70105,57.7125,49.2693179,41.394015,47.7359326,54.3829135,-50.78706,-50.533221,-50.508895,44.9783149,-22.8672363,-22.8367789,-22.8589768,-22.961575,-22.0401102,-1.3821792,-1.4588097,-4.6999956,-4.8301505,-4.8750572,-14.799156,-16.6822565,49.5473846,-30.383335,-33.3448493,48.1155849,47.5717797,33.919298,6.3951968,6.3970123,6.4005948,6.4224358,6.4384519,53.2097477,43.2309333,15.6743519,61.4355519,54.606407,33.9805038,-32.7529117,-32.8800099,-34.3029273,-33.8026213,-34.2646954,-34.2480165,-34.1196992,-23.3852646,-25.2705509,-32.0284144,-26.8008527,-26.1841651,-19.7491649,-16.3447657,-7.9469289,-5.3781067,-5.7071966,-2.7610967,53.6623822,-32.2920345,37.041553,37.1004375,37.1015746,37.3615008,37.5396263,37.9084072,37.9148021,37.9486288,38.098266,38.124513,38.1497279,38.3127555,38.4514372,39.1424913,39.2234389,39.8561052,39.9235074,40.4125,41.1280651,41.2406877,41.8752694,42.2444614,42.4917615,42.7589575,42.8812625,43.1228039,43.3999388,43.5239173,43.5730868,43.950694,43.9799853,45.3261655,45.4690863,52.8075418,62.9711944,46.9343491,47.0826803,59.7318151,46.3004177,55.0685841,58.662318,7.3402859,49.2954932,36.3880293,59.3481556,52.1565647,-77.4249898,59.802931,40.2154973,37.9014624,-52.5674297,-21.2290095,11.5813889,46.2813576,39.4502112,47.9657728,47.9657675,-34.4222363,36.863314,24.5587734,24.5611426,36.7237203,-31.9864,59.3018956,44.6262796,44.6232845,44.6057437,60.2486822,52.3245202,-44.8183333,59.383333,48.2916054,-36.8894764,-25.5074387,-5.6822002,-20.7935992,30.5243902,30.5257464,30.5287546,60.769444,59.891667,59.397222,36.6367598,45.0250339,45.2982676,-21.9106537,-17.6507171,48.3534637,69.2908135,57.6529185,38.9729838,51.4809297,51.4785047,43.7346558,44.7769379,37.1388716,43.7012262,12.46442,72.7948209,54.6925765,49.2269594,51.475805,51.2836272,50.7525372,-43.8118495,-19.9979421,-41.3911035,-41.2785981,-44.41225,-41.1663694,-35.7711831,-36.6853675,-36.8236655,-36.4401652,-36.4367445,-36.4250038,-36.4102964,40.773035,54.717637,-38.5695891,0.3493036,40.68,55.930429,60.1935842,45.0312014,43.5085972,47.5994572,7.3382625,37.5916667,47.3927535,38.146661,68.3900962,-32.0955562,65.3269975,25.0866628,-46.7333336,-18.0657234,43.0293346,48.7555628,42.7583025,43.0015449,44.1636524,30.4604586,42.2713882,-37.9695957,25.072444,16.8830944,41.1092697,47.89618,42.5216304,53.2982485,44.6147809,51.4480805,32.0402111,51.9037685,36.9230624,50.4897166,49.6539799,40.572291,7.5226477,44.1508209,51.4998165,50.8861434,-5.5162403,38.8107491,39.13476,18.316006,43.5618317,8.80781,8.74406,51.4805059,37.6997985,37.9477273,8.72583,52.9551223,64.1199159,53.3546237,-17.1683705,24.6160174,-39.7730948,12.4438643,-48.7459528,41.087868,59.0124149,38.0171865,-27.5003519,54.5815733,46.3110486,53.5597812,18.7303783,54.3533429,37.0655902,16.7296851,40.7789092,40.5269564,-15.0297001,34.8882803,48.1610179,-22.2412218,12.990338,51.8150016,44.782547,-16.5496003,28.4209984,-20.3262213,5.5505032,5.5228277,51.6856524,51.7332559,44.4155287,-41.1853508,45.3483717,44.8955067,-36.9995445,45.4364188,50.9133006,39.2087743,11.8884075,37.785568,37.7856759,-21.138499,-46.5760348,47.7726667,47.7738755,47.7739587,47.7739505,47.7740914,47.7738556,47.7740159,40.6450884,-8.6421436,-8.6742483,53.3491047,64.578168,64.5748934,64.5266219,64.8135958,34.5871495,47.6565406,18.7985369,-46.5772068,48.4367213,39.9222386,41.9191265,41.9213023,46.0658695,18.1161589,50.1717984,37.7854293,22.9602688,45.8640834,-38.9268514,-40.286113,69.3909936,69.4112597,38.6114519,41.8149143,38.9443389,40.4895079,-33.8151646,48.0872802,17.7496035,-41.3807773,-41.3808419,-41.3806292,8.7348621,-33.0865449,48.6224086,48.0855289,38.0555781,38.0255508,38.0208438,38.0124981,38.0120271,47.8920649,47.8923271,48.4594524,40.6445784,40.64259,40.6439852,40.645023,40.6451133,40.6445272,40.644521,40.6447598,40.6447838,40.6443471,40.6432729,40.6425051,40.6438076,40.6431067,40.6430483,40.6382871,40.6389973,40.6383654,40.6383996,45.191363,38.6687436,38.6623802,38.6623582,43.5231398,43.5227068,51.4021999,51.4056781,51.4061758,51.406252,51.3889232,51.411205,51.4051707,51.4115545,43.1039263,-9.1372713,-9.096573,15.548551,51.4121017,51.4129844,51.4920013,52.8766164,51.7351885,51.734321,51.7738355,51.7334464,32.3800863,42.288918,54.6205783,51.402565,51.439403,54.598517,54.7661667,40.4116667,54.028167,53.8564502,53.867262,53.862573,53.862917,53.95013,53.999687,47.6455,33.9156667,71.4145,55.146,37.9378611,43.288,55.0091667,58.0233333,54.5961667,57.2083333,59.4907719,38.2191667,3.7608333,54.642633,47.8534687,47.8167922,54.012933,54.182002,53.763742,53.787847,54.642907,54.8039,53.56835,54.55695,54.4732,54.473383,54.5153,54.490778,54.518683,54.514447,54.680917,54.681117,54.649918,54.639623,54.838705,53.6622,54.436745,54.548317,54.608817,54.764617,54.745433,54.76505,54.75955,54.695517,53.876018,53.861532,53.8172,53.837597,53.835682,54.113622,53.97,53.972367,54.222099,54.167916,54.413147,54.591991,54.541483,53.713998,53.790358,53.820705,53.858132,53.843015,53.965448,53.858762,54.045238,54.027592,54.178648,54.512172,55.050097,54.009712,54.3855,54.362235,47.8790793]},"kind":"numeric","n":6914,"n_null":0,"n_unique":6902,"null_rate":0.0,"stats":{"iqr":27.285215874999995,"kurtosis":0.8666182288242585,"max":82.1711364,"mean":33.14774564704947,"median":43.8517503,"min":-77.4249898,"n_outliers":864,"outlier_rate":0.12496384148105294,"q1":26.582012875,"q3":53.867228749999995,"skew":-1.4171772700889849,"std":29.884607865118472,"zero_rate":0.0}},{"alerts":[{"code":"outliers","level":"warn","message":"11.7% rows beyond 1.5 IQR"}],"column":"lon","extras":{"histogram":{"counts":[54,17,7,5,4,15,266,15,3,39,161,521,174,212,147,158,39,31,111,631,1144,1435,411,272,92,101,61,8,31,7,7,23,34,44,48,44,109,69,163,201],"edges":[-179.2833333,-170.315068185,-161.34680307000002,-152.378537955,-143.41027284,-134.442007725,-125.47374261000002,-116.50547749500001,-107.53721238000001,-98.56894726500002,-89.60068215000001,-80.63241703500002,-71.66415192000002,-62.695886805000015,-53.72762169000002,-44.75935657500003,-35.79109146000002,-26.82282634500001,-17.85456123000003,-8.886296115000022,0.08196899999998664,9.050234114999967,18.018499229999975,26.986764344999983,35.95502945999996,44.92329457499997,53.89155968999998,62.85982480499999,71.82808991999997,80.79635503499995,89.76462014999996,98.73288526499996,107.70115037999997,116.66941549499998,125.63768060999999,134.605945725,143.57421083999995,152.54247595499996,161.51074106999997,170.47900618499997,179.4472713]},"sample":[122.5033517,-124.3121202,13.699233,-34.8562498,-72.3632701,-87.0069444,-87.2438833,-79.1542361,-79.4765472,-80.0983333,-80.2009417,-76.8516667,-76.64417,-81.7149972,-9.383333,-0.7941666,-0.0138888,-3.145833,36.2037666,34.84145,73.405026,73.490725,-59.8410619,-16.2,176.8732982,174.8155259,-0.6212566,-0.7394021,-6.1201021,-2.8042998,8.2854329,-2.1782167,-2.2227667,-0.2277776,129.755444,-3.3754429,12.4165252,166.06055,166.057717,166.171997,14.0656279,-43.1262747,-43.1051592,-43.1729661,-43.0597584,-40.9521793,-48.5315725,-48.5073744,-36.7213946,-37.1052493,-37.1231367,-39.0121718,-38.9388083,0.0814528,-50.2781,-52.5986873,-88.3081486,-88.4482376,10.8905229,3.3842093,3.3736218,3.1377584,3.5547122,3.8342324,159.9546173,5.31975,-96.5955289,30.3146232,18.7938316,-120.2332333,-60.7183377,-60.6786104,-58.5105387,-59.2450488,-58.6619263,-58.7211601,-58.3327919,-57.4929707,-57.6356144,-52.080882,-48.517447,-48.4871619,-39.981128,-38.9799139,-34.7671444,-35.2543068,-35.182571,-57.7605946,11.4157707,-52.2598635,14.2452629,15.4000808,15.299857,15.1361085,12.6950698,15.9571653,12.3712437,16.1377493,14.4564801,13.3950692,12.60438,16.4502646,16.6151476,9.3910273,8.2310947,8.4365237,17.8010325,14.9483333,13.6981317,13.1740494,11.7597952,11.5538377,11.1202905,13.9923552,14.4628226,10.5317005,10.4107971,10.2793227,13.5931366,9.9013834,9.8877006,12.9994802,12.5836292,-9.5023958,17.8141477,-2.4803849,-2.4544963,5.6990503,59.4801937,49.0982154,17.1372071,134.4392897,-0.0954697,25.4313772,18.2443417,10.2990731,167.4581583,18.9939506,18.4614012,24.8824088,-70.0688267,13.6616895,165.4983334,60.1558749,20.0033325,-4.1020136,-4.1020967,-58.5598729,53.4318792,-81.6853241,-81.682206,3.3378152,115.5582667,18.4305481,-63.5356978,-63.5260193,-63.5277222,19.5429128,14.1617268,-59.1016667,21.666667,-4.5224046,174.6680903,152.9877063,72.3223583,24.728703,47.8393543,47.8438872,47.8418756,4.620833,-2.383889,18.441667,-76.0595997,35.4021308,59.3344331,165.7552256,178.8278661,-4.5620042,34.3874689,-3.6219478,23.0158507,10.8085128,10.8132506,12.0296367,-75.4002209,-76.6227469,4.612801,-69.9718622,-56.1080355,18.5520599,-122.3573498,10.8186129,30.2248728,-1.5138029,-176.7012383,-167.7418108,174.7899563,174.8433609,171.437515,174.2196778,174.7187861,174.917674,174.7246823,174.7815954,174.7861618,174.8326034,174.8290414,-73.8492678,-4.3559164,-58.6488427,6.7297426,23.735,-4.4863969,18.7787786,14.1605334,16.447831,-52.9716262,134.4340249,15.1816667,-2.9415404,-76.0530643,14.4122495,115.7586465,-142.3868728,-77.3482589,-60.1333351,-140.974016,1.4481337,-86.5380342,10.3937946,-87.885377,0.6204944,49.0872518,-80.7998227,-57.5399524,34.9373952,-24.9955248,1.2673438,-92.0257088,3.1229691,12.7644321,-67.4922833,3.6433982,34.7428777,105.0750533,3.8905843,9.3329082,-124.9481017,8.300181,93.5406072,28.6789506,30.5864843,-1.4143903,154.7292674,-90.1126957,-94.5476103,-64.9573664,5.1307413,167.72911,167.72493,10.80941,24.2816378,23.5658986,167.72198,158.6851454,-21.9087523,-6.241354,177.1799586,-81.9819498,174.4878496,-61.4972025,-74.4287643,-73.9142296,22.7544166,-121.8051306,153.3738988,10.0259376,30.6779781,9.786292,-68.4538206,13.6522087,14.2277697,-22.8951391,-73.8676818,-74.2456754,40.7357511,128.6946238,37.7552711,14.353552,-61.289359,143.1478499,29.5009556,11.6710448,-81.5799616,-40.2944287,5.1625478,5.1674327,-4.3354026,-4.3749012,-75.8496765,173.9721049,32.4854014,-86.07366,174.7961275,16.4530476,-1.4780721,-76.5739826,-15.5319701,-122.2532154,-122.2528678,-175.1618689,168.3096174,-3.2989095,-3.2972883,-3.2957439,-3.2953414,-3.2951695,-3.2935005,-3.2960847,18.0218525,13.4120315,13.4155387,-60.3281892,40.4740396,40.5504636,34.7842865,36.5584965,-77.3671437,-3.2112026,-95.7747601,168.3093144,-4.2852267,-75.1376556,-73.9774264,-73.9710398,141.9317038,-62.9811334,155.5817362,-122.2511584,-83.1729778,149.7928037,-62.0664967,148.3298917,33.1558918,33.1317615,-90.1880636,-71.3928049,-74.9720644,-79.8579645,151.2246976,-4.3336254,-64.6976691,147.078863,147.0775752,147.0801383,167.7357216,151.6453746,-68.4093781,-4.3332723,-121.8391351,-121.7955803,-121.7291397,-121.7290255,-121.7296168,-122.3288264,-122.3288881,-122.5141785,-74.1758361,-74.169842,-74.1692724,-74.1638161,-74.1635368,-74.1633356,-74.16321,-74.1635818,-74.1623464,-74.1628246,-74.1625129,-74.1614077,-74.1639885,-74.1577191,-74.1574534,-74.1529999,-74.1491736,-74.1501493,-74.1499065,-66.2664465,-9.0206563,-9.0138394,-9.0137265,-1.4964384,-1.4961399,0.580861,0.580718,0.580303,0.5800705,0.6993964,0.7274103,0.5501353,0.5684666,6.0056705,160.2493063,160.1547388,-61.4674161,0.5700414,0.5712998,31.3145643,0.183333,0.6902578,0.6898956,0.8995867,0.6858175,-64.6711364,-71.0333132,13.7142817,3.465707,3.561618,8.371767,19.2333333,14.9383333,8.315633,7.7237749,7.997088,8.087232,8.11542,8.123465,7.928763,-3.7461667,35.5141667,34.9028333,12.60785,23.5901667,3.428,17.9916667,23.801,19.1,24.1866667,2.5565527,15.3361667,8.7797222,13.201217,-3.9290407,-4.0468251,10.9551,13.816295,8.280428,8.098185,12.428682,13.899983,9.672233,14.029783,13.7506,13.63295,13.753217,10.298462,10.519517,10.091672,13.3251,12.7428,12.531673,10.101843,9.509533,9.50817,11.564893,11.340403,10.9562,14.121567,13.623467,13.32945,13.566333,13.576167,9.108708,8.999327,8.408467,8.207675,7.754017,8.79984,8.54875,8.45015,8.650138,7.823731,8.647457,8.364734,8.285883,8.374197,7.763057,9.355228,8.743782,8.801355,8.536367,7.252352,8.630407,8.349703,8.627982,8.114985,8.295613,7.666148,10.184167,10.168802,-4.1233373]},"kind":"numeric","n":6914,"n_null":0,"n_unique":6910,"null_rate":0.0,"stats":{"iqr":58.7436294,"kurtosis":0.9210873148234016,"max":179.4472713,"mean":3.06695853247035,"median":8.321783100000001,"min":-179.2833333,"n_outliers":806,"outlier_rate":0.1165750650853341,"q1":-40.75332395,"q3":17.99030545,"skew":0.509256437777553,"std":69.12497111664672,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"35 singleton categories"},{"code":"null_rate","level":"warn","message":"99.5% null"}],"column":"year_sunk","extras":{"singletons":35,"top_values":[["1942",2],["30 June 1890",1],["1854",1],["1971",1],["1937-09-02",1],["1963-02",1],["1643..1663",1],["1982",1],["June 7, 1928",1],["1435",1],["1920-12-16",1],["1490s",1],["~1700",1],["20 April 1943",1],["25 May 1963",1],["1710",1],["1915",1],["1909",1],["1951",1],["1952",1]]},"kind":"categorical","n":6914,"n_null":6877,"n_unique":36,"null_rate":0.9946485391958345,"stats":{"cardinality":36,"entropy":5.155399311574898,"entropy_ratio":0.9971903480488863,"top_rate":0.05405405405405406,"top_value":"1942"}},{"alerts":[{"code":"long_tail","level":"info","message":"17 singleton categories"}],"column":"type","extras":{"singletons":17,"top_values":[["shipwreck",5081],["wreck",1345],["ship",381],["barge",27],["submarine",18],["aircraft",17],["plane",10],["boat",4],["vehicle",3],["motor_vehicle",3],["schooner",2],["car",2],["sailboat",2],["battleship",2],["steamer",1],["airplane",1],["freightcar",1],["train",1],["paddle steamer",1],["motorbike",1]]},"kind":"categorical","n":6914,"n_null":0,"n_unique":31,"null_rate":0.0,"stats":{"cardinality":31,"entropy":1.1659220908691208,"entropy_ratio":0.23534030906782408,"top_rate":0.7348857390801273,"top_value":"shipwreck"}},{"alerts":[{"code":"long_tail","level":"info","message":"303 singleton categories"},{"code":"null_rate","level":"warn","message":"95.5% null"}],"column":"wikipedia","extras":{"singletons":303,"top_values":[["en:SS Edmund Fitzgerald",4],["fr:Armorique (navire)",2],["en:Curtiss C-46 Commando",2],["en:USS Amesbury",2],["en:SS America (1939)",1],["ar:\u0633\u0641\u064a\u0646\u0629 \u0632\u064a\u0633\u062a\u0644 \u062c\u0648\u0631\u0645",1],["en:BOS 400",1],["en:New Carissa",1],["en:MV Cita",1],["en:SS Richard Montgomery",1],["en:Kroombit Tops National Park#Crash site",1],["en:Astron (ship)",1],["en:SS Yongala",1],["et:Raketa (laev, 1949)",1],["en:USNS General Hoyt S. Vandenberg (T-AGM-10)",1],["en:USS Oriskany (CV-34)",1],["en:USS Massachusetts (BB-2)",1],["en:Water Witch (schooner)",1],["en:Burlington Bay Horse Ferry",1],["en:Champlain II",1]]},"kind":"categorical","n":6914,"n_null":6601,"n_unique":307,"null_rate":0.954729534278276,"stats":{"cardinality":307,"entropy":8.245290412427826,"entropy_ratio":0.9979660808479136,"top_rate":0.012779552715654952,"top_value":"en:SS Edmund Fitzgerald"}},{"alerts":[{"code":"long_tail","level":"info","message":"347 singleton categories"},{"code":"null_rate","level":"warn","message":"94.8% null"}],"column":"wikidata","extras":{"singletons":347,"top_values":[["Q1286267",4],["Q959696",2],["Q215692",2],["Q2862787",2],["Q1145708",2],["Q11675753",2],["Q463091",1],["Q32276",1],["Q115709756",1],["Q14213801",1],["Q2877353",1],["Q7006376",1],["Q6719379",1],["Q41771616",1],["Q7394285",1],["Q7420193",1],["Q1359321",1],["Q4811601",1],["Q1424289",1],["Q1618842",1]]},"kind":"categorical","n":6914,"n_null":6553,"n_unique":353,"null_rate":0.9477870986404396,"stats":{"cardinality":353,"entropy":8.445993531042296,"entropy_ratio":0.9979286593320096,"top_rate":0.0110803324099723,"top_value":"Q1286267"}},{"alerts":[{"code":"long_tail","level":"info","message":"282 singleton categories"},{"code":"null_rate","level":"warn","message":"94.9% null"}],"column":"description","extras":{"singletons":282,"top_values":[["WWII era concrete fuel barge converted into breakwater",14],["Wrecks",7],["WWII concrete barge sunk as part of jetty, partially covered by jetty and fill",5],["Location is based on divers hand drawn maps. Due to the wreak breaking up and salvage, the wreak is scattered over a large area.",4],["Partially sunken ships",4],["Concrete petrol barge sunk as part of breakwater",4],["Wrecks of Zulu fishing boats",3],["Chaloupe abandonn\u00e9e \u00e0 terre",3],["WWII era concrete fuel barge sunk as part of jetty foundation",3],["Armada Ship",2],["remains of sunken wooden boats",2],["Hundido el 3 de julio de 1898 durante la batalla naval de Santiago de Cuba en la Guerra Hispano-Cubana-Norteamericana.",2],["09/09/2006 : Epave en bois, longue de 20 m\u00e8tres, large de 4 m\u00e8tres et haute de 3 m\u00e8tres.",2],["Steamer",2],["Iron-hulled barque",2],["On shore wreck of a small abandoned wooden ship.",2],["\u00c9pave",2],["Doj\u015bcie do wrak\u00f3w w zasadzie wolne. Jednak mog\u0105 wyst\u0105pi\u0107 sytuacje gdy b\u0119dzie to utrudnione lub niemo\u017cliwe.",2],["Wrecked sealing vessel",2],["Staten Island boat graveyard",2]]},"kind":"categorical","n":6914,"n_null":6559,"n_unique":304,"null_rate":0.9486549030951692,"stats":{"cardinality":304,"entropy":8.052439281028278,"entropy_ratio":0.9762985026121199,"top_rate":0.03943661971830986,"top_value":"WWII era concrete fuel barge converted into breakwater"}},{"alerts":[{"code":"long_tail","level":"info","message":"3 singleton categories"},{"code":"null_rate","level":"warn","message":"99.8% null"}],"column":"heritage","extras":{"singletons":3,"top_values":[["2",10],["no",1],["yes",1],["1",1]]},"kind":"categorical","n":6914,"n_null":6901,"n_unique":4,"null_rate":0.9981197570147526,"stats":{"cardinality":4,"entropy":1.1451104143815827,"entropy_ratio":0.5725552071907913,"top_rate":0.7692307692307693,"top_value":"2"}},{"alerts":[{"code":"null_rate","level":"warn","message":"92.6% null"}],"column":"access","extras":{"singletons":1,"top_values":[["yes",341],["no",73],["permit",27],["private",27],["unknown",20],["permissive",17],["customers",3],["foot",1]]},"kind":"categorical","n":6914,"n_null":6405,"n_unique":8,"null_rate":0.9263812554237778,"stats":{"cardinality":8,"entropy":1.6470256586228902,"entropy_ratio":0.5490085528742967,"top_rate":0.6699410609037328,"top_value":"yes"}},{"alerts":[{"code":"null_rate","level":"warn","message":"77.4% null"}],"column":"depth","extras":{"singletons":186,"top_values":[["12.4",11],["16",11],["18",11],["15.5",11],["19.2",11],["1.1",10],["17.4",10],["15.6",10],["7",10],["14",10],["5",10],["15.1",10],["6.4",9],["9",9],["8",9],["19",9],["15.2",9],["20",9],["16.4",9],["18.5",9]]},"kind":"categorical","n":6914,"n_null":5349,"n_unique":502,"null_rate":0.7736476713913798,"stats":{"cardinality":502,"entropy":8.57917632187813,"entropy_ratio":0.9562653594988282,"top_rate":0.007028753993610224,"top_value":"12.4"}},{"alerts":[],"column":"seamark_type","extras":{"singletons":5,"top_values":[["wreck",5055],["dangerous",598],["non-dangerous",358],["distributed_remains",306],["hulk",56],["hull_showing",46],["shoreline_construction",14],["mast_showing",8],["obstruction",7],["harbour",2],["restricted_area",1],["plane",1],["beacon_special_purpose",1],["landmark",1],["no",1]]},"kind":"categorical","n":6914,"n_null":459,"n_unique":15,"null_rate":0.06638704078680938,"stats":{"cardinality":15,"entropy":1.199603543177135,"entropy_ratio":0.3070481534664757,"top_rate":0.7831138652207591,"top_value":"wreck"}},{"alerts":[],"column":"osm_id","extras":{"histogram":{"counts":[262,451,454,614,699,184,137,46,213,623,54,37,58,44,48,40,49,90,140,94,67,40,40,64,43,59,374,135,125,131,20,45,73,37,928,85,88,141,54,28],"edges":[13059633.0,355475766.325,697891899.65,1040308032.9749999,1382724166.3,1725140299.625,2067556432.9499998,2409972566.275,2752388699.6,3094804832.9249997,3437220966.25,3779637099.575,4122053232.8999996,4464469366.224999,4806885499.55,5149301632.875,5491717766.2,5834133899.525,6176550032.849999,6518966166.175,6861382299.5,7203798432.825,7546214566.15,7888630699.474999,8231046832.799999,8573462966.125,8915879099.449999,9258295232.775,9600711366.1,9943127499.425,10285543632.75,10627959766.074999,10970375899.4,11312792032.725,11655208166.05,11997624299.375,12340040432.699999,12682456566.025,13024872699.35,13367288832.675,13709704966.0]},"sample":[285876338.0,318331282.0,330610844.0,416654262.0,614128497.0,663869446.0,663869455.0,663869472.0,663869476.0,663869477.0,663869479.0,663869498.0,663869499.0,663869539.0,663869554.0,663869568.0,663869593.0,663869630.0,663869750.0,663869779.0,663869900.0,663869945.0,802957846.0,1033930279.0,1036801908.0,1036801935.0,1156298917.0,1156299263.0,1310611778.0,1359357764.0,1615537180.0,1618106610.0,1618106612.0,1707488894.0,1800854276.0,1806342789.0,1816832352.0,1866606317.0,1866606318.0,1866606319.0,1918925708.0,1934610284.0,1971472609.0,1971473023.0,1971502194.0,1973522462.0,1990673437.0,1990676551.0,1998246279.0,1998246782.0,1998246873.0,2069788166.0,2069828346.0,2113577632.0,2118486823.0,2118509054.0,2171779322.0,2171849465.0,2235593333.0,2247300106.0,2247300110.0,2247300119.0,2247300144.0,2247300146.0,2495246208.0,2549678025.0,2641053316.0,2648219912.0,2661492065.0,2780229546.0,2804934622.0,2804943008.0,2806463487.0,2806463488.0,2806465170.0,2806465807.0,2818717704.0,2899969713.0,2899973534.0,2901729585.0,2905071233.0,2906316273.0,2913679540.0,2915314395.0,2919672797.0,2921310587.0,2921310752.0,2926852506.0,3062490840.0,3127010155.0,3145391443.0,3145391448.0,3145391449.0,3145391463.0,3145391470.0,3145391494.0,3145391496.0,3145391502.0,3145391523.0,3145391528.0,3145391632.0,3145391669.0,3145391677.0,3145391715.0,3145391720.0,3145391844.0,3145391848.0,3145391885.0,3145392343.0,3145392352.0,3145392393.0,3145392417.0,3145392433.0,3145392441.0,3145392449.0,3145392458.0,3145392475.0,3145392485.0,3145392487.0,3145392521.0,3145392524.0,3145392612.0,3145392615.0,3216012637.0,3371668477.0,3517594234.0,3517684772.0,3623011541.0,3730868933.0,3737744354.0,3770180432.0,4031041874.0,4033373997.0,4263340998.0,4309763925.0,4424290675.0,4506135109.0,4515497138.0,4700789946.0,4902039096.0,4930733364.0,4959040652.0,5057746331.0,5095458299.0,5109946855.0,5241743234.0,5241743235.0,5341790082.0,5385359259.0,5412702406.0,5412702408.0,5745861522.0,5792018085.0,5849369406.0,5896532314.0,5896532320.0,5896570166.0,5924115081.0,6057435136.0,6068383788.0,6077645941.0,6122110226.0,6124014656.0,6325222208.0,6420479579.0,6440363917.0,6516228507.0,6516228525.0,6516228548.0,6516967676.0,6516967677.0,6516992725.0,6592331761.0,6610582183.0,6727106625.0,6801427799.0,6897142679.0,6906742448.0,7013337185.0,7079569734.0,7169799137.0,7671628425.0,7671628428.0,7786998632.0,7815068453.0,7977872983.0,8190643083.0,8230560868.0,8269538921.0,8796320090.0,8836420680.0,8909579856.0,8948540360.0,9156038418.0,9220189260.0,9222907402.0,9231686990.0,9231686995.0,9236468453.0,9236801063.0,9236894298.0,9239185872.0,9239185899.0,9239185907.0,9239185908.0,9239185912.0,9239187017.0,9243708083.0,9312936055.0,9352814315.0,9353792499.0,9456262934.0,9469558404.0,9709341165.0,9720658080.0,9799938783.0,9802778108.0,9835653086.0,9851590948.0,9862767347.0,9864254501.0,9885211699.0,9939223415.0,9989421298.0,10106188447.0,10142949062.0,10164697267.0,10259232998.0,10262729483.0,10739106131.0,10771853782.0,10778526212.0,10839143373.0,10964242505.0,11041485164.0,11331356919.0,11474156302.0,11648728361.0,11729736557.0,11736882875.0,11813807106.0,11879596657.0,11983461063.0,12043924933.0,12044884991.0,12059551182.0,12077393926.0,12079625209.0,12104977842.0,12108058777.0,12109694665.0,12147375501.0,12292590701.0,12480170251.0,12569314800.0,12572483312.0,12623852811.0,12824166409.0,12845245615.0,12845245621.0,12852595385.0,12961027315.0,12973869298.0,12981590912.0,13018403201.0,13056264560.0,13260437635.0,13289920766.0,13382701660.0,13384401429.0,13456923901.0,13059633.0,32108807.0,32686966.0,35836546.0,42444702.0,113964909.0,141480345.0,150679354.0,170000345.0,176634548.0,217558472.0,236852632.0,309911662.0,315833777.0,330565434.0,398859991.0,413246598.0,414558721.0,522759114.0,569137534.0,668996467.0,736926283.0,784520988.0,794610755.0,807599440.0,807599441.0,822192980.0,822192981.0,852472582.0,860591507.0,863826191.0,879058566.0,887784527.0,891514161.0,905562834.0,911811342.0,913583303.0,934451215.0,934451216.0,998958973.0,1000657633.0,1035843189.0,1035843199.0,1035843211.0,1035843212.0,1035843219.0,1035843231.0,1035843233.0,1044510971.0,1062946102.0,1062946108.0,1065371658.0,1075467831.0,1078714643.0,1080250214.0,1081716390.0,1114710275.0,1116008162.0,1155194734.0,1157853677.0,1157864052.0,1158226815.0,1181415899.0,1181415903.0,1190070430.0,1213406210.0,1216319973.0,1219328060.0,1222494819.0,1227803680.0,1228746618.0,1266682468.0,1282734136.0,1282734139.0,1304187527.0,1304349259.0,1304351823.0,1307839711.0,1311003542.0,1318884598.0,1319841641.0,1373543823.0,1373543830.0,1373543832.0,1375971087.0,1377160702.0,1383474518.0,1385762404.0,1387382568.0,1387382575.0,1387385487.0,1387385496.0,1387385498.0,1387731078.0,1387731079.0,1387921484.0,1387937280.0,1388101931.0,1388101936.0,1388120546.0,1388120547.0,1388120556.0,1388120557.0,1388120558.0,1388120563.0,1388120570.0,1388382823.0,1388382842.0,1388382846.0,1388436247.0,1388436248.0,1388897594.0,1389423805.0,1389423814.0,1389423816.0,1389695462.0,1391747348.0,1391747350.0,1391747351.0,1391875019.0,1391875021.0,1392192952.0,1392192967.0,1392192969.0,1392192971.0,1392258639.0,1392258659.0,1392436461.0,1392845450.0,1393608460.0,1396497840.0,1396680086.0,1408658504.0,1408932057.0,1408932068.0,1411474960.0,1433566308.0,1453992601.0,1454027320.0,1454274265.0,1454276605.0,1458391814.0,1458669726.0,6325205449.0,6383969836.0,6383969837.0,8489721676.0,8992973288.0,9014338869.0,9164313171.0,9164313185.0,9164315241.0,9164315248.0,9164315255.0,9164315316.0,9164315323.0,9369861811.0,9407025737.0,9456088335.0,9884055312.0,9915143465.0,10079459665.0,10117567922.0,10121902866.0,10171464595.0,10209178681.0,10237827451.0,10268732043.0,10276588952.0,11101145419.0,11334853974.0,11334853976.0,11826170837.0,11826170849.0,11826190334.0,11826190349.0,11826207705.0,11826207710.0,11826207719.0,11826207809.0,11826207811.0,11826207812.0,11826207825.0,11826207862.0,11826207871.0,11826207881.0,11826207884.0,11826207891.0,11826207897.0,11826207903.0,11826207908.0,11826207917.0,11826207940.0,11826207948.0,11826207954.0,11826207961.0,11826207966.0,11826207979.0,11826207985.0,11826207987.0,11826210191.0,11826210199.0,11826210245.0,11826210254.0,11826210258.0,11826210269.0,11826210273.0,11826210291.0,11826210317.0,11826210330.0,11826210334.0,11826210350.0,11826210359.0,11826210387.0,11826210400.0,11826210420.0,11826210438.0,11826210444.0,11826210451.0,11826210479.0,11826210487.0,11826210516.0,11826210521.0,11826210556.0,11826210565.0,11826210582.0,11827115827.0,11827115834.0,1222121328.0]},"kind":"numeric","n":6914,"n_null":0,"n_unique":6914,"null_rate":0.0,"stats":{"iqr":8440492924.25,"kurtosis":-1.4532113110451803,"max":13709704966.0,"mean":5365308386.894562,"median":3145392419.5,"min":13059633.0,"n_outliers":0,"outlier_rate":0.0,"q1":1347916860.75,"q3":9788409785.0,"skew":0.43554970599310067,"std":4463893575.9306755,"zero_rate":0.0}},{"alerts":[],"column":"osm_type","extras":{"singletons":0,"top_values":[["node",5000],["way",1914]]},"kind":"categorical","n":6914,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.8510948988389824,"entropy_ratio":0.8510948988389824,"top_rate":0.7231703789412786,"top_value":"node"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["type.top_value","type.top_rate","seamark_type.top_value","seamark_type.top_rate","heritage.null_rate","year_sunk.null_rate","wikipedia.null_rate","access.null_rate","access.top_value","access.top_rate","row_count","lat.min","lat.max"],"featured_charts":[{"caption":"Look for the dominance of 'shipwreck' and 'wreck' and how thin the long tail of aircraft, submarines, and barges really is.","column":"type","kind":"bar"},{"caption":"Shows the navigational hazard classification \u2014 note how many wrecks are marked 'dangerous' or have 'distributed_remains' versus a clean hull.","column":"seamark_type","kind":"bar"},{"caption":"Among the minority of wrecks with access data, check how many are freely accessible versus permit-only or private.","column":"access","kind":"donut"},{"caption":"Distribution of wreck latitudes reveals geographic clustering \u2014 look for the concentration in northern hemisphere waters and the outlier spike near the poles.","column":"lat","kind":"histogram"},{"caption":"Depth values (where recorded) cluster around 7\u201320 metres, suggesting the dataset skews toward shallow, diveable wrecks rather than deep-sea losses.","column":"depth","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset is an OpenStreetMap-derived catalogue of 6,914 shipwrecks and related maritime hazards mapped globally. The most important thing to explore first is the `type` and `seamark_type` columns, which reveal that the overwhelming majority (~73-78%) of entries are labelled simply 'shipwreck' or 'wreck', with a long tail of submarines, aircraft, barges, and other vessels worth examining. A secondary point of interest is the high null rates across many descriptive fields \u2014 `heritage` (99.8% null), `year_sunk` (99.5% null), and `wikipedia` (95.5% null) \u2014 meaning rich contextual data exists for only a tiny fraction of wrecks, and the dataset is far more useful as a spatial inventory than a historical record. The `access` column, where populated, shows most accessible wrecks are open ('yes'), but a meaningful share require permits or are private, which could interest dive-site analysts.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["top_words","n_unique","n","duplicate_rate","n_duplicates","null_rate","len_median","len_p95","len_max"],"model":"anthropic:default","narrative":"This column contains the names of individual shipwrecks, as confirmed by dominant top words: 'shipwreck' (5032 occurrences across 6914 rows), 'wreck', 'ss', 'uss', and 'hms'. With 6841 unique values out of 6914 rows and a near-zero null rate, it is essentially a name/label field \u2014 but the 73 duplicates (1.06% duplicate rate) are mildly surprising and may indicate the same wreck is referenced under the same name in multiple records. Lengths cluster tightly (median 20, p95 21 characters) with a long tail reaching 153, suggesting most names are concise vessel names while a minority carry extended descriptions.","role":"label","scope":"column","target":"name","treatment":"Use as a display label; investigate 73 duplicates for deduplication or record linkage before treating as a unique identifier."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","n_outliers","outlier_rate","n_unique","null_rate"],"model":"anthropic:default","narrative":"This column represents geographic latitude values, spanning from -77.42\u00b0 (near Antarctica) to 82.17\u00b0 (high Arctic), with 6,902 unique values across 6,914 rows. The mean (33.15\u00b0) sits notably below the median (43.85\u00b0), driven by a left skew of -1.42 \u2014 indicating a cluster of records in mid-to-high northern latitudes with a pull from southern hemisphere or equatorial observations. Roughly 12.5% of values (864 rows) are flagged as outliers, likely corresponding to polar or deep southern hemisphere coordinates that deviate from the dominant northern mid-latitude band.","role":"feature","scope":"column","target":"lat","treatment":"Retain as-is for geospatial modelling; consider pairing with longitude and binning into geographic regions to handle the skewed distribution and outlier polar values."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","iqr","std","n_outliers","outlier_rate","skew","n_unique"],"model":"anthropic:default","narrative":"This column contains geographic longitude values, spanning the full valid range from -179.28\u00b0 to 179.45\u00b0 and covering both hemispheres. The mean (3.07\u00b0) and median (8.32\u00b0) are both modestly east of the Prime Meridian, suggesting a concentration of records in Europe/Africa, while the wide IQR of 58.74\u00b0 and std of 69.12\u00b0 confirm global scatter. Notably, 806 rows (11.66%) are flagged as outliers, likely corresponding to locations in the Americas or Pacific \u2014 not erroneous values, but genuine geographic extremes relative to the modal cluster.","role":"feature","scope":"column","target":"lon","treatment":"Use as-is or pair with latitude for spatial modelling; consider projecting to radians or embedding via geohash for ML pipelines."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","top_values","cardinality"],"model":"anthropic:default","narrative":"This column records the year (or date) a vessel was sunk, but it is almost entirely empty \u2014 99.46% of the 6,914 rows are null, leaving only about 38 non-null values. Among those, the formats are wildly inconsistent: bare years ('1942', '1854'), full dates in multiple formats ('30 June 1890', 'June 7, 1928', '1937-09-02'), partial dates ('1963-02'), and even a range ('1643..1663'), making normalisation non-trivial. With 36 unique values across ~38 populated rows the column is near-unique relative to its populated set, and the top value '1942' appears only twice.","role":"metadata","scope":"column","target":"year_sunk","treatment":"Parse and normalise to a standard year integer after regex-based format detection; treat as sparse metadata and do not use as a primary feature without imputation strategy given 99.46% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","n_unique","entropy_ratio","top_values","alerts"],"model":"anthropic:default","narrative":"This column classifies underwater or maritime wreck sites by vessel/object type, with 31 distinct categories across 6,914 records and no nulls. The distribution is heavily dominated by 'shipwreck' (73.5% of records) and 'wreck' (19.5%), together accounting for over 93% of all entries \u2014 the remaining 29 categories share just ~6.5%, confirming the long-tail alert. The near-redundancy between 'shipwreck', 'wreck', and 'ship' (plus 'boat', 'barge') suggests inconsistent taxonomy that may need consolidation before modelling.","role":"label","scope":"column","target":"type","treatment":"Consolidate overlapping categories (e.g. 'shipwreck'/'wreck'/'ship') into a canonical taxonomy, then one-hot or target-encode for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","top_values","alerts"],"model":"anthropic:default","narrative":"This column stores Wikipedia article links associated with dataset entities (ships and aircraft), formatted as language-prefixed slugs (e.g., 'en:SS Edmund Fitzgerald', 'fr:Armorique (navire)'). The null rate is extremely high at 95.47%, meaning only ~313 of 6,914 rows have any Wikipedia reference. Among populated values, cardinality is very high (307 unique values across ~313 non-null rows), with the top value appearing only 4 times \u2014 indicating near-unique coverage and a long-tail distribution. A language mix is present (English 'en:', French 'fr:', Arabic 'ar:'), which could complicate any downstream lookup or joining logic.","role":"metadata","scope":"column","target":"wikipedia","treatment":"Use as an optional enrichment link; do not use in modelling due to 95.47% nulls and near-unique cardinality; parse language prefix if language-specific resolution is needed."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","entropy_ratio","alerts","n"],"model":"anthropic:default","narrative":"This column stores Wikidata entity identifiers (Q-codes), linking dataset records to Wikidata knowledge graph entries. The most striking signal is the extreme null rate of 94.78%, meaning only ~360 of 6,914 rows carry a Wikidata link at all. Among the 353 unique Q-codes present, the distribution is nearly flat \u2014 the top value 'Q1286267' appears only 4 times, entropy ratio is 0.998, and the long-tail alert confirms almost no repeated values \u2014 suggesting each populated row points to a distinct entity with minimal reuse.","role":"foreign_key","scope":"column","target":"wikidata","treatment":"Use as an optional foreign key to enrich records via Wikidata API lookup; do not use as a feature directly given 94.78% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","entropy","entropy_ratio","top_value","top_rate","top_values"],"model":"anthropic:default","narrative":"This column contains free-text descriptions of maritime wrecks or nautical features, with entries referencing WWII-era vessels, jetties, fishing boats, and abandoned craft. The most striking signal is the 94.87% null rate \u2014 nearly the entire dataset lacks a description \u2014 making this column nearly unusable at scale. Among the 304 unique values across 6,914 rows, entropy is very high (8.05, ratio 0.976), indicating wide diversity in phrasing, and a language mix is evident (e.g., French 'Chaloupe abandonn\u00e9e \u00e0 terre' alongside English entries).","role":"free_text","scope":"column","target":"description","treatment":"Exclude from modelling due to 94.87% null rate; if used, tokenize and embed the 5.13% populated values, and flag language mixing before NLP processing."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","top_values","cardinality"],"model":"anthropic:default","narrative":"This column appears to encode a 'heritage' flag or classification with only 4 distinct values ('1', '2', 'no', 'yes'), suggesting a binary or ordinal attribute that may have been inconsistently encoded across sources. The critical finding is a null rate of 99.81%, meaning only 13 of 6,914 rows have any value at all \u2014 rendering this column nearly useless for modelling. Among those 13 non-null values, '2' dominates at 76.9%, while 'no', 'yes', and '1' each appear only once, indicating a mixed encoding scheme (numeric vs. boolean strings) on an already negligible sample.","role":"feature","scope":"column","target":"heritage","treatment":"Drop this column; 99.81% null rate and only 13 non-null observations make it statistically unusable."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","top_values"],"model":"anthropic:default","narrative":"This column appears to encode access permission or restriction tags for geographic features (likely OpenStreetMap-style data), with values such as 'yes', 'no', 'permit', 'private', 'permissive', and 'customers'. The striking finding is a 92.64% null rate \u2014 only 509 of 6,914 rows carry a value \u2014 meaning this attribute is almost entirely absent from the dataset. Among the non-null values, 'yes' dominates heavily at 66.99% of populated rows, suggesting most tagged features have open access.","role":"feature","scope":"column","target":"access","treatment":"Flag extreme sparsity (92.64% nulls); treat nulls as a distinct 'untagged' category or drop column if missingness renders it uninformative for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","cardinality","entropy_ratio","top_value","top_values"],"model":"anthropic:default","narrative":"This column represents a numeric depth measurement (likely in meters or similar units) stored as a categorical string, with values ranging from small decimals like '1.1' to integers like '19.2'. The most striking signal is a null rate of 77.36%, meaning only ~1,556 of 6,914 rows carry a value \u2014 a severe missingness that warrants investigation into whether it is structurally absent (e.g., not applicable to certain record types) or a data quality issue. Among populated rows, cardinality is very high (502 unique values) with an entropy ratio of 0.956, indicating nearly uniform spread and essentially no dominant depth value \u2014 the top value '12.4' appears only 11 times. The column should be cast to numeric before any modelling use.","role":"feature","scope":"column","target":"depth","treatment":"Cast to float, investigate structural vs. random missingness before imputing or dropping nulls, then use as a numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","entropy_ratio","null_rate","n","top_values"],"model":"anthropic:default","narrative":"This column contains a nautical/maritime classification type for seamarks, most likely drawn from an OpenStreetMap or similar marine charting schema. The distribution is severely dominated by 'wreck' at 78.3% of 6,914 rows, with the next largest category 'dangerous' at only 8.6%, giving a low entropy ratio of 0.307. The mix of subtypes (hull_showing, mast_showing, distributed_remains, hulk) suggests these are sub-classifications of wrecks that could have been normalized into a hierarchy rather than a flat taxonomy. The 6.6% null rate warrants attention if completeness matters for navigation safety contexts.","role":"label","scope":"column","target":"seamark_type","treatment":"One-hot or ordinal encode; consider grouping wreck subtypes (hull_showing, mast_showing, hulk, distributed_remains) into a parent 'wreck' hierarchy before modelling, and impute or flag the 6.6% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","min","max","iqr","kurtosis","skew","n_outliers","zero_rate"],"model":"anthropic:default","narrative":"This column is an OpenStreetMap (OSM) object identifier \u2014 a large integer surrogate key assigned by the OSM platform to geographic features. Every one of the 6,914 rows has a distinct value with zero nulls, confirming it functions purely as a unique identifier. The value range (13 M to ~13.7 B) and flat distribution (kurtosis \u22121.45, near-uniform spread across a ~8.4 B IQR) are consistent with OSM's incrementally assigned ID space across different data vintages. No outliers are flagged and the mild positive skew (0.44) suggests a slight concentration of older, lower-numbered IDs.","role":"identifier","scope":"column","target":"osm_id","treatment":"Retain as a join/lookup key to OSM data; drop from any model feature set as it carries no predictive signal."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","null_rate","top_values"],"model":"anthropic:default","narrative":"This column encodes the OpenStreetMap geometry type, distinguishing between point features ('node') and linear/polygonal features ('way'). With only 2 distinct values across 6,914 rows and zero nulls, it is a clean binary categorical. The distribution is moderately skewed: 'node' accounts for 72.3% (5,000 rows) versus 'way' at 27.7% (1,914 rows), which is consistent with OSM datasets where point POIs outnumber way geometries.","role":"feature","scope":"column","target":"osm_type","treatment":"One-hot encode or map to binary flag (node=1, way=0) before modelling; consider whether geometry type meaningfully differs from other features in the pipeline."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":4372,"prompt_tokens":13549,"total_tokens":17921}},"language_counts":{},"meta":{"generated_at":"2026-06-21T22:43:45+00:00","mode":"full","row_count":6914,"sampled_rows":6914,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/shipwrecks.json"},"notes":[],"saturn_version":"0.2.0","schema":{"access":"categorical","depth":"categorical","description":"categorical","heritage":"categorical","lat":"numeric","lon":"numeric","name":"text","osm_id":"numeric","osm_type":"categorical","seamark_type":"categorical","type":"categorical","wikidata":"categorical","wikipedia":"categorical","year_sunk":"categorical"}}
