{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"97.6% of rows are unique strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[301,525,304,528,827,705,859,906,7858,560,414,287,151,155,52,42,42,37,5,8,4,3,2,2,2,1,1,1,0,0,0,2,0,0,0,0,0,0,0,1],"edges":[2.0,4.225,6.45,8.675,10.9,13.125,15.350000000000001,17.575000000000003,19.8,22.025000000000002,24.25,26.475,28.700000000000003,30.925,33.150000000000006,35.375,37.6,39.825,42.050000000000004,44.275,46.5,48.725,50.95,53.175000000000004,55.400000000000006,57.625,59.85,62.075,64.30000000000001,66.525,68.75,70.97500000000001,73.2,75.425,77.65,79.875,82.10000000000001,84.325,86.55,88.775,91.0]},"near_unique":true,"sample":["Far de Capdepera","Lighthouse 12736917711","\u51fa\u96f2\u65e5\u5fa1\u7895\u706f\u53f0","Pilsumer Leuchtturm","Lighthouse 3592895556","H\u1ea3i \u0111\u0103ng V\u1ea1n Ca","Lighthouse 3954003290","Lighthouse 666429022","Farol do Bugio","Vejsn\u00e6s Nakke","Lighthouse 8302234386","Lighthouse 6367102379","Lighthouse 1572675044","Punta Ballena","Lighthouse 1576209851","Lighthouse 1293082078","\u0422\u0435\u0440\u0438\u0431\u0435\u0440\u0441\u043a\u0438\u0439","Lighthouse 303765533","Mercusuar Cirebon","Lighthouse 3073675847","Lighthouse 1579311833","San Fernando Point Lighthouse","Lighthouse 1216465472","Tvikobben","Faros","Lighthouse 13074696188","Lighthouse 6816680116","Lighthouse 1295700390","K\u0101ingaroa automatic light","H\u1ea3i \u0111\u0103ng S\u01a1n Ch\u00e0","S\u00f8ndre Katland fyr","Lighthouse 9848807466","Lighthouse 9848807460","H\u1ea3i \u0111\u0103ng Cao Trang","Lighthouse 2266550487","Lighthouse 3790789305","Danushkodi Lighthouse","Lighthouse 5587474511","Lighthouse 4168011445","Lighthouse 6274650580","North Ronaldsay Lighthouse","\u8d64\u7901\u5d0e\u706f\u53f0","Lighthouse 1572630476","St. Abbs Lighthouse","Lighthouse 1124285588","\u5b89\u57fa\u706f\u5854","Rose Blanche Lighthouse","Faro de San Felipe","Homlungen fyr","Baileys harbor Upper Range Light"],"top_values":[],"top_words":[["lighthouse",8487],["faro",780],["de",739],["light",622],["point",395],["fyr",354],["phare",341],["island",274],["farol",217],["\u043c\u0430\u044f\u043a",193],["punta",167],["do",128],["la",125],["di",121],["tulepaak",121],["cape",118],["da",118],["h\u1ea3i",96],["\u0111\u0103ng",95],["head",92],["range",89],["du",88],["mercusuar",85],["\u03c6\u03ac\u03c1\u03bf\u03c2",79],["leuchtturm",75]],"vocab_skipped":null,"word_histogram":{"counts":[1557,0,9038,0,0,2381,0,0,1118,0,340,0,0,106,0,0,26,0,0,10,0,4,0,0,2,0,0,0,0,3],"edges":[1.0,1.3666666666666667,1.7333333333333334,2.0999999999999996,2.466666666666667,2.833333333333333,3.1999999999999997,3.5666666666666664,3.933333333333333,4.3,4.666666666666666,5.033333333333333,5.3999999999999995,5.766666666666667,6.133333333333333,6.5,6.866666666666666,7.2333333333333325,7.6,7.966666666666666,8.333333333333332,8.7,9.066666666666666,9.433333333333334,9.799999999999999,10.166666666666666,10.533333333333333,10.899999999999999,11.266666666666666,11.633333333333333,12.0]}},"kind":"text","n":14585,"n_null":0,"n_unique":14239,"null_rate":0.0,"stats":{"allcaps_rate":0.057936235858759,"boilerplate_rate":0.0,"duplicate_rate":0.023723003085361672,"emoji_rate":0.0,"len_max":91,"len_mean":18.947685978745287,"len_median":21.0,"len_min":2,"len_p95":27.0,"n_duplicates":346,"n_empty":0,"one_word_rate":0.10675351388412753,"readability_flesch_mean":75.78385,"url_rate":0.0,"vocab_size":14670,"word_mean":2.326911210147412,"word_median":2.0}},{"alerts":[{"code":"outliers","level":"warn","message":"8.9% rows beyond 1.5 IQR"}],"column":"lat","extras":{"histogram":{"counts":[3,2,38,44,29,92,108,112,147,101,93,113,58,92,58,130,129,168,121,239,549,274,241,383,273,226,1062,1554,1300,1976,1257,625,900,1039,410,357,179,73,22,8],"edges":[-63.3959402,-59.76603041,-56.13612062,-52.50621083,-48.87630104,-45.24639125,-41.616481459999996,-37.98657167,-34.35666188,-30.726752089999998,-27.0968423,-23.46693251,-19.837022719999993,-16.207112929999994,-12.577203139999995,-8.947293349999995,-5.317383559999996,-1.6874737699999969,1.9424360200000024,5.572345810000002,9.2022556,12.83216539,16.46207518,20.09198497,23.721894760000012,27.35180455000001,30.98171434000001,34.61162413000001,38.24153392000001,41.87144371000001,45.50135350000001,49.13126329000001,52.761173080000006,56.391082870000005,60.020992660000005,63.650902450000004,67.28081224,70.91072203,74.54063182,78.17054161,81.8004514]},"sample":[41.6714041,50.7783104,43.8510395,17.9184021,8.2693766,51.9928567,-22.9379526,-52.4558419,66.0765851,44.1344915,33.9542667,57.8568937,54.4403358,-55.9635495,53.9264838,22.3288382,49.7737356,44.0863076,44.203861,55.9543672,44.1531001,45.0564222,43.8735333,59.2113333,44.0664852,44.2076771,35.676386,63.1144881,53.6215619,44.0052476,43.3903332,49.8594791,43.9222227,49.3912747,57.1396436,10.383199,43.947995,12.1896131,44.3121629,48.7709607,69.0444998,53.5463116,13.3085045,59.8828953,59.7554518,59.4532248,59.3664653,58.8249234,55.6604444,-45.7736721,-36.4450733,-45.3918842,-40.9235928,61.2691607,62.4307248,11.9521043,54.2271605,59.9011496,43.6555624,42.359108,38.0204086,-2.8038383,-3.8450354,-16.0286572,-22.9343148,-7.465,-0.9102729,-12.9287235,-22.8997339,-33.2025318,-19.8340168,-31.8853476,-12.24,-3.8748564,-23.0440671,-20.3187463,-12.5774542,47.12506,47.2169187,69.7184425,64.7687067,45.0388572,32.6953636,34.6762224,34.6578291,33.9617482,49.4483171,60.2744235,39.0784249,22.6209167,13.6343902,52.514957,44.7296902,42.893183,34.6196273,48.2721074,9.998408,-15.7280687,46.9043906,45.8577344,65.5280773,61.7311028,30.9297448,45.5374127,51.9813825,33.7535841,45.0235419,43.4294953,39.0135117,38.47445,38.2016785,38.21195,47.9265763,36.3577023,60.7270144,60.6863539,38.6194269,45.4789943,57.8553362,57.9987531,57.6345776,49.4349667,50.4055851,59.7276904,65.5004707,60.1063694,38.5879695,41.1845688,42.1245803,42.394113,43.357353,43.4714252,43.4738022,43.7203898,43.3919342,37.9346685,37.9364881,37.6378303,39.3493811,38.7595469,34.938957,35.465775,36.6323003,36.9305418,37.1425328,33.0596166,39.4162752,42.550668,35.4179909,35.8634532,36.4150167,37.2953845,37.5782333,38.1956957,35.1468737,11.7899394,-33.9073861,40.3560885,45.3942365,46.1335147,48.6847062,64.5607156,-46.8033235,60.8469866,45.3874997,20.4022974,44.2490013,44.8607453,60.0002322,46.5935166,19.2037991,38.1535519,44.9145512,8.8806411,13.5988205,-16.9261124,45.820269,45.259679,45.1969648,45.2082582,-33.5196236,46.6660625,45.559044,40.2360972,40.2375601,52.9246731,34.1753669,34.2152204,30.7346622,33.8730162,18.4642652,31.2027887,38.2035779,-34.7265294,9.823124,51.4486063,-31.9055328,6.9656508,42.148593,58.934,33.9450281,43.8926067,-53.6872882,50.1832557,58.8779332,61.2614787,52.0608629,50.8604775,-16.1600776,12.2372512,23.1933206,32.7730951,-4.5029704,34.4685373,34.5124177,55.9093,35.0242558,-14.9005878,52.9091437,49.4011798,30.3649874,25.131638,43.3508242,43.3507116,37.8903097,36.219807,35.5500307,-51.7617727,32.6449455,41.2342217,35.536717,32.9452837,35.8993295,-16.0557479,-11.3079027,9.4580799,22.4636148,16.3255138,33.8719945,8.8676992,39.5907755,35.0808024,40.9030654,19.6590195,20.1979851,50.8723949,45.5043487,9.8790874,9.724893,48.1817569,38.6221144,26.3430178,18.263524,59.3741733,18.2790147,44.3213525,40.3532652,7.3709738,44.0948804,33.9391423,36.9713473,39.6216954,42.4221825,-12.3210412,44.1478159,59.9970763,-39.3848285,-49.739583,44.4499615,51.4604395,22.2272222,22.1793056,25.7815901,46.0438814,6.1683577,-50.4754085,-3.7945841,33.3849165,13.0090044,32.6570056,15.6584425,50.4421201,34.5676969,36.1109763,7.5791491,10.0560392,32.7615713,33.255417,39.4535883,60.9116918,12.1033402,35.6630848,-39.6923322,30.2941182,33.6927468,36.339269,21.4275556,38.9632635,58.9140792,47.0821609,31.0708579,33.9199765,40.8963461,35.3519633,32.1837374,-20.0615561,17.6140628,32.7752227,72.5635571,11.6032424,64.0028306,16.9559952,9.426569,25.1148052,46.3720012,46.4898188,7.2979655,42.5614804,24.8488922,10.6391719,9.464487,66.8223187,34.4900904,30.5897531,37.1187761,24.8135763,27.2243136,40.0052366,68.243444,73.4814403,68.5016646,42.7346097,69.7112185,73.2429683,73.3332395,-26.8016505,35.9909355,52.4392446,32.6426611,18.6825091,33.9678996,25.4579908,43.587194,50.0327973,41.0375734,44.1778388,19.0218697,46.8450033,43.2933615,-16.9049701,33.5377305,55.7073792,11.9955841,54.9609737,37.5004848,34.4941006,34.46536,34.4310853,54.6048019,34.2319396,7.819389,33.5919086,42.0984113,46.0050231,42.0593258,56.4892628,33.5510418,21.2845641,12.399333,-20.7179242,36.0265494,38.5994861,35.7285556,-1.7191211,17.0540543,54.2426369,39.0098228,69.4279549,7.9433388,41.3629619,52.9351267,-40.3116955,33.8514004,43.2009832,47.2336398,43.9711673,67.5313774,40.3276119,36.6603108,40.02071,44.8332861,30.4350857,36.2141194,45.1483441,45.6491728,43.2948854,48.4235201,44.9906157,41.7156241,47.8748384,54.4062779,39.1405673,43.3938733,48.8011173,39.3485816,43.3967018,68.2575179,60.1199849,43.1249348,43.3598816,46.4898263,64.8637148,27.864073,54.3997301,54.6251133,39.3663321,54.6986234,66.5361791,36.3063263,57.6830563,41.9526422,51.5046019,54.2873154,41.3687815,38.209523,43.5518912,43.6231105,-13.216804,61.1446267,10.8080865,44.375308,49.3303925,41.1478258,60.4116695,33.2472249,6.4462109,8.2254425,15.2929235,54.3946082,41.711158,40.8370692,-46.6120495,40.7782797,31.7903589,42.6398123,50.2142405,68.1176868,54.4949656,51.6857553,-15.0533519,57.5258668,-12.9636452,45.6562085,57.2703937,41.4702633,42.2697065,41.4621479,37.1992415,51.0488813,31.8695808,-43.0511105,43.6358687,55.7211233,50.4930988,58.0722835,43.6135848,42.7341089,19.1339671,44.6845359,63.8016937,19.3686016,51.1546685,41.3250717,-17.1573719,56.1548942,42.133275,44.0143648,-5.7027759,44.5256391,33.8598229,37.0831661,66.5083215,46.5158036,56.8419842,33.2566829,16.1222128,19.3397646,24.7939694,-1.6611695,34.7372649,31.3912039,43.3852924,58.4114173,43.9536174,-2.8825512,34.3651228,0.9355287,29.5525157,47.6743867,33.0388856]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14572,"null_rate":0.0,"stats":{"iqr":20.8711248,"kurtosis":2.028331985362966,"max":81.8004514,"mean":34.520122738196775,"median":40.8121914,"min":-63.3959402,"n_outliers":1295,"outlier_rate":0.08878985258827563,"q1":28.1114396,"q3":48.9825644,"skew":-1.4577018596013198,"std":24.643138078763677,"zero_rate":0.0}},{"alerts":[],"column":"lon","extras":{"histogram":{"counts":[39,3,20,6,8,179,235,78,38,132,544,805,835,379,272,178,73,128,277,1272,770,1383,1441,659,258,141,77,38,131,73,57,245,388,955,1230,818,199,66,49,106],"edges":[-179.1974645,-170.2340229375,-161.270581375,-152.3071398125,-143.34369825,-134.38025668749998,-125.416815125,-116.45337356249999,-107.489932,-98.5264904375,-89.56304887499999,-80.5996073125,-71.63616575,-62.672724187499995,-53.709282625,-44.745841062500006,-35.7823995,-26.818957937499988,-17.855516375000008,-8.892074812499999,0.07136675000000992,9.03480831249999,17.998249875,26.961691437500008,35.92513299999999,44.8885745625,53.852016125000006,62.815457687500015,71.77889925,80.74234081249998,89.70578237499998,98.66922393749999,107.6326655,116.59610706250001,125.55954862500002,134.52299018750003,143.48643174999998,152.4498733125,161.413314875,170.3767564375,179.340198]},"sample":[-69.9498826,-1.0890453,13.0154223,-76.1843645,73.0260912,-7.5864951,-43.1343108,-69.5448795,-18.6472571,-68.4476379,134.6990296,-6.6419659,16.3785579,-67.2208155,-3.007515,120.3665068,31.4377407,39.0660575,-76.3096098,15.7054122,-87.5624102,-87.4928913,-88.3626168,23.501,-76.54982,-76.6377608,10.8944317,7.6630584,9.530418,-77.6829057,-65.6214909,-126.674116,135.5198919,-63.5939486,-2.0749327,124.0185041,-76.7986737,-69.9985805,-64.1683878,-123.3670634,33.0428068,8.5700998,-59.6483749,19.0848224,19.1029125,18.3874517,18.3105313,17.7288133,21.1541334,170.7287703,174.9246366,170.8662094,173.8335696,4.8111752,5.7590512,-66.6766568,13.3881068,30.0965885,-65.098764,14.4085758,12.3341334,-41.7293394,-32.421383,-38.9219871,-43.1463773,-34.7933333,-46.1861887,-38.5195887,-43.1677034,-52.7060307,-40.063374,-52.1511314,-37.78,-32.461515,-44.112767,-40.2722127,-38.001642,-70.7019282,-70.2747805,170.4553553,-23.61616,142.5153648,-79.8836242,135.2958859,134.5832303,130.9539036,-2.5329503,26.4390762,17.1359364,120.2568333,121.0366264,141.2054224,136.3370365,133.8825541,128.5479852,-70.2023251,76.221506,46.3045128,-60.4519598,-61.5035073,23.5570631,-114.5826109,74.6220759,13.6541445,4.0814067,11.0124443,-74.653645,3.7775268,20.9135191,20.805,20.4422071,22.9545333,-53.3629774,25.3571183,28.6777687,28.7240539,15.8285398,-66.0826168,11.5530275,11.5154079,18.2733346,-2.7023667,-3.5129427,21.4006353,24.842823,-1.3474847,-0.0548546,-8.7041572,-8.8426715,-8.8187621,-8.3785539,-8.2328734,-8.2401715,-7.8140825,-1.7919056,23.678821,23.6619747,23.1581604,22.9865987,22.8606866,23.9906774,25.2165366,24.8602098,24.7332277,24.5161367,-16.3151756,3.2680785,27.6613658,26.932672,26.2322965,27.4027,26.7630352,26.3979833,26.7826964,35.9213244,-15.6521382,18.4337014,27.9575949,-75.8584586,-63.7771959,-123.2935602,39.8111337,-75.6384872,33.7621977,-63.6312015,121.9588101,-66.3924711,-62.4492247,20.0013211,-63.8663732,-96.1204052,128.6109343,-75.1470456,76.5659814,121.2624826,146.0020551,-61.0391791,-61.2854805,-61.1360903,-61.158725,27.1059243,32.0126994,-80.5039532,23.2806677,23.2788394,158.4173824,132.8003406,132.3948726,130.9906443,131.0101796,-69.876351,129.4751994,128.6031604,135.9938315,125.4404834,0.4373143,-71.5268547,79.8309252,139.5212143,23.4828333,132.4403696,-64.7082431,-67.8475931,-61.2631686,17.9283203,-117.574564,-127.948127,-127.6123145,49.7527424,124.37429,119.4170214,132.6288878,129.8884365,133.9586698,134.0158009,12.7248167,138.8972873,-148.6296635,158.4525394,154.8277267,-89.08967,121.8230444,-8.2172131,-8.2167432,-76.2359672,136.13142,134.1914655,-72.5088736,-16.9111231,29.1121447,134.0074417,132.457999,14.5229825,-69.1330542,131.7651988,-13.8336425,120.4386261,-98.5668733,132.0172548,112.8309975,-9.0765107,24.7378857,27.4683067,-80.1090654,92.5378732,156.6662327,32.644781,115.5158155,114.2918068,-123.110316,24.125767,126.7480187,109.7287014,144.7265006,109.7254184,28.6946009,26.6905908,149.169791,-87.6482945,126.3295155,137.586988,16.7518099,18.5469158,13.5955564,28.6722779,-1.1577004,-62.01455,-67.7222532,-73.2761076,-56.8584513,91.8001944,91.8211667,119.6591391,141.9190877,102.3453107,-75.2779636,-38.5944772,133.2832766,123.3203624,14.268834,-96.5004153,-125.5071383,134.1042033,133.131882,123.1673671,123.8990986,128.7432234,129.134007,141.9627407,30.4107946,-68.9335089,135.3026333,-73.3971905,120.2747747,135.3267071,133.2820277,111.2534167,-9.4234396,13.9208159,39.1285765,130.6547706,118.3040172,12.9644329,129.344998,129.9678943,148.9640267,-101.5567589,132.9341633,129.5584363,43.1434406,-51.6985306,120.4357409,126.4465763,62.3304717,6.4657751,-1.8047658,124.0204659,-83.3376968,50.853767,122.725376,123.2291162,13.3420768,135.3697908,111.3929468,-8.6179512,118.7684351,33.8430185,-85.7442261,44.2324831,80.4208008,73.58153,133.0965113,60.4885874,56.2325167,70.0639637,153.1374485,129.567418,-127.2737694,131.7653279,108.6931639,136.2189835,119.8482041,-5.9203866,-117.415853,40.4880288,28.6620736,-104.3343311,-71.1317683,-2.1322736,35.1673335,129.966993,52.305161,121.3836117,20.2636993,137.3512781,-114.362625,-114.3478737,-114.3156722,-130.536998,134.6486367,110.500944,134.3649065,142.9842489,-1.1187028,11.8298266,-2.7873036,129.8950817,-89.7030118,-81.474333,-45.6924731,129.5019046,-90.1014564,120.0026389,108.6930938,-25.3572454,-130.3439966,26.6068193,-93.8899428,-76.755327,141.3404229,12.4525195,147.8013914,118.3524898,27.9216834,-2.1827221,47.6392499,-64.027771,50.6115621,51.509885,52.986681,50.180515,122.5269029,29.9868326,29.7591901,13.7559746,17.0086784,-4.7789355,-81.2481508,2.9340098,-4.1128429,18.6611996,20.2485174,4.9857623,-3.3954685,-123.8261727,3.7017247,54.211904,-1.1214739,17.1964422,-8.8445136,-1.8047692,-24.039024,-15.3846326,41.9937956,8.3909609,-74.4141541,-5.5135479,-16.0261614,-121.9016965,-4.036471,19.1497344,3.4735211,8.6520818,-72.3762975,-0.5136679,10.289594,-70.2078665,12.7145274,21.3027473,124.908975,-73.2313355,-123.264586,16.8482414,19.5926832,134.1756524,3.3976309,81.4119494,-23.7711208,-164.7446054,140.9643155,140.7429602,168.3599905,14.0890313,129.7825466,18.1132715,-5.4766405,13.5832711,11.2382099,39.2516451,40.7866203,-7.694643,40.481187,-84.4655214,-133.6315528,-70.5674599,-70.7591384,-71.3629064,15.1840912,2.3644202,129.9378626,147.3427436,-79.404905,-4.9669837,5.8625121,8.0528155,-79.3434132,-87.7782198,72.781459,-66.6931392,-16.636568,105.9310834,2.7299857,2.1522518,-39.5097167,21.0232826,41.6611112,-68.7264429,53.6494304,14.4483442,-78.6371462,136.723132,-16.5439325,142.3749087,-5.1240102,-8.495484,108.21388,105.825075,66.9775751,127.4088618,119.5203616,119.7075536,-87.8596764,-134.9550234,-86.4694213,107.3397178,136.9084789,104.9225825,119.1126251,-124.4869054,-16.4060836]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14565,"null_rate":0.0,"stats":{"iqr":152.9449913,"kurtosis":-0.95366208364038,"max":179.340198,"mean":23.038825234981143,"median":14.9745494,"min":-179.1974645,"n_outliers":0,"outlier_rate":0.0,"q1":-40.4996302,"q3":112.4453611,"skew":0.052602242829746154,"std":79.39557268151874,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"11 singleton categories"},{"code":"null_rate","level":"warn","message":"99.6% null"}],"column":"country","extras":{"singletons":11,"top_values":[["LV",18],["DE",8],["EE",7],["HT",6],["US",5],["GB",2],["PT",2],["IM",2],["JP",1],["KY",1],["PH",1],["IN",1],["FI",1],["PL",1],["IT",1],["RU",1],["UZ",1],["MX",1],["TW",1]]},"kind":"categorical","n":14585,"n_null":14524,"n_unique":19,"null_rate":0.9958176208433321,"stats":{"cardinality":19,"entropy":3.441728321839467,"entropy_ratio":0.8102135243474123,"top_rate":0.29508196721311475,"top_value":"LV"}},{"alerts":[{"code":"long_tail","level":"info","message":"197 singleton categories"},{"code":"null_rate","level":"warn","message":"90.2% null"}],"column":"height","extras":{"singletons":197,"top_values":[["15",59],["12",55],["14",54],["10",51],["8",45],["18",44],["20",43],["13",38],["6",37],["17",33],["25",31],["11",28],["30",28],["16",27],["23",26],["21",21],["19",20],["28",19],["9",19],["26",18]]},"kind":"categorical","n":14585,"n_null":13153,"n_unique":316,"null_rate":0.9018169352074049,"stats":{"cardinality":316,"entropy":6.837224026149949,"entropy_ratio":0.8233868684033955,"top_rate":0.04120111731843575,"top_value":"15"}},{"alerts":[{"code":"long_tail","level":"info","message":"271 singleton categories"},{"code":"null_rate","level":"warn","message":"93.2% null"}],"column":"year_built","extras":{"singletons":271,"top_values":[["C19",31],["1875",16],["1872",15],["1881",12],["C20",12],["1906",11],["1871",11],["1877",11],["1882",11],["1874",11],["1873",11],["1939",10],["1898",10],["1897",9],["1870",9],["1909",8],["1884",8],["1890",8],["1911",7],["1950",7]]},"kind":"categorical","n":14585,"n_null":13593,"n_unique":429,"null_rate":0.9319849160095989,"stats":{"cardinality":429,"entropy":8.142359110821328,"entropy_ratio":0.9311050686755546,"top_rate":0.03125,"top_value":"C19"}},{"alerts":[{"code":"long_tail","level":"info","message":"167 singleton categories"},{"code":"null_rate","level":"warn","message":"92.7% null"}],"column":"operator","extras":{"singletons":167,"top_values":[["U.S. Coast Guard",82],["Plovput",49],["INEA",42],["Tagbilaran Station, Philippine Coast Guard",27],["Cebu Station, Philippine Coast Guard",26],["Catbalogan Station, Philippine Coast Guard",18],["Surigao Station, Philippine Coast Guard",18],["Masbate Station, Philippine Coast Guard",17],["Maasin Station, Philippine Coast Guard",17],["\u6d77\u4e0a\u4fdd\u5b89\u5e81",16],["Directorate General of Lighthouses and Lightships",15],["Romblon Station, Philippine Coast Guard",15],["Iloilo Station, Philippine Coast Guard",14],["Puerto Princesa Station, Philippine Coast Guard",14],["Bacolod Station, Philippine Coast Guard",13],["Sorsogon Station, Philippine Coast Guard",13],["Cagayan de Oro Station, Philippine Coast Guard",13],["Marine Department of Sabah",13],["Dumaguete Station, Philippine Coast Guard",12],["Appari Station, Philippine Coast Guard",12]]},"kind":"categorical","n":14585,"n_null":13520,"n_unique":283,"null_rate":0.926979773740144,"stats":{"cardinality":283,"entropy":7.013278518406073,"entropy_ratio":0.8610893556618491,"top_rate":0.07699530516431925,"top_value":"U.S. Coast Guard"}},{"alerts":[{"code":"null_rate","level":"warn","message":"48.0% null"}],"column":"seamark_type","extras":{"singletons":7,"top_values":[["light_minor",3496],["light_major",3051],["landmark",716],["beacon_special_purpose",146],["beacon_lateral",102],["beacon_cardinal",19],["light",9],["beacon",5],["beacon_isolated_danger",5],["building",4],["daymark",4],["radio_station",3],["pile",3],["signal_station_traffic",3],["signal_station_warning",3],["buoy_lateral",3],["navigation_line",2],["fishing_facility",2],["light_vessel",1],["cone",1]]},"kind":"categorical","n":14585,"n_null":7002,"n_unique":25,"null_rate":0.4800822763112787,"stats":{"cardinality":25,"entropy":1.6574304304948864,"entropy_ratio":0.3569082165258199,"top_rate":0.4610312541210603,"top_value":"light_minor"}},{"alerts":[{"code":"null_rate","level":"warn","message":"71.3% null"}],"column":"light_character","extras":{"singletons":4,"top_values":[["Fl",3126],["Iso",249],["F",245],["Q",182],["Oc",151],["LFl",148],["VQ",24],["Al.Fl",24],["Mo",14],["FFl",4],["Al",4],["IQ",3],["Al.LFl",2],["Al.Oc",2],["Q+LFl",2],["IVQ",1],["Fl(2)",1],["LFl W 10s",1],["Al.Iso",1]]},"kind":"categorical","n":14585,"n_null":10401,"n_unique":19,"null_rate":0.7131299280082276,"stats":{"cardinality":19,"entropy":1.5033610368706383,"entropy_ratio":0.35390458808745956,"top_rate":0.747131931166348,"top_value":"Fl"}},{"alerts":[{"code":"null_rate","level":"warn","message":"96.9% null"}],"column":"heritage","extras":{"singletons":1,"top_values":[["2",343],["3",52],["4",26],["yes",25],["1",4],["regional",2],["no",1]]},"kind":"categorical","n":14585,"n_null":14132,"n_unique":7,"null_rate":0.9689406924922866,"stats":{"cardinality":7,"entropy":1.2438994308704119,"entropy_ratio":0.4430859173156191,"top_rate":0.7571743929359823,"top_value":"2"}},{"alerts":[{"code":"near_unique","level":"info","message":"97.9% of rows are unique strings"},{"code":"null_rate","level":"warn","message":"86.2% null"}],"column":"wikipedia","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[29,74,20,9,17,22,24,45,107,93,132,139,178,307,147,129,103,168,56,38,38,22,38,11,13,5,20,5,2,2,2,9,0,0,0,0,1,0,1,2],"edges":[6.0,7.225,8.45,9.675,10.9,12.125,13.350000000000001,14.575000000000001,15.8,17.025,18.25,19.475,20.700000000000003,21.925,23.150000000000002,24.375,25.6,26.825000000000003,28.05,29.275000000000002,30.5,31.725,32.95,34.175,35.400000000000006,36.625,37.85,39.075,40.300000000000004,41.525000000000006,42.75,43.975,45.2,46.425000000000004,47.650000000000006,48.875,50.1,51.325,52.550000000000004,53.775000000000006,55.0]},"near_unique":true,"sample":["de:Neuer Leuchtturm Borkum","hr:Svjetionik Hrid Blitvenica","en:Gay Head Light","ja:\u845b\u767b\u652f\u5cac\u706f\u53f0","en:Sentinel Island Light","it:Faro di Scilla","es:Faro Punta Bajos","en:Grassy Island Range Lights","fr:Phare de K\u00e9r\u00e9on","fr:Phare de Tuskar Rock","gl:Faro das Illas Sisargas","ca:Far de Tossa","fr:Phare de Ho\u015fk\u00f6y","fr:Phare de Pen-Men","en:Cape Beale Light","ja:\u5e73\u4e45\u4fdd\u57fc\u706f\u53f0","it:Faro di Capo Testa","fr:Phare de la Gacholle","en:Boulder Bank Lighthouse","en:Cape Kumukahi Light","fr:Phare d'Isla de Aves","ca:Far del Cap de Sant Antoni","pt:Farol da Ba\u00eda da Trai\u00e7\u00e3o","en:Nab Tower","it:Faro dismesso della Diga Curvilinea","ja:\u7f85\u81fc\u706f\u53f0","pl:Latarnia Volty","af:Vo\u00ebleiland-vuurtoring","en:Wadjemup Lighthouse","af:Donkin Hill-vuurtoring","de:Pilsumer Leuchtturm","de:Leuchtturm Greifswalder Oie","it:Faro di Capo Carbonara","tr:\u0130\u011fneada Feneri","en:Reedy Island Range Rear Light","es:Faro Santa Cruz","zh:\u6771\u5409\u5dbc\u71c8\u5854","de:Leuchtturm Staberhuk","fr:Phare de Point-Amour","en:Au Sable Light","en:Mukilteo Light","sr:\u0421\u0432\u0435\u0442\u0438\u043e\u043d\u0438\u0446\u0438 \u043d\u0430 \u0443\u0448\u045b\u0443 \u0422\u0430\u043c\u0438\u0448\u0430 \u0443 \u0414\u0443\u043d\u0430\u0432","en:South Stack Lighthouse","en:Craighill Channel Lower Range Front Light","en:Pond Island Light","en:Bradleys Head Light","en:Ruhnu Lighthouse","ja:\u9678\u4e2d\u9ed2\u57fc\u706f\u53f0","da:Ryvarden fyr","en:Queen's Wharf Lighthouse"],"top_values":[],"top_words":[["light",477],["de",357],["lighthouse",335],["fr:phare",195],["es:faro",181],["island",128],["point",121],["de:leuchtturm",89],["punta",70],["pt:farol",65],["fyr",64],["it:faro",56],["en:cape",56],["di",52],["la",46],["du",44],["head",43],["da",42],["cabo",41],["hr:svjetionik",41],["harbor",35],["range",30],["del",28],["en:point",28],["gl:faro",27]],"vocab_skipped":null,"word_histogram":{"counts":[153,0,0,461,0,0,0,877,0,0,0,346,0,0,0,121,0,0,38,0,0,0,6,0,0,0,4,0,0,2],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":14585,"n_null":12577,"n_unique":1965,"null_rate":0.8623243057936236,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.02141434262948207,"emoji_rate":0.0,"len_max":55,"len_mean":22.296314741035857,"len_median":22.0,"len_min":6,"len_p95":33.0,"n_duplicates":43,"n_empty":0,"one_word_rate":0.07619521912350598,"readability_flesch_mean":48.91560000000001,"url_rate":0.0,"vocab_size":2370,"word_mean":2.9955179282868527,"word_median":3.0}},{"alerts":[],"column":"osm_id","extras":{"histogram":{"counts":[1272,1457,998,2358,1852,348,245,266,212,223,234,178,187,216,220,241,180,113,143,248,351,94,185,147,150,131,163,210,199,422,102,91,107,97,198,162,158,161,132,134],"edges":[13391742.0,351194351.875,688996961.75,1026799571.625,1364602181.5,1702404791.375,2040207401.25,2378010011.125,2715812621.0,3053615230.875,3391417840.75,3729220450.625,4067023060.5,4404825670.375,4742628280.25,5080430890.125,5418233500.0,5756036109.875,6093838719.75,6431641329.625,6769443939.5,7107246549.375,7445049159.25,7782851769.125,8120654379.0,8458456988.875,8796259598.75,9134062208.625,9471864818.5,9809667428.375,10147470038.25,10485272648.125,10823075258.0,11160877867.875,11498680477.75,11836483087.625,12174285697.5,12512088307.375,12849890917.25,13187693527.125,13525496137.0]},"sample":[257496069.0,269331345.0,277063575.0,290802041.0,316624604.0,322520727.0,322767085.0,345518842.0,367452790.0,367795300.0,387744952.0,388215533.0,414678633.0,418621659.0,475564875.0,476662470.0,498342239.0,527372298.0,529166256.0,553326916.0,582707558.0,582711628.0,582715740.0,677529069.0,687193062.0,687193063.0,705194180.0,733230569.0,856616788.0,867798786.0,870425741.0,870822946.0,891114739.0,896638055.0,898145504.0,927670772.0,937255146.0,967990618.0,970996565.0,973813067.0,993454873.0,1015989176.0,1017716538.0,1030530369.0,1030699139.0,1033001753.0,1033048198.0,1035157745.0,1038134988.0,1052816798.0,1052817308.0,1052817376.0,1052818017.0,1115686209.0,1115686597.0,1124747028.0,1140833417.0,1141447490.0,1147492562.0,1163031158.0,1170470340.0,1181448690.0,1181450779.0,1181454393.0,1181456363.0,1181456551.0,1181456927.0,1181459419.0,1181487370.0,1181489879.0,1181491434.0,1181491592.0,1181492375.0,1181492599.0,1181494151.0,1181497206.0,1181499916.0,1206538071.0,1207646801.0,1208590623.0,1208592533.0,1211610673.0,1219931322.0,1223938651.0,1223938763.0,1223939632.0,1251978042.0,1255497399.0,1258031782.0,1271039565.0,1271040202.0,1271041687.0,1271041814.0,1271041846.0,1271043104.0,1274845171.0,1293790323.0,1295700444.0,1316325490.0,1325500623.0,1336828827.0,1342633746.0,1346126598.0,1347582375.0,1352210001.0,1396142254.0,1419264827.0,1420666397.0,1427393582.0,1431246153.0,1431273924.0,1431277845.0,1435090056.0,1446175116.0,1464343749.0,1471742488.0,1511519712.0,1518364509.0,1531116614.0,1531129767.0,1533280278.0,1543875388.0,1546187887.0,1550164884.0,1551816219.0,1552943834.0,1566344543.0,1568411704.0,1572564061.0,1572630476.0,1572649626.0,1572671377.0,1572674715.0,1572676426.0,1572817975.0,1574284874.0,1574285300.0,1574289106.0,1575974635.0,1576184962.0,1576208850.0,1576210131.0,1576226091.0,1576239261.0,1576239703.0,1577313045.0,1578665290.0,1578792158.0,1579308033.0,1579308061.0,1579308856.0,1579314776.0,1579315215.0,1579316275.0,1581598638.0,1581753513.0,1581821895.0,1587867548.0,1607156650.0,1635832883.0,1676341025.0,1708934514.0,1745142248.0,1747283270.0,1749004607.0,1779228447.0,1802194462.0,1820917891.0,1851202396.0,1918057029.0,1922148046.0,1948384294.0,2173695042.0,2222721291.0,2279145540.0,2282340081.0,2332459660.0,2336557282.0,2362575442.0,2362576251.0,2394067787.0,2414429522.0,2450039128.0,2474402398.0,2474402405.0,2548274918.0,2819877848.0,2819878132.0,3073675953.0,3074371583.0,3085625775.0,3090584881.0,3194528389.0,3213913862.0,3218331761.0,3264209317.0,3355313876.0,3365506089.0,3377941302.0,3382406087.0,3412041316.0,3523929335.0,3607789181.0,3621592518.0,3666245109.0,3673886182.0,3772706530.0,3772706535.0,3864988145.0,3867189595.0,3869478973.0,3928356747.0,3958654205.0,4041431688.0,4041518908.0,4097748289.0,4110476524.0,4182942012.0,4198431679.0,4212014963.0,4282019929.0,4322578745.0,4335168799.0,4335168800.0,4345307906.0,4370568053.0,4394710881.0,4440025799.0,4455877089.0,4472474312.0,4491583362.0,4499197712.0,4564081941.0,4643267859.0,4677140819.0,4730639098.0,4741525333.0,4756528571.0,4855633587.0,4856233148.0,4874639411.0,4959779581.0,5037921138.0,5118328681.0,5157009043.0,5165791360.0,5179731552.0,5206098615.0,5206187367.0,5237331900.0,5247496381.0,5267412818.0,5313721639.0,5343906622.0,5428863284.0,5429066874.0,5506911318.0,5526253460.0,5591060389.0,5619644612.0,5697555021.0,5840150856.0,5866016356.0,6148579245.0,6335111208.0,6518138123.0,6527917890.0,6527952717.0,6548110059.0,6548305562.0,6614816932.0,6614816933.0,6655955004.0,6665521201.0,6812359181.0,6828735107.0,6845958332.0,6853394171.0,6883900742.0,6898065786.0,7017083015.0,7023497023.0,7026412555.0,7057956266.0,7074204106.0,7106447484.0,7108534304.0,7111623411.0,7291797818.0,7457144284.0,7472086539.0,7499857816.0,7555099928.0,7595568326.0,7617368642.0,7705093769.0,8022046757.0,8115173938.0,8221607979.0,8231127670.0,8347693964.0,8606611310.0,8619096499.0,8764167716.0,8780330749.0,8858648535.0,8947736422.0,9043868605.0,9119257342.0,9141815449.0,9214278912.0,9427782660.0,9454129933.0,9456834639.0,9457793876.0,9487144663.0,9503896862.0,9570033891.0,9587237790.0,9626161277.0,9635285467.0,9668568332.0,9678566108.0,9776555283.0,9814816440.0,9816669244.0,9824706387.0,9858514837.0,9894237212.0,9903473593.0,9904059792.0,9980993519.0,9982446570.0,9982446583.0,9982516560.0,9990849240.0,10052865693.0,10165146320.0,10173471339.0,10200444430.0,10314577161.0,10584104648.0,10651832163.0,10703399054.0,10731968951.0,10804813653.0,10840142187.0,11112724664.0,11185215932.0,11227946992.0,11303039533.0,11367132320.0,11496715033.0,11520843452.0,11524440137.0,11575677696.0,11575790659.0,11577750630.0,11747031180.0,11795094666.0,11824244928.0,11834603179.0,11868104716.0,11918045878.0,12082533804.0,12230550831.0,12271416571.0,12323979912.0,12403509943.0,12405712541.0,12460930452.0,12580272635.0,12633335813.0,12659675630.0,12743351027.0,12745896980.0,12777880729.0,12807479812.0,12826431860.0,12848821265.0,12944958311.0,12968529221.0,13128312541.0,13141631671.0,13236941803.0,13418912420.0,13431866297.0,13441897419.0,13442767715.0,13446883933.0,13449573832.0,13500905352.0,13524254168.0,32537305.0,36428916.0,39575805.0,42960770.0,66794294.0,80909785.0,92538232.0,95569655.0,97595085.0,99128901.0,118638646.0,134243077.0,135812154.0,147934784.0,151365977.0,159236340.0,179215196.0,179499177.0,184505000.0,195715600.0,200827110.0,222880922.0,230036850.0,245105046.0,248116909.0,255390082.0,255838543.0,258806147.0,277713167.0,295622226.0,304156195.0,305074608.0,305790645.0,329793373.0,340980091.0,358509216.0,361836251.0,372184185.0,406020994.0,417996498.0,446265458.0,478815009.0,482032593.0,490008774.0,498318575.0,540625695.0,568504506.0,584353658.0,611120060.0,614055102.0,616626229.0,627267808.0,651660980.0,658171880.0,658625167.0,659098092.0,680560861.0,684552685.0,686941830.0,689668921.0,694787512.0,713406713.0,713423319.0,713487173.0,737203438.0,751717753.0,792556099.0,798995900.0,804641654.0,842463364.0,859462115.0,877965875.0,878933152.0,929199010.0,932435071.0,963211868.0,966485291.0,967471567.0,1016510991.0,1022084360.0,1027037562.0,1034573800.0,1044468047.0,1054398239.0,1062126247.0,1073103740.0,1106996678.0,1127602302.0,1223829884.0,1265788955.0,1268442078.0,1271710463.0,1273256840.0,1273268450.0,1280095702.0,1283855695.0,1288957672.0,1288957689.0,1320730392.0,1339186128.0,1339212907.0,1339816348.0,1350348321.0,1358094430.0,1362459642.0,1383615096.0,1425362173.0]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14584,"null_rate":0.0,"stats":{"iqr":5401041935.0,"kurtosis":-0.1990653461721621,"max":13525496137.0,"mean":3722756855.24107,"median":1574285300.0,"min":13391742.0,"n_outliers":0,"outlier_rate":0.0,"q1":1000644758.0,"q3":6401686693.0,"skew":1.0734905451396348,"std":3828250123.608008,"zero_rate":0.0}},{"alerts":[],"column":"osm_type","extras":{"singletons":0,"top_values":[["node",11358],["way",3227]]},"kind":"categorical","n":14585,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.762451964535013,"entropy_ratio":0.762451964535013,"top_rate":0.7787452862529997,"top_value":"node"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns","kinds"],"featured_charts":[{"caption":"Shows the dataset is dominated by light_minor and light_major entries, confirming its lighthouse focus.","column":"seamark_type","kind":"bar"},{"caption":"Reveals that flashing lights ('Fl') account for roughly three-quarters of recorded light characters.","column":"light_character","kind":"donut"},{"caption":"Indicates most lighthouses are mapped as point nodes (78%) rather than ways/polygons.","column":"osm_type","kind":"donut"},{"caption":"Highlights a strong northern-hemisphere skew with a long tail of southern outliers.","column":"lat","kind":"histogram"},{"caption":"Among the tiny 0.4% with country tags, Latvia, Germany and Estonia lead \u2014 illustrating how sparse this field is.","column":"country","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogues 14,585 lighthouses and related navigational landmarks sourced from OpenStreetMap, with 13 columns covering location (lat/lon), OSM identifiers, names, operators, build years, light characteristics, and heritage status. Coverage is very uneven: descriptive fields like country (99.6% null), heritage (96.9% null), year_built (93.2% null) and operator (92.7% null) are mostly empty, so any analysis on those needs to acknowledge the small annotated subsample. The most reliable signals are geographic (lat/lon, fully populated) and the OSM-derived fields osm_type and seamark_type \u2014 the latter shows the dataset is dominated by light_minor (3,496) and light_major (3,051), confirming its lighthouse focus. Light_character is also worth examining: where recorded, 'Fl' (flashing) overwhelmingly dominates at 75% of entries. Latitude is heavily skewed toward the northern hemisphere (median 40.8\u00b0) with 1,295 outliers flagged in the southern extremes.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","alerts","stats.duplicate_rate","stats.n_duplicates","stats.word_mean","stats.len_mean","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds proper names of lighthouses, with 14,239 unique values across 14,585 rows (near_unique alert) and a mean of 2.33 words per entry. The vocabulary is multilingual: 'lighthouse' dominates at 8,487 occurrences but 'faro', 'fyr', 'phare', 'farol' and Cyrillic '\u043c\u0430\u044f\u043a' all appear, signalling Spanish/Italian, Scandinavian, French, Portuguese and Russian sources mixed together. Despite the near-uniqueness, 346 duplicate names (2.4%) exist, which is worth checking before treating this as a key.","role":"identifier","scope":"column","target":"name","treatment":"Use as a display label; do not treat as a unique key without disambiguating the 346 duplicates."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","n_outliers","outlier_rate","n","n_unique","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is a latitude coordinate in decimal degrees, ranging from -63.40 to 81.80 with a median of 40.81, suggesting a Northern-Hemisphere-skewed global distribution. The strong negative skew (-1.46) and 1295 flagged outliers (8.88%) reflect a long tail into the southern hemisphere rather than data errors. Near-unique values (14572 of 14585) indicate per-record geocoordinates with no nulls.","role":"feature","scope":"column","target":"lat","treatment":"Pair with longitude for geospatial features; avoid treating southern-hemisphere points as outliers when modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","skew","kurtosis","iqr","n","n_unique","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is a longitude coordinate, with values spanning -179.20 to 179.34 and a near-symmetric distribution (skew 0.05). The 14565 unique values across 14585 rows suggest each record is a distinct geographic point. The wide IQR of 152.94 and negative kurtosis (-0.95) indicate global coverage rather than concentration in any single region.","role":"feature","scope":"column","target":"lon","treatment":"pair with latitude for geospatial analysis; avoid treating as a standalone scalar feature."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Two-letter ISO country codes (LV, DE, EE, HT, US\u2026) identifying record origin, but the column is effectively empty: 99.58% of the 14,585 rows are null, leaving only ~61 populated values spread across 19 codes. Among the few present, Latvia leads at 29.5% (18 records) with high entropy ratio 0.81, so no single market dominates the observed slice.","role":"metadata","scope":"column","target":"country","treatment":"Drop or treat as missing-indicator only; insufficient coverage for modelling or segmentation."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as a categorical, this column appears to record a height as a small integer (top values are '15', '12', '14', '10', '8'), with 316 distinct values and high entropy ratio 0.8234. The dominant signal is missingness: null_rate is 0.9018, so roughly 90% of the 14585 rows have no value, and even the most common value '15' covers only 4.12% of non-nulls. The numeric-looking strings suggest it should be a numeric feature rather than a category.","role":"feature","scope":"column","target":"height","treatment":"Cast to numeric and impute or add a missingness indicator before modelling, given ~90% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","top_values","entropy_ratio","n"],"model":"anthropic:claude-opus-4-7","narrative":"Construction year of the asset, stored as free-text strings that mix four-digit years (e.g. '1875', '1872') with century shorthand like 'C19' and 'C20' \u2014 the latter is actually the most frequent value at 31 occurrences. The column is 93.2% null and only populated for 14585 rows, with 429 distinct values and very high entropy ratio (0.93), so the populated portion is a long, flat tail dominated by 19th-century dates. The mixed encoding (numeric years vs. century codes) means this cannot be treated as a clean numeric field without parsing.","role":"feature","scope":"column","target":"year_built","treatment":"Parse century codes (e.g. 'C19' \u2192 1850) and cast to numeric year; given 93% nulls, add a missing-indicator and consider dropping if coverage stays this low."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is the operating authority responsible for each entry, likely a lighthouse or maritime navigation aid given the prevalence of coast guards and Plovput. The column is 92.7% null, leaving only ~1,065 populated rows spread across 283 distinct operators with high entropy (ratio 0.861) and the top value (U.S. Coast Guard) covering just 7.7%. Notable signals include a long tail of regional Philippine Coast Guard stations and at least one non-Latin entry (\u6d77\u4e0a\u4fdd\u5b89\u5e81, Japan Coast Guard), suggesting multilingual free-form values rather than a controlled vocabulary.","role":"metadata","scope":"column","target":"operator","treatment":"Normalize/translate operator strings and group long-tail values; treat missingness as its own category before any encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column classifies maritime seamark features, with 25 distinct types dominated by navigational lights \u2014 'light_minor' covers 46.1% of non-null rows and 'light_major' is the runner-up, together swamping the long tail. Nearly half the rows (48.0%) are null, which triggered the null_rate alert and means this attribute is absent for most records. Entropy ratio of 0.36 confirms heavy concentration in the top two categories.","role":"feature","scope":"column","target":"seamark_type","treatment":"Treat nulls as a distinct 'unknown' level and group rare categories before one-hot encoding."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","stats.cardinality","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column captures the light character (flash pattern) of navigational lights, with codes like 'Fl' (flashing), 'Iso' (isochronous), 'Q' (quick), and 'Oc' (occulting) drawn from standard maritime chart notation. It is overwhelmingly dominated by 'Fl' at 74.7% of the 4,188 non-null rows, yielding a low entropy ratio of 0.354 across 19 distinct codes. The headline concern is a 71.31% null rate, meaning the field is populated for fewer than 30% of records.","role":"feature","scope":"column","target":"light_character","treatment":"Treat nulls as a distinct 'unknown' category and one-hot encode, collapsing rare codes into 'other'."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_values","stats.top_rate","stats.top_value","stats.entropy_ratio"],"model":"anthropic:claude-opus-4-7","narrative":"`heritage` is a sparsely populated categorical flag with only 7 distinct values across 14585 rows and a 96.89% null rate. The non-null entries are a messy mix of numeric codes (\"2\", \"3\", \"4\", \"1\") and free-text tokens (\"yes\", \"no\", \"regional\"), with \"2\" alone covering 75.7% of populated cells. The coding inconsistency plus the extreme nullity suggest this field was populated ad hoc rather than against a controlled vocabulary.","role":"metadata","scope":"column","target":"heritage","treatment":"Normalise the mixed numeric/text codes to a single scheme and treat absence as its own category, or drop given the 96.89% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.len_mean","stats.word_mean","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short Wikipedia article titles or links, almost certainly for lighthouses given the dominance of tokens like 'light' (477), 'lighthouse' (335), and multilingual equivalents 'fr:phare' (195), 'es:faro' (181), 'de:leuchtturm' (89), 'pt:farol' (65). Strings are short (mean 22.3 chars, ~3 words) and the column is 86.23% null with only 1,965 unique values across 14,585 rows. The interwiki-style prefixes ('fr:', 'es:', 'de:', 'pt:') indicate a language mix encoded in the values themselves rather than free prose.","role":"metadata","scope":"column","target":"wikipedia","treatment":"Parse the 'lang:title' prefix into a language code and title, and treat as an optional cross-reference link rather than a model feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew"],"model":"anthropic:claude-opus-4-7","narrative":"This is almost certainly the OpenStreetMap object id: 14584 unique values across 14585 rows (effectively one per record), no nulls, and a numeric range from 13,391,742 up to 13,525,496,137 that matches OSM's monotonically growing identifier space. The distribution is right-skewed (skew 1.07) with median 1,574,285,300 well below mean 3,722,756,855, reflecting the mix of older low-numbered and newer high-numbered OSM entities rather than any analytic signal.","role":"identifier","scope":"column","target":"osm_id","treatment":"Keep as a key for joins to OSM data; exclude from modelling features."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary categorical flag indicating the OpenStreetMap geometry type, taking only the values 'node' (11358 rows, 77.9%) and 'way' (3227 rows). No nulls across 14585 rows, and entropy ratio of 0.76 reflects the moderate imbalance toward nodes.","role":"feature","scope":"column","target":"osm_type","treatment":"One-hot or boolean-encode (node vs way) before modelling."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4440,"prompt_tokens":15397,"total_tokens":19837}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:36:44+00:00","mode":"full","row_count":14585,"sampled_rows":14585,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/lighthouses.json"},"notes":[],"saturn_version":"0.2.0","schema":{"country":"categorical","height":"categorical","heritage":"categorical","lat":"numeric","light_character":"categorical","lon":"numeric","name":"text","operator":"categorical","osm_id":"numeric","osm_type":"categorical","seamark_type":"categorical","wikipedia":"text","year_built":"categorical"}}
