{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"97.6% of rows are unique strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[301,525,304,528,827,705,859,906,7858,560,414,287,151,155,52,42,42,37,5,8,4,3,2,2,2,1,1,1,0,0,0,2,0,0,0,0,0,0,0,1],"edges":[2.0,4.225,6.45,8.675,10.9,13.125,15.350000000000001,17.575000000000003,19.8,22.025000000000002,24.25,26.475,28.700000000000003,30.925,33.150000000000006,35.375,37.6,39.825,42.050000000000004,44.275,46.5,48.725,50.95,53.175000000000004,55.400000000000006,57.625,59.85,62.075,64.30000000000001,66.525,68.75,70.97500000000001,73.2,75.425,77.65,79.875,82.10000000000001,84.325,86.55,88.775,91.0]},"near_unique":true,"sample":["Far de Capdepera","Lighthouse 12736917711","\u51fa\u96f2\u65e5\u5fa1\u7895\u706f\u53f0","Pilsumer Leuchtturm","Lighthouse 3592895556","H\u1ea3i \u0111\u0103ng V\u1ea1n Ca","Lighthouse 3954003290","Lighthouse 666429022","Farol do Bugio","Vejsn\u00e6s Nakke","Lighthouse 8302234386","Lighthouse 6367102379","Lighthouse 1572675044","Punta Ballena","Lighthouse 1576209851","Lighthouse 1293082078","\u0422\u0435\u0440\u0438\u0431\u0435\u0440\u0441\u043a\u0438\u0439","Lighthouse 303765533","Mercusuar Cirebon","Lighthouse 3073675847","Lighthouse 1579311833","San Fernando Point Lighthouse","Lighthouse 1216465472","Tvikobben","Faros","Lighthouse 13074696188","Lighthouse 6816680116","Lighthouse 1295700390","K\u0101ingaroa automatic light","H\u1ea3i \u0111\u0103ng S\u01a1n Ch\u00e0","S\u00f8ndre Katland fyr","Lighthouse 9848807466","Lighthouse 9848807460","H\u1ea3i \u0111\u0103ng Cao Trang","Lighthouse 2266550487","Lighthouse 3790789305","Danushkodi Lighthouse","Lighthouse 5587474511","Lighthouse 4168011445","Lighthouse 6274650580","North Ronaldsay Lighthouse","\u8d64\u7901\u5d0e\u706f\u53f0","Lighthouse 1572630476","St. Abbs Lighthouse","Lighthouse 1124285588","\u5b89\u57fa\u706f\u5854","Rose Blanche Lighthouse","Faro de San Felipe","Homlungen fyr","Baileys harbor Upper Range Light"],"top_values":[],"top_words":[["lighthouse",8487],["faro",780],["de",739],["light",622],["point",395],["fyr",354],["phare",341],["island",274],["farol",217],["\u043c\u0430\u044f\u043a",193],["punta",167],["do",128],["la",125],["di",121],["tulepaak",121],["cape",118],["da",118],["h\u1ea3i",96],["\u0111\u0103ng",95],["head",92],["range",89],["du",88],["mercusuar",85],["\u03c6\u03ac\u03c1\u03bf\u03c2",79],["leuchtturm",75]],"vocab_skipped":null,"word_histogram":{"counts":[1557,0,9038,0,0,2381,0,0,1118,0,340,0,0,106,0,0,26,0,0,10,0,4,0,0,2,0,0,0,0,3],"edges":[1.0,1.3666666666666667,1.7333333333333334,2.0999999999999996,2.466666666666667,2.833333333333333,3.1999999999999997,3.5666666666666664,3.933333333333333,4.3,4.666666666666666,5.033333333333333,5.3999999999999995,5.766666666666667,6.133333333333333,6.5,6.866666666666666,7.2333333333333325,7.6,7.966666666666666,8.333333333333332,8.7,9.066666666666666,9.433333333333334,9.799999999999999,10.166666666666666,10.533333333333333,10.899999999999999,11.266666666666666,11.633333333333333,12.0]}},"kind":"text","n":14585,"n_null":0,"n_unique":14239,"null_rate":0.0,"stats":{"allcaps_rate":0.057936235858759,"boilerplate_rate":0.0,"duplicate_rate":0.023723003085361672,"emoji_rate":0.0,"len_max":91,"len_mean":18.947685978745287,"len_median":21.0,"len_min":2,"len_p95":27.0,"n_duplicates":346,"n_empty":0,"one_word_rate":0.10675351388412753,"readability_flesch_mean":75.78385,"url_rate":0.0,"vocab_size":14670,"word_mean":2.326911210147412,"word_median":2.0}},{"alerts":[{"code":"outliers","level":"warn","message":"8.9% rows beyond 1.5 IQR"}],"column":"lat","extras":{"histogram":{"counts":[3,2,38,44,29,92,108,112,147,101,93,113,58,92,58,130,129,168,121,239,549,274,241,383,273,226,1062,1554,1300,1976,1257,625,900,1039,410,357,179,73,22,8],"edges":[-63.3959402,-59.76603041,-56.13612062,-52.50621083,-48.87630104,-45.24639125,-41.616481459999996,-37.98657167,-34.35666188,-30.726752089999998,-27.0968423,-23.46693251,-19.837022719999993,-16.207112929999994,-12.577203139999995,-8.947293349999995,-5.317383559999996,-1.6874737699999969,1.9424360200000024,5.572345810000002,9.2022556,12.83216539,16.46207518,20.09198497,23.721894760000012,27.35180455000001,30.98171434000001,34.61162413000001,38.24153392000001,41.87144371000001,45.50135350000001,49.13126329000001,52.761173080000006,56.391082870000005,60.020992660000005,63.650902450000004,67.28081224,70.91072203,74.54063182,78.17054161,81.8004514]},"sample":[41.6714041,50.7783104,43.8510395,17.9184021,8.2693766,51.9928567,-22.9379526,-52.4558419,66.0765851,44.1344915,33.9542667,57.8568937,54.4403358,-55.9635495,53.9264838,22.3288382,49.7737356,44.0863076,44.203861,55.9543672,44.1531001,45.0564222,43.8735333,59.2113333,44.0664852,44.2076771,35.676386,63.1144881,53.6215619,44.0052476,43.3903332,49.8594791,43.9222227,49.3912747,57.1396436,10.383199,43.947995,12.1896131,44.3121629,48.7709607,69.0444998,53.5463116,13.3085045,59.8828953,59.7554518,59.4532248,59.3664653,58.8249234,55.6604444,-45.7736721,-36.4450733,-45.3918842,-40.9235928,61.2691607,62.4307248,11.9521043,54.2271605,59.9011496,43.6555624,42.359108,38.0204086,-2.8038383,-3.8450354,-16.0286572,-22.9343148,-7.465,-0.9102729,-12.9287235,-22.8997339,-33.2025318,-19.8340168,-31.8853476,-12.24,-3.8748564,-23.0440671,-20.3187463,-12.5774542,47.12506,47.2169187,69.7184425,64.7687067,45.0388572,32.6953636,34.6762224,34.6578291,33.9617482,49.4483171,60.2744235,39.0784249,22.6209167,13.6343902,52.514957,44.7296902,42.893183,34.6196273,48.2721074,9.998408,-15.7280687,46.9043906,45.8577344,65.5280773,61.7311028,30.9297448,45.5374127,51.9813825,33.7535841,45.0235419,43.4294953,39.0135117,38.47445,38.2016785,38.21195,47.9265763,36.3577023,60.7270144,60.6863539,38.6194269,45.4789943,57.8553362,57.9987531,57.6345776,49.4349667,50.4055851,59.7276904,65.5004707,60.1063694,38.5879695,41.1845688,42.1245803,42.394113,43.357353,43.4714252,43.4738022,43.7203898,43.3919342,37.9346685,37.9364881,37.6378303,39.3493811,38.7595469,34.938957,35.465775,36.6323003,36.9305418,37.1425328,33.0596166,39.4162752,42.550668,35.4179909,35.8634532,36.4150167,37.2953845,37.5782333,38.1956957,35.1468737,11.7899394,-33.9073861,40.3560885,45.3942365,46.1335147,48.6847062,64.5607156,-46.8033235,60.8469866,45.3874997,20.4022974,44.2490013,44.8607453,60.0002322,46.5935166,19.2037991,38.1535519,44.9145512,8.8806411,13.5988205,-16.9261124,45.820269,45.259679,45.1969648,45.2082582,-33.5196236,46.6660625,45.559044,40.2360972,40.2375601,52.9246731,34.1753669,34.2152204,30.7346622,33.8730162,18.4642652,31.2027887,38.2035779,-34.7265294,9.823124,51.4486063,-31.9055328,6.9656508,42.148593,58.934,33.9450281,43.8926067,-53.6872882,50.1832557,58.8779332,61.2614787,52.0608629,50.8604775,-16.1600776,12.2372512,23.1933206,32.7730951,-4.5029704,34.4685373,34.5124177,55.9093,35.0242558,-14.9005878,52.9091437,49.4011798,30.3649874,25.131638,43.3508242,43.3507116,37.8903097,36.219807,35.5500307,-51.7617727,32.6449455,41.2342217,35.536717,32.9452837,35.8993295,-16.0557479,-11.3079027,9.4580799,22.4636148,16.3255138,33.8719945,8.8676992,39.5907755,35.0808024,40.9030654,19.6590195,20.1979851,50.8723949,45.5043487,9.8790874,9.724893,48.1817569,38.6221144,26.3430178,18.263524,59.3741733,18.2790147,44.3213525,40.3532652,7.3709738,44.0948804,33.9391423,36.9713473,39.6216954,42.4221825,-12.3210412,44.1478159,59.9970763,-39.3848285,-49.739583,44.4499615,51.4604395,22.2272222,22.1793056,25.7815901,46.0438814,6.1683577,-50.4754085,-3.7945841,33.3849165,13.0090044,32.6570056,15.6584425,50.4421201,34.5676969,36.1109763,7.5791491,10.0560392,32.7615713,33.255417,39.4535883,60.9116918,12.1033402,35.6630848,-39.6923322,30.2941182,33.6927468,36.339269,21.4275556,38.9632635,58.9140792,47.0821609,31.0708579,33.9199765,40.8963461,35.3519633,32.1837374,-20.0615561,17.6140628,32.7752227,72.5635571,11.6032424,64.0028306,16.9559952,9.426569,25.1148052,46.3720012,46.4898188,7.2979655,42.5614804,24.8488922,10.6391719,9.464487,66.8223187,34.4900904,30.5897531,37.1187761,24.8135763,27.2243136,40.0052366,68.243444,73.4814403,68.5016646,42.7346097,69.7112185,73.2429683,73.3332395,-26.8016505,35.9909355,52.4392446,32.6426611,18.6825091,33.9678996,25.4579908,43.587194,50.0327973,41.0375734,44.1778388,19.0218697,46.8450033,43.2933615,-16.9049701,33.5377305,55.7073792,11.9955841,54.9609737,37.5004848,34.4941006,34.46536,34.4310853,54.6048019,34.2319396,7.819389,33.5919086,42.0984113,46.0050231,42.0593258,56.4892628,33.5510418,21.2845641,12.399333,-20.7179242,36.0265494,38.5994861,35.7285556,-1.7191211,17.0540543,54.2426369,39.0098228,69.4279549,7.9433388,41.3629619,52.9351267,-40.3116955,33.8514004,43.2009832,47.2336398,43.9711673,67.5313774,40.3276119,36.6603108,40.02071,44.8332861,30.4350857,36.2141194,45.1483441,45.6491728,43.2948854,48.4235201,44.9906157,41.7156241,47.8748384,54.4062779,39.1405673,43.3938733,48.8011173,39.3485816,43.3967018,68.2575179,60.1199849,43.1249348,43.3598816,46.4898263,64.8637148,27.864073,54.3997301,54.6251133,39.3663321,54.6986234,66.5361791,36.3063263,57.6830563,41.9526422,51.5046019,54.2873154,41.3687815,38.209523,43.5518912,43.6231105,-13.216804,61.1446267,10.8080865,44.375308,49.3303925,41.1478258,60.4116695,33.2472249,6.4462109,8.2254425,15.2929235,54.3946082,41.711158,40.8370692,-46.6120495,40.7782797,31.7903589,42.6398123,50.2142405,68.1176868,54.4949656,51.6857553,-15.0533519,57.5258668,-12.9636452,45.6562085,57.2703937,41.4702633,42.2697065,41.4621479,37.1992415,51.0488813,31.8695808,-43.0511105,43.6358687,55.7211233,50.4930988,58.0722835,43.6135848,42.7341089,19.1339671,44.6845359,63.8016937,19.3686016,51.1546685,41.3250717,-17.1573719,56.1548942,42.133275,44.0143648,-5.7027759,44.5256391,33.8598229,37.0831661,66.5083215,46.5158036,56.8419842,33.2566829,16.1222128,19.3397646,24.7939694,-1.6611695,34.7372649,31.3912039,43.3852924,58.4114173,43.9536174,-2.8825512,34.3651228,0.9355287,29.5525157,47.6743867,33.0388856]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14572,"null_rate":0.0,"stats":{"iqr":20.8711248,"kurtosis":2.028331985362966,"max":81.8004514,"mean":34.520122738196775,"median":40.8121914,"min":-63.3959402,"n_outliers":1295,"outlier_rate":0.08878985258827563,"q1":28.1114396,"q3":48.9825644,"skew":-1.4577018596013198,"std":24.643138078763677,"zero_rate":0.0}},{"alerts":[],"column":"lon","extras":{"histogram":{"counts":[39,3,20,6,8,179,235,78,38,132,544,805,835,379,272,178,73,128,277,1272,770,1383,1441,659,258,141,77,38,131,73,57,245,388,955,1230,818,199,66,49,106],"edges":[-179.1974645,-170.2340229375,-161.270581375,-152.3071398125,-143.34369825,-134.38025668749998,-125.416815125,-116.45337356249999,-107.489932,-98.5264904375,-89.56304887499999,-80.5996073125,-71.63616575,-62.672724187499995,-53.709282625,-44.745841062500006,-35.7823995,-26.818957937499988,-17.855516375000008,-8.892074812499999,0.07136675000000992,9.03480831249999,17.998249875,26.961691437500008,35.92513299999999,44.8885745625,53.852016125000006,62.815457687500015,71.77889925,80.74234081249998,89.70578237499998,98.66922393749999,107.6326655,116.59610706250001,125.55954862500002,134.52299018750003,143.48643174999998,152.4498733125,161.413314875,170.3767564375,179.340198]},"sample":[-69.9498826,-1.0890453,13.0154223,-76.1843645,73.0260912,-7.5864951,-43.1343108,-69.5448795,-18.6472571,-68.4476379,134.6990296,-6.6419659,16.3785579,-67.2208155,-3.007515,120.3665068,31.4377407,39.0660575,-76.3096098,15.7054122,-87.5624102,-87.4928913,-88.3626168,23.501,-76.54982,-76.6377608,10.8944317,7.6630584,9.530418,-77.6829057,-65.6214909,-126.674116,135.5198919,-63.5939486,-2.0749327,124.0185041,-76.7986737,-69.9985805,-64.1683878,-123.3670634,33.0428068,8.5700998,-59.6483749,19.0848224,19.1029125,18.3874517,18.3105313,17.7288133,21.1541334,170.7287703,174.9246366,170.8662094,173.8335696,4.8111752,5.7590512,-66.6766568,13.3881068,30.0965885,-65.098764,14.4085758,12.3341334,-41.7293394,-32.421383,-38.9219871,-43.1463773,-34.7933333,-46.1861887,-38.5195887,-43.1677034,-52.7060307,-40.063374,-52.1511314,-37.78,-32.461515,-44.112767,-40.2722127,-38.001642,-70.7019282,-70.2747805,170.4553553,-23.61616,142.5153648,-79.8836242,135.2958859,134.5832303,130.9539036,-2.5329503,26.4390762,17.1359364,120.2568333,121.0366264,141.2054224,136.3370365,133.8825541,128.5479852,-70.2023251,76.221506,46.3045128,-60.4519598,-61.5035073,23.5570631,-114.5826109,74.6220759,13.6541445,4.0814067,11.0124443,-74.653645,3.7775268,20.9135191,20.805,20.4422071,22.9545333,-53.3629774,25.3571183,28.6777687,28.7240539,15.8285398,-66.0826168,11.5530275,11.5154079,18.2733346,-2.7023667,-3.5129427,21.4006353,24.842823,-1.3474847,-0.0548546,-8.7041572,-8.8426715,-8.8187621,-8.3785539,-8.2328734,-8.2401715,-7.8140825,-1.7919056,23.678821,23.6619747,23.1581604,22.9865987,22.8606866,23.9906774,25.2165366,24.8602098,24.7332277,24.5161367,-16.3151756,3.2680785,27.6613658,26.932672,26.2322965,27.4027,26.7630352,26.3979833,26.7826964,35.9213244,-15.6521382,18.4337014,27.9575949,-75.8584586,-63.7771959,-123.2935602,39.8111337,-75.6384872,33.7621977,-63.6312015,121.9588101,-66.3924711,-62.4492247,20.0013211,-63.8663732,-96.1204052,128.6109343,-75.1470456,76.5659814,121.2624826,146.0020551,-61.0391791,-61.2854805,-61.1360903,-61.158725,27.1059243,32.0126994,-80.5039532,23.2806677,23.2788394,158.4173824,132.8003406,132.3948726,130.9906443,131.0101796,-69.876351,129.4751994,128.6031604,135.9938315,125.4404834,0.4373143,-71.5268547,79.8309252,139.5212143,23.4828333,132.4403696,-64.7082431,-67.8475931,-61.2631686,17.9283203,-117.574564,-127.948127,-127.6123145,49.7527424,124.37429,119.4170214,132.6288878,129.8884365,133.9586698,134.0158009,12.7248167,138.8972873,-148.6296635,158.4525394,154.8277267,-89.08967,121.8230444,-8.2172131,-8.2167432,-76.2359672,136.13142,134.1914655,-72.5088736,-16.9111231,29.1121447,134.0074417,132.457999,14.5229825,-69.1330542,131.7651988,-13.8336425,120.4386261,-98.5668733,132.0172548,112.8309975,-9.0765107,24.7378857,27.4683067,-80.1090654,92.5378732,156.6662327,32.644781,115.5158155,114.2918068,-123.110316,24.125767,126.7480187,109.7287014,144.7265006,109.7254184,28.6946009,26.6905908,149.169791,-87.6482945,126.3295155,137.586988,16.7518099,18.5469158,13.5955564,28.6722779,-1.1577004,-62.01455,-67.7222532,-73.2761076,-56.8584513,91.8001944,91.8211667,119.6591391,141.9190877,102.3453107,-75.2779636,-38.5944772,133.2832766,123.3203624,14.268834,-96.5004153,-125.5071383,134.1042033,133.131882,123.1673671,123.8990986,128.7432234,129.134007,141.9627407,30.4107946,-68.9335089,135.3026333,-73.3971905,120.2747747,135.3267071,133.2820277,111.2534167,-9.4234396,13.9208159,39.1285765,130.6547706,118.3040172,12.9644329,129.344998,129.9678943,148.9640267,-101.5567589,132.9341633,129.5584363,43.1434406,-51.6985306,120.4357409,126.4465763,62.3304717,6.4657751,-1.8047658,124.0204659,-83.3376968,50.853767,122.725376,123.2291162,13.3420768,135.3697908,111.3929468,-8.6179512,118.7684351,33.8430185,-85.7442261,44.2324831,80.4208008,73.58153,133.0965113,60.4885874,56.2325167,70.0639637,153.1374485,129.567418,-127.2737694,131.7653279,108.6931639,136.2189835,119.8482041,-5.9203866,-117.415853,40.4880288,28.6620736,-104.3343311,-71.1317683,-2.1322736,35.1673335,129.966993,52.305161,121.3836117,20.2636993,137.3512781,-114.362625,-114.3478737,-114.3156722,-130.536998,134.6486367,110.500944,134.3649065,142.9842489,-1.1187028,11.8298266,-2.7873036,129.8950817,-89.7030118,-81.474333,-45.6924731,129.5019046,-90.1014564,120.0026389,108.6930938,-25.3572454,-130.3439966,26.6068193,-93.8899428,-76.755327,141.3404229,12.4525195,147.8013914,118.3524898,27.9216834,-2.1827221,47.6392499,-64.027771,50.6115621,51.509885,52.986681,50.180515,122.5269029,29.9868326,29.7591901,13.7559746,17.0086784,-4.7789355,-81.2481508,2.9340098,-4.1128429,18.6611996,20.2485174,4.9857623,-3.3954685,-123.8261727,3.7017247,54.211904,-1.1214739,17.1964422,-8.8445136,-1.8047692,-24.039024,-15.3846326,41.9937956,8.3909609,-74.4141541,-5.5135479,-16.0261614,-121.9016965,-4.036471,19.1497344,3.4735211,8.6520818,-72.3762975,-0.5136679,10.289594,-70.2078665,12.7145274,21.3027473,124.908975,-73.2313355,-123.264586,16.8482414,19.5926832,134.1756524,3.3976309,81.4119494,-23.7711208,-164.7446054,140.9643155,140.7429602,168.3599905,14.0890313,129.7825466,18.1132715,-5.4766405,13.5832711,11.2382099,39.2516451,40.7866203,-7.694643,40.481187,-84.4655214,-133.6315528,-70.5674599,-70.7591384,-71.3629064,15.1840912,2.3644202,129.9378626,147.3427436,-79.404905,-4.9669837,5.8625121,8.0528155,-79.3434132,-87.7782198,72.781459,-66.6931392,-16.636568,105.9310834,2.7299857,2.1522518,-39.5097167,21.0232826,41.6611112,-68.7264429,53.6494304,14.4483442,-78.6371462,136.723132,-16.5439325,142.3749087,-5.1240102,-8.495484,108.21388,105.825075,66.9775751,127.4088618,119.5203616,119.7075536,-87.8596764,-134.9550234,-86.4694213,107.3397178,136.9084789,104.9225825,119.1126251,-124.4869054,-16.4060836]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14565,"null_rate":0.0,"stats":{"iqr":152.9449913,"kurtosis":-0.95366208364038,"max":179.340198,"mean":23.038825234981143,"median":14.9745494,"min":-179.1974645,"n_outliers":0,"outlier_rate":0.0,"q1":-40.4996302,"q3":112.4453611,"skew":0.052602242829746154,"std":79.39557268151874,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"11 singleton categories"},{"code":"null_rate","level":"warn","message":"99.6% null"}],"column":"country","extras":{"singletons":11,"top_values":[["LV",18],["DE",8],["EE",7],["HT",6],["US",5],["GB",2],["PT",2],["IM",2],["JP",1],["KY",1],["PH",1],["IN",1],["FI",1],["PL",1],["IT",1],["RU",1],["UZ",1],["MX",1],["TW",1]]},"kind":"categorical","n":14585,"n_null":14524,"n_unique":19,"null_rate":0.9958176208433321,"stats":{"cardinality":19,"entropy":3.441728321839467,"entropy_ratio":0.8102135243474123,"top_rate":0.29508196721311475,"top_value":"LV"}},{"alerts":[{"code":"long_tail","level":"info","message":"197 singleton categories"},{"code":"null_rate","level":"warn","message":"90.2% null"}],"column":"height","extras":{"singletons":197,"top_values":[["15",59],["12",55],["14",54],["10",51],["8",45],["18",44],["20",43],["13",38],["6",37],["17",33],["25",31],["11",28],["30",28],["16",27],["23",26],["21",21],["19",20],["28",19],["9",19],["26",18]]},"kind":"categorical","n":14585,"n_null":13153,"n_unique":316,"null_rate":0.9018169352074049,"stats":{"cardinality":316,"entropy":6.837224026149949,"entropy_ratio":0.8233868684033955,"top_rate":0.04120111731843575,"top_value":"15"}},{"alerts":[{"code":"long_tail","level":"info","message":"271 singleton categories"},{"code":"null_rate","level":"warn","message":"93.2% null"}],"column":"year_built","extras":{"singletons":271,"top_values":[["C19",31],["1875",16],["1872",15],["1881",12],["C20",12],["1906",11],["1871",11],["1877",11],["1882",11],["1874",11],["1873",11],["1939",10],["1898",10],["1897",9],["1870",9],["1909",8],["1884",8],["1890",8],["1911",7],["1950",7]]},"kind":"categorical","n":14585,"n_null":13593,"n_unique":429,"null_rate":0.9319849160095989,"stats":{"cardinality":429,"entropy":8.142359110821328,"entropy_ratio":0.9311050686755546,"top_rate":0.03125,"top_value":"C19"}},{"alerts":[{"code":"long_tail","level":"info","message":"167 singleton categories"},{"code":"null_rate","level":"warn","message":"92.7% null"}],"column":"operator","extras":{"singletons":167,"top_values":[["U.S. Coast Guard",82],["Plovput",49],["INEA",42],["Tagbilaran Station, Philippine Coast Guard",27],["Cebu Station, Philippine Coast Guard",26],["Catbalogan Station, Philippine Coast Guard",18],["Surigao Station, Philippine Coast Guard",18],["Masbate Station, Philippine Coast Guard",17],["Maasin Station, Philippine Coast Guard",17],["\u6d77\u4e0a\u4fdd\u5b89\u5e81",16],["Directorate General of Lighthouses and Lightships",15],["Romblon Station, Philippine Coast Guard",15],["Iloilo Station, Philippine Coast Guard",14],["Puerto Princesa Station, Philippine Coast Guard",14],["Bacolod Station, Philippine Coast Guard",13],["Sorsogon Station, Philippine Coast Guard",13],["Cagayan de Oro Station, Philippine Coast Guard",13],["Marine Department of Sabah",13],["Dumaguete Station, Philippine Coast Guard",12],["Appari Station, Philippine Coast Guard",12]]},"kind":"categorical","n":14585,"n_null":13520,"n_unique":283,"null_rate":0.926979773740144,"stats":{"cardinality":283,"entropy":7.013278518406073,"entropy_ratio":0.8610893556618491,"top_rate":0.07699530516431925,"top_value":"U.S. Coast Guard"}},{"alerts":[{"code":"null_rate","level":"warn","message":"48.0% null"}],"column":"seamark_type","extras":{"singletons":7,"top_values":[["light_minor",3496],["light_major",3051],["landmark",716],["beacon_special_purpose",146],["beacon_lateral",102],["beacon_cardinal",19],["light",9],["beacon",5],["beacon_isolated_danger",5],["building",4],["daymark",4],["radio_station",3],["pile",3],["signal_station_traffic",3],["signal_station_warning",3],["buoy_lateral",3],["navigation_line",2],["fishing_facility",2],["light_vessel",1],["cone",1]]},"kind":"categorical","n":14585,"n_null":7002,"n_unique":25,"null_rate":0.4800822763112787,"stats":{"cardinality":25,"entropy":1.6574304304948864,"entropy_ratio":0.3569082165258199,"top_rate":0.4610312541210603,"top_value":"light_minor"}},{"alerts":[{"code":"null_rate","level":"warn","message":"71.3% null"}],"column":"light_character","extras":{"singletons":4,"top_values":[["Fl",3126],["Iso",249],["F",245],["Q",182],["Oc",151],["LFl",148],["VQ",24],["Al.Fl",24],["Mo",14],["FFl",4],["Al",4],["IQ",3],["Al.LFl",2],["Al.Oc",2],["Q+LFl",2],["IVQ",1],["Fl(2)",1],["LFl W 10s",1],["Al.Iso",1]]},"kind":"categorical","n":14585,"n_null":10401,"n_unique":19,"null_rate":0.7131299280082276,"stats":{"cardinality":19,"entropy":1.5033610368706383,"entropy_ratio":0.35390458808745956,"top_rate":0.747131931166348,"top_value":"Fl"}},{"alerts":[{"code":"null_rate","level":"warn","message":"96.9% null"}],"column":"heritage","extras":{"singletons":1,"top_values":[["2",343],["3",52],["4",26],["yes",25],["1",4],["regional",2],["no",1]]},"kind":"categorical","n":14585,"n_null":14132,"n_unique":7,"null_rate":0.9689406924922866,"stats":{"cardinality":7,"entropy":1.2438994308704119,"entropy_ratio":0.4430859173156191,"top_rate":0.7571743929359823,"top_value":"2"}},{"alerts":[{"code":"near_unique","level":"info","message":"97.9% of rows are unique strings"},{"code":"null_rate","level":"warn","message":"86.2% null"}],"column":"wikipedia","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[29,74,20,9,17,22,24,45,107,93,132,139,178,307,147,129,103,168,56,38,38,22,38,11,13,5,20,5,2,2,2,9,0,0,0,0,1,0,1,2],"edges":[6.0,7.225,8.45,9.675,10.9,12.125,13.350000000000001,14.575000000000001,15.8,17.025,18.25,19.475,20.700000000000003,21.925,23.150000000000002,24.375,25.6,26.825000000000003,28.05,29.275000000000002,30.5,31.725,32.95,34.175,35.400000000000006,36.625,37.85,39.075,40.300000000000004,41.525000000000006,42.75,43.975,45.2,46.425000000000004,47.650000000000006,48.875,50.1,51.325,52.550000000000004,53.775000000000006,55.0]},"near_unique":true,"sample":["de:Neuer Leuchtturm Borkum","hr:Svjetionik Hrid Blitvenica","en:Gay Head Light","ja:\u845b\u767b\u652f\u5cac\u706f\u53f0","en:Sentinel Island Light","it:Faro di Scilla","es:Faro Punta Bajos","en:Grassy Island Range Lights","fr:Phare de K\u00e9r\u00e9on","fr:Phare de Tuskar Rock","gl:Faro das Illas Sisargas","ca:Far de Tossa","fr:Phare de Ho\u015fk\u00f6y","fr:Phare de Pen-Men","en:Cape Beale Light","ja:\u5e73\u4e45\u4fdd\u57fc\u706f\u53f0","it:Faro di Capo Testa","fr:Phare de la Gacholle","en:Boulder Bank Lighthouse","en:Cape Kumukahi Light","fr:Phare d'Isla de Aves","ca:Far del Cap de Sant Antoni","pt:Farol da Ba\u00eda da Trai\u00e7\u00e3o","en:Nab Tower","it:Faro dismesso della Diga Curvilinea","ja:\u7f85\u81fc\u706f\u53f0","pl:Latarnia Volty","af:Vo\u00ebleiland-vuurtoring","en:Wadjemup Lighthouse","af:Donkin Hill-vuurtoring","de:Pilsumer Leuchtturm","de:Leuchtturm Greifswalder Oie","it:Faro di Capo Carbonara","tr:\u0130\u011fneada Feneri","en:Reedy Island Range Rear Light","es:Faro Santa Cruz","zh:\u6771\u5409\u5dbc\u71c8\u5854","de:Leuchtturm Staberhuk","fr:Phare de Point-Amour","en:Au Sable Light","en:Mukilteo Light","sr:\u0421\u0432\u0435\u0442\u0438\u043e\u043d\u0438\u0446\u0438 \u043d\u0430 \u0443\u0448\u045b\u0443 \u0422\u0430\u043c\u0438\u0448\u0430 \u0443 \u0414\u0443\u043d\u0430\u0432","en:South Stack Lighthouse","en:Craighill Channel Lower Range Front Light","en:Pond Island Light","en:Bradleys Head Light","en:Ruhnu Lighthouse","ja:\u9678\u4e2d\u9ed2\u57fc\u706f\u53f0","da:Ryvarden fyr","en:Queen's Wharf Lighthouse"],"top_values":[],"top_words":[["light",477],["de",357],["lighthouse",335],["fr:phare",195],["es:faro",181],["island",128],["point",121],["de:leuchtturm",89],["punta",70],["pt:farol",65],["fyr",64],["it:faro",56],["en:cape",56],["di",52],["la",46],["du",44],["head",43],["da",42],["cabo",41],["hr:svjetionik",41],["harbor",35],["range",30],["del",28],["en:point",28],["gl:faro",27]],"vocab_skipped":null,"word_histogram":{"counts":[153,0,0,461,0,0,0,877,0,0,0,346,0,0,0,121,0,0,38,0,0,0,6,0,0,0,4,0,0,2],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":14585,"n_null":12577,"n_unique":1965,"null_rate":0.8623243057936236,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.02141434262948207,"emoji_rate":0.0,"len_max":55,"len_mean":22.296314741035857,"len_median":22.0,"len_min":6,"len_p95":33.0,"n_duplicates":43,"n_empty":0,"one_word_rate":0.07619521912350598,"readability_flesch_mean":48.91560000000001,"url_rate":0.0,"vocab_size":2370,"word_mean":2.9955179282868527,"word_median":3.0}},{"alerts":[],"column":"osm_id","extras":{"histogram":{"counts":[1272,1457,998,2358,1852,348,245,266,212,223,234,178,187,216,220,241,180,113,143,248,351,94,185,147,150,131,163,210,199,422,102,91,107,97,198,162,158,161,132,134],"edges":[13391742.0,351194351.875,688996961.75,1026799571.625,1364602181.5,1702404791.375,2040207401.25,2378010011.125,2715812621.0,3053615230.875,3391417840.75,3729220450.625,4067023060.5,4404825670.375,4742628280.25,5080430890.125,5418233500.0,5756036109.875,6093838719.75,6431641329.625,6769443939.5,7107246549.375,7445049159.25,7782851769.125,8120654379.0,8458456988.875,8796259598.75,9134062208.625,9471864818.5,9809667428.375,10147470038.25,10485272648.125,10823075258.0,11160877867.875,11498680477.75,11836483087.625,12174285697.5,12512088307.375,12849890917.25,13187693527.125,13525496137.0]},"sample":[257496069.0,269331345.0,277063575.0,290802041.0,316624604.0,322520727.0,322767085.0,345518842.0,367452790.0,367795300.0,387744952.0,388215533.0,414678633.0,418621659.0,475564875.0,476662470.0,498342239.0,527372298.0,529166256.0,553326916.0,582707558.0,582711628.0,582715740.0,677529069.0,687193062.0,687193063.0,705194180.0,733230569.0,856616788.0,867798786.0,870425741.0,870822946.0,891114739.0,896638055.0,898145504.0,927670772.0,937255146.0,967990618.0,970996565.0,973813067.0,993454873.0,1015989176.0,1017716538.0,1030530369.0,1030699139.0,1033001753.0,1033048198.0,1035157745.0,1038134988.0,1052816798.0,1052817308.0,1052817376.0,1052818017.0,1115686209.0,1115686597.0,1124747028.0,1140833417.0,1141447490.0,1147492562.0,1163031158.0,1170470340.0,1181448690.0,1181450779.0,1181454393.0,1181456363.0,1181456551.0,1181456927.0,1181459419.0,1181487370.0,1181489879.0,1181491434.0,1181491592.0,1181492375.0,1181492599.0,1181494151.0,1181497206.0,1181499916.0,1206538071.0,1207646801.0,1208590623.0,1208592533.0,1211610673.0,1219931322.0,1223938651.0,1223938763.0,1223939632.0,1251978042.0,1255497399.0,1258031782.0,1271039565.0,1271040202.0,1271041687.0,1271041814.0,1271041846.0,1271043104.0,1274845171.0,1293790323.0,1295700444.0,1316325490.0,1325500623.0,1336828827.0,1342633746.0,1346126598.0,1347582375.0,1352210001.0,1396142254.0,1419264827.0,1420666397.0,1427393582.0,1431246153.0,1431273924.0,1431277845.0,1435090056.0,1446175116.0,1464343749.0,1471742488.0,1511519712.0,1518364509.0,1531116614.0,1531129767.0,1533280278.0,1543875388.0,1546187887.0,1550164884.0,1551816219.0,1552943834.0,1566344543.0,1568411704.0,1572564061.0,1572630476.0,1572649626.0,1572671377.0,1572674715.0,1572676426.0,1572817975.0,1574284874.0,1574285300.0,1574289106.0,1575974635.0,1576184962.0,1576208850.0,1576210131.0,1576226091.0,1576239261.0,1576239703.0,1577313045.0,1578665290.0,1578792158.0,1579308033.0,1579308061.0,1579308856.0,1579314776.0,1579315215.0,1579316275.0,1581598638.0,1581753513.0,1581821895.0,1587867548.0,1607156650.0,1635832883.0,1676341025.0,1708934514.0,1745142248.0,1747283270.0,1749004607.0,1779228447.0,1802194462.0,1820917891.0,1851202396.0,1918057029.0,1922148046.0,1948384294.0,2173695042.0,2222721291.0,2279145540.0,2282340081.0,2332459660.0,2336557282.0,2362575442.0,2362576251.0,2394067787.0,2414429522.0,2450039128.0,2474402398.0,2474402405.0,2548274918.0,2819877848.0,2819878132.0,3073675953.0,3074371583.0,3085625775.0,3090584881.0,3194528389.0,3213913862.0,3218331761.0,3264209317.0,3355313876.0,3365506089.0,3377941302.0,3382406087.0,3412041316.0,3523929335.0,3607789181.0,3621592518.0,3666245109.0,3673886182.0,3772706530.0,3772706535.0,3864988145.0,3867189595.0,3869478973.0,3928356747.0,3958654205.0,4041431688.0,4041518908.0,4097748289.0,4110476524.0,4182942012.0,4198431679.0,4212014963.0,4282019929.0,4322578745.0,4335168799.0,4335168800.0,4345307906.0,4370568053.0,4394710881.0,4440025799.0,4455877089.0,4472474312.0,4491583362.0,4499197712.0,4564081941.0,4643267859.0,4677140819.0,4730639098.0,4741525333.0,4756528571.0,4855633587.0,4856233148.0,4874639411.0,4959779581.0,5037921138.0,5118328681.0,5157009043.0,5165791360.0,5179731552.0,5206098615.0,5206187367.0,5237331900.0,5247496381.0,5267412818.0,5313721639.0,5343906622.0,5428863284.0,5429066874.0,5506911318.0,5526253460.0,5591060389.0,5619644612.0,5697555021.0,5840150856.0,5866016356.0,6148579245.0,6335111208.0,6518138123.0,6527917890.0,6527952717.0,6548110059.0,6548305562.0,6614816932.0,6614816933.0,6655955004.0,6665521201.0,6812359181.0,6828735107.0,6845958332.0,6853394171.0,6883900742.0,6898065786.0,7017083015.0,7023497023.0,7026412555.0,7057956266.0,7074204106.0,7106447484.0,7108534304.0,7111623411.0,7291797818.0,7457144284.0,7472086539.0,7499857816.0,7555099928.0,7595568326.0,7617368642.0,7705093769.0,8022046757.0,8115173938.0,8221607979.0,8231127670.0,8347693964.0,8606611310.0,8619096499.0,8764167716.0,8780330749.0,8858648535.0,8947736422.0,9043868605.0,9119257342.0,9141815449.0,9214278912.0,9427782660.0,9454129933.0,9456834639.0,9457793876.0,9487144663.0,9503896862.0,9570033891.0,9587237790.0,9626161277.0,9635285467.0,9668568332.0,9678566108.0,9776555283.0,9814816440.0,9816669244.0,9824706387.0,9858514837.0,9894237212.0,9903473593.0,9904059792.0,9980993519.0,9982446570.0,9982446583.0,9982516560.0,9990849240.0,10052865693.0,10165146320.0,10173471339.0,10200444430.0,10314577161.0,10584104648.0,10651832163.0,10703399054.0,10731968951.0,10804813653.0,10840142187.0,11112724664.0,11185215932.0,11227946992.0,11303039533.0,11367132320.0,11496715033.0,11520843452.0,11524440137.0,11575677696.0,11575790659.0,11577750630.0,11747031180.0,11795094666.0,11824244928.0,11834603179.0,11868104716.0,11918045878.0,12082533804.0,12230550831.0,12271416571.0,12323979912.0,12403509943.0,12405712541.0,12460930452.0,12580272635.0,12633335813.0,12659675630.0,12743351027.0,12745896980.0,12777880729.0,12807479812.0,12826431860.0,12848821265.0,12944958311.0,12968529221.0,13128312541.0,13141631671.0,13236941803.0,13418912420.0,13431866297.0,13441897419.0,13442767715.0,13446883933.0,13449573832.0,13500905352.0,13524254168.0,32537305.0,36428916.0,39575805.0,42960770.0,66794294.0,80909785.0,92538232.0,95569655.0,97595085.0,99128901.0,118638646.0,134243077.0,135812154.0,147934784.0,151365977.0,159236340.0,179215196.0,179499177.0,184505000.0,195715600.0,200827110.0,222880922.0,230036850.0,245105046.0,248116909.0,255390082.0,255838543.0,258806147.0,277713167.0,295622226.0,304156195.0,305074608.0,305790645.0,329793373.0,340980091.0,358509216.0,361836251.0,372184185.0,406020994.0,417996498.0,446265458.0,478815009.0,482032593.0,490008774.0,498318575.0,540625695.0,568504506.0,584353658.0,611120060.0,614055102.0,616626229.0,627267808.0,651660980.0,658171880.0,658625167.0,659098092.0,680560861.0,684552685.0,686941830.0,689668921.0,694787512.0,713406713.0,713423319.0,713487173.0,737203438.0,751717753.0,792556099.0,798995900.0,804641654.0,842463364.0,859462115.0,877965875.0,878933152.0,929199010.0,932435071.0,963211868.0,966485291.0,967471567.0,1016510991.0,1022084360.0,1027037562.0,1034573800.0,1044468047.0,1054398239.0,1062126247.0,1073103740.0,1106996678.0,1127602302.0,1223829884.0,1265788955.0,1268442078.0,1271710463.0,1273256840.0,1273268450.0,1280095702.0,1283855695.0,1288957672.0,1288957689.0,1320730392.0,1339186128.0,1339212907.0,1339816348.0,1350348321.0,1358094430.0,1362459642.0,1383615096.0,1425362173.0]},"kind":"numeric","n":14585,"n_null":0,"n_unique":14584,"null_rate":0.0,"stats":{"iqr":5401041935.0,"kurtosis":-0.1990653461721621,"max":13525496137.0,"mean":3722756855.24107,"median":1574285300.0,"min":13391742.0,"n_outliers":0,"outlier_rate":0.0,"q1":1000644758.0,"q3":6401686693.0,"skew":1.0734905451396348,"std":3828250123.608008,"zero_rate":0.0}},{"alerts":[],"column":"osm_type","extras":{"singletons":0,"top_values":[["node",11358],["way",3227]]},"kind":"categorical","n":14585,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.762451964535013,"entropy_ratio":0.762451964535013,"top_rate":0.7787452862529997,"top_value":"node"}}],"insights":{"errors":[],"insights":[{"confidence":"medium","critiques":[],"evidence_keys":["row_count","column_count","seamark_type.top_values","seamark_type.null_rate","light_character.top_rate","light_character.top_value","light_character.null_rate","lat.mean","lat.median","lat.skew","lat.n_outliers","country.null_rate","operator.null_rate","year_built.null_rate","height.null_rate","osm_type.top_values"],"featured_charts":[{"caption":"Look for the dominance of light_minor and light_major together accounting for the majority of seamarks, with landmark a distant third.","column":"seamark_type","kind":"bar"},{"caption":"Notice how 'Fl' (flashing) overwhelms all other light patterns, representing nearly three-quarters of all classified lights.","column":"light_character","kind":"bar"},{"caption":"Watch for the left skew and outlier cluster revealing that most structures sit in the Northern Hemisphere, with a tail of Southern Hemisphere locations.","column":"lat","kind":"histogram"},{"caption":"Check the roughly 78/22 split between node and way OSM types, reflecting how most lighthouses are mapped as single points rather than polygons.","column":"osm_type","kind":"donut"},{"caption":"Among the ~10% of records with height data, look for the concentration around 10\u201320 units, suggesting a typical structural height range for these seamarks.","column":"height","kind":"bar"}],"model":"anthropic:default","narrative":"This dataset contains 14,585 lighthouse and seamark records sourced from OpenStreetMap, covering navigational lights and related structures worldwide. The most immediately striking feature is that many descriptive columns \u2014 country, operator, year_built, height, and heritage \u2014 have null rates of 90% or higher, meaning the richest analysis must focus on the minority of well-filled records. Two columns worth close inspection are seamark_type, which cleanly splits records into light_minor (3,496), light_major (3,051), and landmark (716) with no nulls beyond the 48% gap, and light_character, where 'Fl' (flashing) dominates at 74.7% of non-null values across 19 pattern types. Geographically, all 14,585 records carry latitude and longitude, revealing a notable left skew in latitude (mean 34.5\u00b0, median 40.8\u00b0) with 1,295 outliers, suggesting a clustering of records in the Northern Hemisphere with some Southern outliers worth mapping.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_words","n_unique","n_duplicates","n","duplicate_rate"],"model":"anthropic:default","narrative":"This column contains Wikipedia article titles or slugs for lighthouse-related entries, as evidenced by top words including 'lighthouse', 'light', 'fr:phare', 'es:faro', 'de:leuchtturm', and 'pt:farol' \u2014 indicating multilingual cross-references. The column is extremely sparse, with an 86.23% null rate across 14,585 rows, meaning only ~2,027 rows carry a value. Despite being flagged near-unique, there are 43 duplicate values across 1,965 unique entries, which is low but worth noting if this is intended as a one-to-one reference.","role":"metadata","scope":"column","target":"wikipedia","treatment":"Use as an optional external reference link; impute or exclude nulls before joining, and investigate the 43 duplicates for data quality issues."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","entropy_ratio","alerts"],"model":"anthropic:default","narrative":"This column represents a numeric height measurement stored as a categorical/string type, with values that appear to be small integers (range visible: 6\u201320, likely in inches or some domain-specific unit). The most alarming signal is a null rate of 90.18%, meaning only ~1,433 of 14,585 rows carry any value at all \u2014 this is a severely sparse field. Among non-null values, 316 unique levels exist with modest concentration (top value '15' appears in only 4.12% of rows) and a high entropy ratio of 0.823, indicating the non-null values are broadly spread rather than clustered.","role":"feature","scope":"column","target":"height","treatment":"Cast to numeric after handling nulls; impute or flag missing values given the 90.18% null rate before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","entropy_ratio","alerts","top_values"],"model":"anthropic:default","narrative":"This column records the operating authority or agency responsible for a navigational aid or maritime infrastructure asset \u2014 dominated by coast guard entities (U.S., Philippine, and Chinese \u6d77\u4e0a\u4fdd\u5b89\u5385) alongside European bodies like Plovput and INEA. The 92.7% null rate is the critical anomaly: only 1,074 of 14,585 rows carry a value, meaning operator attribution is nearly absent across the dataset. Among populated rows, entropy ratio of 0.861 across 283 unique values signals a long tail of rarely-seen operators beyond the top few, and the top value 'U.S. Coast Guard' covers only 7.7% of non-null rows. The presence of CJK characters (\u6d77\u4e0a\u4fdd\u5b89\u5e81) confirms a multilingual mix requiring normalisation before any grouping or analysis.","role":"label","scope":"column","target":"operator","treatment":"Impute or flag nulls explicitly, normalise multilingual variants to a canonical form, then use as a categorical grouping variable with an 'Unknown' level for the 92.7% missing."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","top_value","top_rate","top_values","alerts","kind"],"model":"anthropic:default","narrative":"This column represents the year a structure was built, but it has been parsed as categorical rather than numeric, likely because it contains century-level codes such as 'C19' and 'C20' mixed with specific years like '1875' and '1906'. The null rate is extremely high at 93.2%, meaning only about 990 of 14,585 rows carry any value at all. Among populated values, 429 distinct entries exist with the top value 'C19' appearing only 31 times (3.1% of non-null rows), indicating a very long tail. The mixed format \u2014 century codes alongside specific years \u2014 signals data quality issues that require harmonisation before any temporal analysis.","role":"feature","scope":"column","target":"year_built","treatment":"Impute or exclude 93.2% nulls, normalise century codes ('C19' \u2192 1800\u20131899) to numeric ranges, then convert to integer or ordinal decade bins before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","n_outliers","outlier_rate","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column represents geographic latitude, with values ranging from -63.4\u00b0 (near Antarctica) to 81.8\u00b0 (high Arctic), consistent with global location data across 14,585 records. The distribution is notably left-skewed (skew = -1.46) with a mean of 34.5\u00b0 but a median of 40.8\u00b0, suggesting a concentration of records in mid-to-high Northern Hemisphere latitudes pulled down by a Southern Hemisphere tail. Nearly 9% of values (1,295) are flagged as outliers, likely corresponding to records in the Southern Hemisphere or extreme polar regions which are genuinely sparse in most datasets. Uniqueness is near-perfect (14,572 of 14,585 values distinct), implying precise coordinate capture with minimal rounding.","role":"feature","scope":"column","target":"lat","treatment":"Use as-is or pair with longitude for spatial modelling; consider binning into latitude bands or projecting to Cartesian coordinates for distance-based algorithms."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","entropy_ratio","top_values","cardinality"],"model":"anthropic:default","narrative":"This column captures the classification of maritime seamarks (navigational aids such as lights, beacons, and landmarks), drawn from what appears to be nautical/GIS data. Nearly half the rows (48.01%) are null, which is flagged as an alert and likely reflects features in the dataset that are not seamarks at all. Among the 7,588 populated rows, the distribution is heavily skewed toward light types: 'light_minor' alone accounts for 46.1% of non-null values, and combined with 'light_major' these two dominate, while the remaining 23 categories (beacons, landmarks, buildings, etc.) cover a long tail. Entropy ratio of 0.357 confirms moderate but uneven spread across the 25 categories.","role":"label","scope":"column","target":"seamark_type","treatment":"Impute nulls only if missingness is structurally meaningful (i.e., non-seamark features); otherwise keep as-is, one-hot or ordinal encode the 25 categories, and consider grouping rare categories (\u22645 occurrences) into an 'other' bucket before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","cardinality","top_values"],"model":"anthropic:default","narrative":"This column encodes the light character (flashing pattern) of maritime navigational aids \u2014 values like 'Fl' (flashing), 'Iso' (isophase), 'Oc' (occulting), and 'LFl' (long flashing) are standard IALA light notation. The dominant concern is a 71.31% null rate, meaning nearly three-quarters of records carry no light character, likely because many features in the dataset are unlighted aids or non-light structures. Among populated rows, 'Fl' accounts for 74.7% of non-null values, making the distribution heavily skewed across 19 categories.","role":"feature","scope":"column","target":"light_character","treatment":"Impute nulls with an explicit 'None/Unlighted' category, then one-hot or ordinal encode the 19 light character types before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","n_duplicates","top_words","word_median","len_mean","alerts"],"model":"anthropic:default","narrative":"This column contains the proper names of lighthouses, drawn from a multilingual global dataset \u2014 'lighthouse', 'faro' (Spanish/Italian), 'fyr' (Scandinavian), 'phare' (French), 'farol' (Portuguese), and '\u043c\u0430\u044f\u043a' (Russian/Cyrillic) all appear in the top words, confirming broad geographic coverage. With 14,239 unique values across 14,585 rows, the near-unique alert is expected for a name field; the 346 duplicates (2.37%) likely reflect shared names for distinct structures (e.g., 'North Light'). Average name length is ~19 characters with a median of 2 words, consistent with short proper-noun phrases. The multilingual vocabulary mix is the key surprise and warrants attention if name-matching or NLP is planned.","role":"label","scope":"column","target":"name","treatment":"Use as a display label or entity identifier; normalize Unicode and language variants before any string-matching or embedding."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","iqr","n","n_unique","null_rate","zero_rate"],"model":"anthropic:default","narrative":"This column contains geographic longitude values, spanning the full valid range from -179.2 to 179.3 degrees with a mean near 23\u00b0 and median near 15\u00b0, suggesting a slight concentration of observations in Europe/Africa relative to the Americas and East Asia. The distribution is nearly symmetric (skew \u2248 0.05) and platykurtic (kurtosis \u2248 -0.95), consistent with a broad, flat spread of coordinates across the globe rather than clustering around a single region. Near-uniqueness (14,565 distinct values out of 14,585 rows) and zero nulls confirm these are precise geospatial measurements, not coarse bins. The IQR of ~153 degrees reinforces global coverage.","role":"feature","scope":"column","target":"lon","treatment":"Pair with latitude for spatial modelling; consider map-projection or trigonometric encoding (sin/cos) to handle the -180/180 wraparound boundary."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","min","max","iqr","skew","stats.q1","stats.q3"],"model":"anthropic:default","narrative":"This column contains OpenStreetMap object identifiers, a well-known external reference system where numeric IDs are assigned sequentially as features are added to the OSM database. Nearly all 14,585 rows have a unique ID (only 1 duplicate exists across 14,584 unique values), and the null rate is zero, indicating high integrity. The wide IQR of ~5.4 billion and right skew (1.07) reflect OSM's historical ID growth \u2014 older features have lower IDs while newer additions push into the 10+ billion range, which is consistent with OSM's current ID space.","role":"foreign_key","scope":"column","target":"osm_id","treatment":"Use as a join key to OSM data sources; do not use as a numeric feature in modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","null_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column captures the OpenStreetMap geometry type, distinguishing between point features ('node') and linear/polygon features ('way'). With only 2 distinct values across 14,585 rows and zero nulls, it is a clean binary indicator. The distribution is notably skewed: 'node' dominates at 77.9% (11,358 records) versus 'way' at 22.1% (3,227 records), reflecting the typical OSM pattern where point features outnumber area/line features. Entropy ratio of 0.76 confirms moderate imbalance but not extreme dominance.","role":"feature","scope":"column","target":"osm_type","treatment":"One-hot encode or binary-encode (node=1, way=0) before modelling; consider as a stratification variable given the 78/22 class split."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":3769,"prompt_tokens":11477,"total_tokens":15246}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:32:58+00:00","mode":"full","row_count":14585,"sampled_rows":14585,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/lighthouses.json"},"notes":[],"saturn_version":"0.2.0","schema":{"country":"categorical","height":"categorical","heritage":"categorical","lat":"numeric","light_character":"categorical","lon":"numeric","name":"text","operator":"categorical","osm_id":"numeric","osm_type":"categorical","seamark_type":"categorical","wikipedia":"text","year_built":"categorical"}}
