{"columns":[{"alerts":[],"column":"latitude","extras":{"histogram":{"counts":[1,0,2,0,0,40,69,231,980,1226,1111,984,2936,1664,2204,1445,552,652,711,1326,1234,2293,1440,2557,1843,1639,3183,1376,3246,4519,7862,12910,7883,3329,3266,2437,2174,1260,64,29],"edges":[-77.722234,-73.8125762775,-69.902918555,-65.9932608325,-62.08360311,-58.1739453875,-54.264287665,-50.3546299425,-46.44497222,-42.535314497499996,-38.625656774999996,-34.715999052499996,-30.806341329999995,-26.896683607499995,-22.987025884999994,-19.077368162499994,-15.167710439999993,-11.258052717499993,-7.3483949949999925,-3.438737272499992,0.4709204500000084,4.380578172500009,8.29023589500001,12.19989361750001,16.10955134000001,20.01920906250001,23.92886678500001,27.83852450750001,31.748182230000012,35.65783995250001,39.56749767500001,43.47715539750001,47.386813120000014,51.29647084250003,55.206128565000014,59.1157862875,63.025444010000015,66.93510173250003,70.84475945500002,74.7544171775,78.6640749]},"sample":[63.3177541,-17.9248638,-20.7487046,47.1647903,60.9784369,41.6577905,35.0920058,0.0441254,45.5674168,47.6385934,35.2845942,45.3246277,41.3151,60.8397478,36.1343901,57.5395025,15.2101766,46.342282,46.3181643,46.0378643,34.5561523,31.8672999,32.446955,49.5015202,45.5756673,36.5524153,49.3767343,50.7548126,41.2414616,-39.2069635,-41.2954189,-39.7375572,-39.3197204,-39.0450644,-43.2224268,-35.267538,-39.1428688,-43.8945391,-45.1738406,-42.8580206,-43.8506072,-44.6539808,-43.1835672,-38.1523496,-38.014149,46.2958508,44.7598026,57.5653936,54.9110112,56.083093,22.0072876,2.8840644,48.4818212,49.9548147,47.5102201,56.6168002,46.9255648,36.757942,44.3921762,54.3737118,44.1068585,44.2810327,-27.7528477,36.8550494,45.6177544,19.1125688,-21.776509,59.687524,-8.4287746,46.8001704,38.1676631,-21.329622,47.5776194,9.920395,-22.1720789,48.0498134,-4.6242968,52.1187486,45.1895723,40.9764779,41.5900821,42.4531275,43.2443089,32.6713553,32.7298066,56.1562629,48.5397996,-20.5993183,56.3204251,43.2333982,-21.4603603,47.3576575,-29.2375634,47.4238218,66.8114294,45.4526577,19.7158364,35.5464239,35.6868,35.6623089,41.5107618,47.7520929,43.4410772,41.8363794,38.5335978,47.5640396,48.7779562,35.0740558,45.2798349,45.6454782,17.9158088,42.7018912,-29.2839701,-29.1279303,-23.0648506,42.7525364,-28.0331647,65.8320972,50.5900144,-24.7375383,-24.5575736,44.89188,50.1228409,49.0177383,-19.5582219,35.4410161,35.2504531,60.831378,58.018113,44.3476988,37.4122623,37.4119482,60.3392484,60.4747567,37.5539,-31.1495446,45.6210185,15.366364,13.9554481,-26.9067942,45.2816391,33.3002593,41.9179537,40.1376602,-29.4368545,54.2045161,-7.3455658,4.1306344,33.4504259,-29.2660608,61.641793,62.2004868,-12.7682169,62.747772,43.3106242,-27.5022514,49.5141627,54.450112,48.6550042,-21.001538,44.5026993,42.743641,45.5942537,-31.6591153,-24.5589311,10.4671327,42.8840204,47.693571,-41.4849085,50.8961136,50.6018774,2.8747491,-28.9711264,20.3630955,51.8739408,-39.3083085,47.1239389,41.9724659,-22.7837517,-16.2839309,50.0833377,15.6789114,8.2187808,27.6792032,50.2759385,47.5774897,25.040822,50.8725995,4.6846828,7.0870201,46.0945793,19.8467497,54.7830826,43.2184125,54.4061847,33.9771041,68.4551868,45.0738905,-13.2512046,50.6595605,-25.1423764,51.744077,-29.5679295,11.1317026,44.5028945,47.6810143,47.7341723,49.9687775,46.608069,50.8924417,-4.1169693,14.2105086,37.7741918,51.3117904,46.2237782,42.1741687,46.4352062,42.0469372,40.7642917,7.7415093,-16.7689806,61.2249745,32.7669272,48.0922267,48.3946507,35.9145787,34.9696717,-13.4208132,68.1600764,10.6861848,-17.0016262,64.168371,64.3920481,1.1013801,45.6491315,-47.1770028,-3.2322585,-26.2024624,37.2671791,24.8824996,-27.3370765,64.6915702,20.4776838,-2.4868282,-18.3754431,9.3548832,45.4259761,-26.3367309,23.6062534,42.4193598,45.2090212,45.3444419,46.9369046,36.8654086,46.2448942,-18.7900085,-18.5028546,25.7310077,46.5025002,49.0016647,41.0982898,45.2630582,39.9058854,-0.8104916,-17.3554069,19.2864898,46.7080074,-26.4308204,34.1040894,41.5400515,-5.4534276,24.7923303,-18.9795951,-19.2085402,12.6371062,5.6646719,35.7745232,69.7618951,11.3645804,-19.5038636,47.9182999,24.077868,-33.7677549,24.6455115,-26.3472849,-26.9648364,26.1398167,-19.4853099,25.9359088,34.7593819,42.5095638,45.8043096,-24.2872147,55.2701998,39.8900706,55.2077958,55.3277842,55.616889,-25.96747,56.5171197,56.9983359,57.5942006,-21.7637254,11.7773509,44.2405328,29.2011303,49.2465418,25.9055032,52.2562317,36.3517832,-15.8159177,52.2852393,-37.2224011,42.0701217,35.7797097,54.3640588,51.305069,54.4837064,34.5029945,37.8464704,45.9168219,-28.1313961,49.2894798,-28.0998009,-28.1162184,-28.0808927,-28.0742953,43.8515252,-6.7813805,25.3202036,25.2592434,47.5344687,22.382608,25.1336232,42.7121651,63.2634539,5.1514291,37.2334034,41.9262669,30.2731781,38.3452947,50.8686359,46.7893939,-3.0208643,6.672822,45.705239,8.6572226,25.0822446,46.1884001,44.2044111,41.6156298,25.4225506,47.7477919,43.2113171,24.8515082,44.7922346,60.0811404,46.350368,25.4552229,15.004793,45.3116239,10.3725934,49.3510777,22.2759424,5.7263256,40.6217123,44.0795913,50.1042686,42.7887755,41.719809,46.9922616,50.7240177,61.7833293,59.8609108,-27.7165358,42.4529359,-39.7295531,34.0282329,14.4863399,39.6273803,51.154086,51.0697562,47.2910604,66.1469407,-22.1315979,12.459028,65.4464424,44.1188138,20.0478331,35.8112383,47.1545144,67.0656043,67.8953446,68.710929,45.8549065,45.8434501,9.2174611,35.9826517,42.9813343,-20.2153857,57.8878507,46.6343904,37.9369449,42.7136526,68.3429003,-27.2923625,45.8507815,16.7480491,43.5726848,48.6981464,43.1558809,-8.290989,16.5980811,42.6249494,15.6750502,48.3393515,7.6106974,6.4227224,15.7210877,47.770757,44.5623974,42.2789355,49.7243726,60.4635971,36.6435439,-17.8628668,-8.639293,3.1302213,-35.4984708,-36.8781295,39.5586946,40.6998886,52.1531475,-35.1388815,-49.986371,41.9241485,41.7744929,16.9246776,53.4760442,39.0619849,48.265967,44.0336856,14.814276,44.1287816,11.835987,43.9987121,35.7911719,43.1436658,48.2222625,22.2989839,-45.9348569,26.4709668,26.3525772,59.204439,17.5157172,26.4473589,-6.8001872,25.5511002,-21.3316686,35.6428049,26.3692417,8.4615548,-22.8315297,43.8490398,45.9190946,58.18976,25.8234046,54.2091489,61.5211182,61.5133971,25.6824378,26.0393145,19.7256009,27.3394132,-37.4236344,41.5841933,51.0169426,42.7623513,66.244893,43.5729084,61.5807155,44.5585159,-25.6300352,45.9949292,17.5156636,15.9685734,47.4074118,69.3270707,4.8024339,-29.2628868,49.2452357,4.6954489,5.6900471,65.3779329,19.0085139,5.1999327,5.5907584,41.6206872,67.9410919,5.6959856]},"kind":"numeric","n":80678,"n_null":0,"n_unique":80650,"null_rate":0.0,"stats":{"iqr":37.81987485,"kurtosis":-0.2826767393011993,"max":78.6640749,"mean":27.148406794299564,"median":40.311778000000004,"min":-77.722234,"n_outliers":298,"outlier_rate":0.0036936959270185182,"q1":9.65716525,"q3":47.477040099999996,"skew":-0.9359450639051901,"std":30.045322878500286,"zero_rate":0.0}},{"alerts":[],"column":"longitude","extras":{"histogram":{"counts":[21,5,332,160,42,1803,3492,1627,566,922,3019,4949,2531,2294,4760,1428,76,959,901,4266,7904,10863,4161,2370,4414,1659,555,512,785,865,833,2020,1082,1555,842,1654,1855,345,743,1508],"edges":[-179.9906627,-171.005601225,-162.02053975,-153.035478275,-144.0504168,-135.06535532499998,-126.08029385,-117.095232375,-108.1101709,-99.125109425,-90.14004795,-81.154986475,-72.169925,-63.184863525,-54.19980205,-45.21474057500001,-36.2296791,-27.24461762499999,-18.25955615000001,-9.274494675,-0.2894331999999906,8.69562827499999,17.68068975,26.665751225000008,35.65081269999999,44.635874175,53.62093565000001,62.605997125000016,71.5910586,80.57612007499998,89.56118154999999,98.546243025,107.5313045,116.51636597500001,125.50142745000002,134.48648892500003,143.47155039999998,152.456611875,161.44167335,170.426734825,179.4117963]},"sample":[13.3565338,25.8589179,-41.8575388,9.5915342,29.8366925,-80.8601946,-82.8957932,36.370469,25.6436169,12.0111304,-82.2772451,15.3999534,-89.0185475,6.1750361,-82.5366065,-5.8539281,-90.2196241,12.8915441,12.9306098,9.1180235,136.0989912,130.7933351,131.392809,-122.612473,8.9415049,-118.7935724,16.3741082,15.5703079,-122.025035,175.4911854,174.965922,174.9150612,175.5026284,177.1099073,170.6089052,173.7249817,177.0549292,169.9451996,168.8588493,171.6691983,169.7014289,168.3654071,170.9401229,178.1601282,176.3451257,7.703197,4.2911195,-6.1527298,-120.7507031,-4.1666045,-159.5290299,-77.1236166,-69.3340159,87.7029599,11.7168072,-6.0610724,-53.9232109,-117.9039886,38.6138965,-3.0889477,-103.6492296,40.3461275,-48.5427014,-3.738523,-121.9513891,-96.9290011,-46.6094446,-135.2414481,18.2849826,13.5971535,15.4806762,148.9423829,9.8332546,123.953987,-57.5187572,6.746044,-79.4115507,102.0245951,5.9584993,47.9158669,33.4008499,2.6705257,-79.9077863,-112.3869076,-109.89398,47.1370796,26.2559763,-46.2286294,-3.3980338,-79.9738826,-50.845973,8.8673146,-51.1560367,7.8799898,30.8474484,5.8790586,-155.1339507,-85.0385731,-84.934,-83.6199909,70.0450966,12.5929097,-111.3771467,9.2650737,67.5663099,10.1192823,9.1899088,-82.5968071,-88.1986767,10.196542,73.6547801,25.0766485,-50.1348644,-50.0118477,47.169251,44.3624798,-49.6429065,-16.3999677,-115.2005116,-50.1193923,-50.5307608,34.7048315,-92.0193787,19.2820105,169.3932979,-85.5537291,-85.7467624,5.8834943,14.4636355,17.2400443,-110.0726308,-110.1171607,6.7928454,6.9943623,-109.2728062,-64.365617,25.5584985,-61.2520196,105.9350053,-51.1404189,2.9820066,126.580936,-72.8566636,29.1415066,-50.0141018,-7.8919859,-73.691358,-52.2000007,-111.4435703,-51.9578831,7.5595075,7.5392094,-72.6316576,7.7545694,22.1757717,-49.8775529,18.3805205,-2.2863985,-124.5901973,-42.4754736,38.9470756,-0.333507,7.3963486,-65.0358551,-50.2545831,123.3252172,-71.708947,14.0924409,146.3021595,14.0609052,-1.2830678,-52.9737043,152.098569,-13.1105303,-3.2976578,176.8826187,12.9332188,47.9283911,-46.0290908,-52.635467,19.3979531,74.1896746,124.3030557,84.4249687,12.983479,14.614506,80.6756492,58.2870741,100.8742905,125.3398364,-121.9140906,102.2284277,83.2081543,-122.9477285,-3.0533426,75.3539264,16.4139583,24.2217267,-39.5844808,14.4877263,-48.8526275,-0.491153,-50.9201734,-73.854764,33.8612625,7.9489788,7.930206,86.5680551,141.9686991,15.374163,138.8942139,121.4829234,140.1537064,9.4020225,-122.1892387,2.3067307,13.473621,2.4916391,44.8718675,126.2997777,-40.1278639,10.4375931,-17.0359488,-90.4447166,23.0942059,138.4195054,135.7851335,-72.5653219,17.909168,-12.2130687,13.2446538,13.611744,-19.1111347,-76.6144419,-122.0935267,-72.4709402,-79.1432118,-53.6731697,127.9423494,93.3897113,-53.7427089,13.067622,-12.8690412,-78.776468,-59.595976,-83.5054564,25.4450774,-49.158907,120.657006,-76.8494032,5.6535494,15.4903475,4.3200879,137.9906872,13.6177337,145.88874,145.7770465,-100.6526434,7.7197951,23.6174158,22.1896629,15.5453445,-79.4905964,-78.879862,145.853734,100.7881501,7.5623142,-50.9799197,135.9356028,70.1460638,143.2047594,9.6089709,146.0439861,146.0693481,-8.1064889,-66.0929903,137.4639921,25.1560363,104.1040148,146.983145,24.2923337,10.7511174,151.1174355,121.5076598,-49.2872623,-50.3151968,6.7946416,-41.0556601,7.4829393,135.6776454,43.0723475,9.5579036,-51.9386795,-132.1951381,139.737104,-131.2386019,-131.4257421,-132.1423852,-54.5828223,-133.5096944,-133.2173724,-134.3217626,-46.6044019,107.2149699,39.1901058,121.0061485,-124.3488462,8.9874896,7.7523376,51.1178002,135.7141255,5.7379645,144.1894813,12.8759219,138.2541472,142.7620614,142.930485,-3.0503311,-96.9705569,-79.0769628,1.186294,-49.4020218,20.3487405,-49.4272054,-50.4884133,-49.7078706,-49.7577954,19.8231928,-79.2032812,91.5976222,9.0850555,19.0516217,114.0733521,8.9460379,13.3702894,8.9358376,102.8003724,42.846035,46.2159281,-97.7354756,40.9020942,-1.2927812,-60.6547326,28.3604068,-62.3351121,11.1291906,124.9817592,9.4095921,8.8228069,39.149368,2.0076991,9.3345743,28.4556447,6.4280687,51.1246481,38.5031836,10.986396,8.8766511,8.4587705,-24.4275257,14.2742457,-84.8285329,-122.6578261,114.1335025,-66.8918038,-110.8926134,11.0947754,-124.3313699,1.1095502,44.7839275,9.4927308,13.6178635,30.7829493,30.0548385,-49.1803374,42.3753327,174.4174304,134.3166231,101.1122874,115.6205734,-125.8247196,-125.9851331,9.7481752,13.0234754,-47.7092856,-84.8505288,12.7753729,39.1761197,-155.4040613,137.3040274,9.0680221,14.324391,13.0073364,17.8946028,11.1969566,11.2038874,47.5422017,-81.8542525,-70.9443896,-49.5440885,-4.553585,12.5275753,14.6212379,77.040771,66.1414987,-50.0433059,9.0426383,52.9520255,6.2327314,-122.4710497,77.1214439,115.0383747,52.691119,0.9309865,52.1631449,23.1710501,124.952821,126.1811152,52.1554738,10.9500339,38.3193094,-2.5229663,18.8223106,12.35966,-105.4685703,-63.5118023,18.4795337,101.4814026,-70.7024159,-71.1274678,-120.6269856,9.2551438,-7.9713356,-70.5184642,-73.2113645,-74.4510602,13.6990048,44.6379903,-128.5197803,16.5188277,24.8859108,39.6080361,-24.3641042,-74.544932,107.0136299,39.5278783,-82.2355088,-71.9557318,24.7175074,103.8720124,-72.0745731,7.2938034,7.4492615,6.4051735,44.1867276,7.4202575,142.9352647,7.3240346,117.5800858,-93.728076,7.4396056,-80.8444458,-45.9976262,11.2732395,12.1361634,-71.4130854,8.4086126,-7.9039185,-6.87623,-6.8624466,8.4033394,8.2316472,-155.143521,88.4878896,146.5551402,24.018752,15.3687405,1.0606806,-22.0594424,12.491344,7.6432055,5.0283357,29.9781409,10.5183095,21.5947759,101.91345,11.1753295,-53.9095679,-60.7686289,-51.0569162,20.2521989,-61.8632412,-61.6819292,-71.862651,103.2696274,-62.1393958,-62.634158,44.7673754,-50.2472196,-62.5021327]},"kind":"numeric","n":80678,"n_null":0,"n_unique":80650,"null_rate":0.0,"stats":{"iqr":100.2692399,"kurtosis":-0.41192885783942623,"max":179.4117963,"mean":0.9626313955390575,"median":7.8029868,"min":-179.9906627,"n_outliers":0,"outlier_rate":0.0,"q1":-61.70840265,"q3":38.56083725,"skew":0.28654549459258427,"std":76.85904576354724,"zero_rate":0.0}},{"alerts":[{"code":"duplicates","level":"warn","message":"65.7% duplicate strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[217,1507,685,1501,1940,1737,4325,4350,2013,52248,3568,1444,2068,1215,387,545,308,94,175,61,84,62,18,31,31,11,17,8,2,6,7,1,2,3,1,4,0,0,0,2],"edges":[1.0,2.65,4.3,5.949999999999999,7.6,9.25,10.899999999999999,12.549999999999999,14.2,15.85,17.5,19.15,20.799999999999997,22.45,24.099999999999998,25.75,27.4,29.049999999999997,30.7,32.349999999999994,34.0,35.65,37.3,38.949999999999996,40.599999999999994,42.25,43.9,45.55,47.199999999999996,48.849999999999994,50.5,52.15,53.8,55.449999999999996,57.099999999999994,58.75,60.4,62.05,63.699999999999996,65.35,67.0]},"near_unique":false,"sample":["Str\u00f8mslifossen","Unnamed Waterfall","Rau\u00f0fossar","Unnamed Waterfall","Little Niagara Falls","Unnamed Waterfall","Price\u2019s Falls","Unnamed Waterfall","\u5f4c\u6771\u98db\u7011","Cascada de Arriba","Storfallet","\u96cc\u6edd","Lower Bishop Falls","Mystic Falls","Gruta da Serra","Morphy Falls 12","Unnamed Waterfall","\u0e19\u0e49\u0e33\u0e15\u0e01\u0e2a\u0e32\u0e22\u0e23\u0e38\u0e49\u0e07\u0e25\u0e30\u0e2d\u0e2d\u0e07\u0e14\u0e32\u0e27","Unnamed Waterfall","Dlh\u00fd vodop\u00e1d","Unnamed Waterfall","Unnamed Waterfall","Freixa","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Bull Falls","Unnamed Waterfall","Unnamed Waterfall","\u10e7\u10d0\u10e0\u10d8\u10db\u10d0\u10dc\u10d0\u10e1 \u10e9\u10d0\u10dc\u10e9\u10e5\u10d4\u10e0\u10d8","Unnamed Waterfall","Unnamed Waterfall","Gertelbach-Wasserf\u00e4lle","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Heuston Falls","Unnamed Waterfall","Tunnel Cave Falls","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall","Unnamed Waterfall"],"top_values":[["Unnamed Waterfall",48168],["Rapid",105],["Cascada",85],["Fossen",85],["Storfossen",78],["Cascade",49],["Sagfossen",47],["Twin Falls",45],["Fossane",40],["Rainbow Falls",37],["Bridal Veil Falls",35],["Cascata",33],["Storforsen",32],["\u4e0d\u52d5\u6edd",31],["Hidden Falls",29],["Upper Falls",28],["\u1021\u102f\u1014\u103a\u1038\u1010\u1005\u102f \u101b\u1031\u1010\u1036\u1001\u103d\u1014\u103a",28],["Lower Falls",26],["High Falls",26],["Cascade Falls",26]],"top_words":[["waterfall",12228],["unnamed",11979],["falls",1772],["cachoeira",492],["de",452],["cascada",353],["do",298],["cascade",293],["cascata",273],["salto",204],["la",178],["del",173],["creek",171],["da",166],["\u0432\u043e\u0434\u043e\u043f\u0430\u0434",150],["du",79],["upper",72],["wasserfall",66],["vodop\u00e1d",66],["lower",65],["el",65],["air",54],["cascades",54],["terjun",53],["di",51]],"vocab_skipped":null,"word_histogram":{"counts":[8971,0,59981,0,0,8432,0,2424,0,0,581,0,188,0,0,68,0,18,0,0,6,0,3,0,0,2,0,2,0,2],"edges":[1.0,1.4,1.8,2.2,2.6,3.0,3.4000000000000004,3.8000000000000003,4.2,4.6,5.0,5.4,5.800000000000001,6.2,6.6000000000000005,7.0,7.4,7.800000000000001,8.2,8.600000000000001,9.0,9.4,9.8,10.200000000000001,10.600000000000001,11.0,11.4,11.8,12.200000000000001,12.600000000000001,13.0]}},"kind":"text","n":80678,"n_null":0,"n_unique":27697,"null_rate":0.0,"stats":{"allcaps_rate":0.034619103101217186,"boilerplate_rate":0.0,"duplicate_rate":0.6566969929844567,"emoji_rate":1.2394952775229926e-05,"len_max":67,"len_mean":16.120949949180694,"len_median":17.0,"len_min":1,"len_p95":21.0,"n_duplicates":52981,"n_empty":0,"one_word_rate":0.11119512134658767,"readability_flesch_mean":17.608900000000013,"url_rate":0.0,"vocab_size":8093,"word_mean":2.0914499615756466,"word_median":2.0}},{"alerts":[{"code":"long_tail","level":"info","message":"403 singleton categories"}],"column":"description","extras":{"singletons":403,"top_values":[["Waterfall",72565],["Waterfall, 3m",551],["Waterfall, 2m",520],["Waterfall, 5m",460],["Waterfall, 10m",426],["Waterfall, 4m",423],["Waterfall, 1m",358],["Waterfall, 6m",329],["Waterfall, 20m",298],["Waterfall, 15m",257],["Waterfall, 8m",240],["Waterfall, 7m",214],["Waterfall, 30m",170],["Waterfall, 12m",159],["Waterfall, 25m",125],["Waterfall, 40m",114],["Waterfall, 1.5m",103],["Waterfall, 50m",79],["Waterfall, 9m",79],["Waterfall, 60m",74]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":775,"null_rate":0.0,"stats":{"cardinality":775,"entropy":1.139861526797745,"entropy_ratio":0.11875966783663179,"top_rate":0.8994397481345596,"top_value":"Waterfall"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"category","extras":{"singletons":0,"top_values":[["usgs_waterfalls",80678]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"usgs_waterfalls"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"date","extras":{"singletons":0,"top_values":[["",80678]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"4 singleton categories"},{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"country","extras":{"singletons":4,"top_values":[["",80650],["VE",24],["DE",1],["LB",1],["HN",1],["BR",1]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":6,"null_rate":0.0,"stats":{"cardinality":6,"entropy":0.0047937009607278,"entropy_ratio":0.0018544566737004684,"top_rate":0.9996529413222935,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"403 singleton categories"}],"column":"height","extras":{"singletons":403,"top_values":[["",72565],["3",551],["2",520],["5",460],["10",426],["4",423],["1",358],["6",329],["20",298],["15",257],["8",240],["7",214],["30",170],["12",159],["25",125],["40",114],["1.5",103],["50",79],["9",79],["60",74]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":775,"null_rate":0.0,"stats":{"cardinality":775,"entropy":1.139861526797745,"entropy_ratio":0.11875966783663179,"top_rate":0.8994397481345596,"top_value":""}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"source","extras":{"singletons":0,"top_values":[["OpenStreetMap",80678]]},"kind":"categorical","n":80678,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"OpenStreetMap"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","description.top_rate","description.top_value","height.top_rate","height.top_values","name.top_values","name.n_duplicates","name.duplicate_rate","latitude.min","latitude.max","country.top_rate","source.top_value"],"featured_charts":[{"caption":"Look at how sharply the distribution drops after the most common values (2\u201310 m), revealing that most recorded heights are small and the vast majority of falls have no height at all.","column":"height","kind":"bar"},{"caption":"The near-total dominance of the plain 'Waterfall' label versus all other descriptions highlights just how little structured metadata exists beyond the basic classification.","column":"description","kind":"bar"},{"caption":"Name length distribution shows a tight median around 17 characters, with 'Unnamed Waterfall' accounting for the large spike \u2014 a useful signal of data completeness.","column":"name","kind":"length"},{"caption":"The latitude histogram reveals a pronounced concentration in the northern mid-latitudes (Europe, North America) with a thinner southern-hemisphere tail.","column":"latitude","kind":"histogram"},{"caption":"Longitude spreads widely across the full global range but shows clustering around Europe and the Americas, reflecting OpenStreetMap contributor density.","column":"longitude","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset is a global catalogue of 80,678 waterfalls sourced entirely from OpenStreetMap, covering geographic coordinates and basic descriptive attributes. The most striking finding is how sparse the data quality is: 89.9% of records carry only the generic description 'Waterfall' with no height recorded, and 59.7% of entries are named 'Unnamed Waterfall', suggesting the dataset is geographically broad but informationally thin. Height data is worth a closer look \u2014 where it does exist, values cluster at small measurements (2\u201310 metres), hinting at a possible recording bias toward easily measured falls. The geographic spread is genuinely global (latitude ranges from -77.7 to 78.7), but the country field is nearly empty for 99.97% of records, so spatial analysis should rely on the raw coordinates rather than the country column.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","top_values","null_rate","alerts"],"model":"anthropic:default","narrative":"This column is intended to capture country of origin or residence, with only 6 distinct values across 80,678 rows. The overwhelming surprise is that 99.97% of records (80,650 out of 80,678) contain an empty string rather than a valid country code, making the field effectively unpopulated. The remaining 28 records split across five ISO country codes (VE with 24 occurrences, and DE, LB, HN, BR each with 1), suggesting the field was rarely filled in rather than being systematically captured.","role":"feature","scope":"column","target":"country","treatment":"Treat empty strings as missing; with 99.97% blank rate this column carries near-zero signal and should be dropped unless the rare non-empty values have specific analytical value."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column contains the names of waterfalls or water features, drawn from what appears to be a global geographic dataset (evidenced by multilingual terms: 'Cachoeira'/'Cascada'/'Cascata'/'Fossen'/'Salto'). The dominant signal is that 48,168 of 80,678 rows \u2014 nearly 60% \u2014 carry the value 'Unnamed Waterfall', driving a duplicate rate of 65.7% and collapsing effective cardinality to just 27,697 unique values out of 80,678 total. The vocab includes Portuguese, Spanish, Norwegian, and English terms, confirming a multilingual mix that an analyst should be aware of when grouping or filtering by name.","role":"label","scope":"column","target":"name","treatment":"Treat 'Unnamed Waterfall' as a missing-name sentinel; flag or separate those 48,168 rows before any name-based grouping or NLP embedding."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","cardinality","top_rate","top_value","null_rate","n"],"model":"anthropic:default","narrative":"This column is a dataset category tag, representing the data source or classification for every record \u2014 here uniformly 'usgs_waterfalls'. With cardinality of 1, top_rate of 1.0, and zero nulls across all 80,678 rows, it carries no discriminative information whatsoever. This is a constant column, almost certainly a provenance/partition label added when merging multiple source datasets.","role":"metadata","scope":"column","target":"category","treatment":"Drop before modelling \u2014 zero-variance constant; retain only if merging with other source datasets where the value varies."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","top_rate","top_value","cardinality","entropy"],"model":"anthropic:default","narrative":"This column is labeled 'date' but contains no actual date values \u2014 every single one of its 80,678 rows holds an empty string, giving it a cardinality of 1 and a top_rate of 1.0. The column is entirely blank with zero nulls, meaning missing values were stored as empty strings rather than proper nulls. It carries zero information and will contribute nothing to any analysis or model.","role":"other","scope":"column","target":"date","treatment":"Drop this column; it is entirely empty strings with no informational content."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","cardinality","entropy","top_rate","top_value","null_rate"],"model":"anthropic:default","narrative":"This column records the data source attribution for all 80,678 rows, and every single record carries the value 'OpenStreetMap' \u2014 making it a constant with cardinality of 1, entropy of 0, and a top_rate of 1.0. It provides zero discriminative information and will contribute nothing to any model or analysis. The imbalance alert is technically correct but understates the situation: this is a fully degenerate column, not merely skewed.","role":"metadata","scope":"column","target":"source","treatment":"Drop before modelling; if provenance tracking is needed, note the constant value in dataset documentation instead."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","entropy_ratio","alerts","top_values","n"],"model":"anthropic:default","narrative":"This column appears to describe a financial or project methodology type, overwhelmingly dominated by 'Waterfall' (72,565 of 80,678 rows, ~89.9%), with the remaining values being 'Waterfall' variants qualified by a time suffix (e.g., '3m', '2m', '5m'). The extreme concentration in a single value \u2014 an entropy ratio of only 0.119 \u2014 and the long-tail alert indicate that despite 775 unique values, almost all signal is captured by one category. Surprising: with 775 distinct values but ~90% mass in one label, the tail likely contains hundreds of rare or inconsistently formatted variants that may need normalisation.","role":"label","scope":"column","target":"description","treatment":"Normalise tail variants (e.g., parse time suffix into a separate numeric feature), then one-hot or ordinal encode; consider collapsing rare variants below a frequency threshold."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n","n_unique","null_rate","alerts","entropy_ratio"],"model":"anthropic:default","narrative":"This column purports to store height values but is classified as categorical, with 775 unique string values across 80,678 rows. The dominant signal is alarming: 72,565 rows (89.9%) contain an empty string, meaning the field is effectively missing for nearly 9 in 10 records despite a reported null_rate of 0.0. The non-empty values appear to be small integers (e.g., '1', '2', '3', '5', '10', '20'), suggesting height in some discrete unit, but the extreme sparsity and long-tail alert make this column unreliable as a feature without significant imputation or domain clarification.","role":"feature","scope":"column","target":"height","treatment":"Treat empty strings as missing (true null_rate \u2248 0.90); investigate unit semantics, then impute or drop depending on task requirements before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","iqr","n_unique","n","null_rate","n_outliers"],"model":"anthropic:default","narrative":"This column contains geographic latitude coordinates, spanning from -77.72\u00b0 (Antarctic region) to 78.66\u00b0 (Arctic region), covering nearly the full terrestrial range. With 80,650 unique values out of 80,678 rows and zero nulls, it is essentially a high-cardinality continuous measurement. The distribution is notably left-skewed (skew = -0.94) with a mean of 27.1\u00b0 and median of 40.3\u00b0, indicating a concentration of records in mid-to-high Northern Hemisphere latitudes but with a meaningful tail toward the Southern Hemisphere. The IQR of 37.8\u00b0 and near-flat kurtosis (-0.28) suggest a broadly spread, roughly uniform distribution rather than a tight cluster.","role":"feature","scope":"column","target":"latitude","treatment":"Use as-is for spatial modelling; consider pairing with longitude and binning into geohash or grid cells for aggregation tasks."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","iqr","kurtosis","skew","n_unique","n","null_rate","outlier_rate"],"model":"anthropic:default","narrative":"This column is geographic longitude, with values spanning nearly the full valid range of \u2212179.99 to 179.41 degrees, indicating globally distributed records. The distribution is notably flat (kurtosis \u22120.41, IQR of 100.27\u00b0) and only mildly right-skewed (skew 0.29), suggesting broad geographic spread rather than concentration in any single region. The median of 7.80\u00b0 (near Western Europe/West Africa) sits well below the mean of 0.96\u00b0, hinting at a slight pull toward Eastern longitudes. Near-perfect uniqueness (80,650 unique values out of 80,678 rows) confirms these are precise coordinate readings, not bucketed regions.","role":"feature","scope":"column","target":"longitude","treatment":"Use as-is for spatial modelling; consider pairing with latitude and applying geographic projections or clustering (e.g., H3/geohash) before feeding into non-spatial models."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":2865,"prompt_tokens":8268,"total_tokens":11133}},"language_counts":{},"meta":{"generated_at":"2026-06-22T00:25:28+00:00","mode":"full","row_count":80678,"sampled_rows":80678,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/geographic/waterfalls/waterfalls_worldwide.json"},"notes":[],"saturn_version":"0.2.0","schema":{"category":"categorical","country":"categorical","date":"categorical","description":"categorical","height":"categorical","latitude":"numeric","longitude":"numeric","name":"text","source":"categorical"}}
