{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"ID","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27037,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["east1459","tarp1240","kech1244","kona1243","jehm1239","east2441","apal1256","mala1473","bauk1238","land1262","sens1241","khot1252","rioc1237","kond1302","bahi1255","eltu1238","mand1443","mbuk1244","wadi1250","sart1248","phra1235","lowe1441","sate1243","hond1244","cuat1239","aneu1237","kupa1238","gunn1248","miri1274","lont1237","rodi1239","dede1238","roge1238","nort3388","east2321","nucl1614","elip1240","sibo1242","tula1250","konm1234","daka1243","haek1241","aewa1238","nigb1238","nuku1258","xong1238","nati1243","tunj1244","nakr1234","nucl1513"],"top_values":[],"top_words":[["cent1996",1],["chan1318",1],["teke1274",1],["cepe1238",1],["bwai1242",1],["bago1245",1],["kwan1286",1],["tumb1248",1],["pira1254",1],["quba1247",1],["kayu1242",1],["mama1277",1],["laco1243",1],["yiid1234",1],["koty1238",1],["ship1256",1],["ukpe1247",1],["bame1268",1],["east2648",1],["iban1269",1],["nort2921",1],["benz1238",1],["rond1238",1],["long1394",1],["pana1301",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27037,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":27037,"n_null":0,"n_unique":27037,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":92.03300000000003,"url_rate":0.0,"vocab_size":20000,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"66.7% rows are a single word"}],"column":"Name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[565,8233,6520,2380,3551,2357,937,1041,722,241,217,137,69,16,26,9,5,4,3,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,3.7,6.4,9.100000000000001,11.8,14.5,17.200000000000003,19.900000000000002,22.6,25.3,28.0,30.700000000000003,33.400000000000006,36.1,38.800000000000004,41.5,44.2,46.900000000000006,49.6,52.300000000000004,55.0,57.7,60.400000000000006,63.1,65.80000000000001,68.5,71.2,73.9,76.60000000000001,79.30000000000001,82.0,84.7,87.4,90.10000000000001,92.80000000000001,95.5,98.2,100.9,103.60000000000001,106.30000000000001,109.0]},"near_unique":true,"sample":["East Bird's Head","Tarpia","Kechi","Konawe","Jeh Mang Ram","East Lagoon","Apali","Mala","Baukan","Land Dayak (Retired)","Sensh\u016b","Khotan","R\u00edo Casacar\u00e1","Konda-Yahadian","Bahinemic","Elt Ulid","Mander","Mbuk","Wadimbisa","Sartul","Phrae Pwo Karen","Lower Pokomo","Sater\u00e9-Maw\u00e9","Honduran M\u00edskito","Cuatzoquitengo","Aneuk Jamee","Kupa","Gun-narda","Miriwunic","Lontes","Rodiya","Dede","Rogede","Northern Sumatra Malay","East Khowar","Nuclear Gadsup","Elip-Gunu","Sibo","Tula-Waja","Kon-Miku","Dakaka","Haeke-Bwatoo","Aewa (Western Province)","Nigbi","Nukumanu","Xonga","Natioro","Tunjung","Nakrehe","Nuclear Mutu"],"top_values":[],"top_words":[["nuclear",340],["central",315],["western",271],["northern",251],["eastern",242],["southern",242],["sign",229],["language",199],["south",161],["north",148],["east",133],["west",132],["of",105],["english",102],["unclassified",82],["pidgin",80],["arabic",75],["upper",72],["san",69],["new",64],["zapotec",59],["mixtec",59],["creole",58],["old",57],["lower",56]],"vocab_skipped":null,"word_histogram":{"counts":[18047,0,6711,0,0,1703,0,451,0,0,102,0,17,0,0,2,0,2,0,0,0,0,1,0,0,0,0,0,0,1],"edges":[1.0,1.4,1.8,2.2,2.6,3.0,3.4000000000000004,3.8000000000000003,4.2,4.6,5.0,5.4,5.800000000000001,6.2,6.6000000000000005,7.0,7.4,7.800000000000001,8.2,8.600000000000001,9.0,9.4,9.8,10.200000000000001,10.600000000000001,11.0,11.4,11.8,12.200000000000001,12.600000000000001,13.0]}},"kind":"text","n":27037,"n_null":0,"n_unique":27037,"null_rate":0.0,"stats":{"allcaps_rate":0.0001479454081443947,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":109,"len_mean":10.439101971372564,"len_median":8.0,"len_min":1,"len_p95":23.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.6674926951954728,"readability_flesch_mean":29.907075000000027,"url_rate":0.0,"vocab_size":18126,"word_mean":1.4442060879535452,"word_median":1.0}},{"alerts":[],"column":"Macroarea","extras":{"singletons":6,"top_values":[["Eurasia",8060],["Africa",8020],["Papunesia",6326],["North America",1782],["South America",1524],["Australia",919],["Africa;Eurasia",29],["Eurasia;Papunesia",22],["Africa;Eurasia;North America;Papunesia;South America",18],["Africa;Australia;Eurasia;North America;Papunesia;South America",17],["North America;South America",15],["Eurasia;North America",12],["Africa;North America",12],["Eurasia;South America",11],["Eurasia;Papunesia;South America",8],["Africa;Eurasia;Papunesia;South America",7],["Eurasia;North America;South America",5],["Eurasia;North America;Papunesia;South America",4],["Africa;Australia;Eurasia;North America;Papunesia",3],["Papunesia;South America",3]]},"kind":"categorical","n":27037,"n_null":224,"n_unique":30,"null_rate":0.008284942856086105,"stats":{"cardinality":30,"entropy":2.2710778709895956,"entropy_ratio":0.46283442166453115,"top_rate":0.3006004550031701,"top_value":"Eurasia"}},{"alerts":[],"column":"Latitude","extras":{"histogram":{"counts":[20,6,3,11,17,51,70,90,139,276,323,446,670,665,1558,2186,1999,1102,1636,2277,2361,993,1013,696,956,1358,620,733,938,558,736,374,418,527,185,143,204,109,66,25],"edges":[-55.2748,-52.064544999999995,-48.85429,-45.644035,-42.43378,-39.223524999999995,-36.01327,-32.803015,-29.59276,-26.382505,-23.17225,-19.961995,-16.751739999999998,-13.541484999999994,-10.331229999999998,-7.120975000000001,-3.9107199999999978,-0.7004649999999941,2.5097900000000024,5.720044999999999,8.930300000000003,12.140555000000006,15.350809999999996,18.561065,21.771320000000003,24.981575000000007,28.19183000000001,31.402085,34.61234,37.82259500000001,41.032849999999996,44.243105,47.45336,50.66361500000001,53.87387000000001,57.084125,60.294380000000004,63.50463500000001,66.71489,69.925145,73.1354]},"sample":[27.27,-15.764624999999995,39.37643333333333,-3.06952,8.848165,-5.568969,-16.72547,36.2161,26.737266666666667,6.6112448146659855,-1.82635,19.5392,-3.5332,-3.5486775833333333,21.5096,16.64,6.08332,-2.780485,30.3027,40.4102,7.133276996429586,-11.710753668663314,-14.681217777777778,-1.842985,-2.44717,-4.94491,29.04,38.8301425,19.2467,-7.63603,8.43391,30.76738,59.500948,-2.9408,47.33,48.179025,-9.59575,-9.080784,-4.934003122213147,52.1228,8.491555,8.196588219154787,-22.757193043986337,-16.65807,-17.4516,-4.49162,-8.09371,24.5024,52.59,-12.683215,23.93497166666667,-8.095455,55.0,25.383679660172387,-3.70748,-9.92982,-7.62223,9.126966913580246,29.9446,9.09034,-21.9803,0.5048725000000001,28.54661142222222,27.1027,19.5183,-8.41526,15.2948,47.0,18.3857,21.8358,55.5,-18.9988,-3.60065,-3.37841,56.12921,60.717,11.05,-4.99504,-4.14421,-3.43705,68.3217,-9.09451,-25.484542625,-13.4957,-27.56865,14.1794,55.98450400000001,-14.1754,-9.026411666666666,35.9053,28.381735,63.0377,60.3844,13.103579999999996,23.3235,24.5209,13.438011,7.851371666666667,2.764837958000044,-3.67233,-4.22936,-1.04518,-0.459785,26.0686,26.3241,27.4177713,21.92559542700842,27.373850000000004,21.8358,11.05,-5.80804,-5.778625,17.32754033333333,15.3648,7.02206,4.993625833333333,7.084225,11.1324,13.8941,-14.810589,10.8427,18.2836,49.3231,7.88153,68.6436,-8.37312,11.32054,41.850397,24.5209,-6.3285,2.75927,1.41405,-3.3245399995843035,-8.57376,-3.72704,-3.95195,6.16512,6.69756,33.9479,36.33736,0.579755,24.114500000000003,26.0789,26.01396,25.74,26.5158555,27.14808,5.28022,6.568783333333333,5.3944,55.8257,56.0,-25.0,4.13257,25.0317,43.22,-11.5178,-3.461612,20.5447,51.7045,-4.322858,-7.95814,-6.35337,-5.12627,-6.30218,-5.28029,8.13911,8.32448,9.42222,6.25971,5.992136524877154,6.67251,12.7531,-20.1494,-1.676121,2.31461,-12.62,-2.92,-2.68501,63.8977,4.49119,42.382940000000005,-30.4207,-25.0747,41.850397,5.272691148000092,-9.00299,1.24813,-2.78384,16.2176,17.3654,26.3241,22.8735,34.5177,26.9107,27.2572,27.381782,36.5137,14.6308,11.9777,10.8021,61.0,40.4321,20.236,-6.17824,-5.2219,17.02093,8.72567,2.36213,5.9503,4.75111,9.50768,12.4871,9.79442,9.40172,11.133,9.86954,9.11127,9.525,8.9352,5.20497,5.58288,6.27562,9.788898732320645,6.5805,11.8203,51.2403,39.8155,37.1405,49.250402,48.51614,9.158,10.270273333333332,11.1513,34.9992,0.46863,6.74176,17.29154,12.1959,19.1,11.8,61.972,42.1206,-0.9907051156537504,15.2315,2.756735,4.25064,4.267704999999999,-3.79673,-2.42661,-7.33458,1.45841,-1.80261,-1.80261,35.0,-4.54988,11.6816,11.225505,17.756,19.3435,22.2750324,24.0381,31.5655,27.1606,27.152,27.152,5.05,-6.29363,-4.91191,9.78323,9.49451,7.16751,6.58269,4.62705,10.6462,8.727387499999999,8.4809,6.5423278125,6.5805,12.75752,12.829,-28.289642,-29.042737,51.2403,59.0,34.931823,34.931823,63.4837,10.943,11.0243,11.54345,-2.64636,25.5946,-2.29891,-13.542726,-9.045164237334966,12.516050281047306,1.85856,-24.80155,-2.87434,17.2238,35.0,16.9887,12.70645,23.1253503,32.2766,5.17103,43.565,23.1024,53.1763,-9.30537,-4.562946,8.67053,9.91175,9.02294,10.2077,9.95914,5.4753675,7.33708,-13.98051,51.2403,9.9325,10.072,12.5176,10.0666,10.93854,10.827,5.37869,8.11879,8.33561,22.8346,31.1056,8.20383,23.1868,-10.21751,-10.8,-4.62059,-4.387792509481574,-2.94411,-1.7543,-0.9523,-0.9523,6.38148,16.6646,16.30448,-8.55748,-9.55099,12.14611,17.03036,17.1427,24.278,13.3501,6.34756,5.42441,6.16926,8.92577,8.92577,6.25838,-17.20415,3.16141,1.075766,43.89875,27.66625500112351,21.228,36.6963,42.98,8.79947,10.1258,7.22812,7.22812,17.63759833335,-9.77284,-16.2975,-13.892528,-15.5169,-8.689764603340782,-3.74711,-5.65906,-6.92151,-9.4791,-10.5826,7.72124,-2.93987,-3.6111,17.6163,-9.56709,22.5638,11.8226,23.5,23.1,29.59203,-25.60551,9.84629,8.829761,2.79827,8.48801,10.4789,9.62456,6.88039,-11.3304,-14.8509,-14.8509,-1.95077,0.037455,34.6515,34.6515,29.1708,37.1344,34.2212,42.068306,46.0137,50.947,51.152,-0.183215,19.0,-15.40319,-5.84776,-5.059174259684999,-6.33644,-6.18,3.4667,4.42907,4.42907,17.3879,16.1724,10.467,0.625865,4.47632125,-19.7933,-20.1239,-24.1930875,-2.21871,-1.82559,-3.60164,-0.66322,3.45776,31.8161,46.1064,54.6409,61.291358,-9.34361,-4.9078,-5.68289,10.520219,23.585,7.90997,6.21563,5.87893,8.15211,6.75724,6.650434,-18.1927,-3.07772,40.1897,33.2851,27.53636,16.5734,29.49928,-1.40124,18.5562,-20.4035,-10.2173,-4.30986,-5.167174999999999,23.031191,10.9703,7.15345,6.36656,5.18158,5.56777,-15.4126,40.1897,18.404,27.8834,47.0516,53.0,-1.40124,36.75,18.34,-10.1246,5.47421,5.47421,1.25037,0.40456,-3.30638,41.453,41.32435,24.7458,5.83,-5.64103,13.2335066667,48.7,-16.17259,-4.369018282552084,-28.9,1.16839,-1.6386670833333334,-0.477445,-0.477445,-2.781206125]},"kind":"numeric","n":27037,"n_null":479,"n_unique":13231,"null_rate":0.01771646262529127,"stats":{"iqr":29.74711,"kurtosis":-0.19118003909389714,"max":73.1354,"mean":11.589907074531045,"median":8.52697,"min":-55.2748,"n_outliers":48,"outlier_rate":0.0018073650124256345,"q1":-3.74711,"q3":26.0,"skew":0.42112006859415485,"std":20.569749085950182,"zero_rate":0.0}},{"alerts":[],"column":"Longitude","extras":{"histogram":{"counts":[25,11,23,47,39,45,357,122,212,627,151,543,558,337,144,79,0,7,378,1242,1732,2769,1327,1659,1017,729,168,342,849,765,1092,1350,904,1659,883,1695,1807,330,469,65],"edges":[-178.785,-169.8296040641,-160.8742081282,-151.91881219229998,-142.9634162564,-134.0080203205,-125.0526243846,-116.0972284487,-107.1418325128,-98.1864365769,-89.23104064099999,-80.27564470509999,-71.32024876919999,-62.36485283329999,-53.40945689739999,-44.454060961500005,-35.49866502559999,-26.543269089699976,-17.58787315379999,-8.632477217900004,0.32291871800001104,9.278314653900026,18.233710589800012,27.189106525699998,36.14450246160001,45.09989839750003,54.05529433340001,63.0106902693,71.96608620520001,80.92148214110003,89.87687807699999,98.8322740129,107.78766994880002,116.74306588470003,125.69846182060004,134.6538577565,143.60925369240002,152.56464962830003,161.5200455642,170.4754415001,179.43083743600005]},"sample":[92.63,-57.43435,-120.802,151.537,-70.975342,-80.828247,-68.97119,74.8236,88.88823333333335,-8.627691334892132,132.06,-102.154,142.87,142.80410314351855,77.2647,-92.74,-72.0833,36.59766,-82.3132,-120.645,2.5232967915022413,-69.53743134464393,126.30159,-72.2139,137.629,143.700473,66.56,-123.1244825,-101.631,145.415,25.0,-94.62848,69.085972,140.7495,-102.0,-105.3955,143.773,143.030921,145.49978924469698,-174.29,-11.465745,9.076721308162636,116.47092238900724,145.14981666666665,145.544,143.593,146.908,82.297,-128.53,-63.9917,111.3049,142.435,-125.0,109.69219712607868,142.697,148.303,-66.5662,19.942824938271603,-110.681,18.43689,-58.0654,-69.962425,94.59132339444446,97.0738,97.4138,141.034,-1.69301,3.0,82.771,77.5541,-130.08,-41.2299,142.79,142.715,52.63548,55.756,35.93,143.891,139.753,136.096,-133.532,148.376,139.812369,141.599,153.46239,-2.57999,24.134609,-72.6686,-77.29105,-106.124,30.834959,170.8825,165.653,21.021283333333333,120.887,121.388,144.74474,122.85361666666664,125.45045006600004,142.683,142.401,-78.5392,-75.5547,117.376,94.3824,92.2255891,101.35260644425284,91.07253333333333,77.5541,35.93,146.464,146.523,-90.16342166666666,-90.8893,31.285,15.631520833333331,8.428529999999999,-3.64763,-16.3916,141.788992,-71.8399,-66.8042,-123.014,-5.883345,170.045,142.706,20.87674,43.78613,121.388,106.11372,117.293,117.942,129.01645135033567,118.445,141.864,142.307,34.9436,33.988,-116.772,102.80434,-69.5763,109.06900000000002,94.9669,94.04355,94.25,99.2581092,86.47646,-4.62352,-10.461733333333331,-5.82696,12.3187,44.0,135.0,101.477,87.8185,-123.36,-53.0743,142.917031,-97.6852,-127.221,-70.280457,147.579,145.883,139.455,145.098,145.574,32.3829,-10.3377,11.4439,1.67727,9.998286588008607,9.79396,-12.4796,146.67,-71.149307,128.44,-75.7,141.83,141.258,166.695,7.23558,-73.320255,19.7711,17.9767,43.78613,163.01921634200005,125.159,124.654,111.136,121.878,-93.3633,94.3824,92.8208,100.485,87.2133,96.3855,96.024393,48.4302,108.694,106.798,107.293,-130.0,-122.863,-97.81,145.139,145.589,-91.42348,26.4858,31.0016,15.2569,16.6721,-6.15264,-5.28245,15.8936,-3.47051,-4.88274,12.545,1.60169,8.37644,8.90506,8.35234,9.55918,10.0342,9.998335868193172,10.2267,-15.3221,14.4098,21.9129,22.7341,-122.443668,-123.757114,15.81149,9.926684166666664,8.7804,-4.51293,37.1008,38.3729,53.99526,52.2282,13.0665,13.13,94.689,46.0332,130.7132078365815,120.164,113.91649999999998,115.5,114.02,103.137,102.193,109.716,103.766,115.876,115.876,135.0,143.123,18.1464,18.432365,-96.788,83.6552,112.7199202,93.6136,101.928,87.5717,95.3903,95.3903,-8.85,139.693,139.926,12.0719,11.0698,0.5886,1.92015,7.23441,5.352593333333334,6.52459,11.9603,10.560321312499998,10.2267,-15.7354766667,-15.9217,152.151281,152.780249,14.4098,50.0,70.087592,70.087592,-19.0212,13.8181,13.7362,13.7548,141.104,107.726,147.81,167.343342,161.0287870814277,123.7538258162338,103.0,45.9,128.265,121.455,135.0,-97.7974,79.95209,112.9476548,103.192,-8.1409,42.2606,99.108,-124.452,-59.4769,-53.750438,-5.17372,11.7206,8.34119,6.61686,8.48301,7.815440000000001,5.97843,141.81611,14.4098,9.51838,12.0256,10.8732,13.3343,13.0521,14.0578,37.2737,37.9891,38.4763,38.9237,35.0179,-9.23439,104.993,123.96373,165.8,152.892,152.40248307359255,151.332,135.866,135.879,135.879,101.41,121.113,120.78633,120.329,119.349,17.64653,-97.782372,-96.3352,103.759,-3.36685,-1.33254,-0.59545,8.23592,10.9642,10.9642,11.0906,22.55045,11.6365,9.600739,15.8319,84.54675270918312,74.337,49.3742,44.61,9.15279,10.2542,-8.24535,-8.24535,103.4376166665,124.144,168.211,167.428701,166.95,146.73773529215114,152.493,151.20299999999995,146.895,159.917,161.863,125.16,103.945,128.068,121.0615,119.49165,101.95,75.5203,99.33,104.45,82.4549,-57.08816,11.52551,12.850757,27.8282,-0.72757,8.93337,9.95,9.26287,34.8959,38.8052,38.8052,25.043698,29.44695,75.287,75.287,82.437,49.8359,53.521,48.264749,11.319499999999998,6.927,12.692,34.7552,102.46,166.67531,151.033,145.77811216077836,146.126,146.4,114.523,116.177,116.177,121.0365,-96.2372,76.833,24.498,11.0759125,34.9829,33.1739,33.477425,26.3477,28.1017,38.5037,39.793,15.5246,54.4469,11.315,8.58489,13.830399,150.24,145.779,122.791,78.825989,101.546,0.59728,20.9827,22.4666,-0.2755,11.1843,11.06,32.8052,37.5382,44.4839,75.8707,67.84754,76.9717,66.02877,41.234,104.658,164.193,150.421,152.114,154.628,103.418697,-0.38756,3.67225,11.7002,10.1442,10.1266,13.9704,44.4839,73.994,81.7761,8.46419,-1.0,41.234,43.0,104.729,150.172,10.0214,10.0214,23.6907,33.3942,39.6648,1.569,15.171636,88.9157,-55.33,154.732,-15.33254,9.4,-146.321755,13.485859291666666,30.2,28.7626,14.302116666666665,14.2877,14.2877,10.984256]},"kind":"numeric","n":27037,"n_null":479,"n_unique":13203,"null_rate":0.01771646262529127,"stats":{"iqr":110.16804,"kurtosis":-0.4517500124120386,"max":179.43083743600005,"mean":51.8238390094288,"median":44.065281,"min":-178.785,"n_outliers":51,"outlier_rate":0.0019203253257022365,"q1":9.22546,"q3":119.3935,"skew":-0.4680314121051467,"std":74.04628459501451,"zero_rate":0.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Glottocode","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27037,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["east1459","tarp1240","kech1244","kona1243","jehm1239","east2441","apal1256","mala1473","bauk1238","land1262","sens1241","khot1252","rioc1237","kond1302","bahi1255","eltu1238","mand1443","mbuk1244","wadi1250","sart1248","phra1235","lowe1441","sate1243","hond1244","cuat1239","aneu1237","kupa1238","gunn1248","miri1274","lont1237","rodi1239","dede1238","roge1238","nort3388","east2321","nucl1614","elip1240","sibo1242","tula1250","konm1234","daka1243","haek1241","aewa1238","nigb1238","nuku1258","xong1238","nati1243","tunj1244","nakr1234","nucl1513"],"top_values":[],"top_words":[["cent1996",1],["chan1318",1],["teke1274",1],["cepe1238",1],["bwai1242",1],["bago1245",1],["kwan1286",1],["tumb1248",1],["pira1254",1],["quba1247",1],["kayu1242",1],["mama1277",1],["laco1243",1],["yiid1234",1],["koty1238",1],["ship1256",1],["ukpe1247",1],["bame1268",1],["east2648",1],["iban1269",1],["nort2921",1],["benz1238",1],["rond1238",1],["long1394",1],["pana1301",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27037,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":27037,"n_null":0,"n_unique":27037,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":92.03300000000003,"url_rate":0.0,"vocab_size":20000,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"null_rate","level":"warn","message":"69.7% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"ISO639P3code","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[2.5,2.525,2.55,2.575,2.6,2.625,2.65,2.675,2.7,2.725,2.75,2.775,2.8,2.825,2.85,2.875,2.9,2.925,2.95,2.975,3.0,3.025,3.05,3.075,3.1,3.125,3.15,3.175,3.2,3.225,3.25,3.275,3.3,3.325,3.35,3.375,3.4,3.425,3.45,3.475,3.5]},"near_unique":true,"sample":["aqp","lak","avd","kak","fam","kri","gwt","nnz","zac","wlg","gvp","krp","bfs","yuz","stg","mdj","zml","bfj","kmw","lro","ubu","ccy","ilg","tme","dos","ngg","gis","siq","gti","aek","mss","mdt","mky","mjx","coc","cab","nuh","kyg","tia","mgi","seu","cam","gan","yoy","pex","flr","mvu","mlf","xop","kpg"],"top_values":[],"top_words":[["faa",1],["was",1],["ttd",1],["pue",1],["tud",1],["mrf",1],["ano",1],["pby",1],["omc",1],["aqp",1],["xwa",1],["kbh",1],["dby",1],["kun",1],["yet",1],["ggk",1],["cbu",1],["bsk",1],["con",1],["ule",1],["sua",1],["cas",1],["tiw",1],["caz",1],["gbc",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8180,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":27037,"n_null":18857,"n_unique":8180,"null_rate":0.6974516403447129,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":3,"len_mean":3.0,"len_median":3.0,"len_min":3,"len_p95":3.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":119.10500000000003,"url_rate":0.0,"vocab_size":8180,"word_mean":1.0,"word_median":1.0}},{"alerts":[],"column":"Level","extras":{"singletons":0,"top_values":[["dialect",13593],["language",8612],["family",4832]]},"kind":"categorical","n":27037,"n_null":0,"n_unique":3,"null_rate":0.0,"stats":{"cardinality":3,"entropy":1.4684809100435534,"entropy_ratio":0.9265082986981688,"top_rate":0.5027554832266894,"top_value":"dialect"}},{"alerts":[{"code":"null_rate","level":"warn","message":"66.4% null"}],"column":"Countries","extras":{"singletons":367,"top_values":[["PG",905],["ID",708],["NG",512],["AU",476],["IN",402],["MX",316],["CN",315],["BR",277],["US",255],["CM",205],["PH",188],["CD",162],["VU",129],["RU",104],["TZ",103],["PE",102],["MY",88],["TD",88],["NP",82],["CO",80]]},"kind":"categorical","n":27037,"n_null":17956,"n_unique":737,"null_rate":0.6641269371601879,"stats":{"cardinality":737,"entropy":6.561602766109232,"entropy_ratio":0.6888445154457216,"top_rate":0.09965862790441582,"top_value":"PG"}},{"alerts":[],"column":"Family_ID","extras":{"singletons":5,"top_values":[["atla1278",4861],["aust1307",4108],["indo1319",3173],["sino1245",1926],["afro1255",1458],["nucl1709",834],["pama1250",642],["aust1305",526],["otom1299",385],["book1242",382],["sign1238",343],["mand1469",322],["drav1251",281],["turk1311",273],["cent2225",267],["taik1256",261],["ural1272",236],["nilo1247",235],["nakh1245",190],["araw1281",188]]},"kind":"categorical","n":27037,"n_null":429,"n_unique":297,"null_rate":0.015867145023486334,"stats":{"cardinality":297,"entropy":4.937838732774008,"entropy_ratio":0.601125748848755,"top_rate":0.1826894167167769,"top_value":"atla1278"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"null_rate","level":"warn","message":"49.7% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"77.1% duplicate strings"}],"column":"Language_ID","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13593,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":false,"sample":["kuna1268","mixt1426","mewa1249","meke1243","gban1260","stan1288","ande1247","amah1245","kuan1248","foii1241","alac1244","lopi1242","amer1248","kari1254","kwal1258","bahn1262","kang1288","nige1255","stan1290","tase1235","thur1255","lafo1243","murl1244","sulu1241","yamb1251","umbu1257","nort2646","nene1249","gari1253","stan1288","soma1255","dann1241","dann1241","mara1382","kaid1239","gbar1246","muna1247","bhot1235","czec1258","mape1249","bora1271","swis1247","noct1238","main1267","lomb1257","haho1237","sout2840","lush1249","yaki1237","luxe1241"],"top_values":[["nucl1643",119],["stan1293",114],["west2369",63],["homs1234",58],["dale1238",40],["stan1288",38],["kham1282",32],["kony1248",32],["mand1415",31],["assy1241",31],["mode1248",30],["occi1239",30],["dann1241",29],["biak1248",29],["uppe1400",29],["wuch1236",28],["nort2641",28],["izon1238",27],["nucl1302",27],["shon1251",27]],"top_words":[["nucl1643",119],["stan1293",114],["west2369",63],["homs1234",58],["dale1238",40],["stan1288",38],["kham1282",32],["kony1248",32],["mand1415",31],["assy1241",31],["mode1248",30],["occi1239",30],["dann1241",29],["biak1248",29],["uppe1400",29],["wuch1236",28],["nort2641",28],["izon1238",27],["nucl1302",27],["shon1251",27],["tase1235",26],["nort2697",25],["java1254",25],["amdo1237",25],["okpa1238",25]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13593,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":27037,"n_null":13444,"n_unique":3110,"null_rate":0.49724451677331066,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.7712057676745384,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":10483,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":86.53400000000002,"url_rate":0.0,"vocab_size":3110,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"null_rate","level":"warn","message":"21.3% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"61.6% duplicate strings"}],"column":"Closest_ISO369P3code","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21283,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[2.5,2.525,2.55,2.575,2.6,2.625,2.65,2.675,2.7,2.725,2.75,2.775,2.8,2.825,2.85,2.875,2.9,2.925,2.95,2.975,3.0,3.025,3.05,3.075,3.1,3.125,3.15,3.175,3.2,3.225,3.25,3.275,3.3,3.325,3.35,3.375,3.4,3.425,3.45,3.475,3.5]},"near_unique":false,"sample":["xbn","ebu","ksl","beb","vam","nso","kzp","bns","arg","yag","env","tig","yua","sln","srr","dak","plo","kgn","kty","amx","hur","mwp","tbr","mgd","bhb","gom","umi","asc","ulk","biz","kyj","ztp","zps","tug","ggb","ilw","roh","iso","lnd","hia","lse","nto","szb","nkw","nso","ccd","cok","wbf","lgg","mxc"],"top_values":[["jpn",120],["eng",115],["pes",64],["hyw",59],["spa",39],["qer",37],["khg",33],["nbe",33],["cmn",32],["aii",32],["ell",31],["oci",31],["bhw",30],["sxu",30],["wuu",29],["kmr",29],["ijc",28],["dnj",28],["sna",28],["kat",27]],"top_words":[["jpn",109],["eng",106],["pes",59],["hyw",55],["spa",34],["qer",34],["aii",31],["khg",31],["ell",31],["bhw",30],["cmn",30],["oci",30],["sxu",29],["wuu",29],["nbe",28],["sna",28],["kmr",27],["dnj",27],["kat",27],["adx",25],["nst",25],["evn",25],["ijc",25],["opa",25],["azj",25]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21283,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":27037,"n_null":5754,"n_unique":8180,"null_rate":0.2128194696157118,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.6156556876380209,"emoji_rate":0.0,"len_max":3,"len_mean":3.0,"len_median":3.0,"len_min":3,"len_p95":3.0,"n_duplicates":13103,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":117.41300000000003,"url_rate":0.0,"vocab_size":7877,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"99.2% null"}],"column":"First_Year_Of_Documentation","extras":{"histogram":{"counts":[2,4,3,1,14,26,12,13,15,19,12,10,19,65],"edges":[-2100.0,-1812.0,-1524.0,-1236.0,-948.0,-660.0,-372.0,-84.0,204.0,492.0,780.0,1068.0,1356.0,1644.0,1932.0]},"sample":[1829.0,1741.0,1644.0,1721.0,1792.0,1732.0,1732.0,1698.0,1857.0,1792.0,1612.0,1886.0,1607.0,1829.0,1689.0,-400.0,1875.0,-2000.0,1782.0,1609.0,-250.0,1885.0,300.0,-860.0,-300.0,-500.0,-500.0,-700.0,-600.0,-500.0,-700.0,-1800.0,-600.0,1853.0,1580.0,1700.0,1840.0,1932.0,1932.0,1200.0,-1200.0,300.0,622.0,1688.0,895.0,1883.0,1886.0,1907.0,-500.0,50.0,-600.0,-1650.0,1699.0,300.0,1185.0,1770.0,1826.0,1755.0,1755.0,1755.0,1661.0,1819.0,1600.0,1680.0,1600.0,1580.0,-500.0,1820.0,1886.0,1837.0,1831.0,-800.0,-150.0,400.0,-600.0,450.0,600.0,1500.0,-800.0,1341.0,-2100.0,1700.0,1826.0,1826.0,400.0,1767.0,1300.0,900.0,700.0,800.0,601.0,58.0,1400.0,1829.0,1899.0,1847.0,1887.0,1820.0,1740.0,1819.0,1819.0,1820.0,-1600.0,-100.0,-550.0,-300.0,-350.0,-600.0,-1500.0,-550.0,300.0,1500.0,350.0,300.0,1912.0,1607.0,1831.0,700.0,100.0,1605.0,860.0,1820.0,200.0,1883.0,1878.0,1899.0,1873.0,1845.0,1903.0,1650.0,-550.0,-500.0,-500.0,-1400.0,-700.0,-600.0,-800.0,-500.0,-750.0,-800.0,-900.0,1500.0,1625.0,-1300.0,-1600.0,50.0,-600.0,-100.0,1500.0,900.0,-500.0,-300.0,224.0,500.0,600.0,500.0,800.0,-925.0,500.0,1746.0,400.0,700.0,1042.0,1524.0,-500.0,-700.0,-500.0,-500.0,500.0,-300.0,500.0,200.0,-500.0,-400.0,150.0,750.0,1100.0,500.0,-700.0,300.0,-500.0,1600.0,1200.0,600.0,500.0,-200.0,400.0,1350.0,196.0,1050.0,1200.0,450.0,800.0,-300.0,-814.0,1780.0,1600.0,700.0,200.0,100.0,-200.0,711.0,1300.0,800.0,1700.0,1788.0,1100.0,200.0,272.0,200.0,200.0,1497.0,700.0,1000.0,842.0]},"kind":"numeric","n":27037,"n_null":26822,"n_unique":114,"null_rate":0.9920479343122388,"stats":{"iqr":2010.5,"kurtosis":-0.9205597192550989,"max":1932.0,"mean":673.7302325581395,"median":711.0,"min":-2100.0,"n_outliers":0,"outlier_rate":0.0,"q1":-300.0,"q3":1710.5,"skew":-0.4581417445026361,"std":1055.2568725539652,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"96.0% null"},{"code":"high_skew","level":"info","message":"skew=-3.35"},{"code":"outliers","level":"warn","message":"15.9% rows beyond 1.5 IQR"}],"column":"Last_Year_Of_Documentation","extras":{"histogram":{"counts":[2,0,0,1,2,1,0,0,1,1,4,2,3,1,2,3,9,8,14,10,7,7,4,13,9,9,17,7,18,21,96,796],"edges":[-3100.0,-2939.875,-2779.75,-2619.625,-2459.5,-2299.375,-2139.25,-1979.125,-1819.0,-1658.875,-1498.75,-1338.625,-1178.5,-1018.375,-858.25,-698.125,-538.0,-377.875,-217.75,-57.625,102.5,262.625,422.75,582.875,743.0,903.125,1063.25,1223.375,1383.5,1543.625,1703.75,1863.875,2024.0]},"sample":[1934.0,1918.0,1989.0,1995.0,1873.0,1905.0,1965.0,1900.0,1942.0,1867.0,1940.0,1934.0,2010.0,1767.0,1750.0,2003.0,1877.0,1960.0,1940.0,1955.0,1794.0,1858.0,1778.0,1965.0,100.0,1768.0,-2250.0,1978.0,2007.0,1900.0,-1500.0,1916.0,2002.0,300.0,1957.0,1975.0,1939.0,1972.0,2004.0,1990.0,1990.0,-2300.0,1961.0,1903.0,2018.0,-300.0,-400.0,1965.0,1805.0,-250.0,-500.0,1925.0,1787.0,1985.0,1985.0,1960.0,1955.0,1955.0,1975.0,1975.0,1977.0,2010.0,1988.0,2016.0,2004.0,1855.0,2000.0,2013.0,1875.0,1805.0,1971.0,1915.0,1985.0,1917.0,2012.0,1985.0,1750.0,1900.0,-1300.0,1965.0,1965.0,2013.0,1975.0,2003.0,1750.0,1607.0,2005.0,1888.0,1975.0,2005.0,1855.0,1954.0,1945.0,1985.0,1400.0,1974.0,2008.0,2011.0,400.0,2014.0,629.0,2015.0,2003.0,2015.0,2010.0,1975.0,1908.0,1908.0,1526.0,1911.0,1985.0,1925.0,2015.0,1931.0,1940.0,1931.0,1930.0,2005.0,1938.0,1867.0,1965.0,1985.0,1976.0,1898.0,2005.0,1985.0,1975.0,1965.0,1983.0,1960.0,1799.0,1985.0,1985.0,1995.0,1984.0,-400.0,500.0,-100.0,-1180.0,1925.0,1935.0,2019.0,1910.0,1875.0,1928.0,1972.0,-3000.0,2016.0,1995.0,1981.0,2005.0,1550.0,2005.0,1985.0,1907.0,1975.0,2015.0,1915.0,1644.0,1800.0,1893.0,1827.0,1930.0,1908.0,1705.0,1850.0,1867.0,1975.0,1950.0,1925.0,1805.0,1939.0,1905.0,2014.0,1995.0,1910.0,1859.0,1700.0,1927.0,1985.0,1997.0,1910.0,1985.0,2013.0,1975.0,2010.0,1995.0,1985.0,1985.0,1985.0,2016.0,1909.0,1996.0,1922.0,1867.0,2005.0,1921.0,1985.0,2005.0,1976.0,1935.0,1965.0,1963.0,1985.0,1925.0,1985.0,1985.0,1975.0,1960.0,1965.0,1985.0,1887.0,1985.0,2000.0,1875.0,1867.0,1985.0,1985.0,-200.0,1100.0,950.0,950.0,1725.0,300.0,1965.0,1980.0,1650.0,-2500.0,1836.0,1929.0,1970.0,1000.0,1900.0,1928.0,1633.0,2005.0,1985.0,1915.0,1950.0,1700.0,1500.0,1400.0,1995.0,2002.0,1930.0,2014.0,2005.0,1150.0,1982.0,2013.0,639.0,2010.0,1890.0,1890.0,1925.0,1900.0,1935.0,2003.0,1840.0,1925.0,1575.0,1960.0,1965.0,1983.0,2005.0,2020.0,2009.0,1876.0,1995.0,1985.0,1995.0,1975.0,1918.0,1886.0,1965.0,1973.0,1965.0,1971.0,1988.0,2003.0,1997.0,1931.0,2016.0,1960.0,1985.0,1995.0,1960.0,1965.0,1985.0,1983.0,1995.0,1979.0,1985.0,1831.0,1820.0,1884.0,1905.0,-1400.0,200.0,-350.0,-150.0,-1100.0,-450.0,400.0,2008.0,1996.0,1795.0,1800.0,1899.0,1875.0,1928.0,1968.0,1925.0,1853.0,1985.0,1995.0,1091.0,1995.0,1867.0,1925.0,1950.0,2013.0,2013.0,1931.0,1859.0,900.0,2003.0,2002.0,2005.0,1965.0,1929.0,2010.0,1975.0,1986.0,1985.0,2005.0,1995.0,1985.0,1887.0,1886.0,1948.0,1800.0,1986.0,1904.0,1985.0,1976.0,1985.0,2015.0,1965.0,1950.0,1985.0,2015.0,2000.0,1633.0,1833.0,-100.0,1113.0,1575.0,-300.0,-400.0,2005.0,1975.0,400.0,100.0,500.0,1925.0,1775.0,1845.0,1990.0,1995.0,2024.0,1900.0,2008.0,1975.0,1985.0,1695.0,1991.0,1985.0,1925.0,1972.0,1985.0,1965.0,1875.0,1921.0,2012.0,1971.0,1900.0,1910.0,1970.0,1975.0,1975.0,1965.0,2015.0,1985.0,-1400.0,1756.0,1100.0,100.0,700.0,1150.0,700.0,1875.0,1750.0,1935.0,1911.0,1500.0,2005.0,1985.0,2019.0,1965.0,1499.0,1857.0,1975.0,1995.0,2011.0,2005.0,1985.0,1975.0,1990.0,1965.0,1985.0,1995.0,1985.0,-400.0,-200.0,-300.0,900.0,1453.0,900.0,500.0,20.0,500.0,1300.0,1924.0,1940.0,1995.0,-300.0,1100.0,1905.0,1975.0,1995.0,625.0,1975.0,1970.0,1932.0,1963.0,1983.0,2015.0,1200.0,-100.0,1000.0,1450.0,1500.0,1500.0,1200.0,1900.0,2005.0,1999.0,1600.0,-850.0,2000.0,1995.0,1770.0,2005.0,2010.0,1974.0,1777.0,2000.0,-200.0,1100.0,2005.0,2005.0,1985.0,1985.0,1985.0,2001.0,1898.0,2017.0,1987.0,1995.0,700.0,1990.0,1985.0,1995.0,1985.0,1925.0,1997.0,1995.0,1995.0,1950.0,1100.0,1200.0,1998.0,1995.0,1925.0,1995.0,1500.0,1965.0,2022.0,2005.0]},"kind":"numeric","n":27037,"n_null":25969,"n_unique":269,"null_rate":0.9604985760254466,"stats":{"iqr":129.5,"kurtosis":12.315434020317383,"max":2024.0,"mean":1700.0159176029963,"median":1960.0,"min":-3100.0,"n_outliers":170,"outlier_rate":0.15917602996254682,"q1":1857.75,"q3":1987.25,"skew":-3.345161936161892,"std":699.3357785376286,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"68.1% null"},{"code":"imbalance","level":"warn","message":"top value is 97.9% of rows"}],"column":"Is_Isolate","extras":{"singletons":0,"top_values":[["False",8430],["True",182]]},"kind":"categorical","n":27037,"n_null":18425,"n_unique":2,"null_rate":0.6814735362651182,"stats":{"cardinality":2,"entropy":0.14775728775311614,"entropy_ratio":0.14775728775311614,"top_rate":0.9788666976312123,"top_value":"False"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["Level","Macroarea","Family_ID","Countries","Is_Isolate","Last_Year_Of_Documentation","First_Year_Of_Documentation","Latitude","Longitude","Name"],"featured_charts":[{"caption":"Roughly half of all entries are dialects, with languages and families making up the rest.","column":"Level","kind":"donut"},{"caption":"Eurasia and Africa dominate, together accounting for over half of the entries.","column":"Macroarea","kind":"bar"},{"caption":"A handful of large families (atla1278, aust1307, indo1319) carry most of the rows out of 297 families.","column":"Family_ID","kind":"bar"},{"caption":"Shows the geographic spread of languages, skewed toward equatorial and northern latitudes.","column":"Latitude","kind":"histogram"},{"caption":"Papua New Guinea, Indonesia, and Nigeria lead \u2014 useful context for where linguistic diversity concentrates.","column":"Countries","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a Glottolog catalogue of 27,037 language entries with 15 columns covering identifiers (Glottocode, ISO codes), geographic info (Latitude, Longitude, Countries, Macroarea), classification (Family_ID, Level, Is_Isolate), and documentation years. The Level column shows the catalogue is split across dialects (about 50%), languages, and families, while Macroarea is dominated by Eurasia and Africa with Papunesia close behind. The Family_ID distribution is heavily concentrated in a few large families (atla1278, aust1307, indo1319) out of 297 total. Note that documentation-year fields are almost entirely null (Last_Year ~96%, First_Year ~99%) and Is_Isolate is missing for ~68% of rows, so those columns are unreliable for analysis. The geographic coordinates are nearly complete and would support mapping work.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.word_mean","stats.one_word_rate","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Fixed-length 8-character single-token codes (len_min=len_max=8, word_mean=1.0) that are perfectly unique across all 27037 rows with zero nulls or duplicates. Sample values like 'cent1996' and 'chan1318' look like 4-letter prefix plus 4-digit suffix codes, consistent with Glottolog-style language identifiers rather than arbitrary surrogate keys.","role":"identifier","scope":"column","target":"ID","treatment":"Use as the row key for joins; exclude from modelling features."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_mean","stats.len_median","stats.len_max","stats.one_word_rate","stats.word_mean","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"`Name` is a fully unique short label column (27037 rows, 27037 distinct values, no nulls or duplicates), with a mean length of 10.4 characters and 66.7% of entries being a single word. The vocabulary of 18126 tokens skews toward geographic and topical descriptors \u2014 'nuclear', 'central', 'western', 'northern', 'eastern', 'southern' lead the frequency list \u2014 suggesting these are entity or category names rather than personal names. The combination of perfect uniqueness and short, often one-word values flags it as an identifier-like label.","role":"identifier","scope":"column","target":"Name","treatment":"Treat as a unique key; drop from modelling features or use only for joins and display."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic macroarea label for each record, almost certainly tagging languages or populations by world region. Six canonical regions dominate (Eurasia 8060, Africa 8020, Papunesia 6326, North America 1782, South America 1524, Australia 919), but cardinality is 30 because some rows carry semicolon-joined multi-region strings like 'Africa;Eurasia' (29) or even all six regions concatenated (17). Null rate is low at 0.83% and entropy_ratio of 0.46 reflects the heavy Eurasia/Africa/Papunesia concentration (top_rate 0.30).","role":"feature","scope":"column","target":"Macroarea","treatment":"Split the semicolon-delimited compound values into a multi-hot encoding over the six base regions before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","skew","std","null_rate","n_outliers","outlier_rate","iqr"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic latitude in decimal degrees, spanning -55.2748 to 73.1354, which fits the global range. The distribution is mildly right-skewed (0.42) with a median of 8.52697, consistent with land mass concentrated in the Northern Hemisphere. About 1.77% of rows are null and only 48 outliers (0.18%) sit outside the IQR fence, so the column is largely clean.","role":"feature","scope":"column","target":"Latitude","treatment":"Pair with longitude for geospatial features; impute or drop the 1.77% nulls before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","std","iqr","skew","kurtosis","n","n_unique","null_rate","n_outliers","outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This column captures geographic longitude, with values spanning -178.785 to 179.43 \u2014 essentially the full -180/180 globe. The distribution is wide (std 74.05, IQR 110.17) and slightly left-skewed (-0.47), with 13,203 unique values across 27,037 rows and a 1.77% null rate. Only 51 outliers (0.19%) flag, which is expected since longitude is bounded.","role":"feature","scope":"column","target":"Longitude","treatment":"Pair with latitude for geospatial features; consider sin/cos encoding to handle the -180/180 wraparound."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Glottocodes\u2014the standard 8-character identifiers used by the Glottolog language catalogue (e.g. 'cent1996', 'chan1318'). Every one of the 27,037 rows is unique with a fixed length of 8 and exactly one word, and there are no nulls or duplicates, so it functions as a primary key for languages/dialects. Nothing surprising in the distribution; it behaves exactly like a clean ID field.","role":"identifier","scope":"column","target":"Glottocode","treatment":"Use as a primary key to left-join against Glottolog metadata; do not feed into models as a feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.vocab_size","stats.n_duplicates","stats.n_empty","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds ISO 639-3 language codes \u2014 exactly 3 characters, one word, every value lowercase alphabetic. It is 69.75% null and the 8,180 unique codes across 27,037 rows suggest each code maps to a distinct language entry, consistent with a language-registry foreign key rather than a feature. No duplicates or empties among the populated rows.","role":"foreign_key","scope":"column","target":"ISO639P3code","treatment":"Treat as a language-code key; left-join to an ISO 639-3 reference table and encode missingness explicitly."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a low-cardinality categorical taxonomy field with exactly 3 levels: dialect, language, and family. Distribution is uneven but not pathological \u2014 dialect dominates at 50.3% (13,593 of 27,037), followed by language (8,612) and family (4,832), yielding entropy ratio 0.93. No nulls, suggesting a curated classification scheme likely from a linguistic dataset.","role":"label","scope":"column","target":"Level","treatment":"one-hot or ordinal encode for modelling; safe to use as a stratification key."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Two-letter ISO country codes, with 737 distinct values across 27,037 rows. Two-thirds of rows are null (null_rate 0.6641), and even among present values the distribution is broad (entropy_ratio 0.69) with PG topping out at just 9.97%. The presence of 737 distinct codes is surprising since ISO 3166-1 alpha-2 only defines ~250, suggesting multi-country concatenations or non-standard codes mixed in.","role":"feature","scope":"column","target":"Countries","treatment":"Normalize/split non-standard codes, add an explicit missing indicator, then group rare levels before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Family_ID holds Glottolog-style language family codes (e.g., atla1278, aust1307, indo1319), making it a categorical grouping key across 27,037 rows with 297 distinct families. The distribution is heavily skewed: the top family atla1278 alone covers 18.27% of rows, and the top three account for the bulk of the data, yielding an entropy ratio of 0.60. Null rate is low at 1.59%.","role":"foreign_key","scope":"column","target":"Family_ID","treatment":"left-join on this id to a language-family reference, or group-by for stratified analysis."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.duplicate_rate","stats.vocab_size","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds 8-character single-token codes (len_min/max=8, one_word_rate=1.0) that look like Glottolog language identifiers (e.g., 'nucl1643', 'stan1293'). With 3110 unique values across 27037 rows and a 0.7712 duplicate rate, it behaves like a categorical foreign key into a language registry. Note that 0.4972 of rows are null, so nearly half the dataset has no language assignment.","role":"foreign_key","scope":"column","target":"Language_ID","treatment":"Left-join on this id to a language reference table; treat missing as a separate category."},{"confidence":"high","critiques":[],"evidence_keys":["len_mean","len_min","len_max","one_word_rate","n_unique","null_rate","duplicate_rate","n_duplicates","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds ISO 639-3 three-letter language codes: every value is exactly 3 characters and one word (len_mean 3.0, one_word_rate 1.0), with 8180 unique codes led by jpn (120), eng (115), and pes (64). Notable signals: 21.28% nulls and a 61.57% duplicate rate (13103 duplicates), so coverage is partial but the field is a clean categorical.","role":"feature","scope":"column","target":"Closest_ISO369P3code","treatment":"Treat as a categorical language code; impute or flag the 21% nulls and join to an ISO 639-3 reference table for names/families."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.min","stats.max","stats.median","stats.q1","stats.q3","stats.skew"],"model":"anthropic:claude-opus-4-7","narrative":"This column appears to record the earliest year an item was documented, spanning from -2100 (BCE) to 1932 CE with a median of 711. Severe nullity is the headline: 99.2% of the 27,037 rows are missing, leaving only ~215 populated values across 114 unique years. The wide IQR (-300 to 1710.5) and negative skew indicate a long tail into antiquity rather than a modern-era concentration.","role":"metadata","scope":"column","target":"First_Year_Of_Documentation","treatment":"Drop or treat as a sparse indicator; too null to use as a feature without heavy imputation."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.min","stats.max","stats.median","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be the last year a record was documented, populated for only ~4% of rows (null_rate 0.9605). Values span an implausible range from -3100 to 2024 with a median of 1960, and the heavy left skew (-3.35) plus kurtosis of 12.3 yields 170 outliers (15.9% of non-null entries). The negative minimum suggests BCE-style dating or sentinel values rather than clean calendar years.","role":"timestamp","scope":"column","target":"Last_Year_Of_Documentation","treatment":"Validate or clip the year range and treat as mostly-missing; impute or flag presence rather than relying on the raw value."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.cardinality","stats.top_rate","stats.top_value","stats.entropy","top_values","n"],"model":"anthropic:claude-opus-4-7","narrative":"Boolean flag indicating isolate status, present on only ~32% of the 27,037 rows (null_rate 0.6815). Among non-null values, 'False' dominates at 0.9789 with just 182 'True' cases, yielding very low entropy (0.148).","role":"feature","scope":"column","target":"Is_Isolate","treatment":"Impute or add a missingness indicator; near-constant, so expect little predictive lift."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4744,"prompt_tokens":19531,"total_tokens":24275}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:05:46+00:00","mode":"full","row_count":27037,"sampled_rows":27037,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/cache/glottolog_languages.parquet"},"notes":[],"saturn_version":"0.2.0","schema":{"Closest_ISO369P3code":"text","Countries":"categorical","Family_ID":"categorical","First_Year_Of_Documentation":"numeric","Glottocode":"text","ID":"text","ISO639P3code":"text","Is_Isolate":"boolean","Language_ID":"text","Last_Year_Of_Documentation":"numeric","Latitude":"numeric","Level":"categorical","Longitude":"numeric","Macroarea":"categorical","Name":"text"}}
