{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"glottocode","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19401,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["abaz1241","sand1275","texm1235","stan1298","kuik1246","yait1239","kyon1245","tuyu1244","subi1246","apah1238","niha1238","blaf1238","guil1240","acad1238","hara1259","daoo1238","bela1254","taid1252","zaac1239","kizi1242","huac1245","arab1268","chih1238","bert1249","patt1251","sara1315","mofu1251","ddra1238","bana1282","yage1238","nung1288","nyor1246","nyon1242","khar1290","kama1357","kuuk1238","ward1243","mane1261","lalo1239","mayo1276","sout2729","vute1245","guai1246","vols1237","west2347","yany1243","jiee1239","khas1272","chan1314","tonk1249"],"top_values":[],"top_words":[["aala1237",1],["aant1238",1],["aari1239",1],["aari1240",1],["aasa1238",1],["aata1238",1],["abaa1238",1],["abab1239",1],["abad1241",1],["abad1240",1],["abag1245",1],["abai1239",1],["abai1240",1],["abai1241",1],["abak1242",1],["abak1243",1],["abal1238",1],["abam1239",1],["aban1243",1],["aban1242",1],["abau1245",1],["abaw1238",1],["abay1238",1],["abaz1241",1],["abbe1238",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19401,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":19401,"n_null":0,"n_unique":19401,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":93.30200000000005,"url_rate":0.0,"vocab_size":19401,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"71.7% rows are a single word"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[52,455,4355,2890,3995,1115,813,1417,724,1264,439,524,214,175,343,143,173,58,86,26,26,43,12,24,11,8,3,1,2,1,3,1,2,1,0,0,0,0,0,2],"edges":[1.0,2.425,3.85,5.275,6.7,8.125,9.55,10.975,12.4,13.825000000000001,15.25,16.675,18.1,19.525000000000002,20.95,22.375,23.8,25.225,26.650000000000002,28.075,29.5,30.925,32.35,33.775,35.2,36.625,38.050000000000004,39.475,40.9,42.325,43.75,45.175000000000004,46.6,48.025,49.45,50.875,52.300000000000004,53.725,55.15,56.575,58.0]},"near_unique":true,"sample":["Abaza","Sandiwar","Texmelucan Zapotec","Standard Braj of Mathura","Kuik\u00faro-Kalap\u00e1lo","Yaitepec Chatino","Kyon","Tuyuca","Subiya","Apahapsili","Nihali","Mblafe-R\u00e1nmo","Guilia","Acadian","Harava","Dao","Belarusian","Tai Do","Zaachila Zapotec","Kiziere","Huachipaire","Arabela","Chihuahua Pima Bajo","Berti","Pattinjo","Sarar","Mofu-Gudur Sign Language","Ddralo","Banapari","Yage","N\u00f9ng Q\u00fay Rin","Nyoro","Nyongwe","Khartamche","Kamara","Kuuku-Ya'u","Warduji","Ma\u00f1egu","Lalomerui","Mayo-Plata","Southern Pumi","Vute Mbanjo","Guaicaro","Volscian","West Samogitian","Yanyuwa","Jie","Khaskhong","Changnoi","Tonkawa"],"top_values":[],"top_words":[["nuclear",341],["sign",204],["language",198],["central",182],["southern",181],["western",181],["northern",173],["eastern",163],["north",110],["south",106],["of",87],["east",76],["san",75],["arabic",74],["west",72],["zapotec",70],["new",62],["mixtec",59],["guinea)",54],["pidgin",54],["naga",54],["(papua",53],["old",47],["upper",47],["creole",42]],"vocab_skipped":null,"word_histogram":{"counts":[13908,0,0,4255,0,0,915,0,0,0,244,0,0,65,0,0,11,0,0,0,0,0,0,2,0,0,0,0,0,1],"edges":[1.0,1.3,1.6,1.9,2.2,2.5,2.8,3.1,3.4,3.6999999999999997,4.0,4.3,4.6,4.9,5.2,5.5,5.8,6.1,6.3999999999999995,6.7,7.0,7.3,7.6,7.8999999999999995,8.2,8.5,8.8,9.1,9.4,9.7,10.0]}},"kind":"text","n":19401,"n_null":0,"n_unique":19401,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":58,"len_mean":9.211483944126591,"len_median":7.0,"len_min":1,"len_p95":20.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.7168702644193599,"readability_flesch_mean":60.53085000000003,"url_rate":0.0,"vocab_size":17861,"word_mean":1.3687954229163446,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"null_rate","level":"warn","message":"59.2% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"isocodes","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7922,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[2.5,2.525,2.55,2.575,2.6,2.625,2.65,2.675,2.7,2.725,2.75,2.775,2.8,2.825,2.85,2.875,2.9,2.925,2.95,2.975,3.0,3.025,3.05,3.075,3.1,3.125,3.15,3.175,3.2,3.225,3.25,3.275,3.3,3.325,3.35,3.375,3.4,3.425,3.45,3.475,3.5]},"near_unique":true,"sample":["aba","sgr","thd","nbl","gvc","yad","krh","tuk","sgz","aoa","ndg","mpj","gsm","yif","hmu","cuq","zbt","tlr","yoy","kiv","hot","ans","cbk","bel","pay","svs","zmf","dec","bmx","yaf","nnj","tyh","kkc","kzh","kmh","kfy","wap","xdy","kya","zmm","tla","wbb","goz","vun","wik","yaq","jee","kpn","tuf","xtg"],"top_values":[],"top_words":[["aiw",1],["aay",1],["aas",1],["kbt",1],["abg",1],["abf",1],["abm",1],["aau",1],["abq",1],["aba",1],["abp",1],["abi",1],["bsa",1],["axb",1],["abk",1],["aob",1],["abo",1],["abr",1],["ado",1],["abn",1],["aah",1],["abz",1],["kgr",1],["abu",1],["mgj",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7922,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":19401,"n_null":11479,"n_unique":7922,"null_rate":0.5916705324467811,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":3,"len_mean":3.0,"len_median":3.0,"len_min":3,"len_p95":3.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":118.68200000000003,"url_rate":0.0,"vocab_size":7922,"word_mean":1.0,"word_median":1.0}},{"alerts":[],"column":"level","extras":{"singletons":0,"top_values":[["dialect",10920],["language",8481]]},"kind":"categorical","n":19401,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.9885693839651513,"entropy_ratio":0.9885693839651513,"top_rate":0.5628575846605846,"top_value":"dialect"}},{"alerts":[],"column":"macroarea","extras":{"singletons":0,"top_values":[["Africa",5955],["Eurasia",5028],["Papunesia",4847],["South America",1095],["North America",1035],["Australia",602]]},"kind":"categorical","n":19401,"n_null":839,"n_unique":6,"null_rate":0.043245193546724396,"stats":{"cardinality":6,"entropy":2.175977454253035,"entropy_ratio":0.8417829866568578,"top_rate":0.32081672233595515,"top_value":"Africa"}},{"alerts":[{"code":"null_rate","level":"warn","message":"59.1% null"}],"column":"latitude","extras":{"histogram":{"counts":[5,1,1,4,7,16,29,26,47,77,125,141,280,256,495,788,681,378,468,663,710,303,384,233,318,371,167,143,178,113,138,79,77,76,46,21,41,23,14,6],"edges":[-55.2748,-52.064544999999995,-48.85429,-45.644035,-42.43378,-39.223524999999995,-36.01327,-32.803015,-29.59276,-26.382505,-23.17225,-19.961995,-16.751739999999998,-13.541484999999994,-10.331229999999998,-7.120975000000001,-3.9107199999999978,-0.7004649999999941,2.5097900000000024,5.720044999999999,8.930300000000003,12.140555000000006,15.350809999999996,18.561065,21.771320000000003,24.981575000000007,28.19183000000001,31.402085,34.61234,37.82259500000001,41.032849999999996,44.243105,47.45336,50.66361500000001,53.87387000000001,57.084125,60.294380000000004,63.50463500000001,66.71489,69.925145,73.1354]},"sample":[6.50546,7.64567,-5.88478,15.3307,-3.57026,-12.8322,3.19607,8.34986,5.22371,23.59,23.0,-10.21751,16.2707,-4.63762,-0.53751,25.6841,-4.98449,7.57714,42.4669,-2.08016,-6.72266,-6.65252,-6.24342,16.6633,9.62456,-8.41627,42.3966,-8.35714,7.32924,3.3466,3.34642,3.51211,-2.28732,4.19522,3.80765,25.0046,2.12787,2.35498,-5.26465,47.9232,10.1609,9.40519,53.2307,22.58346,1.17596,-10.0874,6.40701,10.4257,20.8542,11.4839,16.3071,19.1512,1.79156,15.2315,-5.8626,24.8484,-3.48707,-17.927,4.456045,4.60791,-9.13618,24.1025,14.43,-16.41653,-4.03527,17.2239,-21.3777,17.7248,29.6869,9.00924,-6.10712,7.77188,1.267,-20.8489,-5.80391,4.88038,32.7415,7.004574,37.0972,-5.1149,9.359643,6.05387,42.6864,15.6078,41.869481,-13.5,8.15211,-17.3428,9.1793,48.0707,38.3285,8.86601,-28.289642,-14.990954,6.16926,10.209,35.1674,-5.53634,55.8257,-5.70892,-4.837565,27.548,26.6283,-6.32531,27.6544,-1.57408,-21.0909,30.0368,9.71107,7.78428,6.75763,5.54442,2.79312,-17.052,-24.9202,35.7267,58.947,10.425,11.5764,-9.14409,6.27645,-9.73317,2.98585,19.4991,-6.56,50.8154,-8.02035,5.39619,27.253,6.57645,24.1666666667,-15.8247,-17.726974,-15.4296,6.13837,8.21291,11.8081,-3.68407,7.57377,-10.2741,48.0751,9.951721,-2.49047,22.67753,8.486,-54.7,33.3708,-10.2647,-1.52891,-6.25217,31.016,-3.12875,20.6189,-5.97212,-9.70693,0.97588,4.49119,63.4837,5.21188,-1.9915,-7.96772,31.784,40.0511,5.81898,-12.80431,17.0269,7.30076,-7.908,10.5426,11.6089,11.5034,16.8912,10.1675,37.0529,23.83,14.0546,-1.78077,-8.16666,-13.5393,8.44163,2.03781,36.8044,-5.74133,-2.22799,-2.09315,-4.50417,49.0,9.79704,21.0793,4.16535,31.83,10.7321,35.1741,5.92596,-8.37424,41.263356,8.67039,8.04788,-30.4207,-6.39399,63.8977,-4.06603,-6.46414,6.64027,1.07554,36.0451,6.423,18.0769,19.2043,-14.201,-8.2384,31.8339,23.7562,-20.4035,-5.34485,43.0,6.53881,16.4708,-1.50636,10.7074,4.88273,10.1181,-8.75779,2.39354,-5.52462,-4.46971,-6.45512,27.6325,-13.2,27.6237,11.0243,50.8021,-13.5043,9.158,-17.6042,31.8202,8.52193,27.2703,51.015,57.566,0.85544,1.95461,9.40172,0.88267,-0.02586,9.4066,-33.9470874,-25.54522,15.9497,0.63989,67.2759,-5.63017,-15.4316,-9.23498,-4.50212,-10.4673,17.913525,-5.49975,-6.35558,-5.08137,9.068012,9.85,6.51218,-26.789,-13.8754,12.8438,-16.7256,-2.00717,13.0333,-11.554,-13.175,-16.9402,14.1259,2.79827,23.546095,5.13981,-0.57181,4.50635,10.9276,41.1174,-6.56072,-31.79729,27.418,6.43213,10.1474,44.556,-1.4313,-4.57497,-19.811,19.71,11.9765,-1.44105,25.97,9.31975,8.60006,22.848716,-36.2301,7.6302,-7.54628,6.73205,2.94789,-8.28776,27.688,52.9658,-20.6791,41.100052,-1.58463,-11.6129,9.67581,-5.43194,4.5893,-5.93647,14.3253,40.4102,23.9386,55.1928,27.296,8.7283,32.2766,26.4348,36.3167,5.9503,10.7025,-6.40741,8.95481,9.18404,15.8909,3.4997,14.8817,7.66745,66.31,35.2873,21.0,-3.81041,7.29515,32.0,-6.45912,-7.85988,4.89504,-15.3,24.5271,-25.60551,17.0411,-3.74711,-3.25404,7.82186,9.62092,35.5096,37.1390706,-3.05368,0.91716,40.955,-7.01666,13.8694,1.38035,2.65327,3.42467,36.9471,12.3231,-5.83155,14.6066,-8.22481,46.5538,-4.58919,-26.7329,-12.713,21.6176,10.571,41.170545,-1.44174,-7.24959,0.3652,-5.64812,-2.85653,35.5457,8.19236,16.3514,-4.4376,18.1999,18.74,21.6402,-10.3302,5.84903,-1.7094,40.748,23.614,0.51111,-8.29105,-11.9396,11.1285,-3.6111,28.9251,35.7776,-3.36682,21.5995,-2.97663,31.6862,-0.38398,8.483,-6.23022,-0.24587,42.171599,-7.0,-3.01976,41.89,16.9081,5.328596,-25.96551,17.21219,-9.30995,6.82744,37.327706,34.7873,-19.57953,32.497,-10.6606,11.2473,25.9996,-14.970047,22.5908,-17.6485,-4.05047,13.6555,11.4926,63.4046,5.49808,-3.43455,-2.71805,-0.02668,-8.58405,-10.8425,11.7176,6.03205,10.8828,-3.74322,6.35815,42.98,0.42569,13.6331,-6.43051,1.0539,14.40749,-16.9575,-3.87428,14.2584,9.115793,16.6384,40.9,-4.94407,40.2612,-14.712,-15.898299,21.3624,-7.52674,13.2436,-4.99312,46.8191,6.13603,-13.8627,-5.17834,0.50714,-8.81936,11.8634,-8.34522,-1.51945,-18.6529,-3.07263,-3.23021,25.1154,3.133111,16.2495,-8.49669,39.0548,-13.8691,-3.20238,-1.62746,7.37957,25.7707,-1.73938,-31.7612,15.9748,34.228154,14.3103,24.3401,21.7895612,-3.55,-15.7621,-29.49049805,-11.1086,-25.4446,-3.57371,8.97395,-1.68798,-4.86227,-11.3704,60.87784,17.0311,51.4981,31.066667,35.0056]},"kind":"numeric","n":19401,"n_null":11472,"n_unique":7786,"null_rate":0.5913097263027679,"stats":{"iqr":24.411569999999998,"kurtosis":0.3048040164132768,"max":73.1354,"mean":8.16420867839847,"median":6.2918,"min":-55.2748,"n_outliers":135,"outlier_rate":0.0170261066969353,"q1":-5.13857,"q3":19.273,"skew":0.5425118320676289,"std":18.95536602027926,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"59.1% null"}],"column":"longitude","extras":{"histogram":{"counts":[13,4,10,11,10,17,123,47,78,280,59,235,218,150,60,40,0,4,105,275,443,751,322,429,228,126,35,79,210,207,269,454,239,497,316,598,667,122,186,12],"edges":[-178.785,-169.83272499999998,-160.88045,-151.928175,-142.9759,-134.02362499999998,-125.07135,-116.119075,-107.1668,-98.214525,-89.26225,-80.309975,-71.3577,-62.405424999999994,-53.453149999999994,-44.50087499999998,-35.54859999999999,-26.596325000000007,-17.644049999999993,-8.691774999999978,0.2605000000000075,9.212774999999993,18.165050000000008,27.117325000000022,36.06960000000001,45.021874999999994,53.97415000000001,62.92642500000002,71.87870000000001,80.830975,89.78325000000004,98.73552500000002,107.68780000000001,116.640075,125.59234999999998,134.54462500000002,143.4969,152.449175,161.40145000000004,170.35372500000003,179.306]},"sample":[0.727155,6.13085,141.532,-91.3153,141.784,-60.9716,-55.6294,8.52085,-4.42647,101.933,103.0,123.96373,-96.4003,144.991,-72.0869,93.8668,144.801,34.0267,-0.00919,132.983,146.991,146.919,150.427,-96.8575,9.95,160.721,46.1041,115.075,23.0108,28.04453,26.861,114.482,137.054,31.0723,10.4213,121.857,98.2517,99.1761,145.611,13.246,10.4204,7.59566,25.6038,74.59014,110.254,148.835,5.70426,-1.67332,99.9862,11.2322,119.853,83.8141,26.0814,120.164,144.086,106.187,142.495,122.233,114.429505,114.517,125.216,107.711,-3.49,167.73124,143.015,121.091,165.076,-97.7637,-91.6133,9.16071,-45.1299,126.373,-69.91,165.13,138.471,-7.88667,-2.40872,118.455978,43.5027,-75.6778,-82.568207,-0.10955,-124.376,-91.2869,47.41111,-66.3,-0.2755,37.1231,124.721,-123.511,-122.994,10.816,152.151281,-39.43634,8.23592,-63.018,33.0943,145.457,12.3187,146.572,-70.873718,85.0074,88.3798,134.638,90.648,114.979,-61.7179,75.6702,-11.7496,2.27039,2.64908,8.90959,36.7619,168.399,-57.8892,50.3677,24.566,16.997,39.5605,148.59,37.23808,149.854,28.7911,96.9492,-43.0,7.30478,156.552,10.352,68.363,37.05,-110.3,-61.6934,-39.61441,141.635,-6.51291,6.69128,5.07897,37.6827,37.7577,150.256,-122.029,15.590037,24.2299,102.851673333,7.168,-65.5,64.5105,123.377,145.075,-64.70047,77.144,129.387,-97.9347,142.963,147.562,-53.3269,7.23558,-19.0212,8.0831,34.3589,129.651,77.064,-95.5213,5.5862,-64.35748,-89.8734,5.84326,112.378,-5.21959,92.658,19.0954,-95.8484,-3.70994,49.7603,69.0,-9.96404,132.312,-37.5,131.098,77.2465,112.383,-96.8571,147.917,138.157,33.5326,140.443,117.0,-4.24384,101.327,114.367,-96.5,31.5399,73.3207,7.96202,138.0675,-123.310776,-10.9214,-8.89972,19.7711,155.742,166.695,139.645,145.66,1.72346,-69.5705,44.6365,-10.5284,105.31,83.5859,141.709,125.004,77.3772,120.976,164.193,123.154,47.0,10.2073,79.5153,34.5049,11.4953,10.3453,16.5129,160.804,10.0812,144.119,152.678,120.483,90.3336,-60.5,98.4694,13.7362,4.31312,167.332,15.81149,168.202,36.0829,8.35562,87.8859,5.87705,22.0262,124.117,127.772,-3.47051,24.6862,103.548,11.0511,141.5854211,20.588827,-95.6876,99.74549,17.8057,146.407,167.241,33.9826,145.096,40.0274,105.52746,145.757,146.576,144.806,8.73608,76.97,18.7935,144.892,129.844,124.002,22.5983,139.04,-86.0,35.0056,-55.869,-40.7891,-84.3439,27.8282,105.516304,9.92766,24.0443,10.9924,9.81365,22.3782,147.137,-67.569581,94.69,10.2541,98.6765,-122.857,120.009,144.988,34.613,93.995,15.0373,149.624,102.598,16.1282,17.2691,108.366165,140.158,-1.30881,38.958,11.11047,-56.0182,124.192,96.349,139.206,164.797,-123.179627,112.794,43.36,13.8266,141.158,10.9157,140.068,102.986,-120.645,121.59,-77.7505,95.606,-10.4063,103.192,-107.379,-119.721,15.2569,1.27435,146.875,8.62164,5.40031,101.515,33.73651,104.33,15.6675,67.09,-120.669,85.0,21.4223,6.24719,54.0,145.765,145.751,9.15209,14.35,82.251,-57.08816,122.37,152.493,128.779,12.2518,11.044,71.8421,111.9656111,128.197,124.712,-89.1881,-38.0,-13.4482,116.089,113.93,116.443,-94.6634,75.6265,145.937,107.859,124.36,9.92812,144.689,32.6167,142.569,98.0915,8.21883,24.44567,31.3191,31.7614,34.0335,146.078,17.9669,102.819,11.7366,-94.7001,142.369,-96.8833,-97.5936,100.099,-76.2753,125.499,119.349,8.39332,46.4714,128.086,125.078,27.5144,-4.58203,128.068,83.8064,51.4363,129.243,98.0342,31.0681,78.3865,114.693,-13.235,155.423,33.8997,47.332115,156.83,142.049,14.7,105.001,132.221282,28.11864,-97.01454,-76.9854,93.8042,-118.371838,-106.661,169.35603,77.116,150.21,122.551,94.4235,168.057941,8.73169,-149.45,144.506,45.6411,18.2295,-143.338,115.553,139.061,150.923,-76.5761,125.583,122.963,29.1632,35.0823,30.1567,140.591,22.7362,89.18,120.89,-3.82228,30.0807,124.652,-3.10006,24.3751,140.803,13.1571,10.029445,-92.2786,47.7236,145.514,65.3864,166.606,167.309973,73.8652,36.9316,-61.1963,30.5699,14.8451,11.8839,131.893,145.726,-69.5032,160.741,29.1228,32.4375,12.8951,137.608,142.012,138.456,91.6266,-61.210785,-97.3244,123.298,99.3112,141.532,144.089,135.484,143.916,102.367,142.851,20.1733,-96.1927,108.960571,-89.8235,123.801,111.966667,142.527,136.055,140.9591946,-73.3087,115.58,141.88,9.78757,138.028,12.6362,-72.0409,89.74791,-97.2293,3.85428,81.3125,-108.782]},"kind":"numeric","n":19401,"n_null":11472,"n_unique":7745,"null_rate":0.5913097263027679,"stats":{"iqr":116.96426000000001,"kurtosis":-0.776480086053744,"max":179.306,"mean":51.216551346678315,"median":47.565486,"min":-178.785,"n_outliers":13,"outlier_rate":0.0016395510152604363,"q1":7.17974,"q3":124.144,"skew":-0.4813559160272718,"std":81.14929416094796,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns","kinds"],"featured_charts":[{"caption":"Shows Africa, Eurasia, and Papunesia dominate while Australia is smallest \u2014 useful for understanding geographic coverage.","column":"macroarea","kind":"bar"},{"caption":"Reveals the dialect-vs-language split (about 56% dialects, 44% languages).","column":"level","kind":"donut"},{"caption":"Shows the latitudinal spread of languoids; note ~59% are null so the chart only reflects the geocoded subset.","column":"latitude","kind":"histogram"},{"caption":"Shows the bimodal east-west distribution of languoid locations across the globe.","column":"longitude","kind":"histogram"},{"caption":"Most names are short (median 7 characters, ~72% one word), but a long tail extends to 58 characters.","column":"name","kind":"length"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a Glottolog languoid catalogue with 19,401 rows and 7 columns covering identifiers (glottocode, isocodes, name), geographic coordinates (latitude, longitude), and classification fields (macroarea, level). The most striking feature is missingness: roughly 59% of rows lack ISO codes and coordinates, so any geographic or ISO-based analysis will only cover about 40% of entries. Worth a closer look first: the macroarea distribution (Africa leads at 32%, followed by Eurasia and Papunesia) and the level split between dialect (56%) and language (44%). The name field is mostly single words but contains recurring qualifiers like 'nuclear', 'sign', 'central', and 'southern' that hint at naming conventions worth exploring.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.duplicate_rate","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Glottocodes \u2014 fixed 8-character identifiers (len_min=len_max=8) that uniquely tag languages in the Glottolog catalogue. Every one of the 19,401 rows is unique (n_unique=19401, duplicate_rate=0.0) and single-token (one_word_rate=1.0), matching the canonical four-letters-plus-four-digits pattern visible in top_words like 'aala1237' and 'aari1239'. There are no nulls and no collisions, so this is a clean primary key rather than a feature.","role":"identifier","scope":"column","target":"glottocode","treatment":"Use as the primary key; left-join on this id rather than feeding it to a model."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_median","stats.one_word_rate","stats.word_mean","stats.vocab_size","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"The `name` column is a short text identifier, with all 19,401 values unique and no nulls or duplicates \u2014 effectively a primary key or label. Entries are terse (mean 9.2 chars, median 1 word) and 71.7% are single words, yet the top tokens (`nuclear`, `language`, `central`, `southern`, `western`) suggest these are entity or topic names rather than personal names. Vocabulary is wide (17,861 distinct words) for such short strings.","role":"identifier","scope":"column","target":"name","treatment":"Treat as a unique key; do not feature-engineer directly, but tokenize if used for matching or embedding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.vocab_size","stats.n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds 3-character ISO-style codes: every non-null value is exactly 3 characters and one word (len_min/len_max=3, one_word_rate=1.0). It is sparsely populated with a 0.5917 null rate, and of the 19401 rows there are 7922 distinct codes with no duplicates among the populated entries (vocab_size=7922, n_duplicates=0). The top_words sample (aiw, aay, aas, kbt\u2026) suggests ISO 639-3 language codes rather than country codes, each appearing only once.","role":"identifier","scope":"column","target":"isocodes","treatment":"Treat as a categorical/foreign key; left-join to an ISO code lookup and impute or flag the ~59% missing values."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"A binary categorical flag distinguishing 'dialect' (10,920 rows, 56.3%) from 'language' (8,481 rows). The split is fairly balanced, with entropy at 0.989 of the maximum, and there are no nulls across all 19,401 rows.","role":"label","scope":"column","target":"level","treatment":"One-hot or binary-encode for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical macro-region label with just 6 values covering the world's linguistic areas (Africa, Eurasia, Papunesia, South America, North America, Australia). Distribution is moderately balanced (entropy ratio 0.84) with Africa leading at 32.1% and a long tail in Australia at 602 rows; 4.32% are null. No single category dominates, but the Americas and Australia are markedly underrepresented relative to Africa and Eurasia.","role":"feature","scope":"column","target":"macroarea","treatment":"one-hot or target-encode; impute or flag the ~4% missing before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic latitude coordinates ranging from -55.2748 to 73.1354, consistent with a worldwide point dataset. Nearly 59% of rows are null, which dominates any spatial analysis, and the median of 6.29 with mean 8.16 hints at a slight northern-hemisphere lean despite moderate skew (0.54).","role":"feature","scope":"column","target":"latitude","treatment":"Pair with longitude for geospatial features; impute or filter the 59% nulls before mapping."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.min","stats.max","stats.median","stats.mean","stats.skew","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic longitude coordinate spanning the full globe (min -178.785, max 179.306) with median 47.565486, suggesting a worldwide dataset weighted toward Eurasia. The 59.13% null rate is the dominant concern\u2014most records lack location\u2014while only 13 outliers (0.16%) appear and skew is mild (-0.48).","role":"feature","scope":"column","target":"longitude","treatment":"Impute or flag the 59% missing values before any geospatial modelling; pair with latitude for joint use."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":2483,"prompt_tokens":8751,"total_tokens":11234}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:05:30+00:00","mode":"full","row_count":19401,"sampled_rows":19401,"seed":42,"source":"/home/coolhand/servers/diachronica/data_raw/glottolog_languoid.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"glottocode":"text","isocodes":"text","latitude":"numeric","level":"categorical","longitude":"numeric","macroarea":"categorical","name":"text"}}
