{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"glottocode","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19401,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["abaz1241","sand1275","texm1235","stan1298","kuik1246","yait1239","kyon1245","tuyu1244","subi1246","apah1238","niha1238","blaf1238","guil1240","acad1238","hara1259","daoo1238","bela1254","taid1252","zaac1239","kizi1242","huac1245","arab1268","chih1238","bert1249","patt1251","sara1315","mofu1251","ddra1238","bana1282","yage1238","nung1288","nyor1246","nyon1242","khar1290","kama1357","kuuk1238","ward1243","mane1261","lalo1239","mayo1276","sout2729","vute1245","guai1246","vols1237","west2347","yany1243","jiee1239","khas1272","chan1314","tonk1249"],"top_values":[],"top_words":[["aala1237",1],["aant1238",1],["aari1239",1],["aari1240",1],["aasa1238",1],["aata1238",1],["abaa1238",1],["abab1239",1],["abad1241",1],["abad1240",1],["abag1245",1],["abai1239",1],["abai1240",1],["abai1241",1],["abak1242",1],["abak1243",1],["abal1238",1],["abam1239",1],["aban1243",1],["aban1242",1],["abau1245",1],["abaw1238",1],["abay1238",1],["abaz1241",1],["abbe1238",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19401,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":19401,"n_null":0,"n_unique":19401,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":93.30200000000005,"url_rate":0.0,"vocab_size":19401,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"iso_639_3","extras":{},"kind":"unknown","n":19401,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"71.7% rows are a single word"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[52,455,4355,2890,3995,1115,813,1417,724,1264,439,524,214,175,343,143,173,58,86,26,26,43,12,24,11,8,3,1,2,1,3,1,2,1,0,0,0,0,0,2],"edges":[1.0,2.425,3.85,5.275,6.7,8.125,9.55,10.975,12.4,13.825000000000001,15.25,16.675,18.1,19.525000000000002,20.95,22.375,23.8,25.225,26.650000000000002,28.075,29.5,30.925,32.35,33.775,35.2,36.625,38.050000000000004,39.475,40.9,42.325,43.75,45.175000000000004,46.6,48.025,49.45,50.875,52.300000000000004,53.725,55.15,56.575,58.0]},"near_unique":true,"sample":["Abaza","Sandiwar","Texmelucan Zapotec","Standard Braj of Mathura","Kuik\u00faro-Kalap\u00e1lo","Yaitepec Chatino","Kyon","Tuyuca","Subiya","Apahapsili","Nihali","Mblafe-R\u00e1nmo","Guilia","Acadian","Harava","Dao","Belarusian","Tai Do","Zaachila Zapotec","Kiziere","Huachipaire","Arabela","Chihuahua Pima Bajo","Berti","Pattinjo","Sarar","Mofu-Gudur Sign Language","Ddralo","Banapari","Yage","N\u00f9ng Q\u00fay Rin","Nyoro","Nyongwe","Khartamche","Kamara","Kuuku-Ya'u","Warduji","Ma\u00f1egu","Lalomerui","Mayo-Plata","Southern Pumi","Vute Mbanjo","Guaicaro","Volscian","West Samogitian","Yanyuwa","Jie","Khaskhong","Changnoi","Tonkawa"],"top_values":[],"top_words":[["nuclear",341],["sign",204],["language",198],["central",182],["southern",181],["western",181],["northern",173],["eastern",163],["north",110],["south",106],["of",87],["east",76],["san",75],["arabic",74],["west",72],["zapotec",70],["new",62],["mixtec",59],["guinea)",54],["pidgin",54],["naga",54],["(papua",53],["old",47],["upper",47],["creole",42]],"vocab_skipped":null,"word_histogram":{"counts":[13908,0,0,4255,0,0,915,0,0,0,244,0,0,65,0,0,11,0,0,0,0,0,0,2,0,0,0,0,0,1],"edges":[1.0,1.3,1.6,1.9,2.2,2.5,2.8,3.1,3.4,3.6999999999999997,4.0,4.3,4.6,4.9,5.2,5.5,5.8,6.1,6.3999999999999995,6.7,7.0,7.3,7.6,7.8999999999999995,8.2,8.5,8.8,9.1,9.4,9.7,10.0]}},"kind":"text","n":19401,"n_null":0,"n_unique":19401,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":58,"len_mean":9.211483944126591,"len_median":7.0,"len_min":1,"len_p95":20.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.7168702644193599,"readability_flesch_mean":60.53085000000003,"url_rate":0.0,"vocab_size":17861,"word_mean":1.3687954229163446,"word_median":1.0}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"family_name","extras":{},"kind":"unknown","n":19401,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"family_glottocode","extras":{},"kind":"unknown","n":19401,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[],"column":"macroarea","extras":{"singletons":0,"top_values":[["Africa",5955],["Eurasia",5028],["Papunesia",4847],["South America",1095],["North America",1035],["Australia",602]]},"kind":"categorical","n":19401,"n_null":839,"n_unique":6,"null_rate":0.043245193546724396,"stats":{"cardinality":6,"entropy":2.175977454253035,"entropy_ratio":0.8417829866568578,"top_rate":0.32081672233595515,"top_value":"Africa"}},{"alerts":[{"code":"null_rate","level":"warn","message":"59.1% null"}],"column":"latitude","extras":{"histogram":{"counts":[5,1,1,4,7,16,29,26,47,77,125,141,280,256,495,788,681,378,468,663,710,303,384,233,318,371,167,143,178,113,138,79,77,76,46,21,41,23,14,6],"edges":[-55.2748,-52.064544999999995,-48.85429,-45.644035,-42.43378,-39.223524999999995,-36.01327,-32.803015,-29.59276,-26.382505,-23.17225,-19.961995,-16.751739999999998,-13.541484999999994,-10.331229999999998,-7.120975000000001,-3.9107199999999978,-0.7004649999999941,2.5097900000000024,5.720044999999999,8.930300000000003,12.140555000000006,15.350809999999996,18.561065,21.771320000000003,24.981575000000007,28.19183000000001,31.402085,34.61234,37.82259500000001,41.032849999999996,44.243105,47.45336,50.66361500000001,53.87387000000001,57.084125,60.294380000000004,63.50463500000001,66.71489,69.925145,73.1354]},"sample":[6.50546,7.64567,-5.88478,15.3307,-3.57026,-12.8322,3.19607,8.34986,5.22371,23.59,23.0,-10.21751,16.2707,-4.63762,-0.53751,25.6841,-4.98449,7.57714,42.4669,-2.08016,-6.72266,-6.65252,-6.24342,16.6633,9.62456,-8.41627,42.3966,-8.35714,7.32924,3.3466,3.34642,3.51211,-2.28732,4.19522,3.80765,25.0046,2.12787,2.35498,-5.26465,47.9232,10.1609,9.40519,53.2307,22.58346,1.17596,-10.0874,6.40701,10.4257,20.8542,11.4839,16.3071,19.1512,1.79156,15.2315,-5.8626,24.8484,-3.48707,-17.927,4.456045,4.60791,-9.13618,24.1025,14.43,-16.41653,-4.03527,17.2239,-21.3777,17.7248,29.6869,9.00924,-6.10712,7.77188,1.267,-20.8489,-5.80391,4.88038,32.7415,7.004574,37.0972,-5.1149,9.359643,6.05387,42.6864,15.6078,41.869481,-13.5,8.15211,-17.3428,9.1793,48.0707,38.3285,8.86601,-28.289642,-14.990954,6.16926,10.209,35.1674,-5.53634,55.8257,-5.70892,-4.837565,27.548,26.6283,-6.32531,27.6544,-1.57408,-21.0909,30.0368,9.71107,7.78428,6.75763,5.54442,2.79312,-17.052,-24.9202,35.7267,58.947,10.425,11.5764,-9.14409,6.27645,-9.73317,2.98585,19.4991,-6.56,50.8154,-8.02035,5.39619,27.253,6.57645,24.1666666667,-15.8247,-17.726974,-15.4296,6.13837,8.21291,11.8081,-3.68407,7.57377,-10.2741,48.0751,9.951721,-2.49047,22.67753,8.486,-54.7,33.3708,-10.2647,-1.52891,-6.25217,31.016,-3.12875,20.6189,-5.97212,-9.70693,0.97588,4.49119,63.4837,5.21188,-1.9915,-7.96772,31.784,40.0511,5.81898,-12.80431,17.0269,7.30076,-7.908,10.5426,11.6089,11.5034,16.8912,10.1675,37.0529,23.83,14.0546,-1.78077,-8.16666,-13.5393,8.44163,2.03781,36.8044,-5.74133,-2.22799,-2.09315,-4.50417,49.0,9.79704,21.0793,4.16535,31.83,10.7321,35.1741,5.92596,-8.37424,41.263356,8.67039,8.04788,-30.4207,-6.39399,63.8977,-4.06603,-6.46414,6.64027,1.07554,36.0451,6.423,18.0769,19.2043,-14.201,-8.2384,31.8339,23.7562,-20.4035,-5.34485,43.0,6.53881,16.4708,-1.50636,10.7074,4.88273,10.1181,-8.75779,2.39354,-5.52462,-4.46971,-6.45512,27.6325,-13.2,27.6237,11.0243,50.8021,-13.5043,9.158,-17.6042,31.8202,8.52193,27.2703,51.015,57.566,0.85544,1.95461,9.40172,0.88267,-0.02586,9.4066,-33.9470874,-25.54522,15.9497,0.63989,67.2759,-5.63017,-15.4316,-9.23498,-4.50212,-10.4673,17.913525,-5.49975,-6.35558,-5.08137,9.068012,9.85,6.51218,-26.789,-13.8754,12.8438,-16.7256,-2.00717,13.0333,-11.554,-13.175,-16.9402,14.1259,2.79827,23.546095,5.13981,-0.57181,4.50635,10.9276,41.1174,-6.56072,-31.79729,27.418,6.43213,10.1474,44.556,-1.4313,-4.57497,-19.811,19.71,11.9765,-1.44105,25.97,9.31975,8.60006,22.848716,-36.2301,7.6302,-7.54628,6.73205,2.94789,-8.28776,27.688,52.9658,-20.6791,41.100052,-1.58463,-11.6129,9.67581,-5.43194,4.5893,-5.93647,14.3253,40.4102,23.9386,55.1928,27.296,8.7283,32.2766,26.4348,36.3167,5.9503,10.7025,-6.40741,8.95481,9.18404,15.8909,3.4997,14.8817,7.66745,66.31,35.2873,21.0,-3.81041,7.29515,32.0,-6.45912,-7.85988,4.89504,-15.3,24.5271,-25.60551,17.0411,-3.74711,-3.25404,7.82186,9.62092,35.5096,37.1390706,-3.05368,0.91716,40.955,-7.01666,13.8694,1.38035,2.65327,3.42467,36.9471,12.3231,-5.83155,14.6066,-8.22481,46.5538,-4.58919,-26.7329,-12.713,21.6176,10.571,41.170545,-1.44174,-7.24959,0.3652,-5.64812,-2.85653,35.5457,8.19236,16.3514,-4.4376,18.1999,18.74,21.6402,-10.3302,5.84903,-1.7094,40.748,23.614,0.51111,-8.29105,-11.9396,11.1285,-3.6111,28.9251,35.7776,-3.36682,21.5995,-2.97663,31.6862,-0.38398,8.483,-6.23022,-0.24587,42.171599,-7.0,-3.01976,41.89,16.9081,5.328596,-25.96551,17.21219,-9.30995,6.82744,37.327706,34.7873,-19.57953,32.497,-10.6606,11.2473,25.9996,-14.970047,22.5908,-17.6485,-4.05047,13.6555,11.4926,63.4046,5.49808,-3.43455,-2.71805,-0.02668,-8.58405,-10.8425,11.7176,6.03205,10.8828,-3.74322,6.35815,42.98,0.42569,13.6331,-6.43051,1.0539,14.40749,-16.9575,-3.87428,14.2584,9.115793,16.6384,40.9,-4.94407,40.2612,-14.712,-15.898299,21.3624,-7.52674,13.2436,-4.99312,46.8191,6.13603,-13.8627,-5.17834,0.50714,-8.81936,11.8634,-8.34522,-1.51945,-18.6529,-3.07263,-3.23021,25.1154,3.133111,16.2495,-8.49669,39.0548,-13.8691,-3.20238,-1.62746,7.37957,25.7707,-1.73938,-31.7612,15.9748,34.228154,14.3103,24.3401,21.7895612,-3.55,-15.7621,-29.49049805,-11.1086,-25.4446,-3.57371,8.97395,-1.68798,-4.86227,-11.3704,60.87784,17.0311,51.4981,31.066667,35.0056]},"kind":"numeric","n":19401,"n_null":11472,"n_unique":7786,"null_rate":0.5913097263027679,"stats":{"iqr":24.411569999999998,"kurtosis":0.3048040164132768,"max":73.1354,"mean":8.16420867839847,"median":6.2918,"min":-55.2748,"n_outliers":135,"outlier_rate":0.0170261066969353,"q1":-5.13857,"q3":19.273,"skew":0.5425118320676289,"std":18.95536602027926,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"59.1% null"}],"column":"longitude","extras":{"histogram":{"counts":[13,4,10,11,10,17,123,47,78,280,59,235,218,150,60,40,0,4,105,275,443,751,322,429,228,126,35,79,210,207,269,454,239,497,316,598,667,122,186,12],"edges":[-178.785,-169.83272499999998,-160.88045,-151.928175,-142.9759,-134.02362499999998,-125.07135,-116.119075,-107.1668,-98.214525,-89.26225,-80.309975,-71.3577,-62.405424999999994,-53.453149999999994,-44.50087499999998,-35.54859999999999,-26.596325000000007,-17.644049999999993,-8.691774999999978,0.2605000000000075,9.212774999999993,18.165050000000008,27.117325000000022,36.06960000000001,45.021874999999994,53.97415000000001,62.92642500000002,71.87870000000001,80.830975,89.78325000000004,98.73552500000002,107.68780000000001,116.640075,125.59234999999998,134.54462500000002,143.4969,152.449175,161.40145000000004,170.35372500000003,179.306]},"sample":[0.727155,6.13085,141.532,-91.3153,141.784,-60.9716,-55.6294,8.52085,-4.42647,101.933,103.0,123.96373,-96.4003,144.991,-72.0869,93.8668,144.801,34.0267,-0.00919,132.983,146.991,146.919,150.427,-96.8575,9.95,160.721,46.1041,115.075,23.0108,28.04453,26.861,114.482,137.054,31.0723,10.4213,121.857,98.2517,99.1761,145.611,13.246,10.4204,7.59566,25.6038,74.59014,110.254,148.835,5.70426,-1.67332,99.9862,11.2322,119.853,83.8141,26.0814,120.164,144.086,106.187,142.495,122.233,114.429505,114.517,125.216,107.711,-3.49,167.73124,143.015,121.091,165.076,-97.7637,-91.6133,9.16071,-45.1299,126.373,-69.91,165.13,138.471,-7.88667,-2.40872,118.455978,43.5027,-75.6778,-82.568207,-0.10955,-124.376,-91.2869,47.41111,-66.3,-0.2755,37.1231,124.721,-123.511,-122.994,10.816,152.151281,-39.43634,8.23592,-63.018,33.0943,145.457,12.3187,146.572,-70.873718,85.0074,88.3798,134.638,90.648,114.979,-61.7179,75.6702,-11.7496,2.27039,2.64908,8.90959,36.7619,168.399,-57.8892,50.3677,24.566,16.997,39.5605,148.59,37.23808,149.854,28.7911,96.9492,-43.0,7.30478,156.552,10.352,68.363,37.05,-110.3,-61.6934,-39.61441,141.635,-6.51291,6.69128,5.07897,37.6827,37.7577,150.256,-122.029,15.590037,24.2299,102.851673333,7.168,-65.5,64.5105,123.377,145.075,-64.70047,77.144,129.387,-97.9347,142.963,147.562,-53.3269,7.23558,-19.0212,8.0831,34.3589,129.651,77.064,-95.5213,5.5862,-64.35748,-89.8734,5.84326,112.378,-5.21959,92.658,19.0954,-95.8484,-3.70994,49.7603,69.0,-9.96404,132.312,-37.5,131.098,77.2465,112.383,-96.8571,147.917,138.157,33.5326,140.443,117.0,-4.24384,101.327,114.367,-96.5,31.5399,73.3207,7.96202,138.0675,-123.310776,-10.9214,-8.89972,19.7711,155.742,166.695,139.645,145.66,1.72346,-69.5705,44.6365,-10.5284,105.31,83.5859,141.709,125.004,77.3772,120.976,164.193,123.154,47.0,10.2073,79.5153,34.5049,11.4953,10.3453,16.5129,160.804,10.0812,144.119,152.678,120.483,90.3336,-60.5,98.4694,13.7362,4.31312,167.332,15.81149,168.202,36.0829,8.35562,87.8859,5.87705,22.0262,124.117,127.772,-3.47051,24.6862,103.548,11.0511,141.5854211,20.588827,-95.6876,99.74549,17.8057,146.407,167.241,33.9826,145.096,40.0274,105.52746,145.757,146.576,144.806,8.73608,76.97,18.7935,144.892,129.844,124.002,22.5983,139.04,-86.0,35.0056,-55.869,-40.7891,-84.3439,27.8282,105.516304,9.92766,24.0443,10.9924,9.81365,22.3782,147.137,-67.569581,94.69,10.2541,98.6765,-122.857,120.009,144.988,34.613,93.995,15.0373,149.624,102.598,16.1282,17.2691,108.366165,140.158,-1.30881,38.958,11.11047,-56.0182,124.192,96.349,139.206,164.797,-123.179627,112.794,43.36,13.8266,141.158,10.9157,140.068,102.986,-120.645,121.59,-77.7505,95.606,-10.4063,103.192,-107.379,-119.721,15.2569,1.27435,146.875,8.62164,5.40031,101.515,33.73651,104.33,15.6675,67.09,-120.669,85.0,21.4223,6.24719,54.0,145.765,145.751,9.15209,14.35,82.251,-57.08816,122.37,152.493,128.779,12.2518,11.044,71.8421,111.9656111,128.197,124.712,-89.1881,-38.0,-13.4482,116.089,113.93,116.443,-94.6634,75.6265,145.937,107.859,124.36,9.92812,144.689,32.6167,142.569,98.0915,8.21883,24.44567,31.3191,31.7614,34.0335,146.078,17.9669,102.819,11.7366,-94.7001,142.369,-96.8833,-97.5936,100.099,-76.2753,125.499,119.349,8.39332,46.4714,128.086,125.078,27.5144,-4.58203,128.068,83.8064,51.4363,129.243,98.0342,31.0681,78.3865,114.693,-13.235,155.423,33.8997,47.332115,156.83,142.049,14.7,105.001,132.221282,28.11864,-97.01454,-76.9854,93.8042,-118.371838,-106.661,169.35603,77.116,150.21,122.551,94.4235,168.057941,8.73169,-149.45,144.506,45.6411,18.2295,-143.338,115.553,139.061,150.923,-76.5761,125.583,122.963,29.1632,35.0823,30.1567,140.591,22.7362,89.18,120.89,-3.82228,30.0807,124.652,-3.10006,24.3751,140.803,13.1571,10.029445,-92.2786,47.7236,145.514,65.3864,166.606,167.309973,73.8652,36.9316,-61.1963,30.5699,14.8451,11.8839,131.893,145.726,-69.5032,160.741,29.1228,32.4375,12.8951,137.608,142.012,138.456,91.6266,-61.210785,-97.3244,123.298,99.3112,141.532,144.089,135.484,143.916,102.367,142.851,20.1733,-96.1927,108.960571,-89.8235,123.801,111.966667,142.527,136.055,140.9591946,-73.3087,115.58,141.88,9.78757,138.028,12.6362,-72.0409,89.74791,-97.2293,3.85428,81.3125,-108.782]},"kind":"numeric","n":19401,"n_null":11472,"n_unique":7745,"null_rate":0.5913097263027679,"stats":{"iqr":116.96426000000001,"kurtosis":-0.776480086053744,"max":179.306,"mean":51.216551346678315,"median":47.565486,"min":-178.785,"n_outliers":13,"outlier_rate":0.0016395510152604363,"q1":7.17974,"q3":124.144,"skew":-0.4813559160272718,"std":81.14929416094796,"zero_rate":0.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"status","extras":{"singletons":0,"top_values":[["living",19401]]},"kind":"categorical","n":19401,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"living"}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"speakers_count","extras":{},"kind":"unknown","n":19401,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"null_rate","level":"warn","message":"88.8% null"},{"code":"high_skew","level":"info","message":"skew=+2.32"}],"column":"phoneme_count","extras":{"histogram":{"counts":[19,208,460,285,348,233,200,115,116,51,45,14,21,14,14,9,2,3,2,3,3,2,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[11.0,16.5,22.0,27.5,33.0,38.5,44.0,49.5,55.0,60.5,66.0,71.5,77.0,82.5,88.0,93.5,99.0,104.5,110.0,115.5,121.0,126.5,132.0,137.5,143.0,148.5,154.0,159.5,165.0,170.5,176.0,181.5,187.0,192.5,198.0,203.5,209.0,214.5,220.0,225.5,231.0]},"sample":[41.0,27.0,34.0,27.0,39.0,26.0,36.0,79.0,49.0,34.0,42.0,27.0,24.0,20.0,86.0,52.0,26.0,36.0,36.0,40.0,91.0,25.0,34.0,32.0,59.0,56.0,33.0,46.0,49.0,46.0,36.0,50.0,24.0,68.0,36.0,44.0,23.0,25.0,47.0,17.0,36.0,43.0,34.0,67.0,26.0,62.0,49.0,132.0,29.0,28.0,62.0,27.0,33.0,27.0,23.0,41.0,21.0,34.0,28.0,41.0,35.0,97.0,27.0,30.0,32.0,89.0,46.0,36.0,24.0,55.0,17.0,58.0,30.0,21.0,59.0,29.0,54.0,18.0,45.0,31.0,41.0,43.0,17.0,44.0,26.0,47.0,33.0,17.0,16.0,23.0,38.0,29.0,31.0,59.0,23.0,39.0,26.0,53.0,21.0,30.0,25.0,31.0,58.0,55.0,40.0,28.0,36.0,55.0,42.0,34.0,39.0,39.0,31.0,25.0,36.0,94.0,74.0,41.0,21.0,54.0,30.0,56.0,42.0,27.0,50.0,32.0,24.0,57.0,27.0,26.0,36.0,56.0,58.0,43.0,43.0,26.0,45.0,19.0,17.0,21.0,79.0,25.0,20.0,23.0,23.0,57.0,49.0,23.0,21.0,29.0,56.0,47.0,25.0,26.0,35.0,25.0,69.0,33.0,40.0,31.0,22.0,57.0,54.0,35.0,51.0,37.0,33.0,58.0,33.0,27.0,55.0,56.0,29.0,51.0,34.0,65.0,38.0,44.0,43.0,28.0,47.0,35.0,41.0,57.0,24.0,66.0,30.0,23.0,24.0,47.0,25.0,26.0,33.0,24.0,30.0,30.0,37.0,25.0,39.0,231.0,32.0,42.0,41.0,45.0,25.0,27.0,37.0,43.0,61.0,20.0,45.0,31.0,43.0,56.0,50.0,33.0,23.0,43.0,35.0,40.0,25.0,27.0,50.0,31.0,129.0,57.0,45.0,25.0,25.0,20.0,54.0,42.0,22.0,70.0,50.0,45.0,48.0,59.0,23.0,53.0,23.0,21.0,35.0,21.0,32.0,60.0,27.0,22.0,40.0,20.0,35.0,20.0,55.0,41.0,43.0,54.0,48.0,81.0,23.0,39.0,17.0,55.0,46.0,29.0,46.0,31.0,44.0,45.0,29.0,38.0,45.0,33.0,25.0,41.0,33.0,24.0,25.0,49.0,20.0,41.0,25.0,29.0,30.0,30.0,78.0,34.0,22.0,34.0,36.0,28.0,20.0,25.0,55.0,24.0,21.0,47.0,33.0,41.0,23.0,23.0,27.0,40.0,68.0,35.0,40.0,49.0,23.0,42.0,23.0,25.0,18.0,34.0,40.0,33.0,20.0,68.0,18.0,60.0,27.0,28.0,32.0,24.0,32.0,42.0,33.0,54.0,37.0,74.0,53.0,56.0,24.0,20.0,63.0,33.0,53.0,40.0,20.0,43.0,24.0,34.0,31.0,20.0,68.0,25.0,43.0,38.0,25.0,27.0,17.0,44.0,30.0,30.0,30.0,50.0,23.0,14.0,24.0,39.0,92.0,23.0,22.0,38.0,41.0,63.0,42.0,22.0,67.0,18.0,18.0,80.0,27.0,43.0,29.0,20.0,21.0,19.0,31.0,33.0,50.0,74.0,56.0,23.0,42.0,28.0,60.0,40.0,25.0,25.0,43.0,63.0,57.0,35.0,47.0,49.0,64.0,70.0,67.0,29.0,43.0,60.0,25.0,38.0,21.0,20.0,35.0,38.0,36.0,25.0,24.0,26.0,25.0,47.0,29.0,29.0,82.0,31.0,26.0,17.0,60.0,42.0,34.0,14.0,41.0,41.0,27.0,51.0,23.0,38.0,42.0,40.0,39.0,55.0,26.0,25.0,20.0,42.0,33.0,20.0,37.0,30.0,46.0,82.0,29.0,26.0,27.0,26.0,46.0,46.0,26.0,23.0,28.0,33.0,19.0,26.0,23.0,21.0,54.0,21.0,46.0,42.0,46.0,51.0,45.0,24.0,23.0,28.0,48.0,24.0,19.0,25.0,42.0,35.0,24.0,24.0,20.0,30.0,35.0,44.0,19.0,20.0,36.0,38.0,38.0,90.0,23.0,26.0,23.0,26.0,23.0,38.0,22.0,16.0,35.0,21.0,32.0,37.0,68.0,30.0,50.0,61.0]},"kind":"numeric","n":19401,"n_null":17228,"n_unique":100,"null_rate":0.8879954641513325,"stats":{"iqr":20.0,"kurtosis":11.53709354015922,"max":231.0,"mean":38.1974229176254,"median":34.0,"min":11.0,"n_outliers":79,"outlier_rate":0.03635526921306949,"q1":26.0,"q3":46.0,"skew":2.3245998893723896,"std":17.779837132462095,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["phoneme_count","latitude","longitude","macroarea","status","name","glottocode"],"featured_charts":[{"caption":"Shows the geographic distribution of languages, with Africa, Eurasia, and Papunesia accounting for the bulk of entries.","column":"macroarea","kind":"bar"},{"caption":"Reveals the right-skewed distribution of phoneme inventories \u2014 note that this is based on only ~11% of rows due to high null rate.","column":"phoneme_count","kind":"histogram"},{"caption":"Indicates where languages cluster latitudinally, concentrated near the equator and tropics.","column":"latitude","kind":"histogram"},{"caption":"Shows longitudinal spread across the globe, useful for spotting regional concentrations.","column":"longitude","kind":"histogram"},{"caption":"Most language names are one or two words; check the long tail for compound or descriptive names.","column":"name","kind":"length"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogues 19,401 world languages, each identified by a unique Glottocode and name, with attributes like geographic coordinates, macroarea, language family, ISO code, and phoneme count. Two things stand out for closer inspection: phoneme_count is missing for 88.8% of rows and is heavily right-skewed (mean ~38, max 231), so any analysis of phonological inventories will rely on a small subsample with notable outliers. Latitude and longitude are also null for 59.1% of rows, which will limit mapping coverage. On the categorical side, macroarea is well-distributed across six regions but dominated by Africa (32%), while the status column is uninformative since every language is labelled 'living'.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.duplicate_rate","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Glottocodes, the standard 8-character identifiers for languages in the Glottolog catalogue (e.g. 'aala1237', 'aari1239'). Every one of the 19,401 rows is unique with length exactly 8 and a single token, and there are no nulls or duplicates, consistent with a primary key over languages. Nothing anomalous: the column is a clean identifier rather than analysable text.","role":"identifier","scope":"column","target":"glottocode","treatment":"Use as the primary key; left-join other language metadata on this code rather than modelling it."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","column","kind","n","null_rate","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"This column is named iso_639_3, suggesting it should hold ISO 639-3 three-letter language codes. Saturn skipped profiling (kind=unknown), so no uniqueness or value distribution is available; only the row count of 19401 and a 0.0 null rate are confirmed. Without cardinality or sample values, the actual contents and their alignment with the ISO 639-3 standard cannot be verified here.","role":"metadata","scope":"column","target":"iso_639_3","treatment":"Re-profile with explicit string typing to recover cardinality, then use as a categorical language tag."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.word_median","stats.len_mean","stats.len_max","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is a short-text 'name' field with 19401 entirely unique values (n_unique equals n) and no nulls or duplicates. Entries are mostly single tokens \u2014 71.7% are one_word and word_median is 1 \u2014 averaging 9.2 characters and capping at 58. The top vocabulary (nuclear, sign, language, central, southern, western, northern, eastern) suggests these are labels for things like categories, regions, or articles rather than person names.","role":"identifier","scope":"column","target":"name","treatment":"Treat as a unique key \u2014 join or display only, do not use as a model feature."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","column","kind","n","null_rate","n_unique","stats"],"model":"anthropic:claude-opus-4-7","narrative":"This column is named family_name and was skipped by the profiler, so no type, uniqueness, or value statistics are available beyond a row count of 19401 and a null rate of 0.0. The name suggests surnames or a taxonomic family label, but without distribution evidence the actual content cannot be confirmed. The only notable signal is the explicit 'skipped' alert, meaning downstream consumers are flying blind on this field.","role":"other","scope":"column","target":"family_name","treatment":"Re-run profiling with this column included before deciding how to use it."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","n","null_rate","kind"],"model":"anthropic:claude-opus-4-7","narrative":"The column was skipped by the profiler, so beyond a complete absence of nulls across 19,401 rows there is no distributional evidence to draw on. The name family_glottocode points to Glottolog family identifiers (a linguistic taxonomy code), but uniqueness, cardinality, and value distribution are all unknown here.","role":"foreign_key","scope":"column","target":"family_glottocode","treatment":"Re-profile with string handling enabled, then left-join to a Glottolog reference table."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","top_value","top_rate","top_values","entropy_ratio"],"model":"anthropic:claude-opus-4-7","narrative":"Six-valued geographic grouping that bins records into continental-scale macroareas (Africa, Eurasia, Papunesia, South America, North America, Australia), suggesting linguistic or biogeographic data. Distribution is uneven: Africa leads at 32.1% of rows while Australia holds only 602, and entropy ratio of 0.84 confirms moderate but not extreme imbalance. About 4.3% of rows are null.","role":"feature","scope":"column","target":"macroarea","treatment":"One-hot encode and impute or flag the 4.3% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.min","stats.max","stats.median","stats.mean","stats.skew","stats.q1","stats.q3"],"model":"anthropic:claude-opus-4-7","narrative":"This is a geographic latitude coordinate, with values spanning -55.2748 to 73.1354 and a median of 6.2918, consistent with degrees north/south of the equator. Nearly 59% of rows are null, which is the dominant concern; among present values, the distribution is mildly right-skewed (0.54) and roughly centered north of the equator (mean 8.16). Only 7,786 unique values across 19,401 rows suggests repeated locations rather than per-row precise coordinates.","role":"feature","scope":"column","target":"latitude","treatment":"Pair with longitude for geospatial features; impute or flag missingness given the 59% null rate before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","mean","std","skew","kurtosis","null_rate","n","n_unique","n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic longitude in decimal degrees, with values spanning -178.785 to 179.306 and a median of 47.57 consistent with a global coordinate range. The distribution is mildly left-skewed (-0.48) and platykurtic (-0.78), with only 13 outliers (0.16%), but 59.13% of rows are null, meaning location is missing for most records. Of 19,401 rows, 7,745 unique values suggest repeated locations rather than per-record GPS fixes.","role":"feature","scope":"column","target":"longitude","treatment":"Pair with latitude for geospatial features; impute or add a missingness flag given the high null rate."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a single-value categorical field where every one of the 19,401 rows is \"living\". With cardinality 1 and entropy 0, it carries no information and cannot discriminate between records.","role":"metadata","scope":"column","target":"status","treatment":"Drop; constant column with zero entropy."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","column","kind","n","n_unique","null_rate","stats"],"model":"anthropic:claude-opus-4-7","narrative":"The column is named speakers_count, which suggests a numeric tally of speakers per record, but saturn skipped profiling and returned no type, uniqueness, or distribution stats. The only confirmed signals are 19401 rows and a 0.0 null rate. Without kind or summary statistics, nothing further can be said about its actual contents.","role":"other","scope":"column","target":"speakers_count","treatment":"Re-profile this column with type inference forced before deciding on downstream use."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.min","stats.max","stats.median","stats.iqr","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Counts of phonemes per record, ranging 11 to 231 with median 34 and IQR 20. The distribution is heavily right-skewed (skew 2.32, kurtosis 11.5) with 79 outliers (3.6%), and critically 88.8% of rows are null so only ~2,170 values are present.","role":"feature","scope":"column","target":"phoneme_count","treatment":"Impute or flag missingness and log-transform before modelling given the 88.8% null rate and skew of 2.32."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":3183,"prompt_tokens":10699,"total_tokens":13882}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:52:19+00:00","mode":"full","row_count":19401,"sampled_rows":19401,"seed":42,"source":"/home/coolhand/servers/diachronica/etymology_atlas/parquet/languages.parquet"},"notes":[],"saturn_version":"0.2.0","schema":{"family_glottocode":"unknown","family_name":"unknown","glottocode":"text","iso_639_3":"unknown","latitude":"numeric","longitude":"numeric","macroarea":"categorical","name":"text","phoneme_count":"numeric","speakers_count":"unknown","status":"categorical"}}
