{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"id","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23740,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["abbe1238","sanm1298","thur1255","suar1238","kukn1238","yagu1244","labu1252","uist1237","suku1258","arak1251","nkpa1238","meif1236","grik1245","acha1250","haid1248","dang1261","beni1250","taiy1243","yuuy1234","kitu1246","hoch1237","araw1280","chik1249","bhoj1244","peer1241","scha1237","momv1238","dawa1241","bang1364","yace1238","nung1290","nyor1246","nyon1241","kham1291","kale1248","kwaa1269","waru1269","mann1248","lame1259","mbin1238","sout3052","wada1259","gosh1237","vunj1238","west2804","yang1307","jeme1245","khan1274","chan1307","tren1240"],"top_values":[],"top_words":[["melk1240",1],["yang1299",1],["yiss1240",1],["east2553",1],["sout3158",1],["taba1263",1],["buxi1237",1],["taob1238",1],["koda1254",1],["tibe1272",1],["kham1284",1],["ishi1240",1],["tong1325",1],["sako1234",1],["naur1244",1],["micr1243",1],["nucl1525",1],["west2857",1],["loni1238",1],["khai1248",1],["chru1240",1],["whit1262",1],["soko1247",1],["west2359",1],["sang1336",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23740,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":23740,"n_null":0,"n_unique":23740,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":86.11100000000002,"url_rate":0.0,"vocab_size":20000,"word_mean":1.0,"word_median":1.0}},{"alerts":[],"column":"family_id","extras":{"singletons":6,"top_values":[["atla1278",4663],["aust1307",3850],["indo1319",2201],["sino1245",1666],["afro1255",1259],["nucl1709",762],["pama1250",598],["aust1305",503],["book1242",399],["otom1299",338],["mand1469",303],["sign1238",259],["drav1251",255],["cent2225",251],["turk1311",229],["taik1256",223],["nilo1247",201],["ural1272",185],["japo1237",179],["tupi1275",157]]},"kind":"categorical","n":23740,"n_null":429,"n_unique":287,"null_rate":0.018070766638584666,"stats":{"cardinality":287,"entropy":4.885598149147087,"entropy_ratio":0.5983654428668718,"top_rate":0.20003431856205225,"top_value":"atla1278"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"68.5% duplicate strings"}],"column":"parent_id","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23311,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":false,"sample":["abee1242","cent2144","yuag1237","mnon1258","pama1253","raic1241","kenh1234","uygh1240","taih1244","book1242","mend1261","nucl1709","gres1241","unun9958","band1343","sout3248","east2804","kain1274","uppe1415","suku1261","gras1249","unat1236","nort3200","khas1269","taro1266","khmu1256","russ1270","haus1257","miji1239","ewon1239","nung1283","unun9919","diii1241","khan1274","cent2336","kako1241","waru1263","hata1242","soga1242","bena1258","nort3200","kiwa1251","kain1273","punu1240","prew1234","book1242","kara1480","noct1238","kang1292","cuaa1241"],"top_values":[["book1242",399],["uncl1493",121],["vill1244",45],["pidg1258",44],["unat1236",42],["deaf1237",41],["kony1248",32],["biak1248",29],["tase1235",29],["izon1238",27],["mala1545",25],["nort2940",22],["bund1253",22],["isok1239",22],["pama1250",21],["cent2146",21],["mixe1287",21],["nort2697",20],["yoru1245",20],["sino1245",20]],"top_words":[["book1242",349],["uncl1493",98],["vill1244",41],["pidg1258",36],["unat1236",34],["deaf1237",33],["kony1248",28],["tase1235",25],["biak1248",25],["izon1238",25],["mala1545",22],["pama1250",20],["isok1239",20],["cent2146",20],["nort2940",19],["nucl1708",18],["lsfi1234",18],["mixe1287",17],["bund1253",17],["bhil1254",16],["doma1258",16],["vlax1238",16],["sina1266",16],["nucl1240",16],["asli1244",16]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23311,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":23740,"n_null":429,"n_unique":7338,"null_rate":0.018070766638584666,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.6852129895757367,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":15973,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":91.18700000000003,"url_rate":0.0,"vocab_size":7189,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"69.5% rows are a single word"}],"column":"name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[54,496,4648,3145,4510,1373,1069,1997,1023,1768,644,848,340,281,518,224,298,101,140,46,44,69,24,34,12,9,4,2,3,3,5,1,2,1,0,2,0,0,0,2],"edges":[1.0,2.425,3.85,5.275,6.7,8.125,9.55,10.975,12.4,13.825000000000001,15.25,16.675,18.1,19.525000000000002,20.95,22.375,23.8,25.225,26.650000000000002,28.075,29.5,30.925,32.35,33.775,35.2,36.625,38.050000000000004,39.475,40.9,42.325,43.75,45.175000000000004,46.6,48.025,49.45,50.875,52.300000000000004,53.725,55.15,56.575,58.0]},"near_unique":true,"sample":["Abbey-Ve","San Mart\u00edn Itunyoso Triqui","Thuri","Asabano","Kukna","Yagua","Labuan","Uis Tasae","Sukurase","Araki (Iran)","Nkpam","Meifu","Grik","Achagua","Haida","Dang","Beni Touzine","Taiyuan","Yuu-Yuu","Kituba (Democratic Republic of Congo)","Ho Chi Minh City Sign Language","Arawa","Chikonono","Bhojpuri","Peere","Scharans Sutsilvan","Momveda","Dawas","Bangkalan","Yace","Wubuy","Nyoro","Nyong","Khamyang","Kaledupa","Kwaami","Warup","Mann","Lamenu-Lewo","Mbinga","Southern Buduma","Wadamkong","Gosho","Vunjo","Western South Slavic","Yangjiang","Towa","Kh\u00e1ng","Chandari","Trentino Western"],"top_values":[],"top_words":[["nuclear",368],["central",281],["western",238],["eastern",227],["northern",223],["southern",218],["sign",197],["language",172],["north",154],["south",151],["east",138],["west",132],["unclassified",91],["of",76],["arabic",74],["san",68],["pidgin",68],["new",67],["zapotec",63],["mixtec",60],["upper",54],["naga",53],["lower",49],["guinea)",49],["(papua",48]],"vocab_skipped":null,"word_histogram":{"counts":[16506,0,0,5540,0,0,1287,0,0,0,321,0,0,72,0,0,11,0,0,0,0,0,0,2,0,0,0,0,0,1],"edges":[1.0,1.3,1.6,1.9,2.2,2.5,2.8,3.1,3.4,3.6999999999999997,4.0,4.3,4.6,4.9,5.2,5.5,5.8,6.1,6.3999999999999995,6.7,7.0,7.3,7.6,7.8999999999999995,8.2,8.5,8.8,9.1,9.4,9.7,10.0]}},"kind":"text","n":23740,"n_null":0,"n_unique":23740,"null_rate":0.0,"stats":{"allcaps_rate":0.00016849199663016007,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":58,"len_mean":9.950126368997473,"len_median":8.0,"len_min":1,"len_p95":22.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.6952822240943555,"readability_flesch_mean":42.624975000000035,"url_rate":0.0,"vocab_size":17915,"word_mean":1.3977674810446503,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 98.3% of rows"}],"column":"bookkeeping","extras":{"singletons":0,"top_values":[["False",23341],["True",399]]},"kind":"categorical","n":23740,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.12311671360591762,"entropy_ratio":0.12311671360591762,"top_rate":0.9831929233361415,"top_value":"False"}},{"alerts":[],"column":"level","extras":{"singletons":0,"top_values":[["dialect",10920],["language",8481],["family",4339]]},"kind":"categorical","n":23740,"n_null":0,"n_unique":3,"null_rate":0.0,"stats":{"cardinality":3,"entropy":1.4939899650987318,"entropy_ratio":0.9426027205179732,"top_rate":0.45998315080033697,"top_value":"dialect"}},{"alerts":[],"column":"status","extras":{"singletons":0,"top_values":[["safe",18965],["definitely endangered",1814],["vulnerable",1194],["extinct",889],["critically endangered",465],["severely endangered",413]]},"kind":"categorical","n":23740,"n_null":0,"n_unique":6,"null_rate":0.0,"stats":{"cardinality":6,"entropy":1.1495352452368632,"entropy_ratio":0.44470093663492777,"top_rate":0.7988626790227464,"top_value":"safe"}},{"alerts":[{"code":"null_rate","level":"warn","message":"66.5% null"}],"column":"latitude","extras":{"histogram":{"counts":[5,1,1,4,7,16,29,26,48,78,125,141,281,256,495,788,681,379,469,664,710,303,387,233,318,373,167,144,179,113,138,79,78,76,46,21,41,23,14,6],"edges":[-55.2748,-52.064544999999995,-48.85429,-45.644035,-42.43378,-39.223524999999995,-36.01327,-32.803015,-29.59276,-26.382505,-23.17225,-19.961995,-16.751739999999998,-13.541484999999994,-10.331229999999998,-7.120975000000001,-3.9107199999999978,-0.7004649999999941,2.5097900000000024,5.720044999999999,8.930300000000003,12.140555000000006,15.350809999999996,18.561065,21.771320000000003,24.981575000000007,28.19183000000001,31.402085,34.61234,37.82259500000001,41.032849999999996,44.243105,47.45336,50.66361500000001,53.87387000000001,57.084125,60.294380000000004,63.50463500000001,66.71489,69.925145,73.1354]},"sample":[5.834,18.1046,7.64567,-3.49104,10.21,6.85716,-53.435719,4.60014,17.4137,44.408,-12.4931,12.0993,-2.71119,-3.29664,-11.443,6.24999,-2.26271,-2.78397,0.13727,7.56324,-7.95526,1.21652,9.2634,-8.85301,4.53307,0.30801,42.67,7.32924,-33.3036,3.51211,-1.5667,-8.4885,11.989,9.94207,25.0046,-0.862394,14.4153,10.1115,10.2077,22.4405233333,3.18438,9.40519,53.2307,0.24553,23.656,-4.39732,10.2868,24.4578,-4.46238,9.52724,0.39044,9.27518,-4.32114,7.496728,8.88093,42.9331,27.7074,48.2452,9.39794,51.6688,6.48655,-17.8297,-8.32587,10.4822,4.44624,12.5008,-4.90828,-3.23686,3.99999,29.156486,39.0566,48.1968,-9.72027,-13.3544,42.41,23.6818,-5.80391,46.5741,23.00148,32.5476,34.0662,-4.25154,30.605768,16.6562,58.3637,-17.6354,27.358,17.6819,11.0417,-10.5879,11.4653,34.5701,49.7233,41.7,45.3,-8.55268,-3.13905,-5.53634,7.22812,6.40718,-15.6894,-22.196,-10.8098,-14.1361,-5.55315,-3.37081,55.8307,-6.76213,20.5991,6.75763,-3.11078,23.475,16.8693,7.09089,-3.74242,70.11,58.55,10.2955,25.8583,44.5988,28.411,-17.5446,6.15034,8.43391,4.75405,8.8956,30.1168,-9.4791,5.23264,5.01201,6.52106,2.55849,27.357,-19.44157406,17.8583,-8.74742,27.41687,-0.78872,10.3069,67.4307,18.911,18.795135,25.5143,-5.26603,31.784,19.6511,8.486,21.6311,10.942,47.7534,8.61202,-8.14754,-9.57135,20.6189,40.855526,21.954,42.0808,10.4814,16.4263,5.15279,6.95522,-7.84385,0.31776,43.11,53.2186,43.0464,-2.18342,-7.20938,5.17986,26.9682,36.0221,-3.45787,29.1708,-3.78596,9.70781,-1.7124,7.34022,0.75068,-12.2661,-6.26656,1.59855,25.6999,38.5656,-20.445,35.4145,1.1513,9.88165,-7.15104,26.88946,20.2463,34.9403,13.1798,11.69648,-1.43973,5.92596,19.2414,7.85002,5.39111,0.32395,-30.4207,14.046,-5.3655,63.8977,5.31,-16.4823,65.6093,39.8412,-17.7176,-8.38204,-14.0756,9.18,27.1606,9.20815,-7.1135,43.0,-11.9133,-21.92,16.4708,-1.50636,10.1485,4.88273,-17.9026,-8.75779,2.39354,-4.66964,10.4847,9.14965,42.1328,46.3699,11.0243,14.3658,-3.6211,14.9387,-4.94672,-2.04783,-16.7612,-3.18895,2.26808,27.2703,23.6818,-10.8069,1.95461,9.40172,-16.0381,-0.02586,-4.61444,50.6963,-33.9470874,-28.937029,17.3879,35.6171,3.62499,10.943,9.2888,8.3548,-11.8701,-5.61613,35.8884,-4.19082,8.38023,12.8218,25.3836,-26.789,-13.8754,-2.00717,-3.29547,-22.234,13.076,-11.554,-4.3553,13.1027,-1.10038,-18.56,17.0156,23.546095,6.82543,-4.72863,6.53792,-1.04518,-2.52264,27.736,4.17,-1.57031,-28.097,7.956663,-6.3409,48.7686,-8.5692,24.154,-4.69751,30.8983,-9.75639,14.49,4.42115,28.76714,23.9386,-16.0973,-9.37935,-17.6417,27.688,26.72057,-5.02368,27.2614,41.100052,46.3073,23.312,10.2523,-20.9235,1.45357,-19.06,5.43898,14.4674,29.3016,3.56357,11.0441,12.4182,15.6913,54.004532,69.582,12.1967,25.218,9.42495,-19.7933,-7.0,26.4240223,20.8965,27.4301,-29.7054,6.82649,-18.8743,-33.1199,10.0666,5.1785,44.1415,4.74842,7.33708,52.37,-7.85988,1.88296,-7.81309,-8.79019,-6.36069,3.63993,-25.60551,-4.91161,32.6963,-3.25404,-24.7993,23.316718,19.0383,-27.821692,0.50714,4.83332,-41.188369,39.91,-8.405661,7.10677,58.5,24.662,22.570376,1.27026,-2.37609,14.6254,-11.615,44.9292,14.34,-3.07772,37.130642,-15.4048,21.6176,37.0878,25.506,10.571,-3.022268,-15.9534,-10.5522,24.6043,-6.18726,23.76,16.2202,5.444878,-4.4376,17.0777,10.5376,4.54729,8.50634,-5.08824,-6.31958,-2.33349,-5.99286,3.42701,-1.37295,-5.00659,2.82799,-3.41983,35.778,-17.20849,52.3261,31.6862,-6.43149,-3.60838,-8.26947,-5.96037,15.23054,22.9777,42.5111,23.8192,6.90525,2.58367,26.3902,27.8408,7.02206,-16.5116,53.2616,23.058,49.3231,41.453,-11.5229,-3.16082,-2.30765,13.0211,-2.58205,14.06,-5.29223,-7.88553,1.00003,-5.5471,-4.44425,8.74437,-8.48063,-20.6614,28.9506,-2.84694,5.1796,-8.07231,9.8289,1.0946,14.5672,-4.03577,8.20383,0.42569,-5.30721,0.57291,-6.19532,35.3101,-11.518,36.49562,7.31095,-2.676394,-6.55533,-4.82756,7.05085,-14.712,-2.68501,10.8301,60.3353,-13.892528,13.2436,46.8191,-12.8596,-10.41792,1.2341,37.1768,-18.3727,-4.78822,-22.7078,-18.6529,-3.07263,7.50851,-4.13587,18.687,34.9733,33.8162,-14.058,-4.22936,-3.20238,-3.41642,-2.64636,-19.0,15.9748,27.8482649,14.194497,14.341898,24.3401,9.44956,-15.7621,-11.1086,-25.4446,-15.1117,-33.89,-1.68798,36.3167,9.87186,-11.3704,0.735585,-2.26367,-8.18719,25.3499,8.43,24.0649]},"kind":"numeric","n":23740,"n_null":15797,"n_unique":7798,"null_rate":0.6654170176916596,"stats":{"iqr":24.47247,"kurtosis":0.3006308684501371,"max":73.1354,"mean":8.170347097363058,"median":6.30619,"min":-55.2748,"n_outliers":129,"outlier_rate":0.016240715095052247,"q1":-5.13662,"q3":19.33585,"skew":0.5403424604551778,"std":18.961612711714917,"zero_rate":0.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"66.5% null"}],"column":"longitude","extras":{"histogram":{"counts":[13,4,10,11,10,17,124,47,78,280,59,235,218,150,60,40,0,4,105,275,444,751,322,430,228,126,35,80,210,208,269,457,239,502,316,598,667,123,186,12],"edges":[-178.785,-169.83272499999998,-160.88045,-151.928175,-142.9759,-134.02362499999998,-125.07135,-116.119075,-107.1668,-98.214525,-89.26225,-80.309975,-71.3577,-62.405424999999994,-53.453149999999994,-44.50087499999998,-35.54859999999999,-26.596325000000007,-17.644049999999993,-8.691774999999978,0.2605000000000075,9.212774999999993,18.165050000000008,27.117325000000022,36.06960000000001,45.021874999999994,53.97415000000001,62.92642500000002,71.87870000000001,80.830975,89.78325000000004,98.73552500000002,107.68780000000001,116.640075,125.59234999999998,134.54462500000002,143.4969,152.449175,161.40145000000004,170.35372500000003,179.306]},"sample":[-0.151,82.8487,6.13085,142.387,9.89205,9.96261,-71.04126,18.0352,-96.7438,-123.94,-70.5533,29.4332,-49.303,-76.6056,142.242,-76.05,139.502,119.319,-67.7282,5.89556,142.4,-78.3401,8.60225,140.753,9.522,115.332,21.17,23.0108,136.079,114.482,119.342,143.002,8.4331,6.58105,121.857,130.646,120.49,119.025,6.61686,92.92553,29.9842,7.59566,25.6038,28.6681,113.391,152.334,8.19726,92.0157,143.058,3.80058,123.888,-11.597,141.45,0.362291,-83.2124,17.6704,84.348,-3.78934,-83.0539,-0.4431,10.418,125.631,32.8801,12.3007,38.4646,17.6011,26.298,11.9595,-74.0,115.434262,9.03935,20.3129,-71.1733,-66.6277,-76.5969,107.184,138.471,-84.1543,92.339,76.1796,78.2389,142.317,103.970947,-91.9417,-110.832,-59.9172,86.9824,105.773,5.54073,-60.8312,5.26544,-98.424,-124.967,9.25537,34.4,-70.6648,38.6217,145.457,-8.24535,2.68609,32.7731,114.937,122.658,133.002,146.082,119.871,48.405,134.288,-98.0978,2.64908,137.387,99.865,-97.1912,5.84494,140.903,-152.01,25.82,12.6479,78.2187,28.0277,82.6749,139.426,36.67211,25.5173,-7.66699,11.9527,10.0165,159.917,-7.50317,20.0194,0.16932,-76.582,-108.279,144.6482584,-99.0023,161.023,86.28988,34.76751,9.11434,-142.963,-72.394,-72.136115,89.8594,154.662,77.064,56.1562,7.168,-158.002,13.6881,-102.528,125.071,17.572,-75.594,-97.9347,111.743889,-104.062,46.1369,13.526,120.695,7.86688,8.27362,125.902,-78.3729,45.03,-7.61509,12.6489,139.341,146.983,5.51702,63.535,139.079,28.9978,82.437,143.681,12.1508,34.3923,36.1718,28.7394,-53.4651,146.083,27.0186,74.7355,47.4329,118.613,70.9826,33.4791,15.1419,22.9027,93.99325,101.671,-98.9042,-9.49151,30.47166,111.905,7.96202,73.9166,-8.25793,37.53,29.8696,19.7711,-1.96187,-70.5094,166.695,162.98,39.7602,-154.229,66.3805,16.58,124.791,141.724,8.85,87.5717,11.04651,146.57,47.0,133.723,18.0,79.5153,34.5049,7.96524,10.3453,19.256,160.804,10.0812,143.855,9.59527,11.228,47.0809,-103.95,13.7362,107.336,17.7887,-16.8816,20.5674,147.207,168.334,152.606,16.1874,87.8859,107.184,123.085,127.772,-3.47051,37.2437,103.548,16.7259,15.9667,141.5854211,153.48612,121.0365,-78.4531,31.8471,13.8181,11.0232,-7.57109,-64.3201,148.445,14.4508,142.862,25.9024,-15.9202,94.0035,144.892,129.844,139.04,130.601,-61.901,19.2769,35.0056,-70.2079,-3.25128,17.82247,141.46,-95.2833,105.516304,11.1316,38.3519,10.2556,-78.5392,140.326,76.947,96.25,25.104,-60.4145,117.030502,147.79,109.005,140.688,90.6317,149.489,79.4664,38.8762,-3.02,114.011,84.22852,121.59,39.9062,148.237,19.1631,96.349,86.00255,143.029,84.9577,-123.179627,-116.408,92.83,0.70509,117.203,20.14148,-169.86,7.76345,103.863,109.759,21.85251,-3.0124,-4.47538,121.454,-132.079257,-143.77,108.849,110.045,17.9136,34.9829,113.0,98.8862088,104.286,90.534,19.0841,0.37152,146.216,118.151,13.3343,-2.81123,6.82979,6.9058,5.97843,9.72,145.751,104.008,145.34,148.09,155.708,-51.6246,-57.08816,-49.0217,76.5966,128.779,29.4718,-109.614813,101.218,153.304503,-69.5032,-71.5,146.559906,-8.1,122.690759,-67.5996,92.0,93.982,103.141292,109.539,136.352,121.453,160.219,26.2924,107.447,37.5382,122.434269,166.843,98.0915,49.8801,91.469,8.21883,29.485248,168.228,121.847,121.067,146.702,98.825,-96.3362,125.481047,142.369,-97.5432,22.1637,-54.0238,17.8785,145.578,146.759,136.79,138.643,114.178,120.378,39.12249,102.806,140.77,53.32,29.75725,88.4344,78.3865,155.412,142.397,156.541,147.353,-91.7041,120.315,-124.379,93.6884,11.4826,34.7595,106.481,100.622,31.285,167.477,-132.008,101.412,-123.014,1.569,153.488,32.8765,125.966,121.305,120.139,121.747,144.66,126.336,112.975,-37.509155,152.095,11.3894,124.271,-56.0329,83.4672,16.4765,115.826,146.206,16.209,128.154,108.114,121.888,-9.23439,120.89,151.828,30.6389,-61.7416,72.5316,-61.42,10.03348,0.38008,-68.435669,155.706,144.924,-11.3262,166.606,141.258,13.6633,34.7865,167.428701,-61.1963,14.8451,130.737,150.37814,127.494,73.9777,143.943,145.419,122.325,137.608,142.012,-59.3528,123.1,97.4877,-107.517,36.484,141.925,142.401,144.089,136.703,141.104,-47.0,-96.1927,112.7688127,-89.790974,-90.256519,123.801,10.2881,136.055,-73.3087,115.58,141.767,144.22,138.028,-119.721,1.81099,-72.0409,-69.6955,34.0542,159.255,93.6463,-9.51,93.9253]},"kind":"numeric","n":23740,"n_null":15797,"n_unique":7757,"null_rate":0.6654170176916596,"stats":{"iqr":116.887005,"kurtosis":-0.7744929127998281,"max":179.306,"mean":51.27021088091976,"median":47.7236,"min":-178.785,"n_outliers":13,"outlier_rate":0.0016366612111292963,"q1":7.234995,"q3":124.122,"skew":-0.4832001961607399,"std":81.13831220600203,"zero_rate":0.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"null_rate","level":"warn","message":"66.4% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"iso639P3code","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7968,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[2.5,2.525,2.55,2.575,2.6,2.625,2.65,2.675,2.7,2.725,2.75,2.775,2.8,2.825,2.85,2.875,2.9,2.925,2.95,2.975,3.0,3.025,3.05,3.075,3.1,3.125,3.15,3.175,3.2,3.225,3.25,3.275,3.3,3.325,3.35,3.375,3.4,3.425,3.45,3.475,3.5]},"near_unique":true,"sample":["aau","mat","twx","sui","ktr","ygw","key","tui","ssk","agg","wni","mpj","gcf","ado","haf","csf","btu","tks","nua","sjd","hoo","anj","cga","bkv","pay","skc","mpt","dgd","bmx","yak","now","zin","mie","kmt","wkl","ksl","wno","ccm","kvi","zmj","zpd","msn","gqr","rmy","wlv","yaq","jje","kyq","poo","als"],"top_values":[],"top_words":[["aiz",1],["aiw",1],["aay",1],["aas",1],["kbt",1],["abg",1],["abf",1],["abm",1],["mij",1],["aau",1],["abq",1],["aba",1],["abp",1],["abi",1],["bsa",1],["axb",1],["ash",1],["abk",1],["aob",1],["abo",1],["abr",1],["abn",1],["aah",1],["abz",1],["kgr",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7968,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":23740,"n_null":15772,"n_unique":7968,"null_rate":0.6643639427127211,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":3,"len_mean":3.0,"len_median":3.0,"len_min":3,"len_p95":3.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":119.52800000000003,"url_rate":0.0,"vocab_size":7968,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"description","extras":{},"kind":"unknown","n":23740,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"markup_description","extras":{},"kind":"unknown","n":23740,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+44.40"},{"code":"outliers","level":"warn","message":"9.2% rows beyond 1.5 IQR"}],"column":"child_family_count","extras":{"histogram":{"counts":[23588,93,27,6,4,3,2,2,1,1,0,0,1,2,1,1,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,1,0,1],"edges":[0.0,21.475,42.95,64.42500000000001,85.9,107.375,128.85000000000002,150.32500000000002,171.8,193.275,214.75,236.22500000000002,257.70000000000005,279.175,300.65000000000003,322.125,343.6,365.07500000000005,386.55,408.02500000000003,429.5,450.975,472.45000000000005,493.925,515.4000000000001,536.875,558.35,579.825,601.3000000000001,622.7750000000001,644.25,665.725,687.2,708.6750000000001,730.1500000000001,751.625,773.1,794.575,816.0500000000001,837.5250000000001,859.0]},"sample":[0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,46.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,10.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,53.0,0.0,0.0,0.0,0.0,0.0,3.0,29.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,4.0,6.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":23740,"n_null":0,"n_unique":88,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":2352.9443321410663,"max":859.0,"mean":0.8792333614153328,"median":0.0,"min":0.0,"n_outliers":2179,"outlier_rate":0.0917860151642797,"q1":0.0,"q3":0.0,"skew":44.397877944596296,"std":13.203601499216601,"zero_rate":0.9082139848357204}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+41.86"},{"code":"outliers","level":"warn","message":"18.3% rows beyond 1.5 IQR"}],"column":"child_language_count","extras":{"histogram":{"counts":[23547,121,37,6,3,4,3,2,2,0,1,1,0,1,1,2,1,0,0,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,1,1],"edges":[0.0,35.875,71.75,107.625,143.5,179.375,215.25,251.125,287.0,322.875,358.75,394.625,430.5,466.375,502.25,538.125,574.0,609.875,645.75,681.625,717.5,753.375,789.25,825.125,861.0,896.875,932.75,968.625,1004.5,1040.375,1076.25,1112.125,1148.0,1183.875,1219.75,1255.625,1291.5,1327.375,1363.25,1399.125,1435.0]},"sample":[0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0,3.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,4.0,0.0,0.0,18.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,54.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,105.0,0.0,0.0,0.0,0.0,0.0,5.0,32.0,11.0,2.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,8.0,19.0,0.0,0.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,29.0,7.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,15.0,0.0,0.0,0.0,0.0,6.0,3.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,0.0,68.0,2.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,8.0,6.0,2.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":23740,"n_null":0,"n_unique":126,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":2115.0784915515087,"max":1435.0,"mean":1.9960825610783488,"median":0.0,"min":0.0,"n_outliers":4339,"outlier_rate":0.18277169334456614,"q1":0.0,"q3":0.0,"skew":41.858673461484905,"std":23.407876183271224,"zero_rate":0.8172283066554339}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+42.22"},{"code":"outliers","level":"warn","message":"18.0% rows beyond 1.5 IQR"}],"column":"child_dialect_count","extras":{"histogram":{"counts":[23575,99,24,18,4,2,0,1,1,1,1,2,0,1,0,3,0,0,1,1,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,1],"edges":[0.0,59.225,118.45,177.675,236.9,296.125,355.35,414.575,473.8,533.025,592.25,651.475,710.7,769.9250000000001,829.15,888.375,947.6,1006.825,1066.05,1125.275,1184.5,1243.7250000000001,1302.95,1362.175,1421.4,1480.625,1539.8500000000001,1599.075,1658.3,1717.525,1776.75,1835.9750000000001,1895.2,1954.425,2013.65,2072.875,2132.1,2191.3250000000003,2250.55,2309.775,2369.0]},"sample":[0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,5.0,0.0,2.0,0.0,0.0,7.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,10.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,3.0,4.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,4.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,117.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,25.0,71.0,0.0,0.0,0.0,0.0,0.0,18.0,67.0,10.0,1.0,5.0,0.0,0.0,0.0,0.0,19.0,0.0,0.0,0.0,0.0,16.0,22.0,0.0,0.0,0.0,0.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,61.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,8.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,10.0,3.0,0.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,5.0,0.0,3.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,25.0,7.0,0.0,0.0,0.0,11.0,18.0,2.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,3.0,0.0,0.0,0.0,22.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":23740,"n_null":0,"n_unique":164,"null_rate":0.0,"stats":{"iqr":1.0,"kurtosis":2159.2626686620392,"max":2369.0,"mean":3.3890901432181972,"median":0.0,"min":0.0,"n_outliers":4272,"outlier_rate":0.17994945240101096,"q1":0.0,"q3":1.0,"skew":42.218971232840026,"std":36.798748606432454,"zero_rate":0.7441870261162595}},{"alerts":[{"code":"null_rate","level":"warn","message":"64.2% null"}],"column":"country_ids","extras":{"singletons":339,"top_values":[["PG",874],["ID",695],["NG",480],["AU",432],["IN",356],["MX",297],["CN",271],["BR",263],["US",247],["CM",196],["PH",177],["CD",156],["VU",118],["SD",99],["PE",97],["TZ",93],["MY",90],["TD",88],["RU",83],["CO",82]]},"kind":"categorical","n":23740,"n_null":15250,"n_unique":680,"null_rate":0.6423757371524853,"stats":{"cardinality":680,"entropy":6.493208901665357,"entropy_ratio":0.6900774923409273,"top_rate":0.10294464075382803,"top_value":"PG"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["level","status","country_ids","family_id","latitude","longitude","iso639P3code","child_dialect_count","bookkeeping"],"featured_charts":[{"caption":"Shows the split between dialect, language, and family entries \u2014 the core taxonomy of the dataset.","column":"level","kind":"donut"},{"caption":"Highlights how many languoids are safe versus various endangerment levels, including 889 already extinct.","column":"status","kind":"bar"},{"caption":"Reveals the dominant language families; atla1278 and aust1307 alone account for over a third of records.","column":"family_id","kind":"bar"},{"caption":"Shows geographic concentration, with Papua New Guinea, Indonesia, and Nigeria leading by languoid count.","column":"country_ids","kind":"bar"},{"caption":"Distribution of languoid latitudes (where known) \u2014 note the ~66% null rate limits coverage.","column":"latitude","kind":"histogram"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a Glottolog languoid catalog with 23,740 rows and 16 columns describing languages, dialects, and families along with geographic and endangerment metadata. The `level` field splits the records into three classes \u2014 dialect (10,920), language (8,481), and family (4,339) \u2014 making it the natural primary lens. Endangerment `status` is dominated by 'safe' (~79.9%), but the remaining categories flag thousands of vulnerable to extinct languages worth investigating. Geography is concentrated: `country_ids` is led by PG (874), ID (695), and NG (480), and `family_id` is heavily skewed toward atla1278 (4,663) and aust1307 (3,850). Note that `iso639P3code`, `latitude`, and `longitude` are ~66% null, so spatial analysis will only cover about a third of rows.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Fixed 8-character single-token codes (e.g., 'melk1240', 'yang1299'), unique across all 23,740 rows with no nulls or duplicates. The pattern of four letters followed by four digits is consistent with Glottolog-style language identifiers, making this a primary key rather than analyzable text.","role":"identifier","scope":"column","target":"id","treatment":"Use as the row key for joins; exclude from modelling features."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Looks like a Glottolog-style language family identifier (e.g. 'atla1278' for Atlantic-Congo, 'aust1307' for Austronesian) tagging each of the 23,740 rows. The distribution is heavily skewed: the top family alone covers 20.0% of rows and the top 10 of 287 families dominate, yielding an entropy ratio of 0.598. Null rate is low at 1.81%.","role":"foreign_key","scope":"column","target":"family_id","treatment":"left-join on this id to a language-family reference table; consider grouping the long tail before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.duplicate_rate","stats.n_duplicates","stats.vocab_size","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Fixed-width 8-character single-token codes (e.g. 'book1242', 'uncl1493') with len_min=len_max=8 and one_word_rate=1.0 \u2014 these look like Glottolog-style language/family identifiers used as a parent reference. With 7338 unique values across 23740 rows and a 68.5% duplicate_rate, many children share parents; 'book1242' alone accounts for 399 rows. Null rate is low at 1.81%.","role":"foreign_key","scope":"column","target":"parent_id","treatment":"left-join on this id to a parent/language lookup table."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_median","stats.len_max","stats.one_word_rate","stats.word_mean","stats.vocab_size","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"The `name` column holds 23,740 fully unique short labels (null_rate 0.0, n_unique equals n), with a mean length of 9.95 characters and 69.5% being a single word. Top tokens like `nuclear`, `central`, `western`, `eastern`, `northern`, `southern`, and `language` suggest these are entity/topic names rather than person names. Vocabulary is broad (17,915 distinct words across only ~1.4 words per row), and there are no duplicates, URLs, or emoji.","role":"identifier","scope":"column","target":"name","treatment":"Treat as a unique label key; drop from modelling features or hash/embed if semantic content is needed."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary boolean flag indicating whether a record involves bookkeeping, with only two values across 23740 rows and no nulls. The distribution is severely imbalanced: 'False' covers 98.3% (23341 rows) versus only 399 'True' cases, yielding an entropy ratio of just 0.12.","role":"feature","scope":"column","target":"bookkeeping","treatment":"Encode as boolean and apply class-imbalance handling (e.g., stratification or reweighting) if used as a target."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical taxonomy tag with exactly 3 levels (dialect, language, family) and no nulls across 23,740 rows. The distribution is well-spread (entropy_ratio 0.94), with 'dialect' leading at 46.0%, followed by 'language' (8,481) and 'family' (4,339). Looks like a linguistic classification level rather than a free-form attribute.","role":"feature","scope":"column","target":"level","treatment":"one-hot encode for modelling or use directly as a stratification key."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical status column with 6 levels matching UNESCO-style language endangerment categories (safe, definitely endangered, vulnerable, extinct, critically endangered, severely endangered). The distribution is heavily imbalanced: 'safe' accounts for 79.9% of 23,740 rows, while the rarest level 'severely endangered' has only 413 records. Entropy ratio is 0.44, confirming low diversity despite 6 classes.","role":"label","scope":"column","target":"status","treatment":"Treat as ordinal target; stratify or rebalance before classification given the 80/20 dominance of 'safe'."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.min","stats.max","stats.median","stats.skew","stats.iqr","stats.mean"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic latitude coordinate spanning -55.27 to 73.14, consistent with valid Earth latitudes. Two-thirds of rows are null (null_rate 0.6654), which severely limits coverage. The distribution is mildly right-skewed (0.54) with median 6.31 and IQR ~24.5, suggesting a bias toward northern-hemisphere but tropical-leaning observations.","role":"feature","scope":"column","target":"latitude","treatment":"Impute or filter the 66% nulls before any spatial modelling; pair with longitude for geo-features."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.min","stats.max","stats.median","stats.skew","stats.n_outliers","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Geographic longitude in decimal degrees, with values spanning -178.785 to 179.306, consistent with the global WGS84 range. Two-thirds of rows are null (null_rate 0.6654), so coverage is the dominant concern; among populated rows the distribution is mildly left-skewed (-0.48) with a median of 47.72 suggesting an Eastern-Hemisphere bias. Only 13 outliers (0.16%) and no zeros, so the populated values themselves look clean.","role":"feature","scope":"column","target":"longitude","treatment":"Pair with latitude for geospatial features; impute or filter the 66.5% missing before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.vocab_size","alerts","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds ISO 639-3 language codes: every non-null value is exactly 3 characters and one word (len_min=len_max=3, one_word_rate=1.0), with 7968 distinct codes across 23740 rows. Two-thirds are missing (null_rate=0.6644), and the cardinality is near-unique among populated rows, suggesting one code per language entry rather than a repeated categorical.","role":"identifier","scope":"column","target":"iso639P3code","treatment":"Treat as a language-code key; left-join to an ISO 639-3 reference table and handle the 66% nulls explicitly."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","column","kind","n","n_unique","null_rate","stats"],"model":"anthropic:claude-opus-4-7","narrative":"This column is named 'description' but saturn skipped profiling, so no kind, uniqueness, or value statistics are available. Only the row count (23740) and a null rate of 0.0 are reported. Without further stats, the content and structure cannot be characterized.","role":"free_text","scope":"column","target":"description","treatment":"Re-profile or sample manually before deciding; if textual, tokenize and embed before modelling."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","column","kind","n","null_rate","stats"],"model":"anthropic:claude-opus-4-7","narrative":"This column was skipped by the profiler, so no statistics, uniqueness count, or value samples are available beyond a row count of 23,740 and a null rate of 0.0. The name suggests it holds markup or descriptive text (likely HTML or formatted product/item descriptions), but that is inferred from the label alone, not from evidence. No distributional signal can be assessed here.","role":"free_text","scope":"column","target":"markup_description","treatment":"Re-run the profiler with text handling enabled, then tokenize and embed before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.zero_rate","stats.median","stats.q1","stats.q3","stats.iqr","stats.max","stats.mean","stats.std","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"A numeric count of child families per record, with 23740 rows and only 88 distinct values. It is overwhelmingly zero (zero_rate 0.9082) so q1, median, and q3 are all 0 and IQR is 0, yet a long tail pushes max to 859 with mean 0.879 and std 13.20. Skew of 44.40 and kurtosis of 2352.94 confirm an extreme heavy-tailed distribution, and 2179 rows (9.18%) flag as outliers.","role":"feature","scope":"column","target":"child_family_count","treatment":"Binarize (zero vs non-zero) or apply log1p before modelling to tame the extreme skew."},{"confidence":"high","critiques":[],"evidence_keys":["zero_rate","q1","median","q3","skew","kurtosis","max","n_outliers","outlier_rate","mean","std"],"model":"anthropic:claude-opus-4-7","narrative":"A numeric count of child languages per record, where 81.7% of rows are zero and Q1=median=Q3=0, so the typical entity has none. The distribution is extremely long-tailed (skew 41.86, kurtosis 2115) with a max of 1435 and 4339 outliers (18.3% outlier rate), suggesting a small set of hub-like records dominate.","role":"feature","scope":"column","target":"child_language_count","treatment":"Binarize (has_children vs none) or log1p-transform before modelling given the heavy zero-inflation and skew."},{"confidence":"high","critiques":[],"evidence_keys":["zero_rate","median","q3","max","skew","kurtosis","outlier_rate","mean","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"This is a count of child dialects per record, dominated by zeros (zero_rate 0.7442) with a median and Q3 of 0/1 yet a max of 2369. Skew of 42.22 and kurtosis of 2159 confirm an extreme long tail, and 17.99% of rows flag as outliers. The mean of 3.39 is pulled far above the median, so any aggregate using it will mislead.","role":"feature","scope":"column","target":"child_dialect_count","treatment":"Log1p-transform or bin (zero / one / many) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds ISO-style country codes (PG, ID, NG, AU, IN\u2026) as a categorical feature with 680 distinct values across 23,740 rows. Coverage is poor \u2014 64.24% of rows are null \u2014 and the non-null distribution is broad (entropy 6.49, ratio 0.69) with Papua New Guinea leading at only 10.29%. The 680 distinct codes far exceed the ~250 real countries, suggesting multi-value concatenations or non-standard tokens behind the 'country_ids' plural.","role":"feature","scope":"column","target":"country_ids","treatment":"Split multi-code entries, normalise to ISO-3166, and impute or flag the 64% missing before one-hot or target encoding."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":4794,"prompt_tokens":18075,"total_tokens":22869}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:07:06+00:00","mode":"full","row_count":23740,"sampled_rows":23740,"seed":42,"source":"/home/coolhand/datasets/language-data/glottolog_languoid.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"bookkeeping":"boolean","child_dialect_count":"numeric","child_family_count":"numeric","child_language_count":"numeric","country_ids":"categorical","description":"unknown","family_id":"categorical","id":"text","iso639P3code":"text","latitude":"numeric","level":"categorical","longitude":"numeric","markup_description":"unknown","name":"text","parent_id":"text","status":"categorical"}}
