{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"cognate_id","extras":{"language_counts":{},"language_sample_size":4981,"length_histogram":{"counts":[5,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,0,0,0,477,0,0,0,0,0,0,0,0,0,0,0,0,4455],"edges":[7.0,7.075,7.15,7.225,7.3,7.375,7.45,7.525,7.6,7.675,7.75,7.825,7.9,7.975,8.05,8.125,8.2,8.275,8.35,8.425,8.5,8.575,8.65,8.725,8.8,8.875,8.95,9.025,9.1,9.175,9.25,9.325,9.4,9.475,9.55,9.625,9.7,9.775,9.85,9.925,10.0]},"near_unique":true,"sample":["iecor:12","iecor:8032","iecor:9076","iecor:8599","iecor:5170","iecor:9758","iecor:5291","iecor:9282","iecor:8613","iecor:322","iecor:6792","iecor:6234","iecor:2808","iecor:29","iecor:3033","iecor:1643","iecor:855","iecor:8714","iecor:9897","iecor:4969","iecor:3168","iecor:334","iecor:1412","iecor:894","iecor:7550","iecor:8146","iecor:6402","iecor:1700","iecor:663","iecor:9774","iecor:7198","iecor:9956","iecor:7258","iecor:4828","iecor:4163","iecor:5266","iecor:9553","iecor:6020","iecor:5376","iecor:6221","iecor:8570","iecor:9492","iecor:2794","iecor:9488","iecor:9681","iecor:9829","iecor:3828","iecor:4838","iecor:1378","iecor:9223"],"top_values":[],"top_words":[["iecor:3",1],["iecor:4",1],["iecor:5",1],["iecor:7",1],["iecor:9",1],["iecor:11",1],["iecor:12",1],["iecor:13",1],["iecor:14",1],["iecor:15",1],["iecor:18",1],["iecor:21",1],["iecor:22",1],["iecor:24",1],["iecor:25",1],["iecor:27",1],["iecor:28",1],["iecor:29",1],["iecor:38",1],["iecor:39",1],["iecor:41",1],["iecor:42",1],["iecor:44",1],["iecor:45",1],["iecor:46",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4981,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":4981,"n_null":0,"n_unique":4981,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":10,"len_mean":9.883557518570568,"len_median":10.0,"len_min":7,"len_p95":10.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4981,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"concept","extras":{"singletons":0,"top_values":[["",4981]]},"kind":"categorical","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+6.84"},{"code":"outliers","level":"warn","message":"13.0% rows beyond 1.5 IQR"}],"column":"word_count","extras":{"histogram":{"counts":[3848,484,194,96,90,107,32,25,15,4,8,8,6,5,3,3,8,3,3,3,1,1,2,4,5,2,3,2,2,3,0,1,1,0,2,2,0,0,1,4],"edges":[1.0,4.9,8.8,12.7,16.6,20.5,24.4,28.3,32.2,36.1,40.0,43.9,47.8,51.699999999999996,55.6,59.5,63.4,67.3,71.2,75.1,79.0,82.89999999999999,86.8,90.7,94.6,98.5,102.39999999999999,106.3,110.2,114.1,118.0,121.89999999999999,125.8,129.7,133.6,137.5,141.4,145.29999999999998,149.2,153.1,157.0]},"sample":[7.0,29.0,14.0,25.0,1.0,7.0,22.0,23.0,21.0,10.0,50.0,53.0,5.0,52.0,1.0,21.0,18.0,5.0,5.0,2.0,3.0,109.0,3.0,2.0,3.0,3.0,3.0,4.0,3.0,12.0,16.0,8.0,5.0,14.0,10.0,24.0,6.0,2.0,2.0,88.0,2.0,2.0,20.0,22.0,14.0,14.0,21.0,1.0,17.0,3.0,6.0,5.0,11.0,8.0,6.0,7.0,8.0,7.0,9.0,28.0,29.0,24.0,22.0,3.0,34.0,8.0,1.0,41.0,4.0,5.0,5.0,5.0,2.0,6.0,2.0,8.0,6.0,7.0,9.0,6.0,2.0,1.0,4.0,10.0,2.0,12.0,8.0,5.0,6.0,7.0,8.0,4.0,5.0,8.0,7.0,8.0,3.0,2.0,4.0,4.0,8.0,2.0,3.0,1.0,3.0,3.0,3.0,2.0,1.0,3.0,6.0,2.0,1.0,30.0,2.0,7.0,4.0,2.0,2.0,3.0,2.0,2.0,95.0,5.0,2.0,1.0,5.0,7.0,2.0,13.0,4.0,2.0,8.0,5.0,11.0,2.0,10.0,2.0,2.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,2.0,3.0,2.0,1.0,7.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,10.0,1.0,1.0,4.0,1.0,7.0,1.0,3.0,1.0,2.0,11.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0,3.0,2.0,3.0,1.0,1.0,11.0,3.0,98.0,75.0,19.0,57.0,7.0,1.0,2.0,6.0,14.0,9.0,26.0,1.0,11.0,18.0,3.0,2.0,1.0,3.0,3.0,3.0,7.0,14.0,17.0,2.0,2.0,3.0,1.0,2.0,7.0,3.0,2.0,1.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,4.0,2.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,1.0,5.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,5.0,1.0,1.0,2.0,2.0,28.0,1.0,6.0,3.0,2.0,1.0,2.0,4.0,1.0,2.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,21.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,4.0,1.0,7.0,2.0,1.0,1.0,1.0,2.0,1.0,19.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,6.0,1.0,6.0,3.0,7.0,3.0,8.0,5.0,1.0,7.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,6.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,10.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":93,"null_rate":0.0,"stats":{"iqr":3.0,"kurtosis":59.740272006473056,"max":157.0,"mean":5.167837783577595,"median":2.0,"min":1.0,"n_outliers":649,"outlier_rate":0.1302951214615539,"q1":1.0,"q3":4.0,"skew":6.837154477764645,"std":12.134749527988367,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+6.84"},{"code":"outliers","level":"warn","message":"13.0% rows beyond 1.5 IQR"}],"column":"language_count","extras":{"histogram":{"counts":[3849,483,194,96,90,107,32,25,15,4,8,8,6,5,3,3,8,3,3,4,0,1,2,4,5,2,3,2,2,3,0,1,1,0,2,2,0,0,1,4],"edges":[1.0,4.9,8.8,12.7,16.6,20.5,24.4,28.3,32.2,36.1,40.0,43.9,47.8,51.699999999999996,55.6,59.5,63.4,67.3,71.2,75.1,79.0,82.89999999999999,86.8,90.7,94.6,98.5,102.39999999999999,106.3,110.2,114.1,118.0,121.89999999999999,125.8,129.7,133.6,137.5,141.4,145.29999999999998,149.2,153.1,157.0]},"sample":[7.0,29.0,14.0,25.0,1.0,7.0,22.0,23.0,21.0,10.0,50.0,53.0,5.0,52.0,1.0,21.0,18.0,5.0,5.0,2.0,3.0,109.0,3.0,2.0,3.0,3.0,3.0,4.0,3.0,12.0,16.0,8.0,5.0,14.0,10.0,24.0,6.0,2.0,2.0,88.0,2.0,2.0,20.0,22.0,14.0,14.0,21.0,1.0,17.0,3.0,6.0,5.0,11.0,8.0,6.0,7.0,8.0,7.0,9.0,28.0,29.0,24.0,22.0,3.0,34.0,8.0,1.0,41.0,4.0,5.0,5.0,5.0,2.0,6.0,2.0,8.0,6.0,7.0,9.0,6.0,2.0,1.0,4.0,10.0,2.0,12.0,8.0,5.0,6.0,7.0,8.0,4.0,5.0,8.0,7.0,8.0,3.0,2.0,4.0,4.0,8.0,2.0,3.0,1.0,3.0,3.0,3.0,2.0,1.0,3.0,6.0,2.0,1.0,30.0,2.0,7.0,4.0,2.0,2.0,3.0,2.0,2.0,95.0,5.0,2.0,1.0,5.0,7.0,2.0,13.0,4.0,2.0,8.0,5.0,11.0,2.0,10.0,2.0,2.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,2.0,3.0,2.0,1.0,7.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,10.0,1.0,1.0,4.0,1.0,7.0,1.0,3.0,1.0,2.0,11.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0,3.0,2.0,3.0,1.0,1.0,11.0,3.0,98.0,75.0,19.0,57.0,7.0,1.0,2.0,6.0,14.0,9.0,26.0,1.0,11.0,18.0,3.0,2.0,1.0,3.0,3.0,3.0,7.0,14.0,17.0,2.0,2.0,3.0,1.0,2.0,7.0,3.0,2.0,1.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,4.0,2.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,1.0,5.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,5.0,1.0,1.0,2.0,2.0,28.0,1.0,6.0,3.0,2.0,1.0,2.0,4.0,1.0,2.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,21.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,4.0,1.0,7.0,2.0,1.0,1.0,1.0,2.0,1.0,19.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,6.0,1.0,6.0,3.0,7.0,3.0,8.0,5.0,1.0,7.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,6.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,10.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":94,"null_rate":0.0,"stats":{"iqr":3.0,"kurtosis":59.77488401145233,"max":157.0,"mean":5.166231680385465,"median":2.0,"min":1.0,"n_outliers":649,"outlier_rate":0.1302951214615539,"q1":1.0,"q3":4.0,"skew":6.83821712056036,"std":12.130385693042744,"zero_rate":0.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.6% of rows are unique strings"}],"column":"words","extras":{"language_counts":{},"language_sample_size":4981,"length_histogram":{"counts":[3857,475,191,99,93,98,36,27,14,6,8,7,6,4,4,4,4,6,3,4,1,0,3,3,4,2,3,3,2,2,1,0,2,1,1,2,0,0,2,3],"edges":[83.0,454.825,826.65,1198.475,1570.3,1942.125,2313.95,2685.775,3057.6,3429.4249999999997,3801.25,4173.075,4544.9,4916.724999999999,5288.55,5660.375,6032.2,6404.025,6775.849999999999,7147.675,7519.5,7891.325,8263.15,8634.975,9006.8,9378.625,9750.449999999999,10122.275,10494.1,10865.925,11237.75,11609.574999999999,11981.4,12353.225,12725.05,13096.875,13468.699999999999,13840.525,14212.35,14584.175,14956.0]},"near_unique":true,"sample":["[{\"form\": \"s\\u016bxtan\", \"language\": \"Persian: Tehran\", \"iso_639_3\": \"pes\", \"glottocode\": \"west2369\"}, {\"form\": \"su\\u0292yn\", \"language\": \"Ossetic: Iron\", \"iso_639_3\": \"oss\", \"glottocode\": \"iron1242\"}, {\"form\": \"so\\u0292un\", \"language\": \"Ossetic: Digor\", \"iso_639_3\": \"oss\", \"glottocode\": \"digo1242\"}, {\"form\": \"s\\u016b\\u015bt\\u00e4\", \"language\": \"Khotanese\", \"iso_639_3\": \"kho\", \"glottocode\": \"khot1251\"}, {\"form\": \"saoca\", \"language\": \"Avestan: Younger\", \"iso_639_3\": \"ave\", \"glottocode\": \"aves1237\"}, {\"form\": \"s\\u014dz\\u0113d\", \"language\": \"Middle Persian\", \"iso_639_3\": \"xmn\", \"glottocode\": \"pahl1241\"}, {\"form\": \"suxs-\", \"language\": \"Yaghnobi\", \"iso_639_3\": \"yai\", \"glottocode\": \"yagn1238\"}, {\"form\": \"sohdak\", \"language\": \"Bakhtiari\", \"iso_639_3\": \"bqi\", \"glottocode\": \"bakh1245\"}, {\"form\": \"suzene\", \"language\": \"Mazanderani\", \"iso_639_3\": \"mzn\", \"glottocode\": \"maza1291\"}, {\"form\": \"suxsa\", \"language\": \"Sogdian\", \"iso_639_3\": \"sog\", \"glottocode\": \"sogd1245\"}, {\"form\": \"as\\u016bt\\u0131t\", \"language\": \"Kurdish C.: Jafi\", \"iso_639_3\": \"ckb\", \"glottocode\": \"cent1972\"}, {\"form\": \"misujiye\", \"language\": \"Tati\", \"iso_639_3\": \"tks\", \"glottocode\": \"take1255\"}, {\"form\": \"so\\u010d\\u012bt\", \"language\": \"Balochi: Sistani\", \"iso_639_3\": \"bgn\", \"glottocode\": \"west2368\"}, {\"form\": \"asote\", \"language\": \"Lari\", \"iso_639_3\": \"lrl\", \"glottocode\": \"lari1253\"}, {\"form\": \"miso\\u03c7e\", \"language\": \"Delvari\", \"iso_639_3\": \"fay\", \"glottocode\": \"sout2645\"}, {\"form\": \"mas\\u016b\\u010do\", \"language\": \"Hawrami\", \"iso_639_3\": \"hac\", \"glottocode\": \"hawr1243\"}, {\"form\": \"s\\u016bz\\u0113\", \"language\": \"Kurdish S.: Elami\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}, {\"form\": \"\\u1ea1suje\", \"language\": \"Raji: Barzoki\", \"iso_639_3\": \"\", \"glottocode\": \"cent2264\"}, {\"form\": \"swazedal\", \"language\": \"Pashto\", \"iso_639_3\": \"pbu\", \"glottocode\": \"yusu1238\"}, {\"form\": \"d\\u0268so\\u017eit\", \"language\": \"Kurdish N.: Bahdini\", \"iso_639_3\": \"kmr\", \"glottocode\": \"nort2641\"}, {\"form\": \"s\\u014d\\u010d\\u0113d\", \"language\": \"Parthian\", \"iso_639_3\": \"xpr\", \"glottocode\": \"part1239\"}, {\"form\": \"a-s\\u016bz-\\u0113\", \"language\": \"Kurdish S.: Qorveh\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}]","[{\"form\": \"r\\u00e9st\\u00e2\", \"language\": \"Franco-Proven\\u00e7al\", \"iso_639_3\": \"frp\", \"glottocode\": \"fran1269\"}]","[{\"form\": \"s\\u0101pt\", \"language\": \"Sogdian\", \"iso_639_3\": \"sog\", \"glottocode\": \"sogd1245\"}]","[{\"form\": \"wa\\u0113\", \"language\": \"Kurdish S.: Elami\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}, {\"form\": \"a-w(a)-\\u0101\", \"language\": \"Kurdish S.: Qorveh\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}]","[{\"form\": \"sp\\u00edti\", \"language\": \"Greek: Modern Std\", \"iso_639_3\": \"ell\", \"glottocode\": \"mode1248\"}, {\"form\": \"sp\\u00edtin\", \"language\": \"Greek: Cypriot\", \"iso_639_3\": \"ell\", \"glottocode\": \"cypr1249\"}, {\"form\": \"sp\\u00edti\", \"language\": \"Greek: Italiot\", \"iso_639_3\": \"ell\", \"glottocode\": \"apul1236\"}, {\"form\": \"spit\", \"language\": \"Greek: Cappadocian\", \"iso_639_3\": \"cpg\", \"glottocode\": \"capp1239\"}, {\"form\": \"osp\\u00edti(n)\", \"language\": \"Greek: Pontic\", \"iso_639_3\": \"pnt\", \"glottocode\": \"pont1253\"}]","[{\"form\": \"\\u01f0\\u0259tu\", \"language\": \"Pashai: North-West\", \"iso_639_3\": \"glh\", \"glottocode\": \"nort2665\"}]","[{\"form\": \"c\\u014dgit\\u0101re\", \"language\": \"Latin\", \"iso_639_3\": \"lat\", \"glottocode\": \"lati1261\"}, {\"form\": \"cuider\", \"language\": \"Anglo-Norman\", \"iso_639_3\": \"xno\", \"glottocode\": \"angl1258\"}, {\"form\": \"cuidier\", \"language\": \"Old French\", \"iso_639_3\": \"fro\", \"glottocode\": \"oldf1239\"}]","[{\"form\": \"peden\", \"language\": \"Old Occitan\", \"iso_639_3\": \"pro\", \"glottocode\": \"oldp1253\"}]","[{\"form\": \"t\\u0113\\u03b3\", \"language\": \"Khwarazmian\", \"iso_639_3\": \"xco\", \"glottocode\": \"khwa1238\"}]","[{\"form\": \"denken\", \"language\": \"Dutch\", \"iso_639_3\": \"nld\", \"glottocode\": \"dutc1256\"}, {\"form\": \"think\", \"language\": \"English\", \"iso_639_3\": \"eng\", \"glottocode\": \"stan1293\"}, {\"form\": \"denken\", \"language\": \"Flemish\", \"iso_639_3\": \"vls\", \"glottocode\": \"vlaa1240\"}, {\"form\": \"tinke\", \"language\": \"Frisian\", \"iso_639_3\": \"frs\", \"glottocode\": \"west2354\"}, {\"form\": \"denken\", \"language\": \"German\", \"iso_639_3\": \"deu\", \"glottocode\": \"stan1295\"}, {\"form\": \"denken\", \"language\": \"Luxembourgish\", \"iso_639_3\": \"ltz\", \"glottocode\": \"luxe1241\"}, {\"form\": \"thenken\", \"language\": \"Old High German\", \"iso_639_3\": \"goh\", \"glottocode\": \"oldh1241\"}, {\"form\": \"thenkian\", \"language\": \"Old Saxon\", \"iso_639_3\": \"osx\", \"glottocode\": \"olds1250\"}, {\"form\": \"\\u00feen\\u010ban\", \"language\": \"Old English\", \"iso_639_3\": \"ang\", \"glottocode\": \"olde1238\"}, {\"form\": \"denken\", \"language\": \"Middle High German\", \"iso_639_3\": \"gmh\", \"glottocode\": \"midd1343\"}, {\"form\": \"denken\", \"language\": \"Middle Dutch\", \"iso_639_3\": \"dum\", \"glottocode\": \"midd1321\"}, {\"form\": \"thentsa\", \"language\": \"Old Frisian\", \"iso_639_3\": \"ofs\", \"glottocode\": \"oldf1241\"}, {\"form\": \"\\u00feagkjan\", \"language\": \"Gothic\", \"iso_639_3\": \"got\", \"glottocode\": \"goth1244\"}]","[{\"form\": \"mast\", \"language\": \"Ossetic: Iron\", \"iso_639_3\": \"oss\", \"glottocode\": \"iron1242\"}, {\"form\": \"mast\", \"language\": \"Ossetic: Digor\", \"iso_639_3\": \"oss\", \"glottocode\": \"digo1242\"}]","[{\"form\": \"de\\u00edsa\", \"language\": \"Greek: Pontic\", \"iso_639_3\": \"pnt\", \"glottocode\": \"pont1253\"}]","[{\"form\": \"sia\\u0169ras\", \"language\": \"Lithuanian\", \"iso_639_3\": \"lit\", \"glottocode\": \"lith1251\"}, {\"form\": \"\\u0161aurs\", \"language\": \"Latvian\", \"iso_639_3\": \"lav\", \"glottocode\": \"latv1249\"}, {\"form\": \"\\u0161aurs\", \"language\": \"Latgalian\", \"iso_639_3\": \"ltg\", \"glottocode\": \"east2282\"}]","[{\"form\": \"starga\", \"language\": \"Pashto\", \"iso_639_3\": \"pbu\", \"glottocode\": \"yusu1238\"}]","[{\"form\": \"bh\\u0101b\\u0101\", \"language\": \"Bengali\", \"iso_639_3\": \"ben\", \"glottocode\": \"beng1280\"}, {\"form\": \"bh\\u0101w\\u0101\", \"language\": \"Assamese\", \"iso_639_3\": \"asm\", \"glottocode\": \"assa1263\"}]","[{\"form\": \"p\\u0101\\u1e45kha\", \"language\": \"Hindi\", \"iso_639_3\": \"hin\", \"glottocode\": \"hind1269\"}, {\"form\": \"py\\u0101m\\u0310kh\", \"language\": \"Nepali\", \"iso_639_3\": \"nep\", \"glottocode\": \"east1436\"}, {\"form\": \"pa\\u1e45kh\", \"language\": \"Maithili\", \"iso_639_3\": \"mai\", \"glottocode\": \"mait1250\"}, {\"form\": \"pa\\u1e45kha\", \"language\": \"Marathi\", \"iso_639_3\": \"mar\", \"glottocode\": \"mara1378\"}, {\"form\": \"pa\\u1e45kh\", \"language\": \"Magahi\", \"iso_639_3\": \"mag\", \"glottocode\": \"maga1260\"}, {\"form\": \"p\\u0101\\u1e45kh\", \"language\": \"Bhojpuri\", \"iso_639_3\": \"bho\", \"glottocode\": \"bhoj1244\"}, {\"form\": \"p\\u0101khi\", \"language\": \"Assamese\", \"iso_639_3\": \"asm\", \"glottocode\": \"assa1263\"}]","[{\"form\": \"gran\", \"language\": \"Catalan\", \"iso_639_3\": \"cat\", \"glottocode\": \"stan1289\"}, {\"form\": \"grand\", \"language\": \"French\", \"iso_639_3\": \"fra\", \"glottocode\": \"stan1290\"}, {\"form\": \"grande\", \"language\": \"Italian\", \"iso_639_3\": \"ita\", \"glottocode\": \"ital1282\"}, {\"form\": \"gran\", \"language\": \"Ladin\", \"iso_639_3\": \"lld\", \"glottocode\": \"ladi1250\"}, {\"form\": \"grande\", \"language\": \"Portuguese\", \"iso_639_3\": \"por\", \"glottocode\": \"port1283\"}, {\"form\": \"grande\", \"language\": \"Spanish\", \"iso_639_3\": \"spa\", \"glottocode\": \"stan1288\"}, {\"form\": \"grand\", \"language\": \"Walloon\", \"iso_639_3\": \"wln\", \"glottocode\": \"wall1255\"}, {\"form\": \"grant\", \"language\": \"Friulian\", \"iso_639_3\": \"fur\", \"glottocode\": \"friu1240\"}, {\"form\": \"grant\", \"language\": \"Anglo-Norman\", \"iso_639_3\": \"xno\", \"glottocode\": \"angl1258\"}, {\"form\": \"grande\", \"language\": \"Old Spanish\", \"iso_639_3\": \"osp\", \"glottocode\": \"olds1249\"}, {\"form\": \"grant\", \"language\": \"Old French\", \"iso_639_3\": \"fro\", \"glottocode\": \"oldf1239\"}, {\"form\": \"gran\", \"language\": \"Franco-Proven\\u00e7al\", \"iso_639_3\": \"frp\", \"glottocode\": \"fran1269\"}, {\"form\": \"gran\", \"language\": \"Old Occitan\", \"iso_639_3\": \"pro\", \"glottocode\": \"oldp1253\"}, {\"form\": \"gran\", \"language\": \"Old Catalan\", \"iso_639_3\": \"cat\", \"glottocode\": \"oldc1251\"}, {\"form\": \"grande\", \"language\": \"Portuguese: Brazilian\", \"iso_639_3\": \"por\", \"glottocode\": \"braz1246\"}, {\"form\": \"gr\\u00e0nt\", \"language\": \"Milanese\", \"iso_639_3\": \"lmo\", \"glottocode\": \"mila1243\"}]","[{\"form\": \"mustambeda\", \"language\": \"Khwarazmian\", \"iso_639_3\": \"xco\", \"glottocode\": \"khwa1238\"}]","[{\"form\": \"n\\u2032il\", \"language\": \"V\\u00e2si-vari: Pa\\u1e63ki\", \"iso_639_3\": \"prn\", \"glottocode\": \"lowe1390\"}]","[{\"form\": \"loyr\", \"language\": \"Old Welsh\", \"iso_639_3\": \"owl\", \"glottocode\": \"oldw1241\"}, {\"form\": \"loar, lo\\u00e0r, loer\", \"language\": \"Middle Breton\", \"iso_639_3\": \"xbm\", \"glottocode\": \"midd1359\"}, {\"form\": \"loir, loer\", \"language\": \"Old Breton\", \"iso_639_3\": \"obt\", \"glottocode\": \"oldb1248\"}, {\"form\": \"loar\", \"language\": \"Breton: Treger\", \"iso_639_3\": \"bre\", \"glottocode\": \"treg1244\"}, {\"form\": \"l\\u00fbr\", \"language\": \"Late Cornish\", \"iso_639_3\": \"cor\", \"glottocode\": \"oldc1252\"}, {\"form\": \"lor\", \"language\": \"Middle Cornish\", \"iso_639_3\": \"cnx\", \"glottocode\": \"oldc1252\"}, {\"form\": \"luer\", \"language\": \"Breton: Gwened\", \"iso_639_3\": \"bre\", \"glottocode\": \"vann1244\"}, {\"form\": \"lleuad\", \"language\": \"Welsh: North\", \"iso_639_3\": \"cym\", \"glottocode\": \"nort2668\"}, {\"form\": \"lloer\", \"language\": \"Middle Welsh\", \"iso_639_3\": \"wlm\", \"glottocode\": \"midd1363\"}]","[{\"form\": \"prithb\\u012b\", \"language\": \"Maithili\", \"iso_639_3\": \"mai\", \"glottocode\": \"mait1250\"}]","[{\"form\": \"wassen\", \"language\": \"Dutch\", \"iso_639_3\": \"nld\", \"glottocode\": \"dutc1256\"}, {\"form\": \"wash\", \"language\": \"English\", \"iso_639_3\": \"eng\", \"glottocode\": \"stan1293\"}, {\"form\": \"wasschen\", \"language\": \"Flemish\", \"iso_639_3\": \"vls\", \"glottocode\": \"vlaa1240\"}, {\"form\": \"waschen\", \"language\": \"German\", \"iso_639_3\": \"deu\", \"glottocode\": \"stan1295\"}, {\"form\": \"w\\u00e4schen\", \"language\": \"Luxembourgish\", \"iso_639_3\": \"ltz\", \"glottocode\": \"luxe1241\"}, {\"form\": \"waskje\", \"language\": \"Frisian\", \"iso_639_3\": \"frs\", \"glottocode\": \"west2354\"}, {\"form\": \"w\\u00e4sche\", \"language\": \"German: Bernese\", \"iso_639_3\": \"gsw\", \"glottocode\": \"swis1247\"}, {\"form\": \"uuaskan\", \"language\": \"Old High German\", \"iso_639_3\": \"goh\", \"glottocode\": \"oldh1241\"}, {\"form\": \"uuaskan\", \"language\": \"Old Saxon\", \"iso_639_3\": \"osx\", \"glottocode\": \"olds1250\"}, {\"form\": \"wascan\", \"language\": \"Old English\", \"iso_639_3\": \"ang\", \"glottocode\": \"olde1238\"}, {\"form\": \"waschen\", \"language\": \"Middle High German\", \"iso_639_3\": \"gmh\", \"glottocode\": \"midd1343\"}, {\"form\": \"wasschen\", \"language\": \"Middle Dutch\", \"iso_639_3\": \"dum\", \"glottocode\": \"midd1321\"}]","[{\"form\": \"padda- / padd-\", \"language\": \"Hittite\", \"iso_639_3\": \"hit\", \"glottocode\": \"hitt1242\"}, {\"form\": \"fodere\", \"language\": \"Latin\", \"iso_639_3\": \"lat\", \"glottocode\": \"lati1261\"}, {\"form\": \"fuir\", \"language\": \"Anglo-Norman\", \"iso_639_3\": \"xno\", \"glottocode\": \"angl1258\"}, {\"form\": \"fuir\", \"language\": \"Old French\", \"iso_639_3\": \"fro\", \"glottocode\": \"oldf1239\"}]","[{\"form\": \"ca\\u00e7ar\", \"language\": \"Catalan\", \"iso_639_3\": \"cat\", \"glottocode\": \"stan1289\"}, {\"form\": \"chasser\", \"language\": \"French\", \"iso_639_3\": \"fra\", \"glottocode\": \"stan1290\"}, {\"form\": \"cacciare\", \"language\": \"Italian\", \"iso_639_3\": \"ita\", \"glottocode\": \"ital1282\"}, {\"form\": \"ciac\\u00e9\", \"language\": \"Ladin\", \"iso_639_3\": \"lld\", \"glottocode\": \"ladi1250\"}, {\"form\": \"ca\\u00e7ar\", \"language\": \"Portuguese\", \"iso_639_3\": \"por\", \"glottocode\": \"port1283\"}, {\"form\": \"cazzare\", \"language\": \"Sardinian: Logudoro\", \"iso_639_3\": \"src\", \"glottocode\": \"sout2614\"}, {\"form\": \"cassare\", \"language\": \"Sardinian: Nuoro\", \"iso_639_3\": \"src\", \"glottocode\": \"barb1262\"}, {\"form\": \"cazar\", \"language\": \"Spanish\", \"iso_639_3\": \"spa\", \"glottocode\": \"stan1288\"}, {\"form\": \"tch\\u00e8ss\\u00ee\", \"language\": \"Walloon\", \"iso_639_3\": \"wln\", \"glottocode\": \"wall1255\"}, {\"form\": \"cja\\u00e7\\u00e2\", \"language\": \"Friulian\", \"iso_639_3\": \"fur\", \"glottocode\": \"friu1240\"}, {\"form\": \"\\u00ec a caccia\", \"language\": \"Neapolitan\", \"iso_639_3\": \"nap\", \"glottocode\": \"neap1235\"}, {\"form\": \"chacer\", \"language\": \"Anglo-Norman\", \"iso_639_3\": \"xno\", \"glottocode\": \"angl1258\"}, {\"form\": \"ca\\u00e7ar\", \"language\": \"Old Spanish\", \"iso_639_3\": \"osp\", \"glottocode\": \"olds1249\"}, {\"form\": \"chacier\", \"language\": \"Old French\", \"iso_639_3\": \"fro\", \"glottocode\": \"oldf1239\"}, {\"form\": \"shafi\", \"language\": \"Franco-Proven\\u00e7al\", \"iso_639_3\": \"frp\", \"glottocode\": \"fran1269\"}, {\"form\": \"c\\u0103\\u0163\\u00e1ri\", \"language\": \"Megleno-Romanian\", \"iso_639_3\": \"ruq\", \"glottocode\": \"megl1237\"}, {\"form\": \"cassar\", \"language\": \"Old Occitan\", \"iso_639_3\": \"pro\", \"glottocode\": \"oldp1253\"}, {\"form\": \"ca\\u00e7ar\", \"language\": \"Old Catalan\", \"iso_639_3\": \"cat\", \"glottocode\": \"oldc1251\"}, {\"form\": \"ca\\u00e7ar\", \"language\": \"Portuguese: Brazilian\", \"iso_639_3\": \"por\", \"glottocode\": \"braz1246\"}, {\"form\": \"cas\\u00e0a\", \"language\": \"Milanese\", \"iso_639_3\": \"lmo\", \"glottocode\": \"mila1243\"}]","[{\"form\": \"cuci\\u1e45\", \"language\": \"Maithili\", \"iso_639_3\": \"mai\", \"glottocode\": \"mait1250\"}]","[{\"form\": \"m\\u0101r\\u0121\\u0101ne\", \"language\": \"Mazanderani\", \"iso_639_3\": \"mzn\", \"glottocode\": \"maza1291\"}, {\"form\": \"m\\u01ddr\\u03b3iz\\u0101t\\u0113\", \"language\": \"Sogdian\", \"iso_639_3\": \"sog\", \"glottocode\": \"sogd1245\"}]","[{\"form\": \"combate\", \"language\": \"Ladin\", \"iso_639_3\": \"lld\", \"glottocode\": \"ladi1250\"}]","[{\"form\": \"s\\u00fail\", \"language\": \"Gaelic: Irish\", \"iso_639_3\": \"gle\", \"glottocode\": \"iris1253\"}, {\"form\": \"s\\u00f9il\", \"language\": \"Gaelic: Scottish\", \"iso_639_3\": \"gla\", \"glottocode\": \"scot1245\"}, {\"form\": \"s\\u00fail\", \"language\": \"Old Irish\", \"iso_639_3\": \"sga\", \"glottocode\": \"oldi1245\"}, {\"form\": \"sooill\", \"language\": \"Gaelic: Manx\", \"iso_639_3\": \"glv\", \"glottocode\": \"manx1243\"}]","[{\"form\": \"\\u01f0owr\", \"language\": \"Armenian: Western\", \"iso_639_3\": \"hyw\", \"glottocode\": \"homs1234\"}, {\"form\": \"\\u01f0owr\", \"language\": \"Armenian: Eastern\", \"iso_639_3\": \"hye\", \"glottocode\": \"nucl1235\"}, {\"form\": \"\\u01f0owr\", \"language\": \"Armenian: Classical\", \"iso_639_3\": \"xcl\", \"glottocode\": \"clas1249\"}]","[{\"form\": \"kerm\\u012br\", \"language\": \"Sogdian\", \"iso_639_3\": \"sog\", \"glottocode\": \"sogd1245\"}]","[{\"form\": \"\\u2032\\u00fc\\u0148 e-\", \"language\": \"Kamviri\", \"iso_639_3\": \"xvi\", \"glottocode\": \"kamv1242\"}]","[{\"form\": \"x\\u00f4k\\u00f4t\", \"language\": \"Assamese\", \"iso_639_3\": \"asm\", \"glottocode\": \"assa1263\"}]","[{\"form\": \"apala\", \"language\": \"Kala\\u1e63a-al\\u00e2: Ni\\u0161eigr\\u00e2m\", \"iso_639_3\": \"wbk\", \"glottocode\": \"chim1297\"}]","[{\"form\": \"kurjawa\", \"language\": \"Sorbian: Upper\", \"iso_639_3\": \"hsb\", \"glottocode\": \"uppe1395\"}, {\"form\": \"kurjawa\", \"language\": \"Sorbian: Lower\", \"iso_639_3\": \"dsb\", \"glottocode\": \"lowe1385\"}]","[{\"form\": \"\\u1e6d\\u0101n\\u0101\", \"language\": \"Bengali\", \"iso_639_3\": \"ben\", \"glottocode\": \"beng1280\"}, {\"form\": \"t\\u0101nnu\", \"language\": \"Nepali\", \"iso_639_3\": \"nep\", \"glottocode\": \"east1436\"}, {\"form\": \"\\u1e6d\\u0101n\\u0101\", \"language\": \"Assamese\", \"iso_639_3\": \"asm\", \"glottocode\": \"assa1263\"}, {\"form\": \"ndanj\", \"language\": \"Albanian: Arb\\u00ebresh\", \"iso_639_3\": \"aae\", \"glottocode\": \"arbe1236\"}]","[{\"form\": \"thrutna\", \"language\": \"Old Swedish\", \"iso_639_3\": \"swe\", \"glottocode\": \"swed1254\"}]","[{\"form\": \"cariah\", \"language\": \"Late Cornish\", \"iso_639_3\": \"cor\", \"glottocode\": \"oldc1252\"}]","[{\"form\": \"fysa\\u00ednomai\", \"language\": \"Greek: Italiot\", \"iso_639_3\": \"ell\", \"glottocode\": \"apul1236\"}]","[{\"form\": \"i\\u0161ki\\u0161-\", \"language\": \"Hittite\", \"iso_639_3\": \"hit\", \"glottocode\": \"hitt1242\"}]","[{\"form\": \"ro\\u1e2fz\\u014d\", \"language\": \"Greek: Pontic\", \"iso_639_3\": \"pnt\", \"glottocode\": \"pont1253\"}]","[{\"form\": \"h\\u0131q ka\\u0113\", \"language\": \"Kurdish S.: Elami\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}]","[{\"form\": \"a\\u0142tot\", \"language\": \"Armenian: Western\", \"iso_639_3\": \"hyw\", \"glottocode\": \"homs1234\"}, {\"form\": \"a\\u0142te\\u0142i\", \"language\": \"Armenian: Classical\", \"iso_639_3\": \"xcl\", \"glottocode\": \"clas1249\"}]","[{\"form\": \"r\\u0101ha\\u1e47e\", \"language\": \"Marathi\", \"iso_639_3\": \"mar\", \"glottocode\": \"mara1378\"}, {\"form\": \"rahan\\u0101\", \"language\": \"Magahi\", \"iso_639_3\": \"mag\", \"glottocode\": \"maga1260\"}]","[{\"form\": \"tseq\", \"language\": \"Khowar\", \"iso_639_3\": \"khw\", \"glottocode\": \"khow1242\"}, {\"form\": \"\\u0107iki\", \"language\": \"Kala\\u1e63a-al\\u00e2: Ni\\u0161eigr\\u00e2m\", \"iso_639_3\": \"wbk\", \"glottocode\": \"chim1297\"}]","[{\"form\": \"r\\u012b\\u0301\\u0161a\", \"language\": \"Yaghnobi\", \"iso_639_3\": \"yai\", \"glottocode\": \"yagn1238\"}, {\"form\": \"ri\\u0161a\", \"language\": \"Kurdish S.: Qorveh\", \"iso_639_3\": \"sdh\", \"glottocode\": \"sout2640\"}]","[{\"form\": \"zab\\u0101n\", \"language\": \"Urdu\", \"iso_639_3\": \"urd\", \"glottocode\": \"urdu1245\"}]","[{\"form\": \"l\\u016bdere\", \"language\": \"Latin\", \"iso_639_3\": \"lat\", \"glottocode\": \"lati1261\"}]","[{\"form\": \"\\u0161tom\", \"language\": \"Sorbian: Upper\", \"iso_639_3\": \"hsb\", \"glottocode\": \"uppe1395\"}]","[{\"form\": \"stuttur\", \"language\": \"Faroese\", \"iso_639_3\": \"fao\", \"glottocode\": \"faro1244\"}, {\"form\": \"stuttur\", \"language\": \"Icelandic\", \"iso_639_3\": \"isl\", \"glottocode\": \"icel1247\"}, {\"form\": \"stutt\", \"language\": \"Elfdalian\", \"iso_639_3\": \"ovd\", \"glottocode\": \"elfd1234\"}]","[{\"form\": \"xrt\", \"language\": \"Hawrami\", \"iso_639_3\": \"hac\", \"glottocode\": \"hawr1243\"}]"],"top_values":[],"top_words":[["\"language\":",12303],["\"iso_639_3\":",11941],["\"glottocode\":",11771],["{\"form\":",7657],["[{\"form\":",4981],["\"greek:",797],["\"old",650],["\"armenian:",440],["\"albanian:",418],["\"ell\",",371],["\"gaelic:",359],["\"middle",331],["\"kurdish",296],["\"ossetic:",252],["\"oss\",",252],["eastern\",",246],["irish\",",242],["\"grc\",",223],["\"sorbian:",207],["\"tocharian",199],["\"breton:",185],["\"bre\",",182],["breton\",",170],["cornish\",",169],["\"ben\",",167]],"vocab_skipped":null,"word_histogram":{"counts":[4097,383,147,123,96,31,17,7,12,6,5,3,10,4,4,2,2,3,7,2,4,3,2,0,2,1,3,0,1,4],"edges":[8.0,52.7,97.4,142.10000000000002,186.8,231.5,276.20000000000005,320.90000000000003,365.6,410.3,455.0,499.70000000000005,544.4000000000001,589.1,633.8000000000001,678.5,723.2,767.9000000000001,812.6,857.3000000000001,902.0,946.7,991.4000000000001,1036.1000000000001,1080.8000000000002,1125.5,1170.2,1214.9,1259.6000000000001,1304.3000000000002,1349.0]}},"kind":"text","n":4981,"n_null":0,"n_unique":4963,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0036137321822927123,"emoji_rate":0.0,"len_max":14956,"len_mean":498.88094760088336,"len_median":184.0,"len_min":83,"len_p95":1988.0,"n_duplicates":18,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":48.4334418527202,"url_rate":0.0,"vocab_size":12094,"word_mean":44.53382854848424,"word_median":16.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"source_dataset","extras":{"singletons":0,"top_values":[["iecor",4981]]},"kind":"categorical","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"iecor"}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"confidence","extras":{"histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4981,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.525,0.55,0.575,0.6,0.625,0.65,0.675,0.7,0.725,0.75,0.775,0.8,0.825,0.8500000000000001,0.875,0.9,0.925,0.95,0.9750000000000001,1.0,1.025,1.05,1.0750000000000002,1.1,1.125,1.15,1.175,1.2000000000000002,1.225,1.25,1.275,1.3,1.3250000000000002,1.35,1.375,1.4,1.425,1.4500000000000002,1.475,1.5]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":1.0,"mean":1.0,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":1.0,"skew":0.0,"std":0.0,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["language_count","word_count","words","cognate_id","concept","confidence","source_dataset"],"featured_charts":[{"caption":"Highly skewed: most cognate sets cover just 2 languages but a few reach 157.","column":"language_count","kind":"histogram"},{"caption":"Mirrors language_count closely \u2014 check whether the two are effectively redundant.","column":"word_count","kind":"histogram"},{"caption":"Text payload length ranges from 83 to ~15k characters; the long tail marks the richest cognate sets.","column":"words","kind":"length"},{"caption":"Confirms every row comes from the single 'iecor' source.","column":"source_dataset","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 4,981 cognate sets from the IECoR source, with each row identified by a unique cognate_id and accompanied by a JSON-like 'words' payload listing language entries. The numeric columns language_count and word_count are nearly identical twins, both highly skewed (skew ~6.84) with a median of 2 but a max of 157 and ~13% outliers \u2014 a small set of cognate groups is dramatically larger than the rest. Three columns (concept, confidence, source_dataset) are constant or empty and carry no analytic signal. Start by examining the distribution of language_count to understand the long tail of cross-linguistic coverage, and inspect the longest 'words' entries (len_max ~14,956) to see which cognate sets dominate.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.vocab_size","stats.len_min","stats.len_max","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is a unique cognate identifier column, with every one of the 4981 rows carrying a distinct single-token value (vocab_size 4981, one_word_rate 1.0, null_rate 0). Values follow a fixed `iecor:<n>` scheme with lengths between 7 and 10 characters, consistent with a namespaced primary key from the IE-CoR lexical database. There is nothing to model here \u2014 it is pure row identity with zero duplicates.","role":"identifier","scope":"column","target":"cognate_id","treatment":"Use as a join key; drop before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"The 'concept' column is constant: all 4981 rows hold the empty string, with cardinality 1, entropy 0, and a top_rate of 1.0. There is no variation to exploit and no non-empty category was observed.","role":"metadata","scope":"column","target":"concept","treatment":"Drop the column; it carries zero information."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.std","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Counts of words per record (likely titles, queries, or short text fields), with 4981 non-null integer values ranging 1 to 157. The distribution is heavily right-skewed (skew 6.84, kurtosis 59.74): median is 2 and Q3 is 4, yet the max reaches 157, dragging the mean to 5.17 with std 12.13. About 13% of rows (649) are flagged as outliers, indicating a long tail of unusually verbose entries.","role":"feature","scope":"column","target":"word_count","treatment":"log-transform or clip before modelling to tame the long right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.median","stats.q1","stats.q3","stats.mean","stats.std","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"`language_count` is a positive integer feature counting languages per record, ranging from 1 to 157 with a median of 2 and Q3 of 4. The distribution is severely right-skewed (skew 6.84, kurtosis 59.77) with 649 outliers (13.0% outlier rate) stretching the mean to 5.17 against a std of 12.13. No nulls or zeros, and only 94 distinct values across 4981 rows.","role":"feature","scope":"column","target":"language_count","treatment":"Log1p-transform or cap at a high quantile before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.len_max","stats.len_mean","stats.len_median","stats.len_min","stats.n_duplicates","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds serialized JSON arrays of word entries, each carrying fields like \"form\", \"language\", \"iso_639_3\", and \"glottocode\" \u2014 every one of the 4981 rows starts with `[{\"form\":`. Values are nearly unique (4963/4981) with 18 duplicates, and lengths vary wildly from 83 to 14956 characters (mean 498, median 184), indicating variable-size nested records rather than free prose. Top tokens reveal a multilingual etymology dataset spanning Greek, Old (Iranian/English?), Armenian, Albanian, etc., so Flesch readability (48.4) is meaningless here.","role":"free_text","scope":"column","target":"words","treatment":"Parse as JSON and explode into a child table of word entries before any modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the source dataset provenance, but every one of the 4981 rows carries the single value \"iecor\". With cardinality 1 and entropy 0, it conveys no information and serves only as a constant tag.","role":"metadata","scope":"column","target":"source_dataset","treatment":"Drop before modelling; retain only as a provenance note."},{"confidence":"high","critiques":[],"evidence_keys":["alerts","n","n_unique","stats.min","stats.max","stats.mean","stats.std"],"model":"anthropic:claude-opus-4-7","narrative":"This column is labelled 'confidence' and appears to be a numeric score, but every one of the 4981 rows holds the value 1.0 with zero standard deviation. It carries no information and was flagged constant. Likely an upstream default or placeholder rather than a measured confidence.","role":"metadata","scope":"column","target":"confidence","treatment":"drop, constant column with no variance"}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":2315,"prompt_tokens":8057,"total_tokens":10372}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:52:34+00:00","mode":"full","row_count":4981,"sampled_rows":4981,"seed":42,"source":"/home/coolhand/servers/diachronica/etymology_atlas/parquet/cognate_sets.parquet"},"notes":[],"saturn_version":"0.2.0","schema":{"cognate_id":"text","concept":"categorical","confidence":"numeric","language_count":"numeric","source_dataset":"categorical","word_count":"numeric","words":"text"}}
