{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"cognate_id","extras":{"language_counts":{},"language_sample_size":4981,"length_histogram":{"counts":[5,0,0,0,0,0,0,0,0,0,0,0,0,44,0,0,0,0,0,0,0,0,0,0,0,0,477,0,0,0,0,0,0,0,0,0,0,0,0,4455],"edges":[7.0,7.075,7.15,7.225,7.3,7.375,7.45,7.525,7.6,7.675,7.75,7.825,7.9,7.975,8.05,8.125,8.2,8.275,8.35,8.425,8.5,8.575,8.65,8.725,8.8,8.875,8.95,9.025,9.1,9.175,9.25,9.325,9.4,9.475,9.55,9.625,9.7,9.775,9.85,9.925,10.0]},"near_unique":true,"sample":["iecor:12","iecor:8032","iecor:9076","iecor:8599","iecor:5170","iecor:9758","iecor:5291","iecor:9282","iecor:8613","iecor:322","iecor:6792","iecor:6234","iecor:2808","iecor:29","iecor:3033","iecor:1643","iecor:855","iecor:8714","iecor:9897","iecor:4969","iecor:3168","iecor:334","iecor:1412","iecor:894","iecor:7550","iecor:8146","iecor:6402","iecor:1700","iecor:663","iecor:9774","iecor:7198","iecor:9956","iecor:7258","iecor:4828","iecor:4163","iecor:5266","iecor:9553","iecor:6020","iecor:5376","iecor:6221","iecor:8570","iecor:9492","iecor:2794","iecor:9488","iecor:9681","iecor:9829","iecor:3828","iecor:4838","iecor:1378","iecor:9223"],"top_values":[],"top_words":[["iecor:3",1],["iecor:4",1],["iecor:5",1],["iecor:7",1],["iecor:9",1],["iecor:11",1],["iecor:12",1],["iecor:13",1],["iecor:14",1],["iecor:15",1],["iecor:18",1],["iecor:21",1],["iecor:22",1],["iecor:24",1],["iecor:25",1],["iecor:27",1],["iecor:28",1],["iecor:29",1],["iecor:38",1],["iecor:39",1],["iecor:41",1],["iecor:42",1],["iecor:44",1],["iecor:45",1],["iecor:46",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4981,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":4981,"n_null":0,"n_unique":4981,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":10,"len_mean":9.883557518570568,"len_median":10.0,"len_min":7,"len_p95":10.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4981,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"concept","extras":{"singletons":0,"top_values":[["",4981]]},"kind":"categorical","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"words","extras":{},"kind":"unknown","n":4981,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"source_dataset","extras":{"singletons":0,"top_values":[["iecor",4981]]},"kind":"categorical","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"iecor"}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"confidence","extras":{"histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4981,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.525,0.55,0.575,0.6,0.625,0.65,0.675,0.7,0.725,0.75,0.775,0.8,0.825,0.8500000000000001,0.875,0.9,0.925,0.95,0.9750000000000001,1.0,1.025,1.05,1.0750000000000002,1.1,1.125,1.15,1.175,1.2000000000000002,1.225,1.25,1.275,1.3,1.3250000000000002,1.35,1.375,1.4,1.425,1.4500000000000002,1.475,1.5]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":1.0,"mean":1.0,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":1.0,"skew":0.0,"std":0.0,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+6.84"},{"code":"outliers","level":"warn","message":"13.0% rows beyond 1.5 IQR"}],"column":"word_count","extras":{"histogram":{"counts":[3848,484,194,96,90,107,32,25,15,4,8,8,6,5,3,3,8,3,3,3,1,1,2,4,5,2,3,2,2,3,0,1,1,0,2,2,0,0,1,4],"edges":[1.0,4.9,8.8,12.7,16.6,20.5,24.4,28.3,32.2,36.1,40.0,43.9,47.8,51.699999999999996,55.6,59.5,63.4,67.3,71.2,75.1,79.0,82.89999999999999,86.8,90.7,94.6,98.5,102.39999999999999,106.3,110.2,114.1,118.0,121.89999999999999,125.8,129.7,133.6,137.5,141.4,145.29999999999998,149.2,153.1,157.0]},"sample":[7.0,29.0,14.0,25.0,1.0,7.0,22.0,23.0,21.0,10.0,50.0,53.0,5.0,52.0,1.0,21.0,18.0,5.0,5.0,2.0,3.0,109.0,3.0,2.0,3.0,3.0,3.0,4.0,3.0,12.0,16.0,8.0,5.0,14.0,10.0,24.0,6.0,2.0,2.0,88.0,2.0,2.0,20.0,22.0,14.0,14.0,21.0,1.0,17.0,3.0,6.0,5.0,11.0,8.0,6.0,7.0,8.0,7.0,9.0,28.0,29.0,24.0,22.0,3.0,34.0,8.0,1.0,41.0,4.0,5.0,5.0,5.0,2.0,6.0,2.0,8.0,6.0,7.0,9.0,6.0,2.0,1.0,4.0,10.0,2.0,12.0,8.0,5.0,6.0,7.0,8.0,4.0,5.0,8.0,7.0,8.0,3.0,2.0,4.0,4.0,8.0,2.0,3.0,1.0,3.0,3.0,3.0,2.0,1.0,3.0,6.0,2.0,1.0,30.0,2.0,7.0,4.0,2.0,2.0,3.0,2.0,2.0,95.0,5.0,2.0,1.0,5.0,7.0,2.0,13.0,4.0,2.0,8.0,5.0,11.0,2.0,10.0,2.0,2.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,2.0,3.0,2.0,1.0,7.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,10.0,1.0,1.0,4.0,1.0,7.0,1.0,3.0,1.0,2.0,11.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0,3.0,2.0,3.0,1.0,1.0,11.0,3.0,98.0,75.0,19.0,57.0,7.0,1.0,2.0,6.0,14.0,9.0,26.0,1.0,11.0,18.0,3.0,2.0,1.0,3.0,3.0,3.0,7.0,14.0,17.0,2.0,2.0,3.0,1.0,2.0,7.0,3.0,2.0,1.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,4.0,2.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,1.0,5.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,5.0,1.0,1.0,2.0,2.0,28.0,1.0,6.0,3.0,2.0,1.0,2.0,4.0,1.0,2.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,21.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,4.0,1.0,7.0,2.0,1.0,1.0,1.0,2.0,1.0,19.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,6.0,1.0,6.0,3.0,7.0,3.0,8.0,5.0,1.0,7.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,6.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,10.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":93,"null_rate":0.0,"stats":{"iqr":3.0,"kurtosis":59.740272006473056,"max":157.0,"mean":5.167837783577595,"median":2.0,"min":1.0,"n_outliers":649,"outlier_rate":0.1302951214615539,"q1":1.0,"q3":4.0,"skew":6.837154477764645,"std":12.134749527988367,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+6.84"},{"code":"outliers","level":"warn","message":"13.0% rows beyond 1.5 IQR"}],"column":"language_count","extras":{"histogram":{"counts":[3849,483,194,96,90,107,32,25,15,4,8,8,6,5,3,3,8,3,3,4,0,1,2,4,5,2,3,2,2,3,0,1,1,0,2,2,0,0,1,4],"edges":[1.0,4.9,8.8,12.7,16.6,20.5,24.4,28.3,32.2,36.1,40.0,43.9,47.8,51.699999999999996,55.6,59.5,63.4,67.3,71.2,75.1,79.0,82.89999999999999,86.8,90.7,94.6,98.5,102.39999999999999,106.3,110.2,114.1,118.0,121.89999999999999,125.8,129.7,133.6,137.5,141.4,145.29999999999998,149.2,153.1,157.0]},"sample":[7.0,29.0,14.0,25.0,1.0,7.0,22.0,23.0,21.0,10.0,50.0,53.0,5.0,52.0,1.0,21.0,18.0,5.0,5.0,2.0,3.0,109.0,3.0,2.0,3.0,3.0,3.0,4.0,3.0,12.0,16.0,8.0,5.0,14.0,10.0,24.0,6.0,2.0,2.0,88.0,2.0,2.0,20.0,22.0,14.0,14.0,21.0,1.0,17.0,3.0,6.0,5.0,11.0,8.0,6.0,7.0,8.0,7.0,9.0,28.0,29.0,24.0,22.0,3.0,34.0,8.0,1.0,41.0,4.0,5.0,5.0,5.0,2.0,6.0,2.0,8.0,6.0,7.0,9.0,6.0,2.0,1.0,4.0,10.0,2.0,12.0,8.0,5.0,6.0,7.0,8.0,4.0,5.0,8.0,7.0,8.0,3.0,2.0,4.0,4.0,8.0,2.0,3.0,1.0,3.0,3.0,3.0,2.0,1.0,3.0,6.0,2.0,1.0,30.0,2.0,7.0,4.0,2.0,2.0,3.0,2.0,2.0,95.0,5.0,2.0,1.0,5.0,7.0,2.0,13.0,4.0,2.0,8.0,5.0,11.0,2.0,10.0,2.0,2.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,2.0,3.0,2.0,1.0,7.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,10.0,1.0,1.0,4.0,1.0,7.0,1.0,3.0,1.0,2.0,11.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0,3.0,2.0,3.0,1.0,1.0,11.0,3.0,98.0,75.0,19.0,57.0,7.0,1.0,2.0,6.0,14.0,9.0,26.0,1.0,11.0,18.0,3.0,2.0,1.0,3.0,3.0,3.0,7.0,14.0,17.0,2.0,2.0,3.0,1.0,2.0,7.0,3.0,2.0,1.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,4.0,2.0,1.0,2.0,1.0,3.0,1.0,1.0,1.0,3.0,5.0,1.0,2.0,1.0,5.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,3.0,5.0,1.0,1.0,2.0,2.0,28.0,1.0,6.0,3.0,2.0,1.0,2.0,4.0,1.0,2.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,3.0,21.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,4.0,1.0,7.0,2.0,1.0,1.0,1.0,2.0,1.0,19.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,6.0,1.0,6.0,3.0,7.0,3.0,8.0,5.0,1.0,7.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,6.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,10.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":4981,"n_null":0,"n_unique":94,"null_rate":0.0,"stats":{"iqr":3.0,"kurtosis":59.77488401145233,"max":157.0,"mean":5.166231680385465,"median":2.0,"min":1.0,"n_outliers":649,"outlier_rate":0.1302951214615539,"q1":1.0,"q3":4.0,"skew":6.83821712056036,"std":12.130385693042744,"zero_rate":0.0}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"sources","extras":{},"kind":"unknown","n":4981,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","language_count","word_count","concept","confidence","source_dataset","cognate_id"],"featured_charts":[{"caption":"Look at the long right tail \u2014 most sets cover just 1-2 languages but a few reach 157.","column":"language_count","kind":"histogram"},{"caption":"Mirrors language_count almost exactly; check whether the two are effectively duplicates.","column":"word_count","kind":"histogram"},{"caption":"Confirms every row comes from a single source ('iecor'), so no cross-source comparison is possible.","column":"source_dataset","kind":"donut"},{"caption":"ID strings cluster tightly at length 10, useful as a sanity check on identifier formatting.","column":"cognate_id","kind":"length"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset contains 4,981 cognate sets sourced entirely from the 'iecor' source_dataset, each identified by a unique cognate_id. The two main numeric signals are language_count and word_count, which are nearly identical in distribution: both have a median of 2 and mean around 5.17, but stretch out to a maximum of 157 with skew above 6.8 and roughly 13% of rows flagged as outliers. That long tail is the most interesting story \u2014 most cognate sets are small, but a minority span very many languages/words and deserve a closer look. Note that concept is empty for every row, confidence is constant at 1.0, and source_dataset has only one value, so those columns carry no analytic signal.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.one_word_rate","stats.duplicate_rate","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is a primary identifier column: every one of the 4981 values is unique, non-null, single-token, and follows an `iecor:<n>` pattern (length 7-10 chars). With n_unique == n and duplicate_rate 0, it functions as a cognate-set key from the IECoR resource rather than a modelling feature.","role":"identifier","scope":"column","target":"cognate_id","treatment":"Use as the join key to cognate metadata; exclude from any model features."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"The 'concept' column is a categorical field that is entirely constant: all 4981 rows hold the same empty-string value, giving cardinality 1 and entropy 0. It carries no information and was flagged for imbalance with a top_rate of 1.0.","role":"other","scope":"column","target":"concept","treatment":"Drop; the column is constant and contributes no signal."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","kind","n","null_rate","stats"],"model":"anthropic:claude-opus-4-7","narrative":"The column is named \"words\" but saturn skipped profiling it, so its kind is unknown and no descriptive statistics were computed. The only confirmed signals are 4981 non-null rows with a null rate of 0.0; uniqueness, type, and value distribution are all unavailable.","role":"other","scope":"column","target":"words","treatment":"Re-profile or manually inspect this column before any downstream use, since saturn skipped it."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the originating dataset for each row, but every one of the 4981 records carries the single value \"iecor\". Cardinality is 1 and entropy is 0, so the field carries no information for modelling or grouping.","role":"metadata","scope":"column","target":"source_dataset","treatment":"Drop; constant column with no discriminative signal."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.min","stats.max","stats.mean","stats.std","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"The column 'confidence' is a numeric field that is entirely constant: all 4981 rows hold the value 1.0, with zero standard deviation and a single unique value. It carries no information for downstream modelling and likely reflects a default or hard-coded score rather than a measured probability.","role":"metadata","scope":"column","target":"confidence","treatment":"drop, constant column with no variance"},{"confidence":"high","critiques":[],"evidence_keys":["stats.min","stats.max","stats.median","stats.mean","stats.std","stats.q1","stats.q3","stats.iqr","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","n_unique","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Counts of words per record, ranging from 1 to 157 with a median of 2 and IQR of 3. The distribution is severely right-skewed (skew 6.84, kurtosis 59.74) with 649 outliers (13.0% of rows) pulling the mean to 5.17 against std 12.13. Most entries are very short while a long tail of verbose records dominates the variance.","role":"feature","scope":"column","target":"word_count","treatment":"Apply a log1p transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","median","q1","q3","iqr","skew","kurtosis","n_outliers","outlier_rate","mean","std","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"A count of languages per record, ranging from 1 to 157 with a median of just 2 and IQR of 3. The distribution is severely right-skewed (skew 6.84, kurtosis 59.77) with 649 outliers (13.0%), meaning a small number of records list dozens of languages while most list only a handful.","role":"feature","scope":"column","target":"language_count","treatment":"Log-transform or cap before modelling to tame the heavy tail."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","kind","n","null_rate","n_unique","stats"],"model":"anthropic:claude-opus-4-7","narrative":"The column 'sources' was skipped by the profiler, so its kind is unknown and no descriptive statistics are available. We can only confirm it has 4981 rows with a null rate of 0.0 and no recorded unique count. Without further evidence, the content and structure cannot be characterised.","role":"other","scope":"column","target":"sources","treatment":"Re-profile or manually inspect this column before deciding on downstream handling."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":2226,"prompt_tokens":7726,"total_tokens":9952}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:37:03+00:00","mode":"full","row_count":4981,"sampled_rows":4981,"seed":42,"source":"/home/coolhand/servers/diachronica/etymology_atlas/processed/cognate_sets.json"},"notes":[],"saturn_version":"0.2.0","schema":{"cognate_id":"text","concept":"categorical","confidence":"numeric","language_count":"numeric","source_dataset":"categorical","sources":"unknown","word_count":"numeric","words":"unknown"}}
