{"columns":[{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 99.9% of rows"}],"column":"**Please note** 2021 data in columns H, K, R, and U are populated with 2020 data until current data is released.  ","extras":{"singletons":1,"top_values":[["SSA-SA-FYWL.csv",1092],["File Name",1]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.010554540306416187,"entropy_ratio":0.010554540306416187,"top_rate":0.9990850869167429,"top_value":"SSA-SA-FYWL.csv"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 99.9% of rows"}],"column":"","extras":{"singletons":1,"top_values":[["2",1092],["File Version",1]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.010554540306416187,"entropy_ratio":0.010554540306416187,"top_rate":0.9990850869167429,"top_value":"2"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 99.9% of rows"}],"column":"_duplicated_0","extras":{"singletons":1,"top_values":[["3/13/2023",1092],["Update Date",1]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.010554540306416187,"entropy_ratio":0.010554540306416187,"top_rate":0.9990850869167429,"top_value":"3/13/2023"}},{"alerts":[],"column":"_duplicated_1","extras":{"singletons":1,"top_values":[["ATL",168],["DEN",126],["BOS",126],["PHL",126],["CHI",126],["DAL",105],["SEA",84],["SFO",84],["KCM",84],["NYC",63],["Region Code",1]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":11,"null_rate":0.0,"stats":{"cardinality":11,"entropy":3.2771638580905007,"entropy_ratio":0.9473128014541898,"top_rate":0.1537053979871912,"top_value":"ATL"}},{"alerts":[],"column":"_duplicated_2","extras":{"singletons":1,"top_values":[["AK ",21],["AL ",21],["AR ",21],["AZ ",21],["CA ",21],["CO ",21],["CT ",21],["DC ",21],["DE ",21],["FL ",21],["GA ",21],["HI ",21],["IA ",21],["ID ",21],["IL ",21],["IN ",21],["KS ",21],["KY ",21],["LA ",21],["MA ",21]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":53,"null_rate":0.0,"stats":{"cardinality":53,"entropy":5.705778851569065,"entropy_ratio":0.996134443002522,"top_rate":0.0192131747483989,"top_value":"AK "}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 99.9% of rows"}],"column":"_duplicated_3","extras":{"singletons":1,"top_values":[["FY",1092],["Date Type",1]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.010554540306416187,"entropy_ratio":0.010554540306416187,"top_rate":0.9990850869167429,"top_value":"FY"}},{"alerts":[],"column":"_duplicated_4","extras":{"singletons":1,"top_values":[["2001",52],["2002",52],["2003",52],["2004",52],["2005",52],["2006",52],["2007",52],["2008",52],["2009",52],["2010",52],["2011",52],["2012",52],["2013",52],["2014",52],["2015",52],["2016",52],["2017",52],["2018",52],["2019",52],["2020",52]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":22,"null_rate":0.0,"stats":{"cardinality":22,"entropy":4.398853374409258,"entropy_ratio":0.9864157028499182,"top_rate":0.04757548032936871,"top_value":"2001"}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_5","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[265,0,744,0,0,83,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[6.0,6.375,6.75,7.125,7.5,7.875,8.25,8.625,9.0,9.375,9.75,10.125,10.5,10.875,11.25,11.625,12.0,12.375,12.75,13.125,13.5,13.875,14.25,14.625,15.0,15.375,15.75,16.125,16.5,16.875,17.25,17.625,18.0,18.375,18.75,19.125,19.5,19.875,20.25,20.625,21.0]},"near_unique":false,"sample":["407208","5197780","802274","4470992","5075318","3483629","862241","384373","830302","4629213","674710","6290121","3516597","3791503","913983","2824057","2154292","2390431","18383024","856257","1213219","393282","1467615","2771314","2968363","6538569","675189","3896924","3860724","6166127","4466054","4067108","795106","613258","405691","6176933","2696516","3592939","7187818","4286235","1045810","654813","3954898","3766212","3792234","6613461","7184696","12497615","1136880","1880718"],"top_values":[["2899027",2],["6155356",2],["5593754",2],["2465064",2],["486940",2],["3003328",2],["1827226",2],["4526491",2],["25552778",2],["3792234",2],["2242773",2],["508106",2],["598831",2],["13116128",2],["6763217",2],["1096206",2],["1902326",2],["1095747",2],["7861963",2],["4146685",2]],"top_words":[["2899027",2],["6155356",2],["5593754",2],["2465064",2],["486940",2],["3003328",2],["1827226",2],["4526491",2],["25552778",2],["3792234",2],["2242773",2],["508106",2],["598831",2],["13116128",2],["6763217",2],["1096206",2],["1902326",2],["1095747",2],["7861963",2],["4146685",2],["1775835",2],["2748320",2],["2840609",2],["4460035",2],["3833865",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1037,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.05123513266239707,"emoji_rate":0.0,"len_max":21,"len_mean":6.846294602012809,"len_median":7.0,"len_min":6,"len_p95":8.0,"n_duplicates":56,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1039,"word_mean":1.0018298261665142,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.7% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_6","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[397,678,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[5.0,5.875,6.75,7.625,8.5,9.375,10.25,11.125,12.0,12.875,13.75,14.625,15.5,16.375,17.25,18.125,19.0,19.875,20.75,21.625,22.5,23.375,24.25,25.125,26.0,26.875,27.75,28.625,29.5,30.375,31.25,32.125,33.0,33.875,34.75,35.625,36.5,37.375,38.25,39.125,40.0]},"near_unique":true,"sample":["12791","293982","31643","288035","262109","159132","35481","29628","77327","203305","54254","398609","168180","172447","45228","253545","93218","177467","779040","50745","76767","19891","59034","197990","241320","415076","53285","201789","163952","450051","260667","213312","32268","33747","25865","461127","147743","209516","482496","295353","61221","34656","215660","276103","121476","412177","433152","766245","128291","88733"],"top_values":[],"top_words":[["91371",2],["18795",2],["158314",2],["ssa",1],["disability",1],["beneficiaries",1],["age",1],["18-64*",1],["12791",1],["211792",1],["122081",1],["137268",1],["890019",1],["91320",1],["88615",1],["19553",1],["22294",1],["489839",1],["264615",1],["25573",1],["75024",1],["32828",1],["338361",1],["172447",1],["65398",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1666666666666667,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333,5.0,5.166666666666666,5.333333333333333,5.5,5.666666666666666,5.833333333333333,6.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1090,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.0027447392497712718,"emoji_rate":0.0,"len_max":40,"len_mean":5.683440073193046,"len_median":6.0,"len_min":5,"len_p95":6.0,"n_duplicates":3,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1094,"word_mean":1.0045745654162854,"word_median":1.0}},{"alerts":[],"column":"_duplicated_7","extras":{"singletons":232,"top_values":[["5.50",11],["5.07",9],["4.90",9],["4.19",8],["5.08",8],["4.70",8],["4.96",7],["5.29",7],["4.11",6],["4.55",6],["5.18",6],["4.45",6],["6.18",6],["4.98",6],["5.63",6],["7.16",6],["5.33",5],["5.15",5],["5.45",5],["4.71",5]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":511,"null_rate":0.0,"stats":{"cardinality":511,"entropy":8.710274951613991,"entropy_ratio":0.9681117254655754,"top_rate":0.010064043915827997,"top_value":"5.50"}},{"alerts":[{"code":"near_unique","level":"info","message":"95.2% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_8","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[279,0,733,0,80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0,10.5,11.0,11.5,12.0,12.5,13.0,13.5,14.0,14.5,15.0,15.5,16.0,16.5,17.0,17.5,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0,23.5,24.0,24.5,25.0,25.5,26.0]},"near_unique":true,"sample":["394417","4903798","770616","4182957","4813209","3318849","826760","354780","752975","4425908","433577","5891512","3348417","3619056","868755","2570512","2061074","2212964","17571182","805512","1136452","373391","1408581","2573324","2727043","6123493","621904","3695135","3696772","5694506","4324075","3853796","762838","579511","379826","5715806","2548765","3383423","6705322","3990882","984589","620155","3739238","3490077","3663815","6184902","6751544","11731370","1008589","1791932"],"top_values":[],"top_words":[["468802",2],["2702811",2],["1646445",2],["4321562",2],["24535016",2],["3663815",2],["2129448",2],["481747",2],["563707",2],["12365676",2],["6360628",2],["1065248",2],["1797954",2],["1035220",2],["7459448",2],["3878500",2],["1681488",2],["2471250",2],["2601121",2],["4187603",2],["3648540",2],["747811",2],["5694506",2],["3318849",2],["3490077",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1041,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.04757548032936871,"emoji_rate":0.0,"len_max":26,"len_mean":6.835315645013724,"len_median":7.0,"len_min":6,"len_p95":8.0,"n_duplicates":52,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1043,"word_mean":1.0018298261665142,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"98.9% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_9","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[1,0,0,269,0,0,0,720,0,0,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[3.0,3.275,3.55,3.825,4.1,4.375,4.65,4.925000000000001,5.2,5.475,5.75,6.025,6.300000000000001,6.575,6.8500000000000005,7.125,7.4,7.675000000000001,7.95,8.225000000000001,8.5,8.775,9.05,9.325,9.600000000000001,9.875,10.15,10.425,10.700000000000001,10.975000000000001,11.25,11.525,11.8,12.075000000000001,12.350000000000001,12.625,12.9,13.175,13.450000000000001,13.725000000000001,14.0]},"near_unique":true,"sample":["3487","42558","6257","37868","53646","22580","7902","4431","8934","41298","9499","65083","36136","37808","10261","47778","19699","35864","119223","10071","15815","4937","14932","46715","43954","56181","9927","46077","39950","57314","47757","39175","5791","7270","4308","98215","17519","41757","108494","51167","9437","4324","48633","49860","17970","63300","102200","136492","24595","14731"],"top_values":[],"top_words":[["4190",2],["8630",2],["19699",2],["4508",2],["9499",2],["3966",2],["48235",2],["46818",2],["37012",2],["21500",2],["12142",2],["30785",2],["adult",1],["receipts",1],["3487",1],["42416",1],["24423",1],["31148",1],["198004",1],["19412",1],["17339",1],["4598",1],["113158",1],["60785",1],["6564",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1081,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.010978956999085087,"emoji_rate":0.0,"len_max":14,"len_mean":4.8536139066788655,"len_median":5.0,"len_min":3,"len_p95":6.0,"n_duplicates":12,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1082,"word_mean":1.000914913083257,"word_median":1.0}},{"alerts":[],"column":"_duplicated_10","extras":{"singletons":47,"top_values":[["0.97",25],["1.11",24],["1.01",23],["1.04",19],["0.92",19],["1.08",18],["1.02",17],["1.12",16],["1.07",16],["1.15",16],["0.96",15],["1.00",14],["1.13",14],["1.10",14],["0.89",13],["1.23",13],["0.94",13],["0.90",13],["1.05",13],["0.85",13]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":199,"null_rate":0.0,"stats":{"cardinality":199,"entropy":7.096703174030909,"entropy_ratio":0.9292984173845246,"top_rate":0.022872827081427266,"top_value":"0.97"}},{"alerts":[{"code":"near_unique","level":"info","message":"97.2% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_11","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[11,552,529,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[3.0,3.675,4.35,5.025,5.7,6.375,7.050000000000001,7.7250000000000005,8.4,9.075,9.75,10.425,11.100000000000001,11.775,12.450000000000001,13.125,13.8,14.475000000000001,15.15,15.825000000000001,16.5,17.175,17.85,18.525,19.200000000000003,19.875,20.55,21.225,21.900000000000002,22.575000000000003,23.25,23.925,24.6,25.275000000000002,25.950000000000003,26.625,27.3,27.975,28.650000000000002,29.325000000000003,30.0]},"near_unique":true,"sample":["1573","15940","2107","14243","18698","8951","3441","1938","2851","16121","2967","14514","11636","11441","3386","13228","7129","9012","39247","4603","5048","1806","6550","13890","12230","17714","3059","13919","14732","21491","16579","9282","3358","2592","1993","32802","6054","14998","33980","19352","3301","1786","15221","18860","4821","18893","26718","56886","6678","4238"],"top_values":[],"top_words":[["6632",3],["1573",2],["3705",2],["3848",2],["13358",2],["3405",2],["2396",2],["6678",2],["3240",2],["1585",2],["1006",2],["2847",2],["7831",2],["15869",2],["6063",2],["18070",2],["1993",2],["3724",2],["11159",2],["6031",2],["2273",2],["20695",2],["9282",2],["1697",2],["3901",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1062,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.028362305580969808,"emoji_rate":0.0,"len_max":30,"len_mean":4.497712717291857,"len_median":4.0,"len_min":3,"len_p95":5.0,"n_duplicates":31,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1064,"word_mean":1.0018298261665142,"word_median":1.0}},{"alerts":[],"column":"_duplicated_12","extras":{"singletons":11,"top_values":[["0.38",55],["0.34",45],["0.32",43],["0.40",41],["0.35",39],["0.44",36],["0.37",35],["0.31",35],["0.36",33],["0.33",33],["0.39",33],["0.46",32],["0.43",31],["0.48",30],["0.45",30],["0.42",29],["0.30",29],["0.41",27],["0.52",26],["0.54",25]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":69,"null_rate":0.0,"stats":{"cardinality":69,"entropy":5.527114803477987,"entropy_ratio":0.9048199516243182,"top_rate":0.05032021957913998,"top_value":"0.38"}},{"alerts":[{"code":"near_unique","level":"info","message":"98.7% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_13","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[283,0,710,0,99,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0,10.5,11.0,11.5,12.0,12.5,13.0,13.5,14.0,14.5,15.0,15.5,16.0,16.5,17.0,17.5,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0,23.5,24.0]},"near_unique":true,"sample":["3369","41577","6096","37132","46098","22283","7360","4370","8916","39609","9376","54078","35837","33536","10015","46690","19256","33239","129071","8860","15061","3593","14552","45039","42453","57308","9239","46402","37652","56493","46317","37761","5071","6530","4243","96783","15917","44472","109746","49298","9121","4172","47541","47493","15505","60909","101039","127973","22630","13612"],"top_values":[],"top_words":[["17955",2],["5808",2],["40394",2],["2534",2],["23292",2],["6096",2],["2475",2],["3363",2],["7863",2],["30662",2],["4532",2],["46294",2],["29987",2],["4507",2],["all",1],["adult",1],["determinations",1],["3369",1],["39021",1],["24843",1],["27934",1],["189867",1],["17689",1],["3457",1],["4087",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1079,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.012808783165599268,"emoji_rate":0.0,"len_max":24,"len_mean":4.84903934126258,"len_median":5.0,"len_min":4,"len_p95":6.0,"n_duplicates":14,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1081,"word_mean":1.0018298261665142,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"707 singleton categories"}],"column":"_duplicated_14","extras":{"singletons":707,"top_values":[["31.13",4],["44.89",3],["33.20",3],["47.46",3],["30.73",3],["35.51",3],["41.78",3],["40.12",3],["36.06",3],["29.74",3],["36.98",3],["37.02",3],["38.32",3],["29.63",3],["36.17",3],["30.34",3],["32.50",3],["36.14",3],["32.47",3],["31.93",3]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":883,"null_rate":0.0,"stats":{"cardinality":883,"entropy":9.685883358069683,"entropy_ratio":0.9897421312309679,"top_rate":0.0036596523330283625,"top_value":"31.13"}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"98.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_15","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[21,0,0,0,0,0,0,0,530,0,541,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.575,2.15,2.7249999999999996,3.3,3.875,4.449999999999999,5.0249999999999995,5.6,6.175,6.75,7.324999999999999,7.8999999999999995,8.475,9.049999999999999,9.625,10.2,10.774999999999999,11.35,11.924999999999999,12.5,13.075,13.649999999999999,14.225,14.799999999999999,15.374999999999998,15.95,16.525,17.099999999999998,17.674999999999997,18.25,18.825,19.4,19.974999999999998,20.549999999999997,21.125,21.7,22.275,22.849999999999998,23.424999999999997,24.0]},"near_unique":false,"sample":["188453","1870106","299867","1366857","1847182","1301219","304573","1860793","250404","1751532","213987","2490125","1325293","1576279","407190","1108325","843395","956433","7435043","289071","500930","116445","595078","1178563","1091291","2509957","216474","1575597","1515185","2126813","1602721","1622848","311347","219828","128930","2333718","860778","1326208","2723536","1401415","446972","229683","1584441","1371350","1250032","2306377","2730377","4424083","389606","2300691"],"top_values":[["0",21],["698580",2],["1112957",2],["857606",2],["178731",2],["1087261",2],["699714",2],["1646413",2],["8791042",2],["1250032",2],["718952",2],["129588",2],["204656",2],["4250722",2],["2499735",2],["295817",2],["725559",2],["451043",2],["2777835",2],["1566439",2]],"top_words":[["0",21],["698580",2],["1112957",2],["857606",2],["178731",2],["1087261",2],["699714",2],["1646413",2],["8791042",2],["1250032",2],["718952",2],["129588",2],["204656",2],["4250722",2],["2499735",2],["295817",2],["725559",2],["451043",2],["2777835",2],["1566439",2],["696746",2],["1001917",2],["1081280",2],["1341522",2],["1333919",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1019,"null_rate":0.0,"stats":{"allcaps_rate":0.979871912168344,"boilerplate_rate":0.0,"duplicate_rate":0.0677035681610247,"emoji_rate":0.0,"len_max":24,"len_mean":6.415370539798719,"len_median":6.0,"len_min":1,"len_p95":7.0,"n_duplicates":74,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1022,"word_mean":1.0027447392497713,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"96.7% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"98.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_16","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[21,0,25,446,562,37,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.925,2.85,3.7750000000000004,4.7,5.625,6.550000000000001,7.4750000000000005,8.4,9.325000000000001,10.25,11.175,12.100000000000001,13.025,13.950000000000001,14.875,15.8,16.725,17.650000000000002,18.575,19.5,20.425,21.35,22.275000000000002,23.200000000000003,24.125,25.05,25.975,26.900000000000002,27.825000000000003,28.75,29.675,30.6,31.525000000000002,32.45,33.375,34.300000000000004,35.225,36.15,37.075,38.0]},"near_unique":true,"sample":["970","22782","1214","21199","23598","10390","1730","1356","3878","19821","4750","45271","17136","17195","4442","28052","6293","15414","114026","2274","8092","3638","5633","29460","19372","43197","4765","20649","12466","31804","18310","18341","1640","2423","1793","41477","9997","21205","48695","23888","5006","2037","22789","19199","7655","32937","45890","80029","8275","9422"],"top_values":[],"top_words":[["0",21],["1358",2],["840",2],["1380",2],["6608",2],["1957",2],["9219",2],["22850",2],["8665",2],["9132",2],["1069",2],["43238",2],["1197",2],["101512",2],["8991",2],["19372",2],["17227",2],["ssi",1],["disabled",1],["child",1],["(dc)",1],["beneficiaries*",1],["970",1],["25307",1],["13811",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1057,"null_rate":0.0,"stats":{"allcaps_rate":0.979871912168344,"boilerplate_rate":0.0,"duplicate_rate":0.03293687099725526,"emoji_rate":0.0,"len_max":38,"len_mean":4.535224153705398,"len_median":5.0,"len_min":1,"len_p95":5.0,"n_duplicates":36,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1061,"word_mean":1.0036596523330283,"word_median":1.0}},{"alerts":[],"column":"_duplicated_17","extras":{"singletons":70,"top_values":[["0.00",21],["1.68",13],["0.58",12],["1.07",12],["1.08",12],["1.24",12],["1.15",12],["0.64",12],["1.52",11],["1.42",11],["1.18",11],["1.70",11],["1.81",11],["1.20",10],["1.09",10],["1.44",10],["1.11",10],["0.94",10],["1.78",10],["1.56",10]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":272,"null_rate":0.0,"stats":{"cardinality":272,"entropy":7.671248005790159,"entropy_ratio":0.9485357962527797,"top_rate":0.0192131747483989,"top_value":"0.00"}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"98.1% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_18","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[20,0,0,0,0,0,1,0,545,525,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.625,2.25,2.875,3.5,4.125,4.75,5.375,6.0,6.625,7.25,7.875,8.5,9.125,9.75,10.375,11.0,11.625,12.25,12.875,13.5,14.125,14.75,15.375,16.0,16.625,17.25,17.875,18.5,19.125,19.75,20.375,21.0,21.625,22.25,22.875,23.5,24.125,24.75,25.375,26.0]},"near_unique":false,"sample":["187483","1847324","298653","1345658","863746","1289462","302843","1838723","246526","1731711","209237","2444854","1308157","1559084","402748","1080273","837102","941019","7311069","286797","2037419","112807","589445","1149103","206647","4101420","211709","1554948","1502719","2092651","119959","677490","1602882","743882","1799935","269577","2562936","1305003","2674841","1080554","441966","227646","1561652","674754","1241727","2271569","2684487","4344054","381331","2265493"],"top_values":[["0",20],["1546254",2],["177580",2],["1066571",2],["675912",2],["1629637",2],["8696050",2],["1241727",2],["710338",2],["126020",2],["201364",2],["4153803",2],["2456985",2],["294647",2],["717406",2],["446250",2],["2743922",2],["1544822",2],["688367",2],["978354",2]],"top_words":[["0",20],["1546254",2],["177580",2],["1066571",2],["675912",2],["1629637",2],["8696050",2],["1241727",2],["710338",2],["126020",2],["201364",2],["4153803",2],["2456985",2],["294647",2],["717406",2],["446250",2],["2743922",2],["1544822",2],["688367",2],["978354",2],["1050186",2],["1320586",2],["1315242",2],["244439",2],["2092651",2]],"vocab_skipped":null,"word_histogram":{"counts":[1091,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":1,"n_unique":1021,"null_rate":0.0009149130832570906,"stats":{"allcaps_rate":0.9807692307692307,"boilerplate_rate":0.0,"duplicate_rate":0.06501831501831502,"emoji_rate":0.0,"len_max":26,"len_mean":6.406593406593407,"len_median":6.0,"len_min":1,"len_p95":7.0,"n_duplicates":71,"n_empty":0,"one_word_rate":0.9990842490842491,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1023,"word_mean":1.0018315018315018,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"98.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_19","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[21,0,181,623,0,267,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.775,2.55,3.325,4.1,4.875,5.65,6.425,7.2,7.9750000000000005,8.75,9.525,10.3,11.075000000000001,11.85,12.625,13.4,14.175,14.950000000000001,15.725,16.5,17.275000000000002,18.05,18.825,19.6,20.375,21.150000000000002,21.925,22.7,23.475,24.25,25.025000000000002,25.8,26.575,27.35,28.125,28.900000000000002,29.675,30.45,31.225,32.0]},"near_unique":false,"sample":["416","6917","343","5908","9872","2045","541","337","914","8064","1497","20130","7383","7274","1545","11956","2656","4547","22375","880","2639","1329","2171","15631","8610","15583","1633","9145","5022","8299","5495","5320","544","916","571","17496","2005","7155","21012","8286","1332","465","9347","5191","1677","8212","19112","25899","3214","2668"],"top_values":[["0",21],["929",3],["1302",3],["1424",3],["1071",2],["372",2],["2688",2],["433",2],["2457",2],["1230",2],["444",2],["8562",2],["471",2],["2965",2],["2897",2],["756",2],["438",2],["5212",2],["8502",2],["2950",2]],"top_words":[["0",21],["929",3],["1302",3],["1424",3],["1071",2],["372",2],["2688",2],["433",2],["2457",2],["1230",2],["444",2],["8562",2],["471",2],["2965",2],["2897",2],["756",2],["438",2],["5212",2],["8502",2],["2950",2],["2171",2],["20606",2],["1455",2],["422",2],["542",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1018,"null_rate":0.0,"stats":{"allcaps_rate":0.979871912168344,"boilerplate_rate":0.0,"duplicate_rate":0.0686184812442818,"emoji_rate":0.0,"len_max":32,"len_mean":4.046660567246112,"len_median":4.0,"len_min":1,"len_p95":5.0,"n_duplicates":75,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1022,"word_mean":1.0036596523330283,"word_median":1.0}},{"alerts":[],"column":"_duplicated_20","extras":{"singletons":45,"top_values":[["0.30",28],["0.35",26],["0.33",26],["0.37",24],["0.45",24],["0.36",23],["0.61",23],["0.42",22],["0.00",21],["0.43",21],["0.38",20],["0.40",19],["0.48",19],["0.32",19],["0.41",18],["0.39",18],["0.58",18],["0.18",18],["0.57",17],["0.71",17]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":156,"null_rate":0.0,"stats":{"cardinality":156,"entropy":6.608518589287835,"entropy_ratio":0.9070904242154358,"top_rate":0.025617566331198535,"top_value":"0.30"}},{"alerts":[{"code":"long_tail","level":"info","message":"852 singleton categories"}],"column":"_duplicated_21","extras":{"singletons":852,"top_values":[["0",21],["1321",4],["352",3],["597",3],["777",3],["580",3],["1184",3],["1353",3],["710",3],["3128",3],["463",3],["227",3],["1043",2],["421",2],["2891",2],["5079",2],["3228",2],["299",2],["3337",2],["238",2]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":957,"null_rate":0.0,"stats":{"cardinality":957,"entropy":9.788691097474295,"entropy_ratio":0.9885195202466706,"top_rate":0.0192131747483989,"top_value":"0"}},{"alerts":[],"column":"_duplicated_22","extras":{"singletons":17,"top_values":[["0.18",77],["0.20",75],["0.21",64],["0.22",62],["0.25",61],["0.17",61],["0.23",49],["0.19",46],["0.24",45],["0.16",43],["0.15",41],["0.26",36],["0.14",27],["0.12",26],["0.27",26],["0.13",25],["0.11",25],["0.10",22],["0.00",21],["0.29",20]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":70,"null_rate":0.0,"stats":{"cardinality":70,"entropy":5.137924060795578,"entropy_ratio":0.8382585771600553,"top_rate":0.07044830741079597,"top_value":"0.18"}},{"alerts":[{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"98.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_23","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[21,0,182,619,270,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.9,2.8,3.7,4.6,5.5,6.4,7.3,8.2,9.1,10.0,10.9,11.8,12.700000000000001,13.6,14.5,15.4,16.3,17.2,18.1,19.0,19.900000000000002,20.8,21.7,22.6,23.5,24.400000000000002,25.3,26.2,27.1,28.0,28.900000000000002,29.8,30.7,31.6,32.5,33.4,34.300000000000004,35.2,36.1,37.0]},"near_unique":false,"sample":["404","6979","365","5876","8430","2164","510","314","939","8006","1516","21466","7334","6471","1537","11868","2600","4427","23576","802","2587","1043","2144","15813","8345","15818","1557","9213","4735","7806","5353","5389","508","817","572","17146","1775","8115","20931","8370","1286","538","9130","4936","1695","7941","19521","24486","2966","2490"],"top_values":[["0",21],["406",3],["404",2],["1057",2],["3166",2],["416",2],["1913",2],["2802",2],["736",2],["1263",2],["8942",2],["867",2],["4769",2],["3041",2],["1336",2],["500",2],["538",2],["3044",2],["6902",2],["1246",2]],"top_words":[["0",21],["406",3],["404",2],["1057",2],["3166",2],["416",2],["1913",2],["2802",2],["736",2],["1263",2],["8942",2],["867",2],["4769",2],["3041",2],["1336",2],["500",2],["538",2],["3044",2],["6902",2],["1246",2],["551",2],["3007",2],["10271",2],["802",2],["1473",2]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1028,"null_rate":0.0,"stats":{"allcaps_rate":0.979871912168344,"boilerplate_rate":0.0,"duplicate_rate":0.059469350411710885,"emoji_rate":0.0,"len_max":37,"len_mean":4.053064958828911,"len_median":4.0,"len_min":1,"len_p95":5.0,"n_duplicates":65,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1032,"word_mean":1.0036596523330283,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"756 singleton categories"}],"column":"_duplicated_24","extras":{"singletons":756,"top_values":[["0.00",15],["47.52",5],["51.82",4],["47.04",4],["54.24",4],["48.91",4],["51.90",3],["48.89",3],["37.97",3],["51.35",3],["44.18",3],["63.06",3],["40.64",3],["38.66",3],["57.98",3],["30.92",3],["53.94",3],["60.20",3],["39.15",3],["30.05",3]]},"kind":"categorical","n":1093,"n_null":6,"n_unique":900,"null_rate":0.0054894784995425435,"stats":{"cardinality":900,"entropy":9.690040675855581,"entropy_ratio":0.9873911479224543,"top_rate":0.013799448022079117,"top_value":"0.00"}},{"alerts":[{"code":"near_unique","level":"info","message":"99.5% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_25","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[248,0,712,0,0,132,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[4.0,4.35,4.7,5.05,5.4,5.75,6.1,6.449999999999999,6.8,7.15,7.5,7.85,8.2,8.55,8.899999999999999,9.25,9.6,9.95,10.3,10.649999999999999,11.0,11.35,11.7,12.049999999999999,12.399999999999999,12.75,13.1,13.45,13.799999999999999,14.149999999999999,14.5,14.85,15.2,15.549999999999999,15.899999999999999,16.25,16.6,16.95,17.299999999999997,17.65,18.0]},"near_unique":true,"sample":["3773","48556","6461","43008","54528","24447","7870","4684","9855","47615","10892","75544","43171","40007","11552","58558","21856","37666","152647","9662","17648","4636","16696","60852","50798","73126","10796","55615","42387","64299","51670","43150","5579","7347","4815","113929","17692","51873","130677","57668","10407","4710","56671","52429","17200","68850","120560","152459","25596","16102"],"top_values":[],"top_words":[["3584",2],["14860",2],["4203",2],["12221",2],["43180",2],["all",1],["determinations",1],["3773",1],["49900",1],["31294",1],["32497",1],["216147",1],["20038",1],["20362",1],["4486",1],["4960",1],["130914",1],["73112",1],["6442",1],["17011",1],["7902",1],["83205",1],["40007",1],["18139",1],["50504",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1088,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.004574565416285453,"emoji_rate":0.0,"len_max":18,"len_mean":4.90576395242452,"len_median":5.0,"len_min":4,"len_p95":6.0,"n_duplicates":5,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1089,"word_mean":1.000914913083257,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"97.8% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"_duplicated_26","extras":{"language_counts":{},"language_sample_size":1093,"length_histogram":{"counts":[4,487,0,595,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[3.0,3.625,4.25,4.875,5.5,6.125,6.75,7.375,8.0,8.625,9.25,9.875,10.5,11.125,11.75,12.375,13.0,13.625,14.25,14.875,15.5,16.125,16.75,17.375,18.0,18.625,19.25,19.875,20.5,21.125,21.75,22.375,23.0,23.625,24.25,24.875,25.5,26.125,26.75,27.375,28.0]},"near_unique":true,"sample":["1856","18872","2295","17664","22008","10304","3748","2163","3393","19458","3530","20352","14529","14188","4254","16658","8294","11043","51459","5017","6369","2361","7734","19037","14897","23191","3661","17452","17412","25299","19698","11687","3657","3051","2315","39097","7404","18180","40902","23160","4202","2121","18640","21657","5994","22769","32909","68625","8002","5449"],"top_values":[],"top_words":[["2280",3],["2086",2],["1497",2],["3958",2],["4645",2],["20368",2],["1940",2],["4331",2],["8105",2],["18301",2],["17406",2],["7887",2],["2660",2],["2583",2],["19161",2],["1310",2],["10521",2],["4775",2],["6178",2],["4202",2],["2200",2],["3247",2],["2121",2],["all",1],["favorable",1]],"vocab_skipped":null,"word_histogram":{"counts":[1092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":1093,"n_null":0,"n_unique":1069,"null_rate":0.0,"stats":{"allcaps_rate":0.9990850869167429,"boilerplate_rate":0.0,"duplicate_rate":0.021957913998170174,"emoji_rate":0.0,"len_max":28,"len_mean":4.573650503202196,"len_median":5.0,"len_min":3,"len_p95":5.0,"n_duplicates":24,"n_empty":0,"one_word_rate":0.9990850869167429,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1071,"word_mean":1.0018298261665142,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"693 singleton categories"}],"column":"_duplicated_27","extras":{"singletons":693,"top_values":[["37.60",4],["36.60",4],["41.85",4],["38.47",4],["49.19",3],["32.63",3],["42.28",3],["29.96",3],["42.14",3],["38.12",3],["33.04",3],["40.70",3],["40.45",3],["33.84",3],["30.27",3],["31.35",3],["39.43",3],["33.77",3],["30.69",3],["31.39",3]]},"kind":"categorical","n":1093,"n_null":0,"n_unique":873,"null_rate":0.0,"stats":{"cardinality":873,"entropy":9.662095617904999,"entropy_ratio":0.9889719535319927,"top_rate":0.0036596523330283625,"top_value":"37.60"}}],"insights":{"errors":[],"insights":[{"confidence":"medium","critiques":[],"evidence_keys":["_duplicated_1","_duplicated_2","_duplicated_4","_duplicated_22","_duplicated_12","_duplicated_0","_duplicated_3"],"featured_charts":[{"caption":"Row counts by SSA region code \u2014 ATL leads at 168, showing uneven regional coverage.","column":"_duplicated_1","kind":"bar"},{"caption":"Distribution across fiscal years 2001+ \u2014 note the flat 52-rows-per-year pattern indicating a balanced panel.","column":"_duplicated_4","kind":"bar"},{"caption":"State-code coverage \u2014 53 values each appearing 21 times suggests one row per state per year.","column":"_duplicated_2","kind":"bar"},{"caption":"A numeric ratio stored as text (mode 0.18, range ~0.16\u20130.25); convert to numeric to inspect its true distribution.","column":"_duplicated_22","kind":"histogram"},{"caption":"Another numeric column held as 69 string buckets clustered near 0.30\u20130.40 \u2014 recast and replot as a true histogram.","column":"_duplicated_12","kind":"histogram"}],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be the SSA-SA-FYWL dataset (Social Security Administration state/area fiscal-year workload data) with 1,093 rows and 30 columns, but the headers were not parsed correctly \u2014 most columns carry placeholder names like `_duplicated_*` and several columns hold metadata constants (file name, update date 3/13/2023, date type 'FY'). The most informative real fields are the geographic and time dimensions: `_duplicated_2` holds 53 US state codes (each appearing 21 times), `_duplicated_1` holds 11 region codes dominated by ATL (168 rows), and `_duplicated_4` holds 22 fiscal years from 2001 onward in a balanced panel. Many numeric measures (e.g. `_duplicated_22`, `_duplicated_12`, `_duplicated_10`) were ingested as text/categorical strings of decimal numbers, so they should be retyped before analysis. Start by fixing headers and dtypes, then look at the region/state/year structure to confirm the panel layout.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","top_rate","top_value","top_values","entropy","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This column is effectively a constant file-name tag (\"SSA-SA-FYWL.csv\" appears 1092 of 1093 times, top_rate 0.999) with a single stray \"File Name\" value that looks like a header row leaked into the data. The column header itself is a free-text note about 2021 data being backfilled with 2020 data, suggesting this is provenance metadata rather than a feature. Entropy is essentially zero (0.0106), so it carries no discriminative signal.","role":"metadata","scope":"column","target":"**Please note** 2021 data in columns H, K, R, and U are populated with 2020 data until current data is released.  ","treatment":"Drop; near-constant provenance field with a leaked header row."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary categorical column with 1093 rows and only 2 distinct values, but it is effectively a constant: \"2\" appears 1092 times (top_rate 0.999) while \"File Version\" appears once. The lone \"File Version\" string alongside numeric \"2\" suggests a stray header row leaked into the data. Entropy of 0.0106 confirms there is virtually no information here.","role":"metadata","scope":"column","target":"","treatment":"Drop the column and investigate the stray \"File Version\" row as a parsing artifact."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be a duplicated date column where 1092 of 1093 rows hold the single value '3/13/2023', with the lone other entry being the literal string 'Update Date' \u2014 almost certainly a header row that leaked into the data. Entropy is effectively zero (0.0106) and the top rate is 0.999, so the column carries no discriminative signal. The 'Update Date' value also confirms a parsing/ingest issue worth fixing upstream.","role":"metadata","scope":"column","target":"_duplicated_0","treatment":"Drop; constant column with a leaked header value."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Three-letter city/airport codes (ATL, DEN, BOS, PHL, CHI, DAL, SEA, SFO, KCM, NYC...) across 1093 rows with 11 unique values and no nulls. Distribution is fairly even \u2014 entropy ratio 0.947 and top value ATL only 15.4% \u2014 suggesting a balanced categorical rather than a skewed label. The column name `_duplicated_1` flags it as an auto-detected duplicate of another column in the profile.","role":"feature","scope":"column","target":"_duplicated_1","treatment":"Drop as duplicate, or one-hot encode the 11 city codes if kept."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds two-letter US state/territory abbreviations with a trailing space (e.g. 'AK ', 'AL ', 'AR '), with 53 distinct values across 1093 rows and no nulls. The distribution is almost perfectly uniform \u2014 entropy_ratio of 0.996 and the top value appearing just 21 times (1.92%) \u2014 suggesting the data is a regular grid of states repeated roughly 21 times each. The 53 categories slightly exceed the 50 states, consistent with DC and US territories, and the trailing whitespace in every value is a data-hygiene flag.","role":"feature","scope":"column","target":"_duplicated_2","treatment":"Strip trailing whitespace and treat as a categorical state code (one-hot or target-encode)."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"A binary categorical column completely dominated by the value 'FY' (1092 of 1093 rows, top_rate 0.999), with a single stray 'Date Type' entry. Entropy is effectively zero (0.0106), and the name '_duplicated_3' suggests this is a residual from a duplicated header or pivot artifact rather than a real feature. The lone 'Date Type' value looks like a header row that leaked into the data.","role":"other","scope":"column","target":"_duplicated_3","treatment":"Drop; constant column with a likely header-leak artifact."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds 22 distinct year strings ranging from at least 2001 onward, with each year appearing almost exactly 52 times across 1,093 rows and zero nulls. The near-uniform distribution (entropy ratio 0.986, top rate just 0.0476) and the count of 52 strongly suggest weekly observations stacked per year. The '_duplicated_4' name indicates saturn detected this as a duplicate of another column in the dataset.","role":"timestamp","scope":"column","target":"_duplicated_4","treatment":"Drop as a duplicate; if kept, cast to integer year and use as a time key."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.duplicate_rate","stats.n_duplicates","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as text but the values are short numeric tokens (length 6-21, mean 6.85, one word in 99.9% of rows), almost certainly some kind of numeric ID. Cardinality is near-unique (1037 distinct out of 1093) yet 56 rows duplicate (5.1% duplicate rate), which is unexpected for an identifier and worth checking. The column name '_duplicated_5' also suggests this was auto-generated from a collision during ingest.","role":"identifier","scope":"column","target":"_duplicated_5","treatment":"Cast to string id and left-join on it; investigate the 56 duplicates before assuming uniqueness."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","allcaps_rate","len_mean","len_min","len_max","n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Almost every value is a single all-caps token of 5-6 characters (len_mean 5.68, one_word_rate 0.999), with 1090 unique values across 1093 rows and only 3 duplicates. Top tokens are mostly numeric strings like '91371', '18795', '158314', suggesting this is an identifier or numeric code column rather than natural text \u2014 though a stray header-like fragment ('ssa', 'disability', 'beneficiaries', 'age', '18-64*') hints the source file had embedded header rows mixed into the data.","role":"identifier","scope":"column","target":"_duplicated_6","treatment":"Treat as a near-unique code; drop from modelling or use only as a join key after stripping the stray header rows."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Column is typed categorical but holds 511 distinct numeric strings like \"5.50\", \"5.07\", \"4.90\" across 1093 rows, suggesting a continuous measurement (price, rating, or similar) stored as text. Distribution is nearly flat: entropy ratio is 0.968 and the most common value covers only 1.01% of rows. The \"_duplicated_7\" name implies this is a redundant copy of another column produced during a join or pivot.","role":"feature","scope":"column","target":"_duplicated_7","treatment":"Cast to float and drop if it duplicates another numeric column; otherwise treat as a continuous feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.word_mean","stats.one_word_rate","stats.allcaps_rate","stats.duplicate_rate","stats.n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Single-token, all-caps short strings (length 6-26, mean 6.84, ~1 word each) that are overwhelmingly numeric \u2014 top values like '468802', '2702811', '1646445' are integers stored as text. With 1041 unique values across 1093 rows and only 52 duplicates, this looks like a near-unique numeric identifier rather than a feature. The 'allcaps' and Flesch=121.22 signals are artifacts of digit-only tokens; no URLs, emojis, or boilerplate appear.","role":"identifier","scope":"column","target":"_duplicated_8","treatment":"Drop from modelling or use as a join key; cast to integer if needed."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.one_word_rate","stats.allcaps_rate","stats.len_mean","stats.len_max","stats.n_duplicates","stats.duplicate_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Almost certainly an identifier-like code column: 1081 unique values across 1093 rows, single-token entries averaging 4.85 characters, and the top repeated values are short numeric strings like '4190' and '8630'. The 99.9% allcaps and one_word rates plus max length of 14 suggest compact alphanumeric codes rather than prose. The 12 duplicates (1.1%) are minor but worth checking given the column is otherwise near-unique.","role":"identifier","scope":"column","target":"_duplicated_9","treatment":"Treat as an identifier; drop from modelling features or use only for joins/lookups."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as a categorical but the values are numeric strings clustered tightly around 1.0 (top values include '0.97', '1.11', '1.01', '1.04', '0.92'), suggesting a ratio, multiplier, or normalised index. Distribution is highly diffuse with 199 distinct values across 1093 rows and an entropy ratio of 0.929, so no single bucket dominates (top_rate just 0.023). The '_duplicated_10' name implies this column is a redundant copy from an upstream join.","role":"feature","scope":"column","target":"_duplicated_10","treatment":"Cast to float and treat as a continuous feature; verify it isn't a duplicate of another column before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.allcaps_rate","stats.one_word_rate","stats.len_min","stats.len_max","stats.len_median","stats.duplicate_rate","stats.n_duplicates","top_words","column"],"model":"anthropic:claude-opus-4-7","narrative":"Almost certainly a short alphanumeric code column: 1062 distinct values across 1093 rows, 99.9% one-word and 99.9% all-caps, lengths between 3 and 30 characters with a median of 4. Top tokens are bare numeric strings like '6632' and '1573', each appearing only 2-3 times, suggesting ID-like codes rather than categories. The '_duplicated_11' name and 31 duplicates (2.8%) hint this is a copy of another column with minor collisions.","role":"identifier","scope":"column","target":"_duplicated_11","treatment":"Drop as near-unique identifier, or treat as a key for join/lookup rather than a feature."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","column"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds 69 distinct numeric-looking strings (e.g. '0.38', '0.34', '0.32') across 1093 rows with no nulls, suggesting a decimal ratio or rate stored as text. The distribution is fairly flat \u2014 top value '0.38' covers only 5.0% and entropy ratio is 0.905 \u2014 so no single value dominates. The '_duplicated_12' name signals it is a duplicate of another column, which is the main thing to flag.","role":"feature","scope":"column","target":"_duplicated_12","treatment":"Drop as a duplicate column after confirming it matches its source, otherwise cast to float."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.one_word_rate","stats.allcaps_rate","stats.len_min","stats.len_max","stats.len_median","stats.word_mean","top_words","stats.duplicate_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short, single-token uppercase strings that are almost entirely unique (1079 unique out of 1093), with lengths between 4 and 24 characters and a median of 5. The top-frequency tokens are all numeric strings ('17955', '5808', etc.) appearing only twice each, suggesting this is a near-unique identifier code rather than natural text. The 'allcaps' and 'one_word' rates near 99.9% confirm a structured code format, and the column name '_duplicated_13' hints it was auto-generated during a join or pivot.","role":"identifier","scope":"column","target":"_duplicated_13","treatment":"Drop or use as a join key; not suitable as a modelling feature due to near-uniqueness."},{"confidence":"high","critiques":[],"evidence_keys":["column","kind","n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column, labelled `_duplicated_14`, holds 1093 numeric-looking strings (e.g. \"31.13\", \"44.89\") with 883 unique values and no nulls \u2014 almost certainly a continuous measurement that was ingested as categorical. Entropy ratio of 0.99 and a top frequency of just 4 (0.37%) confirm near-uniqueness; the `long_tail` alert and the `_duplicated_` prefix suggest it is a redundant copy of another numeric column.","role":"feature","scope":"column","target":"_duplicated_14","treatment":"Cast to float and check for equality against the original column; drop if it is a duplicate."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","len_mean","len_max","duplicate_rate","n_duplicates","top_values","allcaps_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short single-token numeric strings (one_word_rate 0.999, len_mean 6.4, max 24) stored as text rather than integers, with 1019 unique values across 1093 rows. The value '0' appears 21 times while every other top value occurs only twice, suggesting '0' is a sentinel or default. The name '_duplicated_15' and the 6.8% duplicate rate hint this is a redundant copy of a numeric identifier column from an upstream join.","role":"identifier","scope":"column","target":"_duplicated_15","treatment":"Cast to integer and drop as a duplicate id unless it differs from the original column."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","len_mean","len_max","allcaps_rate","top_words","column"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed as text, this column is dominated by short single-token numeric strings (one_word_rate 0.999, len_mean 4.54, max 38) with 1057 unique values across 1093 rows. The top tokens are bare integers like \"0\" (21 occurrences), \"1358\", \"840\", suggesting an ID or numeric code stored as text rather than natural language. The allcaps_rate of 0.98 is an artifact of digits/non-letter content, and the column name `_duplicated_16` implies it was auto-generated during a column-name collision.","role":"identifier","scope":"column","target":"_duplicated_16","treatment":"Drop or treat as a high-cardinality ID; do not tokenize as text."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","column"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as categorical strings but the values are numeric ('0.00', '1.68', '0.58', '1.07'), suggesting a small-magnitude continuous measurement that was read as text. Cardinality is high (272 unique across 1093 rows) with very flat distribution: top value '0.00' covers only 1.92% and entropy ratio is 0.949. The '_duplicated_17' name implies this is a duplicate of another column produced during a join or concat.","role":"feature","scope":"column","target":"_duplicated_17","treatment":"Cast to float and check whether it duplicates an existing column; drop if redundant."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.word_mean","stats.len_mean","stats.len_max","stats.duplicate_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed as text, this column holds single-token numeric strings (one_word_rate 0.999, word_mean 1.00, len_mean 6.4) with 1021 unique values across 1093 rows \u2014 effectively a high-cardinality numeric ID stored as text. The value '0' appears 20 times while every other top value occurs at most twice, hinting at '0' as a sentinel/placeholder amid otherwise near-unique IDs. The 'allcaps' alert is a quirk of digit-only strings rather than meaningful casing.","role":"identifier","scope":"column","target":"_duplicated_18","treatment":"Cast to integer (treating '0' as missing) or drop as near-unique identifier before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","len_mean","len_max","top_values","allcaps_rate","duplicate_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed as text, this column is essentially short numeric tokens \u2014 99.9% are single words with mean length 4.05 characters and a max of 32. With 1018 unique values across 1093 rows and the most common entry '0' appearing only 21 times, it behaves like a high-cardinality numeric identifier stored as strings. The 'allcaps' alert (97.99%) is an artifact of digits having no lowercase form rather than a meaningful signal.","role":"identifier","scope":"column","target":"_duplicated_19","treatment":"Cast to integer and treat as an ID; drop from modelling features unless joined as a key."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed categorical, every one of the 156 distinct values is a two-decimal numeric string between 0.00 and 0.61+, suggesting a proportion or rate that was stored as text. The distribution is nearly flat (entropy ratio 0.907), with the modal value '0.30' covering only 2.6% of 1093 rows and no nulls. The column name '_duplicated_20' implies it is a copy of another column flagged during ingestion.","role":"feature","scope":"column","target":"_duplicated_20","treatment":"Cast strings to float and treat as a numeric feature; verify against the source column and drop if it is an exact duplicate."},{"confidence":"high","critiques":[],"evidence_keys":["column","kind","n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column is labelled `_duplicated_21`, suggesting saturn detected it as a duplicate of another field; values appear to be numeric strings stored as categorical. With 957 unique values across 1093 rows and an entropy ratio of 0.9885, it is nearly an identifier \u2014 the only meaningful concentration is `\"0\"` at 21 occurrences (1.92%), likely a sentinel or default. The long_tail alert and near-unique cardinality mean it carries almost no categorical signal as-is.","role":"identifier","scope":"column","target":"_duplicated_21","treatment":"Drop as a duplicated near-unique column, or reconcile against its original before any modelling."},{"confidence":"medium","critiques":[],"evidence_keys":["kind","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds 70 distinct short decimal strings clustered tightly around 0.16\u20130.25, suggesting a numeric ratio or rate (perhaps a proportion or probability) that has been stored as text. Distribution is fairly even with the top value '0.18' taking only 7.0% of rows and entropy ratio 0.84, so no single bucket dominates. The 'categorical' kind plus the '_duplicated_22' name hint that saturn detected this as a duplicate of another column and parsed it as strings rather than floats.","role":"feature","scope":"column","target":"_duplicated_22","treatment":"Cast to float and verify it is not redundant with the original column before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.len_mean","stats.len_max","stats.word_mean","stats.duplicate_rate","stats.allcaps_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Despite the text kind, every value is a single short token (word_mean 1.004, len_mean 4.05, len_max 37) and the top values are all numeric strings like \"0\", \"406\", \"404\". With 1028 unique values across 1093 rows and a 5.9% duplicate_rate dominated by \"0\" (21 occurrences), this looks like a numeric identifier or count stored as text. The allcaps_rate of 0.98 is a quirk of digit-only strings being flagged as uppercase.","role":"identifier","scope":"column","target":"_duplicated_23","treatment":"cast to integer and treat as numeric id or count rather than free text."},{"confidence":"high","critiques":[],"evidence_keys":["column","kind","n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed categorical, the values are numeric strings (e.g. '0.00', '47.52', '51.82'), suggesting a monetary or measurement field that was read as text. With 900 unique values across 1093 rows and entropy ratio 0.9874, it is nearly unique; the only meaningful concentration is '0.00' at 1.38% (15 rows). The '_duplicated_24' name implies this is a repeated copy of another column in the source.","role":"feature","scope":"column","target":"_duplicated_24","treatment":"Cast to float and treat as numeric; verify whether it duplicates another column before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.one_word_rate","stats.len_mean","stats.word_mean","stats.duplicate_rate","stats.n_duplicates","stats.vocab_size","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Almost every value is a single short ALLCAPS token (one_word_rate 0.999, allcaps_rate 0.999, len_mean 4.9, word_mean 1.0), and 1088 of 1093 rows are unique with only 5 duplicates. The top tokens are mostly numeric strings like '3584' or '14860', suggesting this is a near-unique short code rather than natural text. The column name '_duplicated_25' hints it was auto-generated from a duplicated source column during profiling.","role":"identifier","scope":"column","target":"_duplicated_25","treatment":"Drop or treat as an ID key; do not tokenize as free text."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","allcaps_rate","len_mean","len_max","top_words","duplicate_rate","vocab_size"],"model":"anthropic:claude-opus-4-7","narrative":"Single-token, all-caps strings averaging 4.57 characters with 1069 unique values across 1093 rows \u2014 almost certainly an identifier or short code column. The top values are all numeric strings (e.g., '2280', '2086') appearing 2-3 times each, suggesting these are numeric IDs stored as text rather than meaningful tokens. The 99.9% one-word and all-caps rates plus near-unique cardinality rule out free text.","role":"identifier","scope":"column","target":"_duplicated_26","treatment":"Treat as a categorical/key field; drop from modelling features or use only for joins."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","column"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as categorical strings but every observed value parses as a two-decimal number (e.g. '37.60', '41.85'), so this is almost certainly a numeric measurement \u2014 possibly a price, rate or score \u2014 that was ingested as text. With 873 unique values across 1093 rows and entropy ratio 0.989, it is near-unique; the most frequent value '37.60' appears just 4 times (top rate 0.37%). The '_duplicated_27' name suggests it is a duplicate of another column produced upstream.","role":"feature","scope":"column","target":"_duplicated_27","treatment":"Cast to float and treat as a numeric feature; verify it is not redundant with the column it duplicates."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":9414,"prompt_tokens":40539,"total_tokens":49953}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:12:33+00:00","mode":"full","row_count":1093,"sampled_rows":1093,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/cache/accessibility/ssa_sa_fywl.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"":"categorical","**Please note** 2021 data in columns H, K, R, and U are populated with 2020 data until current data is released.  ":"categorical","_duplicated_0":"categorical","_duplicated_1":"categorical","_duplicated_10":"categorical","_duplicated_11":"text","_duplicated_12":"categorical","_duplicated_13":"text","_duplicated_14":"categorical","_duplicated_15":"text","_duplicated_16":"text","_duplicated_17":"categorical","_duplicated_18":"text","_duplicated_19":"text","_duplicated_2":"categorical","_duplicated_20":"categorical","_duplicated_21":"categorical","_duplicated_22":"categorical","_duplicated_23":"text","_duplicated_24":"categorical","_duplicated_25":"text","_duplicated_26":"text","_duplicated_27":"categorical","_duplicated_3":"categorical","_duplicated_4":"categorical","_duplicated_5":"text","_duplicated_6":"text","_duplicated_7":"categorical","_duplicated_8":"text","_duplicated_9":"text"}}
