{"columns":[{"alerts":[],"column":"fips","extras":{"histogram":{"counts":[97,15,133,59,14,4,226,5,49,189,204,184,39,15,170,196,150,27,21,95,153,155,46,67,51,161,268,29,133,94,95,0,0,0,0,0,0,0,0,78],"edges":[1001.0,2779.8,4558.6,6337.4,8116.2,9895.0,11673.8,13452.6,15231.4,17010.199999999997,18789.0,20567.8,22346.6,24125.399999999998,25904.2,27683.0,29461.8,31240.6,33019.399999999994,34798.2,36577.0,38355.799999999996,40134.6,41913.4,43692.2,45471.0,47249.799999999996,49028.6,50807.4,52586.2,54365.0,56143.799999999996,57922.6,59701.4,61480.2,63259.0,65037.799999999996,66816.59999999999,68595.4,70374.2,72153.0]},"sample":[1033.0,1041.0,1059.0,1087.0,1113.0,1115.0,1131.0,2016.0,2105.0,2110.0,2188.0,4005.0,4015.0,5005.0,5019.0,5023.0,5053.0,5055.0,5111.0,5129.0,6001.0,6015.0,6047.0,6053.0,6077.0,6091.0,6095.0,6113.0,8007.0,8011.0,8015.0,8019.0,8023.0,8025.0,8029.0,8041.0,8055.0,8069.0,8075.0,8093.0,8123.0,8125.0,12007.0,12011.0,12015.0,12031.0,12047.0,12057.0,12061.0,12075.0,12079.0,12085.0,12107.0,12119.0,12121.0,13003.0,13011.0,13017.0,13051.0,13055.0,13061.0,13073.0,13081.0,13085.0,13109.0,13117.0,13125.0,13131.0,13137.0,13157.0,13159.0,13167.0,13179.0,13189.0,13191.0,13193.0,13229.0,13237.0,13245.0,13269.0,13271.0,13285.0,13293.0,13305.0,13319.0,15005.0,16003.0,16011.0,16013.0,16023.0,16053.0,16087.0,17031.0,17035.0,17047.0,17057.0,17063.0,17077.0,17149.0,17157.0,17185.0,17197.0,17201.0,18005.0,18041.0,18043.0,18083.0,18095.0,18099.0,18131.0,18133.0,18147.0,18159.0,18173.0,19003.0,19017.0,19023.0,19027.0,19035.0,19043.0,19047.0,19075.0,19107.0,19125.0,19137.0,19155.0,19169.0,19173.0,19175.0,19183.0,20003.0,20011.0,20023.0,20037.0,20047.0,20051.0,20057.0,20065.0,20069.0,20075.0,20083.0,20085.0,20113.0,20125.0,20133.0,20153.0,20157.0,20161.0,20171.0,20173.0,20181.0,20183.0,20201.0,20209.0,21011.0,21013.0,21029.0,21045.0,21053.0,21077.0,21085.0,21121.0,21151.0,21155.0,21193.0,21209.0,21223.0,21227.0,22035.0,22057.0,22061.0,22083.0,22111.0,22115.0,22127.0,23003.0,24003.0,24021.0,24025.0,25003.0,25011.0,25013.0,26003.0,26015.0,26033.0,26041.0,26049.0,26073.0,26091.0,26121.0,26125.0,26129.0,26137.0,26155.0,26159.0,26163.0,27007.0,27019.0,27021.0,27023.0,27031.0,27039.0,27041.0,27045.0,27049.0,27051.0,27083.0,27103.0,27107.0,27113.0,27129.0,27133.0,27169.0,28005.0,28007.0,28047.0,28063.0,28071.0,28075.0,28083.0,28085.0,28091.0,28095.0,28103.0,28109.0,28117.0,28133.0,28145.0,28163.0,29017.0,29031.0,29067.0,29087.0,29123.0,29153.0,29163.0,29173.0,29181.0,29187.0,29195.0,29201.0,29213.0,29227.0,30005.0,30009.0,30019.0,30021.0,30027.0,30029.0,30065.0,30077.0,30087.0,30089.0,30105.0,31027.0,31045.0,31075.0,31095.0,31105.0,31129.0,31135.0,31151.0,31161.0,31177.0,32007.0,32033.0,33003.0,33005.0,33007.0,34015.0,34033.0,34041.0,35025.0,35059.0,36003.0,36013.0,36057.0,36063.0,36071.0,36075.0,36083.0,36093.0,36097.0,36103.0,37009.0,37017.0,37019.0,37023.0,37025.0,37029.0,37047.0,37057.0,37065.0,37071.0,37089.0,37095.0,37105.0,37109.0,37141.0,37143.0,37149.0,37199.0,38011.0,38023.0,38041.0,38049.0,38053.0,38055.0,38063.0,38071.0,38095.0,39005.0,39045.0,39057.0,39067.0,39075.0,39093.0,39101.0,39109.0,39117.0,39125.0,39127.0,39159.0,39163.0,39167.0,40027.0,40039.0,40049.0,40051.0,40063.0,40095.0,40099.0,40149.0,41001.0,41005.0,41019.0,41025.0,41031.0,41047.0,41057.0,41065.0,41069.0,42005.0,42013.0,42017.0,42027.0,42033.0,42037.0,42051.0,42055.0,42073.0,42075.0,42077.0,42111.0,42113.0,42123.0,44005.0,45013.0,45025.0,45037.0,45039.0,45041.0,45045.0,45049.0,45061.0,45065.0,45079.0,45083.0,46007.0,46011.0,46013.0,46043.0,46045.0,46049.0,46093.0,46119.0,46135.0,47007.0,47009.0,47035.0,47053.0,47057.0,47083.0,47087.0,47137.0,47159.0,48001.0,48009.0,48011.0,48015.0,48021.0,48039.0,48059.0,48081.0,48085.0,48089.0,48119.0,48161.0,48165.0,48175.0,48221.0,48227.0,48261.0,48263.0,48265.0,48285.0,48299.0,48323.0,48341.0,48369.0,48373.0,48393.0,48397.0,48403.0,48439.0,48443.0,48445.0,48465.0,48475.0,48487.0,48499.0,48505.0,49019.0,49023.0,49041.0,50005.0,50011.0,50025.0,51035.0,51043.0,51047.0,51067.0,51081.0,51093.0,51101.0,51105.0,51109.0,51111.0,51115.0,51131.0,51137.0,51167.0,51179.0,51195.0,51197.0,51510.0,51595.0,51600.0,51650.0,51680.0,51730.0,51740.0,51760.0,51800.0,53003.0,53029.0,53033.0,53043.0,53047.0,53055.0,53059.0,54003.0,54009.0,54043.0,54055.0,54071.0,54087.0,54097.0,54105.0,55003.0,55011.0,55033.0,55037.0,55047.0,55053.0,55057.0,55063.0,55081.0,55085.0,55087.0,55099.0,55113.0,55133.0,56017.0,56027.0,56033.0,56035.0,72023.0,72033.0,72049.0,72054.0,72069.0,72081.0,72083.0,72085.0,72087.0,72093.0,72109.0,72147.0,72153.0]},"kind":"numeric","n":3222,"n_null":0,"n_unique":3222,"null_rate":0.0,"stats":{"iqr":27075.0,"kurtosis":-0.6313744123374883,"max":72153.0,"mean":31377.89044072005,"median":30022.0,"min":1001.0,"n_outliers":0,"outlier_rate":0.0,"q1":19029.5,"q3":46104.5,"skew":0.1573655534643171,"std":16299.548257647879,"zero_rate":0.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"}],"column":"county_name","extras":{"language_counts":{},"language_sample_size":3222,"length_histogram":{"counts":[26,72,121,190,264,407,420,363,320,240,231,152,139,165,41,28,16,10,5,0,1,1,0,1,1,0,2,0,1,1,0,0,0,0,2,1,0,0,0,1],"edges":[16.0,17.075,18.15,19.225,20.3,21.375,22.45,23.525,24.6,25.674999999999997,26.75,27.825,28.9,29.975,31.049999999999997,32.125,33.2,34.275,35.349999999999994,36.425,37.5,38.575,39.65,40.724999999999994,41.8,42.875,43.95,45.025,46.099999999999994,47.175,48.25,49.324999999999996,50.4,51.475,52.55,53.625,54.699999999999996,55.775,56.85,57.925,59.0]},"near_unique":true,"sample":["Bibb County, Alabama","Cheatham County, Tennessee","Piute County, Utah","Lamb County, Texas","Martin County, Minnesota","Sheridan County, Wyoming","Chickasaw County, Mississippi","Rockingham County, Virginia","Liberty County, Texas","Clark County, Arkansas","Moore County, North Carolina","Lancaster County, Nebraska","Chase County, Kansas","DeKalb County, Alabama","Linn County, Kansas","Washington County, Idaho","Saguache County, Colorado","Pecos County, Texas","Patillas Municipio, Puerto Rico","Ogemaw County, Michigan","Washington County, Kansas","Greene County, Arkansas","Toombs County, Georgia","Alachua County, Florida","Cambria County, Pennsylvania","Jefferson County, Tennessee","Hunterdon County, New Jersey","DuPage County, Illinois","Baca County, Colorado","Lincoln County, Wyoming","Scioto County, Ohio","Blaine County, Oklahoma","Greene County, Alabama","Iosco County, Michigan","Vernon Parish, Louisiana","Bolivar County, Mississippi","Mineral County, West Virginia","Silver Bow County, Montana","Lincoln County, Mississippi","Knox County, Nebraska","Hood County, Texas","Spokane County, Washington","Decatur County, Kansas","San Juan County, Washington","La Crosse County, Wisconsin","Aguas Buenas Municipio, Puerto Rico","Bienville Parish, Louisiana","Lapeer County, Michigan","Upson County, Georgia","Essex County, Virginia"],"top_values":[],"top_words":[["county,",2999],["texas",256],["virginia",189],["georgia",159],["north",155],["carolina",147],["new",131],["dakota",121],["kentucky",120],["missouri",115],["south",113],["kansas",105],["illinois",102],["iowa",101],["tennessee",95],["indiana",93],["nebraska",93],["ohio",91],["minnesota",87],["mississippi",84],["michigan",83],["oklahoma",78],["municipio,",78],["puerto",78],["rico",78]],"vocab_skipped":null,"word_histogram":{"counts":[2492,0,0,0,0,0,0,667,0,0,0,0,0,0,0,57,0,0,0,0,0,0,5,0,0,0,0,0,0,1],"edges":[3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.9333333333333336,4.066666666666666,4.2,4.333333333333333,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0,5.133333333333333,5.266666666666667,5.4,5.533333333333333,5.666666666666666,5.8,5.933333333333334,6.066666666666666,6.2,6.333333333333334,6.466666666666667,6.6,6.733333333333333,6.866666666666667,7.0]}},"kind":"text","n":3222,"n_null":0,"n_unique":3222,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":59,"len_mean":24.324022346368714,"len_median":24.0,"len_min":16,"len_p95":31.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":10.283900000000026,"url_rate":0.0,"vocab_size":1990,"word_mean":3.2482929857231535,"word_median":3.0}},{"alerts":[],"column":"rural","extras":{"singletons":0,"top_values":[["True",2212],["False",1010]]},"kind":"categorical","n":3222,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.8971383342699701,"entropy_ratio":0.8971383342699701,"top_rate":0.686530105524519,"top_value":"True"}},{"alerts":[],"column":"rural_category","extras":{"singletons":0,"top_values":[["Rural",2212],["Urban/Suburban",1010]]},"kind":"categorical","n":3222,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.8971383342699701,"entropy_ratio":0.8971383342699701,"top_rate":0.686530105524519,"top_value":"Rural"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.rural.top_values","columns.rural.stats.top_rate","columns.rural_category.top_values","columns.county_name.top_words","columns.fips.stats.min","columns.fips.stats.max"],"featured_charts":[{"caption":"Shows the roughly 69/31 split between Rural and Urban/Suburban counties.","column":"rural_category","kind":"donut"},{"caption":"Distribution of FIPS codes spans 1001 to 72153, indicating coverage from Alabama through Puerto Rico.","column":"fips","kind":"histogram"},{"caption":"Confirms `rural` mirrors `rural_category` exactly \u2014 one of the two columns is redundant.","column":"rural","kind":"bar"},{"caption":"County name lengths cluster tightly around 24 characters, useful for sanity-checking display widths.","column":"county_name","kind":"length"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a county-level reference table covering 3,222 U.S. counties, with each row uniquely identified by a county name and FIPS code and labeled as either rural or urban/suburban. The headline finding is the rural skew: 2,212 counties (about 68.7%) are flagged Rural versus 1,010 Urban/Suburban, and the `rural` and `rural_category` columns are perfectly redundant duplicates of each other. County names are dominated by Texas (256), Virginia (189), and Georgia (159), reflecting how many counties those states contain rather than any data quality issue.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.skew","stats.kurtosis"],"model":"anthropic:claude-opus-4-7","narrative":"This is the FIPS county/state code, with all 3222 rows unique and no nulls. Values span 1001 to 72153 with a near-symmetric distribution (skew 0.16, kurtosis -0.63), consistent with the standard 5-digit US county FIPS encoding rather than a measured quantity. Treat it as a categorical key, not a number.","role":"identifier","scope":"column","target":"fips","treatment":"Cast to zero-padded string and use as a join key to geographic reference tables."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_min","stats.len_max","stats.len_median","stats.word_median","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Each of the 3,222 rows holds a unique county-plus-state string (e.g., 'X County, Texas'), with 'county,' appearing 2,999 times and state names like Texas (256), Virginia (189), and Georgia (159) dominating the top words. Lengths are tight (16-59 chars, median 24) and there are zero nulls or duplicates, consistent with a complete US county roster. The near_unique alert is expected here rather than a data-quality issue.","role":"identifier","scope":"column","target":"county_name","treatment":"Use as a join key to county-level reference tables; do not feed raw into a model."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary boolean flag indicating whether a record is rural, fully populated across all 3222 rows. The split is roughly 69/31 in favour of True (2212 vs 1010), giving a high entropy ratio of 0.90 \u2014 imbalanced but far from degenerate.","role":"feature","scope":"column","target":"rural","treatment":"Cast to 0/1 and use directly as a binary feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary geographic classifier splitting records into 'Rural' (2212) and 'Urban/Suburban' (1010) across all 3222 rows with no nulls. The split is roughly 69/31 toward Rural, giving an entropy ratio of 0.897, so both classes are well represented despite the imbalance.","role":"feature","scope":"column","target":"rural_category","treatment":"Encode as a binary indicator (e.g., is_rural) for modelling."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":1567,"prompt_tokens":4591,"total_tokens":6158}},"language_counts":{},"meta":{"generated_at":"2026-05-01T16:52:50+00:00","mode":"full","row_count":3222,"sampled_rows":3222,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/cache/rural_urban.parquet"},"notes":[],"saturn_version":"0.2.0","schema":{"county_name":"text","fips":"numeric","rural":"boolean","rural_category":"categorical"}}
