{"columns":[{"alerts":[],"column":"STATEFP","extras":{"singletons":2,"top_values":[["48",254],["13",159],["51",133],["21",120],["29",115],["20",105],["17",102],["37",100],["19",99],["47",95],["31",93],["18",92],["39",88],["27",87],["26",83],["28",82],["72",78],["40",77],["05",75],["55",72]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":56,"null_rate":0.0,"stats":{"cardinality":56,"entropy":5.336841514314598,"entropy_ratio":0.9189797396477537,"top_rate":0.07854050711193568,"top_value":"48"}},{"alerts":[],"column":"COUNTYFP","extras":{"singletons":162,"top_values":[["003",50],["001",50],["005",50],["009",49],["007",48],["013",48],["011",47],["015",47],["019",46],["017",46],["027",45],["023",45],["021",45],["025",43],["031",42],["029",42],["033",41],["037",40],["035",40],["039",39]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":330,"null_rate":0.0,"stats":{"cardinality":330,"entropy":7.118188784963758,"entropy_ratio":0.8508145637569648,"top_rate":0.015460729746444033,"top_value":"003"}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"COUNTYNS","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[7.5,7.525,7.55,7.575,7.6,7.625,7.65,7.675,7.7,7.725,7.75,7.775,7.8,7.825,7.85,7.875,7.9,7.925,7.95,7.975,8.0,8.025,8.05,8.075,8.1,8.125,8.15,8.175,8.2,8.225,8.25,8.275,8.3,8.325,8.35,8.375,8.4,8.425,8.45,8.475,8.5]},"near_unique":true,"sample":["00835876","01493928","01558262","01383811","01074062","00465192","00835845","00758561","01639754","01383909","00516912","01008566","00347505","01448030","00974108","00450406","00758509","00835868","00516904","00424261","00758466","01034208","00695792","00758544","01008570","01065575","00025444","00659527","00516855","01265759","01101821","00758481","00929108","00069167","00450357","01480109","01622979","00857664","01026331","01101842","01209173","00424237","00933051","00198132","00424240","00485059","01135864","01419983","01034231","00217271"],"top_values":[],"top_words":[["00835841",1],["01513275",1],["00933054",1],["00835876",1],["00835886",1],["01804523",1],["01265772",1],["01383949",1],["00277310",1],["00516873",1],["01074044",1],["01383880",1],["00161539",1],["01383791",1],["01074015",1],["00348794",1],["01581115",1],["00069902",1],["01155135",1],["01214112",1],["00695754",1],["00758496",1],["00198170",1],["01008544",1],["01448030",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":8,"len_mean":8.0,"len_median":8.0,"len_min":8,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3234,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"GEOID","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[4.5,4.525,4.55,4.575,4.6,4.625,4.65,4.675,4.7,4.725,4.75,4.775,4.8,4.825,4.85,4.875,4.9,4.925,4.95,4.975,5.0,5.025,5.05,5.075,5.1,5.125,5.15,5.175,5.2,5.225,5.25,5.275,5.3,5.325,5.35,5.375,5.4,5.425,5.45,5.475,5.5]},"near_unique":true,"sample":["31109","51135","54009","48051","39099","19005","31047","29217","47077","48247","21131","37099","13283","49033","36019","18015","29109","31093","21115","17125","29023","38085","28143","29179","37119","39013","04009","27165","21017","46091","40067","29053","35003","05095","18059","51036","26073","32007","37187","40109","42017","17071","35006","08033","17077","20197","41057","02240","38101","10001"],"top_values":[],"top_words":[["31039",1],["53069",1],["35011",1],["31109",1],["31129",1],["72085",1],["46099",1],["48327",1],["06091",1],["21053",1],["39063",1],["48189",1],["01027",1],["48011",1],["39003",1],["13189",1],["55111",1],["05137",1],["41063",1],["42007",1],["28061",1],["29083",1],["08109",1],["37037",1],["49033",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":5,"len_mean":5.0,"len_median":5.0,"len_min":5,"len_p95":5.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3234,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"93.1% rows are a single word"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"40.5% duplicate strings"}],"column":"NAME","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[27,0,257,0,470,0,696,0,614,0,0,501,0,292,0,211,0,60,0,0,48,0,22,0,14,0,8,0,5,0,0,3,0,1,0,1,0,3,0,1],"edges":[3.0,3.45,3.9,4.35,4.8,5.25,5.7,6.15,6.6,7.05,7.5,7.95,8.4,8.850000000000001,9.3,9.75,10.2,10.65,11.1,11.55,12.0,12.450000000000001,12.9,13.35,13.8,14.25,14.700000000000001,15.15,15.6,16.05,16.5,16.950000000000003,17.4,17.85,18.3,18.75,19.2,19.650000000000002,20.1,20.55,21.0]},"near_unique":false,"sample":["Lancaster","Nottoway","Brooke","Burleson","Mahoning","Allamakee","Dawson","Vernon","Henderson","Jim Hogg","Leslie","Jackson","Treutlen","Rich","Clinton","Carroll","Lawrence","Howard","Johnson","Mason","Butler","Sioux","Tunica","Reynolds","Mecklenburg","Belmont","Graham","Watonwan","Bourbon","Marshall","Jefferson","Cooper","Catron","Monroe","Hancock","Charles City","Isabella","Elko","Washington","Oklahoma","Bucks","Henderson","Cibola","Dolores","Jackson","Wabaunsee","Tillamook","Southeast Fairbanks","Ward","Kent"],"top_values":[["Washington",31],["Jefferson",26],["Franklin",26],["Lincoln",24],["Jackson",24],["Madison",20],["Clay",18],["Montgomery",18],["Union",18],["Marion",17],["Monroe",17],["Wayne",16],["Grant",15],["Greene",14],["Warren",14],["Carroll",13],["Lake",12],["Polk",12],["Douglas",12],["Marshall",12]],"top_words":[["washington",31],["st.",29],["jefferson",28],["franklin",26],["lincoln",24],["jackson",24],["san",21],["madison",20],["clay",18],["lake",18],["montgomery",18],["union",18],["marion",17],["monroe",17],["wayne",16],["carroll",15],["grant",15],["greene",14],["warren",14],["clark",13],["polk",12],["douglas",12],["marshall",12],["johnson",12],["lawrence",12]],"vocab_skipped":null,"word_histogram":{"counts":[3010,0,0,0,0,0,0,0,0,0,210,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,2],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":3234,"n_null":0,"n_unique":1923,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.40538033395176254,"emoji_rate":0.0,"len_max":21,"len_mean":7.0395794681508965,"len_median":7.0,"len_min":3,"len_p95":11.0,"n_duplicates":1311,"n_empty":0,"one_word_rate":0.9307359307359307,"readability_flesch_mean":31.96645000000003,"url_rate":0.0,"vocab_size":1958,"word_mean":1.0742115027829313,"word_median":1.0}},{"alerts":[{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"39.1% duplicate strings"}],"column":"NAMELSAD","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[1,0,0,0,0,0,0,0,29,256,0,465,683,590,0,496,297,223,0,67,51,0,23,16,14,0,7,4,5,0,2,1,0,1,1,0,0,1,0,1],"edges":[4.0,4.725,5.45,6.175,6.9,7.625,8.35,9.075,9.8,10.524999999999999,11.25,11.975,12.7,13.424999999999999,14.15,14.875,15.6,16.325,17.049999999999997,17.775,18.5,19.225,19.95,20.675,21.4,22.125,22.849999999999998,23.575,24.3,25.025,25.75,26.474999999999998,27.2,27.925,28.65,29.375,30.099999999999998,30.825,31.55,32.275,33.0]},"near_unique":false,"sample":["Lancaster County","Nottoway County","Brooke County","Burleson County","Mahoning County","Allamakee County","Dawson County","Vernon County","Henderson County","Jim Hogg County","Leslie County","Jackson County","Treutlen County","Rich County","Clinton County","Carroll County","Lawrence County","Howard County","Johnson County","Mason County","Butler County","Sioux County","Tunica County","Reynolds County","Mecklenburg County","Belmont County","Graham County","Watonwan County","Bourbon County","Marshall County","Jefferson County","Cooper County","Catron County","Monroe County","Hancock County","Charles City County","Isabella County","Elko County","Washington County","Oklahoma County","Bucks County","Henderson County","Cibola County","Dolores County","Jackson County","Wabaunsee County","Tillamook County","Southeast Fairbanks Census Area","Ward County","Kent County"],"top_values":[["Washington County",30],["Jefferson County",25],["Franklin County",24],["Lincoln County",23],["Jackson County",23],["Madison County",19],["Clay County",18],["Montgomery County",18],["Marion County",17],["Union County",17],["Monroe County",17],["Wayne County",16],["Greene County",14],["Warren County",14],["Grant County",14],["Carroll County",13],["Lake County",12],["Polk County",12],["Douglas County",12],["Marshall County",12]],"top_words":[["county",3007],["municipio",78],["parish",64],["city",47],["washington",31],["st.",29],["jefferson",28],["franklin",26],["lincoln",24],["jackson",24],["san",21],["madison",20],["clay",18],["lake",18],["montgomery",18],["union",18],["marion",17],["borough",17],["monroe",17],["wayne",16],["carroll",15],["grant",15],["greene",14],["warren",14],["clark",13]],"vocab_skipped":null,"word_histogram":{"counts":[1,0,0,0,0,0,0,3001,0,0,0,0,0,0,0,212,0,0,0,0,0,0,17,0,0,0,0,0,0,3],"edges":[1.0,1.1333333333333333,1.2666666666666666,1.4,1.5333333333333332,1.6666666666666665,1.8,1.9333333333333333,2.0666666666666664,2.2,2.333333333333333,2.466666666666667,2.6,2.7333333333333334,2.8666666666666667,3.0,3.1333333333333333,3.2666666666666666,3.4,3.533333333333333,3.6666666666666665,3.8,3.933333333333333,4.066666666666666,4.2,4.333333333333334,4.466666666666667,4.6,4.733333333333333,4.866666666666667,5.0]}},"kind":"text","n":3234,"n_null":0,"n_unique":1969,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.391156462585034,"emoji_rate":0.0,"len_max":33,"len_mean":14.122758194186765,"len_median":14.0,"len_min":4,"len_p95":18.0,"n_duplicates":1265,"n_empty":0,"one_word_rate":0.00030921459492888067,"readability_flesch_mean":32.29095000000002,"url_rate":0.0,"vocab_size":1965,"word_mean":2.078540507111936,"word_median":2.0}},{"alerts":[],"column":"LSAD","extras":{"singletons":0,"top_values":[["06",3007],["13",78],["15",64],["25",40],["04",13],["05",11],["12",6],["00",5],["03",4],["10",3],["07",3]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":11,"null_rate":0.0,"stats":{"cardinality":11,"entropy":0.5393966742712151,"entropy_ratio":0.15592060596465512,"top_rate":0.9298082869511441,"top_value":"06"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 96.3% of rows"}],"column":"CLASSFP","extras":{"singletons":0,"top_values":[["H1",3115],["C7",41],["H6",38],["H4",29],["H5",11]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":5,"null_rate":0.0,"stats":{"cardinality":5,"entropy":0.29619663211688774,"entropy_ratio":0.12756494603303226,"top_rate":0.9632034632034632,"top_value":"H1"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"MTFCC","extras":{"singletons":0,"top_values":[["G4020",3234]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"G4020"}},{"alerts":[{"code":"null_rate","level":"warn","message":"61.2% null"}],"column":"CSAFP","extras":{"singletons":0,"top_values":[["490",48],["122",42],["548",41],["408",31],["545",22],["312",22],["378",21],["176",19],["148",19],["206",19],["178",18],["294",18],["198",17],["476",17],["170",16],["428",16],["400",16],["350",15],["184",14],["174",14]]},"kind":"categorical","n":3234,"n_null":1978,"n_unique":175,"null_rate":0.6116264687693259,"stats":{"cardinality":175,"entropy":6.977200362098987,"entropy_ratio":0.9363847376461224,"top_rate":0.03821656050955414,"top_value":"490"}},{"alerts":[{"code":"long_tail","level":"info","message":"602 singleton categories"},{"code":"null_rate","level":"warn","message":"40.8% null"}],"column":"CBSAFP","extras":{"singletons":602,"top_values":[["41980",40],["12060",29],["47900",25],["35620",23],["47260",19],["40060",17],["17140",16],["33460",15],["41180",15],["16980",14],["28140",14],["34980",13],["16740",11],["37980",11],["19100",11],["26900",11],["19740",10],["18140",10],["12940",10],["31140",10]]},"kind":"categorical","n":3234,"n_null":1318,"n_unique":939,"null_rate":0.4075448361162647,"stats":{"cardinality":939,"entropy":9.277790103154569,"entropy_ratio":0.9395248230376563,"top_rate":0.020876826722338204,"top_value":"41980"}},{"alerts":[{"code":"null_rate","level":"warn","message":"96.6% null"}],"column":"METDIVFP","extras":{"singletons":8,"top_values":[["47894",23],["35614",11],["19124",7],["35084",6],["16984",5],["47664",5],["23844",4],["23104",4],["35154",4],["14454",3],["15804",3],["48864",3],["20994",3],["33874",3],["35004",2],["37964",2],["29404",2],["41884",2],["23224",2],["36084",2]]},"kind":"categorical","n":3234,"n_null":3124,"n_unique":31,"null_rate":0.9659863945578231,"stats":{"cardinality":31,"entropy":4.360899252499727,"entropy_ratio":0.8802435307936319,"top_rate":0.20909090909090908,"top_value":"47894"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 96.4% of rows"}],"column":"FUNCSTAT","extras":{"singletons":1,"top_values":[["A",3116],["F",43],["C",33],["N",27],["S",11],["B",3],["G",1]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":7,"null_rate":0.0,"stats":{"cardinality":7,"entropy":0.3005204240374316,"entropy_ratio":0.10704753491488357,"top_rate":0.9635126777983921,"top_value":"A"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+27.13"},{"code":"outliers","level":"warn","message":"11.2% rows beyond 1.5 IQR"}],"column":"ALAND","extras":{"histogram":{"counts":[3096,97,22,3,4,3,5,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[82093.0,9426052976.925,18852023860.85,28277994744.774998,37703965628.7,47129936512.625,56555907396.549995,65981878280.47499,75407849164.4,84833820048.325,94259790932.25,103685761816.17499,113111732700.09999,122537703584.025,131963674467.94998,141389645351.875,150815616235.8,160241587119.72498,169667558003.65,179093528887.57498,188519499771.5,197945470655.425,207371441539.34998,216797412423.275,226223383307.19998,235649354191.12497,245075325075.05,254501295958.97498,263927266842.89996,273353237726.82498,282779208610.75,292205179494.675,301631150378.6,311057121262.52496,320483092146.44995,329909063030.375,339335033914.3,348761004798.225,358186975682.14996,367612946566.07495,377038917450.0]},"sample":[2153686013.0,1751636189.0,17126455954.0,2271159651.0,1052469885.0,1365527344.0,1481205162.0,2307251052.0,2322112746.0,1633443915.0,1995545557.0,1419419015.0,1076417818.0,2942674729.0,4845019827.0,1845564903.0,3441795903.0,962153982.0,1031795759.0,2580958328.0,2993896403.0,2911757764.0,957415216.0,2336230360.0,1222184251.0,996739079.0,2330762295.0,750439787.0,591684405.0,1137492089.0,77448650.0,4566524477.0,2519361809.0,2858360401.0,814020543.0,665192108.0,1932231568.0,942788867.0,1875077589.0,2684924491.0,12748378765.0,1078561237.0,7080860393.0,1298967362.0,1475804204.0,1831756443.0,1291702720.0,612626871.0,1076387821.0,1240005834.0,2479944305.0,1555612626.0,1021431273.0,2373238700.0,1077461417.0,689900922.0,5681165604.0,14280774789.0,1362677265.0,964946132.0,666494453.0,1036314932.0,4243457240.0,1457099562.0,1286109795.0,2359930478.0,12228666259.0,3863118053.0,4080862341.0,417082919.0,1665848458.0,1417462629.0,1059109158.0,3184064139.0,740863433.0,995658281.0,2156776424.0,1428247558.0,1750176503.0,1979735379.0,1518269755.0,2376792063.0,1053968848.0,5213719156.0,133799721.0,4322321796.0,3644306224.0,1010491623.0,1472652734.0,2116594503.0,429381241.0,1992126184.0,964057413.0,8040895433.0,1881324131.0,2607411018.0,1262317070.0,158316124.0,4420591349.0,1892806735.0,1475870257.0,1298175095.0,2012242914.0,1314689449.0,119637319.0,415150789.0,1306192614.0,1938032964.0,1575487976.0,2735193250.0,2155483577.0,3655871582.0,849209714.0,1688445990.0,7512659846.0,2448595161.0,8499623032.0,1534753300.0,7003406836.0,1136152511.0,1530110527.0,1854696288.0,1703461064.0,17574363572.0,4645084668.0,736569935.0,954133346.0,1621791140.0,1385765319.0,1215472436.0,21078901111.0,1118097708.0,6487154458.0,23798597680.0,267292993.0,4853487954.0,4647923092.0,1082885598.0,334684850.0,1125731578.0,2958090989.0,1360957553.0,1091289821.0,560718761.0,1052883435.0,2313937124.0,172725726.0,1070810025.0,1410354031.0,1852726651.0,1206496138.0,996272886.0,1799408335.0,1678296041.0,25061727.0,1115362166.0,1980383070.0,1741819732.0,1442967719.0,3827677060.0,1074352780.0,2295402858.0,795498346.0,2016295985.0,1125562873.0,6676158658.0,1848872592.0,1449729130.0,5255990018.0,1436123086.0,1377850210.0,947209658.0,1067154436.0,2249624779.0,1239816906.0,792425161.0,44908386.0,115558559.0,1883593926.0,5949926850.0,1180428103.0,463558849.0,2394158684.0,17214061665.0,1975244256.0,1101694783.0,9985057448.0,41484923347.0,1968660755.0,4988962636.0,518758193.0,1045344987.0,2509705676.0,879723724.0,1857036304.0,1374668727.0,1396914848.0,2438398331.0,1912292607.0,982346326.0,1098741137.0,118745080.0,1291817953.0,1632622505.0,1828539717.0,623524633.0,852827573.0,13639398051.0,2626878527.0,3257188233.0,1489930012.0,1842136053.0,1387786258.0,1881916535.0,882181709.0,1575636945.0,2126566287.0,1577902083.0,4489020248.0,4253498542.0,4827433521.0,1628067881.0,1536612362.0,2342295972.0,1464476090.0,974029895.0,897456201.0,6069358955.0,2220025613.0,12437291886.0,899791096.0,1788024160.0,1400332623.0,7857548.0,962673214.0,2073299557.0,965625489.0,2342369110.0,5465546324.0,769360733.0,1292800352.0,54816716.0,2154720636.0,6919333290.0,1478101945.0,13831928341.0,1478137692.0,1045924429.0,65124866.0,2540730649.0,1676360343.0,1072278217.0,1268830618.0,2896993132.0,8798699726.0,695561637.0,1074806610.0,1215368212.0,1762679431.0,481902934.0,2093676309.0,2015326677.0,6335242832.0,2282835039.0,2504708861.0,1855583353.0,1836035445.0,1328792556.0,1060592683.0,1627252596.0,1649671695.0,1963751254.0,2507484145.0,1074228439.0,1236387822.0,2723485995.0,101747429.0,1549018611.0,1450666453.0,1013697487.0,5706368585.0,1966062964.0,1179845613.0,1335344502.0,1487908432.0,1300661926.0,4919826443.0,63990747114.0,978525378.0,1316574580.0,431173840.0,1734352072.0,1121230367.0,2446120246.0,861537618.0,2413515434.0,3388125876.0,1417014581.0,5534591552.0,2377554566.0,4403309851.0,7638229340.0,3098899108.0,1422173769.0,2329026445.0,779455288.0,3762130687.0,1689621009.0,2332448732.0,7815593157.0,1765788195.0,4959475392.0,1629204766.0,1364133162.0,1441308410.0,8664061384.0,1618664029.0,1418795389.0,1289344338.0,1605492716.0,1241494597.0,1068536124.0,6825534318.0,1517642896.0,573036225.0,6167113903.0,1065649888.0,1109126227.0,7679776933.0,1845313211.0,2273311866.0,1064602254.0,1576952763.0,1866683177.0,851489196.0,3000121460.0,2480587302.0,1880936102.0,1872109197.0,1250887844.0,1240141960.0,1193911893.0,2363499444.0,2271326613.0,1969826793.0,3246092085.0,1340448561.0,1180688993.0,838372770.0,2345090474.0,802949683.0,4116554962.0,624563058.0,3385957220.0,2345053180.0,1017770812.0,2830645444.0,766567357.0,1788864348.0,1829011199.0,2589024038.0,682977540.0,1025349325.0,1865429339.0,1783430092.0,2260790940.0,1276705561.0,1469139465.0,9873439554.0,3158596616.0,1964009182.0,1036773963.0,1545040113.0,476078155.0,59490634310.0,1479086713.0,848995066.0,1132621093.0,1422818204.0,1064083614.0,1112396615.0,1054988422.0,1093291103.0,3438328928.0,2090990042.0,2955524813.0,1518120276.0,1412785599.0,2304216457.0,880026860.0,1044905568.0,2646654800.0,1534538403.0,533351953.0,1340188403.0,1614586535.0,2275832535.0,865841645.0,2320518304.0,1320836472.0,1587312625.0,1674415880.0,2913980727.0,1297230222.0,6521806795.0,3182886778.0,21332077.0,1444868084.0,481772300.0,2279809487.0,1429261300.0,1439626085.0,392766463.0,1262839457.0,1539965777.0,1796920726.0,1326483085.0,717836498.0,1021248372.0,422113138.0,1021806945.0,1330180037.0,1197464268.0,538312209.0,5827498555.0,1961343401.0,1177051414.0,1700318978.0,1778229393.0,2100810775.0,2165008742.0,1012739503.0,873352611.0,687992379.0,1427151728.0,3285862966.0,2527123155.0,2511535248.0,372299245.0,1023308791.0,1306299961.0,4614583047.0,1601945705.0,1127885578.0,2393298442.0,1484153957.0,594281632.0,2146225294.0,8621348079.0,1200218357.0,2252888818.0,3736306657.0,2485218918.0,1766966004.0,2878852142.0,1091312448.0,576012470.0,1443073934.0,1767478856.0,25493247.0,1982756253.0,2306454194.0,1415143418.0,1677352687.0,1763868718.0,1096874571.0,1473258536.0,1870380099.0,1746337768.0,1273075626.0,1836953736.0,687232241.0,1632310789.0,864739796.0,1025912543.0,1511439594.0,2107246447.0,17979556898.0,1825359640.0,719347893.0,1963975460.0,1731224399.0,4752131817.0,159895650.0,2870854403.0,2309519001.0,2405674102.0,2371228265.0,2240795377.0,2585876042.0,1066160691.0,2167033438.0,1123881121.0,16978039504.0,1117614339.0,2582051713.0,1514867108.0,678484991.0,1699600349.0,4329730563.0]},"kind":"numeric","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"iqr":1289511584.25,"kurtosis":976.6581799018279,"max":377038917450.0,"mean":2832701709.0466914,"median":1563349650.5,"min":82093.0,"n_outliers":362,"outlier_rate":0.11193568336425479,"q1":1078544020.75,"q3":2368055605.0,"skew":27.12577025305673,"std":9186156810.034382,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+13.33"},{"code":"outliers","level":"warn","message":"14.1% rows beyond 1.5 IQR"}],"column":"AWATER","extras":{"histogram":{"counts":[3040,92,30,25,14,3,3,7,1,3,1,0,1,0,0,0,1,1,0,2,0,3,2,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1],"edges":[0.0,649742380.225,1299484760.45,1949227140.6750002,2598969520.9,3248711901.125,3898454281.3500004,4548196661.575,5197939041.8,5847681422.025001,6497423802.25,7147166182.475,7796908562.700001,8446650942.925,9096393323.15,9746135703.375,10395878083.6,11045620463.825,11695362844.050001,12345105224.275,12994847604.5,13644589984.725,14294332364.95,14944074745.175001,15593817125.400002,16243559505.625,16893301885.85,17543044266.075,18192786646.3,18842529026.525,19492271406.75,20142013786.975002,20791756167.2,21441498547.425,22091240927.65,22740983307.875,23390725688.100002,24040468068.325,24690210448.55,25339952828.775,25989695209.0]},"sample":[45692969.0,3130850.0,36944199.0,73649840.0,1694038.0,46642098.0,2908633230.0,20847065.0,23146755.0,150143028.0,123688759.0,3530746.0,3369003.0,92565.0,31883427.0,2028433.0,14575999.0,143719930.0,25416479.0,2893322.0,86578552.0,94596811.0,20333974.0,262653307.0,7052759.0,16078776.0,1197085.0,4829339.0,12308449.0,754122.0,195352769.0,2476453.0,4125592.0,65734594.0,28690306.0,54648180.0,392073.0,18076118.0,94257003.0,61092093.0,42527190.0,6245396.0,2729213352.0,4305989.0,2188872.0,39305226.0,331279861.0,59070611.0,11582519.0,3174583.0,25864874.0,4054877012.0,20952876.0,10177387.0,9687647.0,2142396.0,43519329.0,13253159.0,16727973.0,1955432.0,74795284.0,4709127.0,15317423.0,3049206.0,8871408.0,3786764809.0,51380263.0,56580900.0,498084088.0,4909580.0,56005520.0,7359186.0,58397116.0,1074011171.0,7102447.0,17155846.0,336988229.0,10591170.0,92946097.0,25071495.0,5291052.0,1331778.0,6715105.0,111244414.0,53351779.0,74690378.0,246466621.0,3949309.0,6706075.0,75770744.0,72799687.0,317924583.0,7236323.0,132735376.0,36486613.0,164375674.0,30903211.0,18709762.0,621302197.0,1354955.0,9099930.0,13790234.0,18803193.0,2131061128.0,666826424.0,10405456.0,3455451.0,104262608.0,21138781.0,548248.0,81655106.0,120356396.0,3466312.0,10370823.0,2385852278.0,20024887.0,1267793988.0,19219023.0,1426230792.0,5104692.0,1486125.0,5857896.0,41842145.0,133311815.0,5478267.0,26875120.0,7439336.0,40051870.0,14100641.0,4752710.0,568489460.0,1743628.0,1426287777.0,5112018.0,1004291414.0,97323497.0,92546791.0,268619696.0,3093435.0,3172088.0,16786781.0,4272458.0,5955974.0,11696847.0,7924128.0,7209242.0,1051789.0,42124187.0,4946188.0,6859888.0,6011473.0,51602808.0,11193869.0,9665551.0,858278.0,12999404.0,8711544.0,26010715.0,58773420.0,1460300913.0,448033952.0,34126776.0,246690.0,36695624.0,2003166.0,22780358.0,108544389.0,28020186.0,27208195.0,41484608.0,14503653.0,3790127.0,13955918.0,12561759.0,12838126.0,2769690.0,131080.0,60210081.0,464936.0,92486168.0,48875040.0,2953274.0,29093511.0,4843011.0,403564956.0,8641803.0,1773188.0,22541184317.0,42062887.0,6093304.0,17303404.0,17716575.0,23903849.0,663488658.0,2591016.0,67184110.0,62748513.0,27945135.0,12557304.0,1281011355.0,6062721.0,57827591.0,7110638.0,392508756.0,42523319.0,3917258.0,604769755.0,120413773.0,83594162.0,239912003.0,13646459.0,9157037.0,8105326.0,35348695.0,13292008.0,22673964.0,5935948.0,3001523.0,478920605.0,167300393.0,19106096.0,9885352.0,56022945.0,16503210.0,4692959.0,1704834.0,195282199.0,1041214321.0,3716675.0,11010427.0,18672319.0,95966884.0,3986292.0,1609.0,360020725.0,9766894.0,9732754.0,47038990.0,563843611.0,8320823.0,19739769.0,44549509.0,87260610.0,16954253.0,1203586.0,90969261.0,10471700.0,14490388.0,349724678.0,553147948.0,43618226.0,82757751.0,3682665.0,7008436.0,413983093.0,539363457.0,8324262.0,1118491.0,24889123.0,825726.0,29525370.0,32912693.0,18679734.0,22621093.0,9537137.0,22915103.0,24720769.0,15873993.0,6783009.0,742162038.0,32751002.0,416872439.0,73737483.0,20992254.0,7546060.0,176317.0,14607647.0,193063921.0,2352217.0,5502986.0,140294391.0,10687958.0,28813485.0,10604264.0,11567409.0,6904087.0,19104286.0,1446632980.0,11147628.0,45556080.0,13758448.0,16140411.0,46966086.0,32375524.0,1190103013.0,159620276.0,36783591.0,11312724.0,42271836.0,1139619647.0,154293639.0,33767224.0,106217126.0,3473068812.0,22649478.0,883412483.0,4887924296.0,8246941.0,2723997.0,94707741.0,3525240.0,78025726.0,13122562.0,19666288.0,67985729.0,2316559.0,549280913.0,71705191.0,19254559.0,1674243852.0,237183770.0,13686670.0,17396672.0,21058193.0,1632150.0,35722738.0,7073996.0,6830041.0,433660.0,24410722.0,36936963.0,9507394.0,5388562.0,1567629.0,6959141.0,46620495.0,41531993.0,4482698.0,830292614.0,10984167.0,2340247.0,53230151.0,7586186.0,2361588.0,51308328.0,119733774.0,55266767.0,631299424.0,172818530.0,5436957.0,8369917.0,49752941.0,9191488.0,9219640.0,71830221.0,11691708.0,1345742564.0,13263789.0,68682286.0,9173803.0,22668882.0,7511429.0,17175565.0,2760902.0,9505389.0,12793700.0,6137411.0,13866648.0,556134.0,4995711.0,126988884.0,20701156.0,8834926.0,17582462.0,13780758093.0,5223042.0,22247645.0,17356399.0,338203448.0,17062367.0,19102189.0,8875327.0,70659566.0,462135107.0,28838468.0,13688537.0,8153423.0,12511257.0,42104537.0,12347127.0,5255279.0,2793754477.0,13489383.0,64091345.0,27503345.0,17428816.0,72745054.0,17588580.0,647342.0,10174044.0,81689126.0,25596214.0,73275528.0,3764606.0,15207397.0,38917581.0,105290.0,64851532.0,32052581.0,246409.0,7077610.0,1460444966.0,11214924.0,32153759.0,29438413.0,52260791.0,4314212.0,12306254.0,108583505.0,29822845.0,43510634.0,4792745.0,65273639.0,10786432.0,42750488.0,38007451.0,15026665.0,8942802.0,21169489.0,51385065.0,1238628.0,40071739.0,19665031.0,156144.0,4576816.0,24278846.0,191364281.0,1882216.0,1832723.0,442187072.0,10869534.0,26212115.0,99781059.0,10166468.0,346846.0,447448.0,76078926.0,18789749.0,31991710.0,14338600.0,19259026.0,64163829.0,236969801.0,10488658.0,32681784.0,11560688.0,781202.0,3757608.0,12761090.0,146287.0,78834956.0,11962259.0,14903733.0,58366943.0,39013276.0,34967238.0,5339036.0,33740130.0,22149577.0,25886753.0,10813161.0,42745198.0,36012936.0,28502982.0,124957891.0,48312243.0,572958789.0,891073714.0,3774635.0,308635080.0,13892745.0,86410055.0,15923999.0,11563208.0,81011709.0,996418659.0,8755301.0,9760311.0,8723372.0,154371160.0,12611136.0,38030738.0,28784118.0,7804886156.0,1391966.0,31558986.0,15326897.0,5623155.0,4666513.0,16913419.0]},"kind":"numeric","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"iqr":54155886.25,"kurtosis":215.85464255324104,"max":25989695209.0,"mean":220188953.10513297,"median":19505620.5,"min":0.0,"n_outliers":456,"outlier_rate":0.14100185528756956,"q1":7043835.5,"q3":61199721.75,"skew":13.326108825445784,"std":1225718213.0548978,"zero_rate":0.00030921459492888067}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"INTPTLAT","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[10.5,10.525,10.55,10.575,10.6,10.625,10.65,10.675,10.7,10.725,10.75,10.775,10.8,10.825,10.85,10.875,10.9,10.925,10.95,10.975,11.0,11.025,11.05,11.075,11.1,11.125,11.15,11.175,11.2,11.225,11.25,11.275,11.3,11.325,11.35,11.375,11.4,11.425,11.45,11.475,11.5]},"near_unique":true,"sample":["+40.7835474","+37.1411668","+40.2726454","+30.4934867","+41.0108798","+43.2749637","+40.8678400","+37.8501957","+35.6539945","+27.0532315","+37.0878462","+35.2854541","+32.4095857","+41.6275976","+44.7527120","+40.5849801","+37.1065898","+41.2168513","+37.8477561","+40.2369926","+36.7151802","+46.1106181","+34.6522011","+37.3664637","+35.2468623","+40.0176819","+32.9318277","+43.9781089","+38.2025623","+45.7370443","+34.1050824","+38.8470929","+33.9016208","+34.6795117","+39.8225286","+37.3610537","+43.6452331","+41.1411327","+35.8447130","+35.5546109","+40.3368872","+40.8144710","+34.9282721","+37.7338227","+37.7860959","+38.9551537","+45.4558895","+63.8649972","+48.2166858","+39.0970884"],"top_values":[],"top_words":[["+41.9158651",1],["+46.2946377",1],["+34.3592729",1],["+40.7835474",1],["+40.1764918",1],["+18.1871483",1],["+43.6674723",1],["+30.8852677",1],["+39.5769252",1],["+36.7272577",1],["+41.0002170",1],["+34.0684364",1],["+33.2703999",1],["+34.9641790",1],["+40.7716274",1],["+33.4806126",1],["+43.4280010",1],["+35.8570011",1],["+45.5937530",1],["+40.6841401",1],["+32.0167482",1],["+38.3864909",1],["+38.0316514",1],["+35.7049939",1],["+41.6275976",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":11,"len_mean":11.0,"len_median":11.0,"len_min":11,"len_p95":11.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3234,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"INTPTLON","extras":{"language_counts":{},"language_sample_size":3234,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[11.5,11.525,11.55,11.575,11.6,11.625,11.65,11.675,11.7,11.725,11.75,11.775,11.8,11.825,11.85,11.875,11.9,11.925,11.95,11.975,12.0,12.025,12.05,12.075,12.1,12.125,12.15,12.175,12.2,12.225,12.25,12.275,12.3,12.325,12.35,12.375,12.4,12.425,12.45,12.475,12.5]},"near_unique":true,"sample":["-096.6886584","-078.0538655","-080.5786910","-096.6220912","-080.7703956","-091.3827510","-099.8155833","-094.3415972","-088.3876742","-098.7475716","-083.3886172","-083.1239657","-082.5708819","-111.2402269","-073.7056429","-086.5651412","-093.8305507","-098.5133417","-082.8301215","-089.9135746","-090.4031300","-101.0612840","-090.3717697","-090.9722821","-080.8338317","-080.9677268","-109.8783103","-094.6137980","-084.2098602","-097.5808695","-097.8388896","-092.8100688","-108.3919284","-091.2033099","-085.7731503","-077.0541700","-084.8394245","-115.3514239","-076.5722906","-097.4094007","-075.1070600","-090.9412464","-107.9926805","-108.5062192","-089.3812119","-096.2012619","-123.7592988","-143.2186282","-101.5405369","-075.5029819"],"top_values":[],"top_words":[["-096.7885168",1],["-123.4244583",1],["-104.3686961",1],["-096.6886584",1],["-098.0468422",1],["-065.8711890",1],["-096.7957261",1],["-099.8588613",1],["-120.5219926",1],["-085.1360977",1],["-083.6659471",1],["-101.8228879",1],["-085.8635254",1],["-101.3566363",1],["-084.1061032",1],["-082.4795333",1],["-089.9433184",1],["-092.1404819",1],["-117.1855796",1],["-080.3507209",1],["-089.1191761",1],["-093.7926278",1],["-106.2346662",1],["-079.2514542",1],["-111.2402269",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3234,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":3234,"n_null":0,"n_unique":3234,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":12,"len_mean":12.0,"len_median":12.0,"len_min":12,"len_p95":12.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3234,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 98.4% of rows"}],"column":"geometry_type","extras":{"singletons":0,"top_values":[["Polygon",3181],["MultiPolygon",53]]},"kind":"categorical","n":3234,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.12065107029368592,"entropy_ratio":0.12065107029368592,"top_rate":0.9836116264687693,"top_value":"Polygon"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","NAME.n_unique","NAME.top_values","NAME.n_duplicates","ALAND.skew","ALAND.outlier_rate","AWATER.outlier_rate","CBSAFP.null_rate","STATEFP.n_unique","NAMELSAD.top_words","CLASSFP.top_rate"],"featured_charts":[{"caption":"Look for the top repeated county names \u2014 'Washington' (31), 'Jefferson' (26), and 'Franklin' (26) reveal how heavily historical naming conventions cluster across states.","column":"NAME","kind":"bar"},{"caption":"Texas (FIPS 48) dominates with 254 counties, nearly double the next largest state, highlighting how county counts vary dramatically by state.","column":"STATEFP","kind":"bar"},{"caption":"The extreme right skew (skew = 27.1) shows most counties are modest in land area while a handful of outliers are orders of magnitude larger \u2014 check these before any area-weighted calculation.","column":"ALAND","kind":"histogram"},{"caption":"About 41% of counties have no CBSAFP code, meaning they fall outside any core-based statistical area \u2014 a large rural segment that metro-focused analysis would miss.","column":"CBSAFP","kind":"donut"},{"caption":"LSAD code '06' (standard county) accounts for 93% of records, with smaller slices for municipios, parishes, and independent cities worth isolating for jurisdiction-type comparisons.","column":"LSAD","kind":"donut"}],"model":"anthropic:default","narrative":"This dataset is a US county-level geographic reference file containing 3,234 county (and county-equivalent) records across 56 state FIPS codes, with spatial attributes, area measurements, and metropolitan area classifications. The most notable pattern is that roughly 41% of counties share a name with at least one other county \u2014 'Washington' alone appears 31 times \u2014 reflecting the historic reuse of patriotic and presidential names across states. Two numeric columns, ALAND (land area) and AWATER (water area), show extreme right skew with over 11\u201314% outliers, meaning a small number of counties are vastly larger or wetter than the median, which warrants attention in any area-weighted analysis. Additionally, over 40% of counties have no CBSAFP code (no core-based statistical area assignment), indicating a large rural, non-metro population of counties that could easily be overlooked in urban-focused analyses.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","n","duplicate_rate","null_rate","len_min","len_max","len_mean","allcaps_rate","one_word_rate","top_words"],"model":"anthropic:default","narrative":"COUNTYNS is a FIPS-style county National Standard (ANSI/GNIS) code \u2014 an 8-character, zero-padded numeric identifier assigned by the U.S. Geological Survey to uniquely identify counties. Every one of the 3,234 rows carries a distinct value (duplicate_rate 0.0, n_unique 3,234) with no nulls, and all values are exactly 8 characters long (len_min = len_max = 8), consistent with the fixed-width GNIS format. The perfect uniqueness and fixed length make this a reliable surrogate key for county-level joins to official geographic reference tables.","role":"identifier","scope":"column","target":"COUNTYNS","treatment":"Use as a join key against TIGER/GNIS county reference data; do not encode or embed."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","null_rate","len_min","len_max","len_mean","allcaps_rate","vocab_size","top_words"],"model":"anthropic:default","narrative":"GEOID is a US Census geographic identifier column containing 5-digit FIPS county codes (e.g., '06091', '48327'), where the first two digits encode state and the last three encode county. Every value is exactly 5 characters long (len_min=5, len_max=5, len_mean=5.0), perfectly unique across all 3,234 rows with zero nulls or duplicates, confirming this is a primary key for county-level geographic records. The allcaps_rate of 1.0 is a classifier artifact \u2014 these are numeric strings, not alphabetic text. The vocab_size of 3,234 matching n_unique=3,234 means this dataset likely covers a near-complete set of US counties (there are ~3,243 counties/equivalents in the US).","role":"identifier","scope":"column","target":"GEOID","treatment":"Use as a primary key for joining to Census TIGER shapefiles or other county-level datasets; zero-pad-preserve when merging (already 5 chars)."},{"confidence":"high","critiques":[],"evidence_keys":["column","n_unique","n","duplicate_rate","len_min","len_max","len_mean","top_words","null_rate","allcaps_rate"],"model":"anthropic:default","narrative":"INTPTLAT is the internal point latitude coordinate for geographic entities (a standard Census Bureau field name), stored as a fixed-width text string rather than a numeric type. Every one of the 3,234 rows is unique, all values are exactly 11 characters long (e.g. '+41.9158651'), and the duplicate rate is 0.0, confirming these are precise geographic identifiers. The surprising signal is that a coordinate stored as text with 'allcaps' and 'one_word' alerts was profiled as a string column \u2014 it should be numeric but the leading '+' sign likely forced text treatment.","role":"feature","scope":"column","target":"INTPTLAT","treatment":"Strip leading '+', cast to float64, and use as a numeric geographic coordinate in modelling or spatial joins."},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","duplicate_rate","len_min","len_max","len_mean","null_rate","top_words"],"model":"anthropic:default","narrative":"INTPTLON is the internal point longitude field, a standard Census Bureau coordinate column storing the longitude of a representative point within each geographic entity. Every one of the 3,234 rows is unique, all values are exactly 12 characters long (mean, median, min, and max all equal 12), and the duplicate rate is 0.0 \u2014 consistent with precise decimal-degree coordinates stored as fixed-format strings. All values appear to be negative (Western Hemisphere), ranging roughly from -065 to -123 degrees, aligning with US continental and territory coverage.","role":"feature","scope":"column","target":"INTPTLON","treatment":"Parse to float64 for geospatial use; pair with INTPTLAT to form a coordinate pair for mapping or spatial joins."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","one_word_rate","len_median","vocab_size","n","n_unique"],"model":"anthropic:default","narrative":"This column contains names of U.S. counties or county-equivalent administrative divisions, dominated by patriotic/presidential surnames (Washington, Jefferson, Franklin, Lincoln, Jackson, Madison) and geographic terms (Union, Lake, Montgomery, Marion). The duplicate rate of 40.5% (1,311 out of 3,234 rows) is expected given that common county names repeat across states, but analysts should note this column alone cannot serve as a unique identifier. The vocabulary of 1,958 tokens across 3,234 rows and a median length of 7 characters confirms these are short, single-word labels in most cases (93.1% one-word rate).","role":"label","scope":"column","target":"NAME","treatment":"Combine with a state column to form a composite key before joining or aggregating."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","n_unique","word_mean","len_median"],"model":"anthropic:default","narrative":"NAMELSAD is a US Census Legal/Statistical Area Description field containing the full human-readable name of county-equivalent geographic units (e.g., 'Washington County', 'Jefferson Parish'). The word 'county' appears in 3,007 of 3,234 rows, with 'municipio' (78) and 'parish' (64) indicating Puerto Rico and Louisiana records respectively. The 39.1% duplicate rate (1,265 duplicates across 1,969 unique values) is fully expected \u2014 common county names like 'Washington County' (30 occurrences) repeat across different US states. The multi-word structure (mean 2.08 words) and short length (median 14 chars) are consistent with standardized geographic labels.","role":"label","scope":"column","target":"NAMELSAD","treatment":"Use as a display label; join on a GEOID/FIPS key for spatial analysis rather than matching on this name alone due to cross-state duplicates."},{"confidence":"high","critiques":[],"evidence_keys":["skew","kurtosis","median","max","min","n_outliers","outlier_rate","n","n_unique"],"model":"anthropic:default","narrative":"ALAND is a US Census land area field (measured in square metres), representing the land area of each geographic entity \u2014 likely counties or census tracts given n=3,234. The distribution is extremely right-skewed (skew=27.13, kurtosis=976.66): while the median is ~1.56 billion m\u00b2, the max reaches 377 billion m\u00b2, roughly 241\u00d7 the median, indicating a small number of very large geographic units (e.g., western US counties). 362 values (11.2%) are flagged as outliers, consistent with the well-known size disparity between densely subdivided eastern counties and sprawling western ones.","role":"feature","scope":"column","target":"ALAND","treatment":"Log-transform before use in any distance-based or linear model to reduce skew from 27.13; consider using as a normalisation denominator for density features."},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","null_rate","stats.skew","stats.kurtosis","stats.median","stats.mean","stats.max","stats.min","stats.zero_rate","stats.n_outliers","stats.outlier_rate","alerts"],"model":"anthropic:default","narrative":"AWATER is almost certainly a US Census TIGER/Line water area field, representing the total water surface area (in square meters) of a geographic unit such as a county or census tract. All 3,234 rows are unique and non-null, consistent with one record per geographic entity. The distribution is extremely right-skewed (skew 13.33, kurtosis 215.85): the median is ~19.5 million m\u00b2 while the mean balloons to ~220 million m\u00b2, and the maximum reaches ~26 billion m\u00b2 \u2014 about 14\u00d7 the mean \u2014 with 456 outliers (14.1% of rows) driven by large water-heavy units like coastal counties or Great Lakes-adjacent areas. Only 1 record has a zero value, which is plausible for fully land-locked units.","role":"feature","scope":"column","target":"AWATER","treatment":"Log-transform (log1p to handle the single zero) before regression or clustering to reduce extreme skew."},{"confidence":"high","critiques":[],"evidence_keys":["column","n_unique","null_rate","top_value","top_rate","entropy_ratio","alerts"],"model":"anthropic:default","narrative":"CBSAFP is a Core Based Statistical Area (CBSA) FIPS code, a U.S. Census geographic identifier linking records to metropolitan or micropolitan statistical areas. With 939 unique codes across 3,234 rows, the distribution is notably flat (entropy_ratio 0.94), meaning records are spread thinly across many areas rather than concentrated. The null rate of 40.75% is a significant concern \u2014 likely representing locations outside any defined CBSA (rural areas), which is a meaningful geographic signal rather than simple missingness. The most frequent value '41980' (San Jose-Sunnyvale-Santa Clara, CA) appears only 40 times (~2.1%), confirming no single area dominates.","role":"foreign_key","scope":"column","target":"CBSAFP","treatment":"Treat nulls as a distinct 'non-CBSA/rural' category; left-join to CBSA reference table for region labels, then encode as categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","entropy_ratio","top_values","n"],"model":"anthropic:default","narrative":"CLASSFP is the FIPS functional classification code for geographic/administrative entities, almost certainly places in a US Census dataset. The distribution is severely imbalanced: 'H1' (incorporated places) accounts for 96.3% of the 3,234 rows, with the remaining four codes (C7, H6, H4, H5) collectively covering only 119 records. The entropy ratio of 0.128 confirms near-minimal informational content, meaning this column carries little discriminatory power as a feature in its current form.","role":"label","scope":"column","target":"CLASSFP","treatment":"Treat as a near-constant; consider dropping or collapsing into a binary flag (H1 vs. other) if used in modelling, given 96.3% dominance of a single class."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","entropy_ratio","top_values","n"],"model":"anthropic:default","narrative":"FUNCSTAT is a U.S. Census functional status code, classifying geographic or administrative entities by their operational state (e.g., 'A' = active, 'F' = fictitious, 'C' = consolidated). The distribution is severely imbalanced: 'A' accounts for 96.35% of the 3,234 records, while the remaining 6 categories together cover only 118 rows \u2014 with 'G' appearing just once. Entropy ratio of 0.107 confirms near-minimal informational diversity. Minority classes may warrant special handling but will be extremely difficult to model as targets.","role":"label","scope":"column","target":"FUNCSTAT","treatment":"One-hot encode with caution; collapse rare categories (F, C, N, S, B, G \u2014 totalling 118 rows) into an 'other' bucket or treat as a filter/stratification variable rather than a model feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","top_value","top_rate","entropy","null_rate"],"model":"anthropic:default","narrative":"MTFCC is a MAF/TIGER Feature Class Code, a U.S. Census Bureau classification code for geographic features. Every single one of the 3,234 rows carries the identical value 'G4020' (which corresponds to a local road/street segment), with zero nulls and an entropy of 0.0 \u2014 this column is entirely constant across the dataset. It carries no discriminatory signal whatsoever.","role":"label","scope":"column","target":"MTFCC","treatment":"Drop before modelling; zero-variance constant column adds no information."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","entropy","n_unique","top_values"],"model":"anthropic:default","narrative":"This column classifies the geometric representation type of spatial features, distinguishing between 'Polygon' and 'MultiPolygon' geometries across 3,234 records. The severe class imbalance is the standout signal: 'Polygon' dominates at 98.36% (3,181 records), leaving 'MultiPolygon' as a rare minority at only 53 occurrences (1.64%). The near-zero entropy (0.121) confirms this column carries almost no information variance, which limits its predictive utility.","role":"feature","scope":"column","target":"geometry_type","treatment":"Flag MultiPolygon rows for geometry normalisation (explode to single polygons) before spatial analysis; otherwise low entropy makes this near-useless as a model feature."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","n_unique","cardinality","entropy_ratio","top_rate","top_value","n"],"model":"anthropic:default","narrative":"CSAFP is likely a Combined Statistical Area FIPS (or similar geographic area code), given the numeric-string values in the hundreds range and cardinality of 175 \u2014 consistent with a US metropolitan/micropolitan area classification code. The most alarming signal is a 61.16% null rate, meaning nearly two-thirds of the 3,234 rows carry no value, which likely indicates records that do not belong to any defined statistical area. The distribution is nearly flat across all 175 codes (entropy ratio 0.936, top value '490' appears only 3.8% of the time), suggesting no single area dominates.","role":"feature","scope":"column","target":"CSAFP","treatment":"Impute nulls with a sentinel 'none/rural' category before use; treat as nominal categorical and one-hot or target-encode given 175 levels."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","entropy_ratio","top_values","null_rate"],"model":"anthropic:default","narrative":"COUNTYFP is a FIPS county code \u2014 a standardized 3-digit zero-padded numeric string used in US geographic identifiers. With 330 unique values across 3,234 rows and a high entropy ratio of 0.85, codes are broadly distributed with near-uniform frequency: the most common value ('003') appears only 50 times (~1.5% top_rate). The sequential odd-number pattern in top values (001, 003, 005, 007\u2026) is characteristic of FIPS county numbering conventions, confirming this is a geographic lookup key rather than a raw feature.","role":"foreign_key","scope":"column","target":"COUNTYFP","treatment":"Left-join on COUNTYFP (combined with state FIPS) to enrich with county-level attributes; do not encode ordinally."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","cardinality","entropy_ratio","n","null_rate"],"model":"anthropic:default","narrative":"LSAD (Legal/Statistical Area Description) is a Census Bureau code that classifies geographic entities by type \u2014 values like '06' (county), '13', '15', '25' are standard LSAD codes. The distribution is severely dominated by code '06', which accounts for 3,007 of 3,234 rows (92.98%), indicating this dataset is overwhelmingly composed of one entity type (most likely counties). With only 11 unique values and near-zero entropy ratio (0.156), this column carries very little discriminative information despite being semantically meaningful.","role":"label","scope":"column","target":"LSAD","treatment":"Use as a stratification or filter variable; consider one-hot encoding if modelling across entity types, but note extreme class imbalance driven by code '06'."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","n","top_value","top_rate","top_values","entropy_ratio","null_rate"],"model":"anthropic:default","narrative":"STATEFP is the U.S. Census Bureau FIPS state code, a two-digit numeric string identifying each U.S. state or territory. With exactly 56 unique values and 3,234 rows it likely represents one record per county or similar sub-state geographic unit. The top value '48' (Texas, 254 rows) alone accounts for 7.85% of all records \u2014 consistent with Texas having the most counties of any U.S. state \u2014 while values like '13' (Georgia, 159) and '51' (Virginia, 133) also rank high, reflecting those states' large county counts. The entropy ratio of 0.919 indicates a fairly even spread across states despite Texas's dominance.","role":"label","scope":"column","target":"STATEFP","treatment":"Use as a categorical grouping key or left-join with a FIPS lookup table to enrich with state names and region attributes; do not treat as numeric."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":5165,"prompt_tokens":18307,"total_tokens":23472}},"language_counts":{},"meta":{"generated_at":"2026-06-22T01:06:32+00:00","mode":"full","row_count":3234,"sampled_rows":3234,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/geographic/counties_simplified.geojson"},"notes":[],"saturn_version":"0.2.0","schema":{"ALAND":"numeric","AWATER":"numeric","CBSAFP":"categorical","CLASSFP":"categorical","COUNTYFP":"categorical","COUNTYNS":"text","CSAFP":"categorical","FUNCSTAT":"categorical","GEOID":"text","INTPTLAT":"text","INTPTLON":"text","LSAD":"categorical","METDIVFP":"categorical","MTFCC":"categorical","NAME":"text","NAMELSAD":"text","STATEFP":"categorical","geometry_type":"categorical"}}
