{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"20 singleton categories"}],"column":"state_name","extras":{"singletons":20,"top_values":[["Alabama",1],["Alaska",1],["Arizona",1],["Arkansas",1],["California",1],["Colorado",1],["Connecticut",1],["Delaware",1],["District of Columbia",1],["Florida",1],["Georgia",1],["Hawaii",1],["Idaho",1],["Illinois",1],["Indiana",1],["Iowa",1],["Kansas",1],["Kentucky",1],["Louisiana",1],["Maine",1]]},"kind":"categorical","n":20,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"cardinality":20,"entropy":4.321928094887363,"entropy_ratio":1.0,"top_rate":0.05,"top_value":"Alabama"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+2.42"},{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"snap_eligible_est","extras":{"histogram":{"counts":[16,2,1,0,1],"edges":[75227.0,997236.0,1919245.0,2841254.0,3763263.0,4685272.0]},"sample":[768897.0,75227.0,916876.0,475729.0,4685272.0,540105.0,355692.0,107790.0,98039.0,2725633.0,1415573.0,135941.0,198788.0,1467893.0,810702.0,343141.0,329870.0,704758.0,842038.0,145491.0]},"kind":"numeric","n":20,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"iqr":675283.75,"kurtosis":5.576534484948493,"max":4685272.0,"mean":857172.75,"median":507917.0,"min":75227.0,"n_outliers":2,"outlier_rate":0.1,"q1":185463.75,"q3":860747.5,"skew":2.420928038840654,"std":1103021.6626484615,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+2.42"},{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"snap_participants_est","extras":{"histogram":{"counts":[16,2,1,0,1],"edges":[50403.0,668149.2,1285895.4,1903641.5999999999,2521387.8,3139134.0]},"sample":[515167.0,50403.0,614308.0,318738.0,3139134.0,361870.0,238315.0,72220.0,65686.0,1826177.0,948429.0,91081.0,133186.0,983489.0,543172.0,229905.0,221013.0,472189.0,564167.0,97480.0]},"kind":"numeric","n":20,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"iqr":452442.75,"kurtosis":5.576534535167227,"max":3139134.0,"mean":574306.45,"median":340304.0,"min":50403.0,"n_outliers":2,"outlier_rate":0.1,"q1":124259.5,"q3":576702.25,"skew":2.4209281026442335,"std":739024.8662252936,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+2.42"},{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"snap_gap","extras":{"histogram":{"counts":[16,2,1,0,1],"edges":[24824.0,329086.8,633349.6,937612.3999999999,1241875.2,1546138.0]},"sample":[253730.0,24824.0,302568.0,156991.0,1546138.0,178235.0,117377.0,35570.0,32353.0,899456.0,467144.0,44860.0,65602.0,484404.0,267530.0,113236.0,108857.0,232569.0,277871.0,48011.0]},"kind":"numeric","n":20,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"iqr":222841.0,"kurtosis":5.576534382285054,"max":1546138.0,"mean":282866.3,"median":167613.0,"min":24824.0,"n_outliers":2,"outlier_rate":0.1,"q1":61204.25,"q3":284045.25,"skew":2.4209279091606835,"std":363996.7964314058,"zero_rate":0.0}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"snap_gap_pct","extras":{"histogram":{"counts":[0,0,20,0,0],"edges":[32.5,32.7,32.9,33.1,33.3,33.5]},"sample":[33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0]},"kind":"numeric","n":20,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":33.0,"mean":33.0,"median":33.0,"min":33.0,"n_outliers":0,"outlier_rate":0.0,"q1":33.0,"q3":33.0,"skew":0.0,"std":0.0,"zero_rate":0.0}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"snap_enrollment_rate","extras":{"histogram":{"counts":[0,0,20,0,0],"edges":[66.5,66.7,66.9,67.1,67.3,67.5]},"sample":[67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0,67.0]},"kind":"numeric","n":20,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":67.0,"mean":67.0,"median":67.0,"min":67.0,"n_outliers":0,"outlier_rate":0.0,"q1":67.0,"q3":67.0,"skew":0.0,"std":0.0,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["snap_enrollment_rate.stats.mean","snap_gap_pct.stats.mean","snap_eligible_est.stats.max","snap_eligible_est.stats.median","snap_eligible_est.n_outliers","snap_gap.stats.max","snap_gap.stats.median","snap_participants_est.n_outliers","snap_eligible_est.stats.skew"],"featured_charts":[{"caption":"Look for the 2 outlier states whose eligible populations (up to 4,685,272) vastly exceed the median of 507,917 \u2014 these will skew any aggregate analysis.","column":"snap_eligible_est","kind":"bar"},{"caption":"The unenrolled gap ranges from 24,824 to 1,546,138 across states; identify which states carry the largest absolute burden of unenrolled eligible residents.","column":"snap_gap","kind":"bar"},{"caption":"The right-skewed distribution of actual participants shows most states cluster at lower counts, with a long tail driven by a few large states.","column":"snap_participants_est","kind":"histogram"},{"caption":"Use this as the index axis when comparing states \u2014 each of the 20 states appears exactly once, confirming one row per state.","column":"state_name","kind":"bar"}],"model":"anthropic:default","narrative":"This dataset tracks SNAP (food stamp) program enrollment across 20 U.S. states, capturing estimated eligible populations, actual participants, and the resulting coverage gap. Two things stand out immediately: first, the enrollment rate and gap percentage are constant across all states (67% enrolled, 33% gap), suggesting these are summary-level figures rather than state-specific calculations \u2014 they should not be used for cross-state comparison. Second, the three population-count columns (eligible, participants, gap) are all heavily right-skewed with 2 outliers each, pointing to a small number of very large states \u2014 likely California and/or Florida \u2014 that dwarf the rest and will dominate any totals-based analysis.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["stats.min","stats.max","stats.median","stats.mean","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","alerts"],"model":"anthropic:default","narrative":"This column represents estimated counts of SNAP-eligible individuals, likely at a geographic unit level (e.g., county or district). Values span a wide range from 75,227 to 4,685,272, with a median of 507,917 well below the mean of 857,172.75 \u2014 a classic signature of population-size data. The distribution is heavily right-skewed (skew = 2.42, kurtosis = 5.58) with 2 outliers (10% of rows), almost certainly large urban jurisdictions pulling the tail sharply upward.","role":"feature","scope":"column","target":"snap_eligible_est","treatment":"Log-transform before regression or clustering to reduce right-skew and dampen outlier influence."},{"confidence":"medium","critiques":[],"evidence_keys":["skew","kurtosis","median","mean","max","min","iqr","q1","n_outliers","outlier_rate","n_unique","n"],"model":"anthropic:default","narrative":"snap_gap is a numeric column likely representing some form of gap or interval measurement (e.g., a time delta, distance, or capacity gap between snapshots). With only 20 rows and 20 unique values, every observation is distinct. The distribution is heavily right-skewed (skew = 2.42, kurtosis = 5.58): the median is 167,613 while the mean is pulled to 282,866 by a long upper tail, and 2 outliers (10% of rows) push up to a maximum of 1,546,138 \u2014 roughly 25\u00d7 the minimum of 24,824. The IQR of 222,841 spans a wide range relative to Q1 of 61,204, signalling high dispersion.","role":"feature","scope":"column","target":"snap_gap","treatment":"log-transform before regression or modelling to reduce skew; investigate the 2 outliers (values near 1,546,138) for validity before inclusion."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","n","n_unique","n_outliers","outlier_rate","iqr","q1","q3"],"model":"anthropic:default","narrative":"This column represents estimated SNAP (Supplemental Nutrition Assistance Program) participant counts, likely at the state or large geographic-unit level given the scale of values ranging from 50,403 to 3,139,134. With only 20 rows and 20 unique values, each record appears to represent a distinct entity (e.g., a U.S. state or territory). The distribution is heavily right-skewed (skew = 2.42, kurtosis = 5.58) with 2 outliers (10% of the dataset), suggesting a small number of very large states\u2014likely California or Texas\u2014dominate the upper tail, while the median (340,304) sits well below the mean (574,306).","role":"feature","scope":"column","target":"snap_participants_est","treatment":"Log-transform before regression or clustering to reduce right skew and outlier influence."},{"confidence":"high","critiques":[],"evidence_keys":["alerts","n_unique","stats.std","stats.min","stats.max","stats.mean","stats.iqr","n"],"model":"anthropic:default","narrative":"This column represents a SNAP (Supplemental Nutrition Assistance Program) enrollment rate, likely expressed as a percentage. Every single one of the 20 rows holds the identical value of 67.0, with zero variance, zero IQR, and a standard deviation of 0.0 \u2014 the column is perfectly constant. This is a strong signal that the value was hardcoded, imputed with a single figure, or the dataset captures a single geographic/temporal stratum where this rate was applied uniformly. It carries no predictive or descriptive information across rows.","role":"feature","scope":"column","target":"snap_enrollment_rate","treatment":"Drop before modelling \u2014 zero-variance constant column provides no information and will cause issues with many estimators."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.min","stats.max","stats.mean","stats.std","stats.iqr","alerts"],"model":"anthropic:default","narrative":"This column appears to represent a snapshot gap percentage, likely a configuration or policy parameter defining some threshold or interval (e.g., 33%). Across all 20 rows the value is identically 33.0 with zero variance, zero nulls, and a single unique value \u2014 making it a degenerate constant with no discriminative power whatsoever.","role":"metadata","scope":"column","target":"snap_gap_pct","treatment":"Drop before modelling; constant column provides zero information gain and will cause issues in variance-sensitive methods."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","cardinality","entropy_ratio","top_rate","top_value","top_values"],"model":"anthropic:default","narrative":"This column contains US state (and territory) names, with exactly 20 unique values across 20 rows \u2014 every row holds a distinct state name, yielding a perfect entropy ratio of 1.0 and a top_rate of 0.05. The dataset appears to be a small, deduplicated lookup or summary table with one row per geographic unit rather than a transactional dataset. The 'long_tail' alert is a statistical artifact of the perfectly uniform distribution, not a genuine skew concern.","role":"label","scope":"column","target":"state_name","treatment":"Use as a join key or display label; consider mapping to standard FIPS codes if merging with other geographic datasets."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":2033,"prompt_tokens":5250,"total_tokens":7283}},"language_counts":{},"meta":{"generated_at":"2026-06-21T23:48:01+00:00","mode":"full","row_count":20,"sampled_rows":20,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/urban/food_deserts/snap_gap_states.json"},"notes":[],"saturn_version":"0.2.0","schema":{"snap_eligible_est":"numeric","snap_enrollment_rate":"numeric","snap_gap":"numeric","snap_gap_pct":"numeric","snap_participants_est":"numeric","state_name":"categorical"}}
