{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"92 singleton categories"}],"column":"domain","extras":{"singletons":92,"top_values":[["google.com",1],["youtube.com",1],["facebook.com",1],["wikipedia.org",1],["instagram.com",1],["bing.com",1],["reddit.com",1],["x.com",1],["chatgpt.com",1],["yandex.ru",1],["whatsapp.com",1],["amazon.com",1],["yahoo.com",1],["weather.com",1],["duckduckgo.com",1],["microsoftonline.com",1],["twitch.tv",1],["linkedin.com",1],["live.com",1],["fandom.com",1]]},"kind":"categorical","n":92,"n_null":0,"n_unique":92,"null_rate":0.0,"stats":{"cardinality":92,"entropy":6.523561956057014,"entropy_ratio":1.0000000000000002,"top_rate":0.010869565217391304,"top_value":"google.com"}},{"alerts":[],"column":"wave_rank","extras":{"histogram":{"counts":[27,17,10,5,7,6,2,4,5],"edges":[1183.0,111173.11111111111,221163.22222222222,331153.3333333333,441143.44444444444,551133.5555555555,661123.6666666666,771113.7777777778,881103.8888888889,991094.0]},"sample":[39740.0,486040.0,806801.0,149882.0,107490.0,303707.0,327331.0,49354.0,100908.0,382348.0,91633.0,113218.0,689672.0,158577.0,98949.0,127269.0,580312.0,344899.0,209361.0,63923.0,433709.0,293580.0,42596.0,211352.0,706046.0,117191.0,299600.0,258538.0,469216.0,72084.0,154786.0,558323.0,589121.0,122728.0,991094.0,75157.0,313911.0,520849.0,556068.0,47884.0,161107.0,918763.0,957606.0,39750.0,74786.0,74784.0,807514.0,448048.0,269006.0,203569.0,39757.0,602431.0,153704.0,162449.0,268296.0,389103.0,809778.0,89270.0,1183.0,6384.0,39756.0,508330.0,886805.0,259513.0,54643.0,137744.0,151489.0,39770.0,39773.0,3773.0,93417.0,915365.0,8405.0,39765.0,784439.0,446698.0,114506.0,148006.0,39762.0,227578.0,660532.0,344902.0,506805.0]},"kind":"numeric","n":92,"n_null":9,"n_unique":83,"null_rate":0.09782608695652174,"stats":{"iqr":395414.5,"kurtosis":-0.18763027535110943,"max":991094.0,"mean":301136.2771084337,"median":203569.0,"min":1183.0,"n_outliers":0,"outlier_rate":0.0,"q1":82213.5,"q3":477628.0,"skew":0.963347110358438,"std":274562.86727518984,"zero_rate":0.0}},{"alerts":[],"column":"popularity_rank","extras":{"histogram":{"counts":[11,5,10,10,9,9,9,9,11],"edges":[1.0,12.0,23.0,34.0,45.0,56.0,67.0,78.0,89.0,100.0]},"sample":[1.0,7.0,4.0,3.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,15.0,16.0,21.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,45.0,48.0,49.0,50.0,51.0,52.0,53.0,54.0,55.0,57.0,58.0,60.0,61.0,62.0,63.0,64.0,65.0,66.0,67.0,69.0,70.0,71.0,72.0,74.0,75.0,76.0,77.0,78.0,79.0,80.0,81.0,82.0,83.0,84.0,85.0,86.0,89.0,90.0,91.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0]},"kind":"numeric","n":92,"n_null":9,"n_unique":82,"null_rate":0.09782608695652174,"stats":{"iqr":49.0,"kurtosis":-1.1919119565405933,"max":100.0,"mean":51.2289156626506,"median":52.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":27.5,"q3":76.5,"skew":-0.050464599910173547,"std":29.38778794869037,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+3.80"},{"code":"outliers","level":"warn","message":"9.6% rows beyond 1.5 IQR"}],"column":"errors","extras":{"histogram":{"counts":[70,7,1,2,0,2,0,0,1],"edges":[0.0,40.44444444444444,80.88888888888889,121.33333333333333,161.77777777777777,202.22222222222223,242.66666666666666,283.1111111111111,323.55555555555554,364.0]},"sample":[1.0,35.0,50.0,6.0,4.0,11.0,3.0,1.0,3.0,4.0,3.0,2.0,66.0,5.0,3.0,3.0,37.0,24.0,11.0,2.0,9.0,16.0,1.0,11.0,50.0,2.0,14.0,8.0,28.0,2.0,5.0,32.0,42.0,5.0,364.0,2.0,8.0,33.0,40.0,0.0,5.0,228.0,158.0,1.0,2.0,2.0,53.0,26.0,4.0,9.0,1.0,32.0,4.0,5.0,12.0,21.0,110.0,2.0,0.0,0.0,1.0,27.0,148.0,11.0,1.0,5.0,6.0,1.0,1.0,0.0,3.0,224.0,0.0,1.0,74.0,26.0,2.0,4.0,1.0,12.0,52.0,24.0,10.0]},"kind":"numeric","n":92,"n_null":9,"n_unique":36,"null_rate":0.09782608695652174,"stats":{"iqr":24.5,"kurtosis":16.120217554093482,"max":364.0,"mean":27.16867469879518,"median":5.0,"min":0.0,"n_outliers":8,"outlier_rate":0.0963855421686747,"q1":2.0,"q3":26.5,"skew":3.8032187384371694,"std":57.198162019999025,"zero_rate":0.060240963855421686}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+3.23"},{"code":"outliers","level":"warn","message":"9.6% rows beyond 1.5 IQR"}],"column":"error_density","extras":{"histogram":{"counts":[65,10,2,2,0,1,2,0,1],"edges":[0.0,0.033666666666666664,0.06733333333333333,0.10099999999999999,0.13466666666666666,0.16833333333333333,0.20199999999999999,0.23566666666666664,0.2693333333333333,0.303]},"sample":[0.0025,0.0137,0.2075,0.0057,0.0102,0.0174,0.1304,0.0056,0.013,0.2353,0.0025,0.0008,0.0302,0.0025,0.0026,0.0016,0.0464,0.0039,0.0051,0.0019,0.1,0.0149,0.0011,0.0082,0.0568,0.0211,0.0077,0.0288,0.0085,0.0013,0.0027,0.0334,0.0221,0.0008,0.1128,0.0041,0.0396,0.0099,0.0091,0.0,0.003,0.0534,0.0939,0.0025,0.001,0.001,0.1761,0.007,0.0588,0.0022,0.0025,0.0345,0.002,0.0024,0.0166,0.0117,0.0356,0.0005,0.0,0.0,0.0025,0.0197,0.0424,0.0062,0.0006,0.0011,0.0034,0.0025,0.0025,0.0,0.0073,0.0451,0.0,0.0025,0.0476,0.0054,0.0008,0.0025,0.0025,0.0127,0.0208,0.0039,0.303]},"kind":"numeric","n":92,"n_null":9,"n_unique":64,"null_rate":0.09782608695652174,"stats":{"iqr":0.02295,"kurtosis":10.949466654242695,"max":0.303,"mean":0.02724578313253012,"median":0.0057,"min":0.0,"n_outliers":8,"outlier_rate":0.0963855421686747,"q1":0.0025,"q3":0.02545,"skew":3.2292040089566196,"std":0.053758224104135395,"zero_rate":0.060240963855421686}},{"alerts":[{"code":"long_tail","level":"info","message":"24 singleton categories"}],"column":"notes","extras":{"singletons":24,"top_values":[["Low contrast text",12],["Missing form input label",9],["Low contrast text, missing alt text",9],["Missing alt text",5],["Asia-based",5],["No detected errors",5],["Low contrast text, empty link",4],["Low contrast text, missing alt text, empty link, empty button",4],["No data",3],["Low contrast text, missing alt text, missing labels",3],["Low contrast text, missing alt text, empty button",3],["Missing document language",2],["Missing alt text, missing document language",2],["Missing form input label, empty button",2],["Low contrast text, missing alt text, empty link, missing form labels, empty button",1],["Low contrast text, missing alt text, empty links",1],["Missing alt text, missing form labels, empty buttons",1],["Missing form input label, missing document language",1],["Low contrast text, missing alt text, missing language",1],["Missing alt text, missing form labels",1]]},"kind":"categorical","n":92,"n_null":0,"n_unique":38,"null_rate":0.0,"stats":{"cardinality":38,"entropy":4.6629990868785764,"entropy_ratio":0.8885410621494674,"top_rate":0.13043478260869565,"top_value":"Low contrast text"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["errors","error_density","notes","popularity_rank","wave_rank"],"featured_charts":[{"caption":"Most sites have few errors but a long right tail reaches 364 \u2014 look for the outlier sites driving the mean above the median.","column":"errors","kind":"histogram"},{"caption":"Error density is similarly skewed; check whether the same sites top both error count and density.","column":"error_density","kind":"histogram"},{"caption":"See which accessibility issues recur most often \u2014 low contrast text and missing labels lead by a clear margin.","column":"notes","kind":"bar"},{"caption":"Popularity is roughly uniform across 1\u2013100, confirming the sample spans the full top-100 range.","column":"popularity_rank","kind":"histogram"},{"caption":"Wave rank spans three orders of magnitude with a mild right skew \u2014 useful for segmenting sites by traffic tier.","column":"wave_rank","kind":"histogram"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset profiles 92 popular websites with accessibility metrics, including error counts, error density, and two ranking signals (popularity and wave rank). The error metrics are highly skewed: errors range from 0 to 364 with a median of just 5, and 8 sites (about 10%) qualify as outliers \u2014 worth flagging as the worst offenders. The notes field is the richest qualitative signal, with 'Low contrast text' (12 sites) and 'Missing form input label' (9 sites) dominating the issue mix. Popularity_rank is evenly spread across 1\u2013100, so it works well as a control axis when comparing error patterns across the popularity spectrum.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds web domain names (e.g., google.com, youtube.com, wikipedia.org), with one row per domain. Every one of the 92 values is unique (n_unique equals n, entropy_ratio = 1.0, top_rate \u2248 0.0109), so it functions as a row identifier rather than a categorical feature. No nulls are present, but the long_tail alert reflects the fully flat distribution.","role":"identifier","scope":"column","target":"domain","treatment":"Use as a row key or join key; do not one-hot encode."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.iqr","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"A numeric ranking-style field spanning 1,183 to 991,094 with mean 301,136 and median 203,569, suggesting position or score values rather than counts. The distribution is right-skewed (skew 0.96) with a wide IQR of 395,414, but no outliers were flagged. Notably, 9.78% of values are null and only 83 of 92 rows are unique, so there are a few repeated ranks.","role":"feature","scope":"column","target":"wave_rank","treatment":"Impute or flag the ~10% nulls and consider a log or rank transform before modelling given the right skew."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","skew","kurtosis","std","iqr","n_outliers","null_rate","n","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"Almost certainly a 1-to-100 popularity ranking, with min 1.0, max 100.0, and a near-symmetric distribution (mean 51.23, median 52.0, skew -0.05). The spread is essentially uniform across the range (std 29.39, IQR 49.0, kurtosis -1.19, no outliers), which is consistent with rank data rather than a measured score. Note 9.78% of rows are null and 82 unique values across 92 rows means a handful of ties or repeats.","role":"feature","scope":"column","target":"popularity_rank","treatment":"Impute or flag the ~10% nulls; treat as ordinal and avoid log transforms given the uniform 1-100 spread."},{"confidence":"high","critiques":[],"evidence_keys":["stats.min","stats.max","stats.median","stats.mean","stats.std","stats.skew","stats.kurtosis","stats.iqr","stats.q1","stats.q3","stats.n_outliers","stats.outlier_rate","stats.zero_rate","null_rate","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric counter of errors per record, ranging from 0 to 364 with a median of just 5 but a mean of 27.17 \u2014 a classic long-tail count. Distribution is severely right-skewed (skew 3.80, kurtosis 16.12) with 8 outliers (9.6% of values) and a std of 57.20 dwarfing the IQR of 24.5. Note also a 9.78% null rate and 6.02% exact zeros, so missingness and zero-inflation both need handling.","role":"feature","scope":"column","target":"errors","treatment":"Impute nulls and apply a log1p transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.median","stats.mean","stats.max","stats.min","stats.skew","stats.kurtosis","stats.outlier_rate","stats.zero_rate","stats.q1","stats.q3","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Likely a per-record error density (errors per unit), bounded at 0 with a long right tail: median 0.0057 vs mean 0.0272 and max 0.303, with skew 3.23 and kurtosis 10.95. Roughly 9.6% of values are flagged outliers, 6% are exactly zero, and 9.8% are null, so a small set of high-error records dominates the distribution.","role":"feature","scope":"column","target":"error_density","treatment":"Log1p- or winsorise before modelling, and impute or flag the ~10% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Free-form QA notes describing accessibility issues found on items, dominated by recurring phrases like \"Low contrast text\" (12/92) and \"Missing form input label\" (9/92). High entropy ratio (0.89) and 38 unique values across only 92 rows indicate a long tail of compound descriptions (e.g., \"Low contrast text, missing alt text, empty link, empty button\"). Notable signals include sentinel-like entries \"No detected errors\" and \"No data\", plus an off-topic \"Asia-based\" tag that suggests inconsistent note conventions.","role":"free_text","scope":"column","target":"notes","treatment":"Split on commas into multi-label issue tags before aggregating; isolate sentinel values like \"No data\"."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":2339,"prompt_tokens":6815,"total_tokens":9154}},"language_counts":{},"meta":{"generated_at":"2026-05-01T16:07:45+00:00","mode":"full","row_count":92,"sampled_rows":92,"seed":42,"source":"/tmp/saturn-uploads/f55be675ada5/web_accessibility_data_top100.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"domain":"categorical","error_density":"numeric","errors":"numeric","notes":"categorical","popularity_rank":"numeric","wave_rank":"numeric"}}
