{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"10 singleton categories"}],"column":"article","extras":{"singletons":10,"top_values":[["Donald_Trump",1],["Joe_Biden",1],["Climate_change",1],["COVID-19_pandemic",1],["Artificial_intelligence",1],["Russia",1],["Israel",1],["Taylor_Swift",1],["Elon_Musk",1],["United_States",1]]},"kind":"categorical","n":10,"n_null":0,"n_unique":10,"null_rate":0.0,"stats":{"cardinality":10,"entropy":3.321928094887362,"entropy_ratio":1.0,"top_rate":0.1,"top_value":"Donald_Trump"}},{"alerts":[{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"avg_daily_views","extras":{"histogram":{"counts":[6,2,1,0,1],"edges":[2199.0,15134.8,28070.6,41006.399999999994,53942.2,66878.0]},"sample":[66878.0,13210.0,2199.0,6141.0,10861.0,13068.0,10896.0,20469.0,24937.0,36181.0]},"kind":"numeric","n":10,"n_null":0,"n_unique":10,"null_rate":0.0,"stats":{"iqr":12950.25,"kurtosis":1.6082937515523819,"max":66878.0,"mean":20484.0,"median":13139.0,"min":2199.0,"n_outliers":1,"outlier_rate":0.1,"q1":10869.75,"q3":23820.0,"skew":1.572568132234222,"std":19006.231019209346,"zero_rate":0.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+2.61"},{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"peak_views","extras":{"histogram":{"counts":[9,0,0,0,1],"edges":[3613.0,150742.0,297871.0,445000.0,592129.0,739258.0]},"sample":[739258.0,24866.0,3613.0,9456.0,15305.0,19356.0,14273.0,78136.0,60829.0,49810.0]},"kind":"numeric","n":10,"n_null":0,"n_unique":10,"null_rate":0.0,"stats":{"iqr":43543.25,"kurtosis":4.9281565782634145,"max":739258.0,"mean":101490.2,"median":22111.0,"min":3613.0,"n_outliers":1,"outlier_rate":0.1,"q1":14531.0,"q3":58074.25,"skew":2.609132736646071,"std":225418.03767706297,"zero_rate":0.0}},{"alerts":[{"code":"outliers","level":"warn","message":"10.0% rows beyond 1.5 IQR"}],"column":"total_views","extras":{"histogram":{"counts":[6,2,1,0,1],"edges":[200122.0,1377276.6,2554431.2,3731585.8000000003,4908740.4,6085895.0]},"sample":[6085895.0,1202151.0,200122.0,558788.0,988318.0,1189182.0,991521.0,1862689.0,2269222.0,3292429.0]},"kind":"numeric","n":10,"n_null":0,"n_unique":10,"null_rate":0.0,"stats":{"iqr":1178470.0,"kurtosis":1.6083504033096867,"max":6085895.0,"mean":1864031.7,"median":1195666.5,"min":200122.0,"n_outliers":1,"outlier_rate":0.1,"q1":989118.75,"q3":2167588.75,"skew":1.5725832518874203,"std":1729564.6126294357,"zero_rate":0.0}},{"alerts":[{"code":"skipped","level":"info","message":"no profiler for kind=unknown"}],"column":"timeline","extras":{},"kind":"unknown","n":10,"n_null":0,"n_unique":null,"null_rate":0.0,"stats":{}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["peak_views.stats.skew","peak_views.stats.max","peak_views.stats.median","peak_views.stats.n_outliers","avg_daily_views.stats.skew","avg_daily_views.stats.n_outliers","total_views.stats.skew","total_views.stats.n_outliers","article.stats.cardinality","row_count"],"featured_charts":[{"caption":"Look for the long right tail \u2014 one article has a peak far above the median of 22,111.","column":"peak_views","kind":"histogram"},{"caption":"Check how concentrated total attention is; mean (1.86M) sits well above the median (1.20M).","column":"total_views","kind":"histogram"},{"caption":"See whether sustained daily interest follows the same skew as peak spikes.","column":"avg_daily_views","kind":"histogram"},{"caption":"Use this as a label axis to see which specific articles drive the outliers in the view metrics.","column":"article","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This is a small dataset of 10 Wikipedia articles tracking US public attention, with view metrics (peak_views, avg_daily_views, total_views) plus an article name and a timeline field. The view metrics are heavily right-skewed \u2014 peak_views has a skew of 2.61 and a max of 739,258 against a median of just 22,111, suggesting one or two articles dominate attention. Each numeric column flags one outlier (10% outlier rate), so it's worth identifying which article is pulling the distribution. The article column has 10 unique values for 10 rows, so it functions as an identifier rather than a category to aggregate on.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Wikipedia-style article titles (e.g., Donald_Trump, COVID-19_pandemic, Taylor_Swift) using underscore-separated naming. Every one of the 10 rows is unique (n_unique=10, entropy_ratio=1.0), so it functions as a row identifier rather than a categorical feature. The mix spans people, countries, and topics, with no nulls and no repeated value (top_rate=0.1).","role":"identifier","scope":"column","target":"article","treatment":"Use as a join key to Wikipedia metadata; do not one-hot encode."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","zero_rate","stats.mean","stats.median","stats.min","stats.max","stats.std","stats.skew","stats.n_outliers","stats.outlier_rate","stats.q1","stats.q3"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric column capturing average daily views per item, with all 10 rows unique and no nulls or zeros. The distribution is right-skewed (skew 1.57) with a mean of 20484 sitting well above the median of 13139, and a max of 66878 flagged as the lone outlier (10% outlier rate) versus a min of 2199. Standard deviation (19006) nearly matches the mean, signalling high dispersion in a tiny sample.","role":"feature","scope":"column","target":"avg_daily_views","treatment":"Log-transform before regression to tame the right skew and outlier."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.skew","stats.kurtosis","stats.median","stats.mean","stats.max","stats.q3","stats.iqr","stats.std","stats.n_outliers","stats.outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be a peak view-count metric per item, with all 10 rows unique and no nulls. The distribution is heavily right-skewed (skew 2.61, kurtosis 4.93): the median is 22111 while the mean is 101490.2 and the max reaches 739258, roughly 12x the Q3 of 58074.25. One outlier (10% of rows) is dragging the standard deviation (225418) far above the IQR (43543.25).","role":"feature","scope":"column","target":"peak_views","treatment":"Log-transform before any modelling or aggregation to tame the skew and outlier."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.skew","stats.outlier_rate","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric view-count metric across just 10 rows, all distinct and non-null. Distribution is right-skewed (skew 1.57) with values ranging from 200,122 to 6,085,895 against a median of 1,195,666.5, and one row (10%) flagged as an outlier pulling the mean up to 1,864,031.7. With n=10 the shape estimates are fragile.","role":"feature","scope":"column","target":"total_views","treatment":"log-transform before any modelling to tame the right skew and outlier."},{"confidence":"low","critiques":[],"evidence_keys":["alerts","kind","n","n_unique","null_rate","stats"],"model":"anthropic:claude-opus-4-7","narrative":"This column, named 'timeline', was skipped by the profiler and has no computed statistics beyond a row count of 10 and a null rate of 0.0. Its kind is reported as 'unknown' and the unique count is missing, so nothing can be said about cardinality, types, or value distribution. The 'skipped' alert is the only substantive signal present.","role":"other","scope":"column","target":"timeline","treatment":"Re-profile with an appropriate parser before deciding on downstream use."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":1983,"prompt_tokens":5201,"total_tokens":7184}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:17:54+00:00","mode":"full","row_count":10,"sampled_rows":10,"seed":42,"source":"/home/coolhand/datasets/us-attention-data/wikipedia_event_articles.json"},"notes":[],"saturn_version":"0.2.0","schema":{"article":"categorical","avg_daily_views":"numeric","peak_views":"numeric","timeline":"unknown","total_views":"numeric"}}
