{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"}],"column":"name","extras":{"language_counts":{},"language_sample_size":2000,"length_histogram":{"counts":[5,20,18,51,103,168,175,80,160,139,151,136,87,98,92,27,66,55,55,44,29,28,21,9,22,23,16,12,12,15,7,7,14,9,15,4,6,2,3,16],"edges":[5.0,6.875,8.75,10.625,12.5,14.375,16.25,18.125,20.0,21.875,23.75,25.625,27.5,29.375,31.25,33.125,35.0,36.875,38.75,40.625,42.5,44.375,46.25,48.125,50.0,51.875,53.75,55.625,57.5,59.375,61.25,63.125,65.0,66.875,68.75,70.625,72.5,74.375,76.25,78.125,80.0]},"near_unique":true,"sample":["seeking help if needed.","failing at something.","being concerned about others.","being violent towards others.","choosing who you spend your time with.","being vain.","still having feelings for an ex.","ignoring someone's birthday.","wanting to win.","eating healthy.","exercising every day.","helping others","feeling guilty.","making mistakes.","being rude to others.","having trust in a relationship.","lying to people","being wrong at times.","looking out for yourself","having regrets.","being negative.","lashing out at someone.","expecting your team to always win their game.","having an overly pessimistic world view.","being compassionate.","practicing safe sex","ending an unhealthy relationship","lying to your friends.","being in a romantic relationship with a teacher.","losing hope.","being financially responsible.","doing your best.","seeking help when needed.","being honest","ignoring your partner.","helping people.","People keeping their word.","feeling jealous.","abusing alcohol.","being selfish","being a pedophile.","telling someone if they upset you.","doing things that make you happy.","explaining your issues with other people.","doing something you don't want to do.","keeping secrets from your family.","manipulating others.","celebrating your birthday.","being responsible for your cousin's problems.","overworking yourself"],"top_values":[],"top_words":[["your",539],["being",344],["to",316],["a",201],["with",169],["having",136],["you",136],["people.",131],["of",125],["people",115],["for",111],["in",107],["when",96],["on",91],["wanting",91],["the",81],["others.",78],["someone",73],["friends.",70],["things",69],["about",67],["not",66],["their",62],["other",59],["out",55]],"vocab_skipped":null,"word_histogram":{"counts":[43,405,0,418,0,368,0,269,0,150,0,98,0,79,0,61,35,0,22,0,22,0,19,0,4,0,3,0,3,1],"edges":[1.0,1.5333333333333332,2.0666666666666664,2.6,3.1333333333333333,3.6666666666666665,4.2,4.733333333333333,5.266666666666667,5.8,6.333333333333333,6.866666666666666,7.4,7.933333333333334,8.466666666666667,9.0,9.533333333333333,10.066666666666666,10.6,11.133333333333333,11.666666666666666,12.2,12.733333333333333,13.266666666666666,13.8,14.333333333333334,14.866666666666667,15.4,15.933333333333334,16.46666666666667,17.0]}},"kind":"text","n":2000,"n_null":0,"n_unique":2000,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":80,"len_mean":28.153,"len_median":24.0,"len_min":5,"len_p95":59.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.0215,"readability_flesch_mean":51.69200839160842,"url_rate":0.0,"vocab_size":1626,"word_mean":4.48,"word_median":4.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+6.26"}],"column":"count","extras":{"histogram":{"counts":[1268,274,111,251,40,12,12,9,4,1,1,4,3,3,1,0,0,1,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1],"edges":[8.0,19.325,30.65,41.974999999999994,53.3,64.625,75.94999999999999,87.27499999999999,98.6,109.925,121.25,132.575,143.89999999999998,155.225,166.54999999999998,177.875,189.2,200.52499999999998,211.85,223.17499999999998,234.5,245.825,257.15,268.47499999999997,279.79999999999995,291.125,302.45,313.775,325.09999999999997,336.42499999999995,347.75,359.075,370.4,381.72499999999997,393.04999999999995,404.375,415.7,427.025,438.34999999999997,449.67499999999995,461.0]},"sample":[157.0,148.0,147.0,129.0,92.0,86.0,85.0,85.0,84.0,84.0,82.0,79.0,68.0,66.0,64.0,63.0,60.0,59.0,56.0,56.0,53.0,53.0,53.0,52.0,52.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,50.0,50.0,50.0,50.0,50.0,50.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,47.0,46.0,46.0,46.0,46.0,46.0,46.0,45.0,45.0,45.0,45.0,45.0,43.0,43.0,43.0,42.0,42.0,41.0,41.0,41.0,40.0,38.0,36.0,36.0,36.0,35.0,35.0,34.0,33.0,33.0,32.0,32.0,32.0,31.0,31.0,31.0,31.0,30.0,30.0,30.0,30.0,30.0,29.0,29.0,29.0,29.0,28.0,28.0,28.0,28.0,28.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,25.0,24.0,24.0,24.0,23.0,23.0,23.0,23.0,23.0,23.0,23.0,23.0,22.0,22.0,22.0,22.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,21.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,16.0,16.0,16.0,16.0,16.0,16.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0]},"kind":"numeric","n":2000,"n_null":0,"n_unique":99,"null_rate":0.0,"stats":{"iqr":18.0,"kurtosis":76.59236087805729,"max":461.0,"mean":23.1735,"median":14.0,"min":8.0,"n_outliers":85,"outlier_rate":0.0425,"q1":10.0,"q3":28.0,"skew":6.2641228093921315,"std":24.31838995071214,"zero_rate":0.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"}],"column":"full","extras":{"language_counts":{},"language_sample_size":2000,"length_histogram":{"counts":[14,29,51,187,259,157,222,151,183,138,92,93,75,61,47,28,30,30,24,19,20,9,19,11,20,8,3,4,3,1,0,1,0,2,2,4,0,1,1,1],"edges":[5.0,7.625,10.25,12.875,15.5,18.125,20.75,23.375,26.0,28.625,31.25,33.875,36.5,39.125,41.75,44.375,47.0,49.625,52.25,54.875,57.5,60.125,62.75,65.375,68.0,70.625,73.25,75.875,78.5,81.125,83.75,86.375,89.0,91.625,94.25,96.875,99.5,102.125,104.75,107.375,110.0]},"near_unique":true,"sample":["seeking help if needed.","failing at something.","being concerned about others.","being violent towards others.","choosing who you spend your time with.","being vain.","still having feelings for an ex.","ignoring someone's birthday.","wanting to win.","eating healthy.","exercising every day.","helping others","feeling guilty.","making mistakes.","being rude to others.","having trust in a relationship.","lying to people","being wrong at times.","looking out for yourself","having regrets.","being negative.","lashing out at someone.","expecting your team to always win their game.","having an overly pessimistic world view.","being compassionate.","practicing safe sex","ending an unhealthy relationship","lying to your friends.","being in a romantic relationship with a teacher.","losing hope.","being financially responsible.","doing your best.","seeking help when needed.","being honest","ignoring your partner.","helping people.","People keeping their word.","feeling jealous.","abusing alcohol.","being selfish","being a pedophile.","telling someone if they upset you.","doing things that make you happy.","explaining your issues with other people.","doing something you don't want to do.","keeping secrets from your family.","manipulating others.","celebrating your birthday.","being responsible for your cousin's problems.","overworking yourself"],"top_values":[],"top_words":[["your",539],["being",344],["to",318],["a",201],["with",169],["you",140],["having",136],["people.",131],["of",125],["people",115],["for",111],["in",107],["when",96],["on",92],["wanting",91],["the",81],["others.",78],["someone",73],["friends.",70],["things",69],["about",67],["not",66],["their",63],["other",61],["out",55]],"vocab_skipped":null,"word_histogram":{"counts":[43,405,0,418,0,368,269,0,150,0,98,79,0,61,0,35,22,0,21,0,15,5,0,2,0,1,3,0,1,4],"edges":[1.0,1.6,2.2,2.8,3.4,4.0,4.6,5.2,5.8,6.3999999999999995,7.0,7.6,8.2,8.8,9.4,10.0,10.6,11.2,11.799999999999999,12.4,13.0,13.6,14.2,14.799999999999999,15.399999999999999,16.0,16.6,17.2,17.8,18.4,19.0]}},"kind":"text","n":2000,"n_null":0,"n_unique":2000,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":110,"len_mean":28.2645,"len_median":24.0,"len_min":5,"len_p95":59.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.0215,"readability_flesch_mean":51.63448339160841,"url_rate":0.0,"vocab_size":1628,"word_mean":4.4955,"word_median":4.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.count.stats.skew","columns.count.stats.kurtosis","columns.count.stats.median","columns.count.stats.max","columns.count.stats.n_outliers","columns.full.stats.len_mean","columns.full.stats.word_mean","columns.full.top_words","columns.name.stats.len_mean","columns.name.top_words","columns.full.stats.vocab_size","columns.name.stats.vocab_size"],"featured_charts":[{"caption":"Look for the heavy right tail \u2014 most values cluster low but a few reach into the hundreds.","column":"count","kind":"histogram"},{"caption":"Phrase-length distribution centered around 24 characters with a p95 near 59.","column":"full","kind":"length"},{"caption":"Compare to `full` \u2014 the length profile is nearly identical, hinting these columns overlap.","column":"name","kind":"length"},{"caption":"Top words like `your`, `being`, and `people` reveal the dataset's social/behavioral framing.","column":"full","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset has 2,000 rows and 3 columns: a numeric `count` and two near-identical text fields (`name` and `full`) that look like short phrases about social behavior. The `count` column is extremely right-skewed (skew 6.26, kurtosis 76.6) with a median of 14 but a max of 461 and 85 outliers \u2014 worth investigating before any averaging. The two text columns are essentially twins: same length profile (mean ~28 chars, ~4.5 words), same top words (`your`, `being`, `to`, `a`), and overlapping vocab sizes (1628 vs 1626), suggesting `full` may be a near-duplicate or light reformat of `name`. Start by inspecting the `count` distribution on a log scale and spot-checking a few rows to see how `name` and `full` actually differ.","scope":"dataset","target":"__global__"},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_max","stats.word_mean","stats.word_median","stats.readability_flesch_mean","stats.vocab_size","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Despite the column header 'name', the values are short free-text phrases averaging 4.48 words (median 4) and up to 80 characters, with top tokens like 'your', 'being', 'to', and 'people' suggesting descriptive statements rather than proper names. All 2000 rows are unique with zero nulls or duplicates, and a Flesch readability of 51.7 indicates ordinary prose rather than identifiers.","role":"free_text","scope":"column","target":"name","treatment":"Tokenize and embed before modelling; do not treat as a key despite the column name."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","zero_rate","min","median","mean","max","q1","q3","skew","kurtosis","n_outliers","outlier_rate"],"model":"anthropic:claude-opus-4-7","narrative":"A non-negative integer count with 99 distinct values across 2000 rows and no nulls or zeros. The distribution is severely right-skewed (skew 6.26, kurtosis 76.6): the median is 14 and Q3 is 28, yet the max reaches 461, producing 85 outliers (4.25%). Mean (23.17) sits well above median, confirming a heavy tail rather than a symmetric spread.","role":"feature","scope":"column","target":"count","treatment":"log1p-transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_median","stats.len_max","stats.word_mean","stats.word_median","stats.vocab_size","stats.one_word_rate","stats.readability_flesch_mean","stats.duplicate_rate","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"The `full` column holds short English phrases averaging 4.5 words (median 4) and 28 characters, with every one of the 2000 rows unique and no duplicates or empties. Top words like \"your\", \"being\", \"having\", and \"people\" suggest these are descriptive statements or prompts rather than names or codes. Flesch readability around 51 indicates fairly plain prose, and the vocabulary of 1628 distinct words across 2000 short rows points to varied but thematically related content.","role":"free_text","scope":"column","target":"full","treatment":"Tokenize and embed before modelling; do not treat as a categorical."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":1639,"prompt_tokens":4245,"total_tokens":5884}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:19:45+00:00","mode":"full","row_count":2000,"sampled_rows":2000,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/social_actions.json"},"notes":[],"saturn_version":"0.2.0","schema":{"count":"numeric","full":"text","name":"text"}}
