{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"}],"column":"keyword","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[84,531,641,374,774,696,588,268,438,395,267,132,223,172,142,61,96,62,56,26,41,24,26,14,10,9,5,3,6,3,0,3,2,1,6,0,0,0,1,1],"edges":[2.0,3.75,5.5,7.25,9.0,10.75,12.5,14.25,16.0,17.75,19.5,21.25,23.0,24.75,26.5,28.25,30.0,31.75,33.5,35.25,37.0,38.75,40.5,42.25,44.0,45.75,47.5,49.25,51.0,52.75,54.5,56.25,58.0,59.75,61.5,63.25,65.0,66.75,68.5,70.25,72.0]},"near_unique":true,"sample":["tangerine_clementine_mandarin","tandem_bicycle","drink_beverage","icing","work_of_art_art_object","drowning","homeland_nationality_loyalty_","indicator_past_participle_2_","box_cube","rickshaw","beyond_past_old_","ice_cream_old_","radio_1_","french_fries_chips_old_","tonsils","batter","support_oral_","drying_rack","italian_language_","tile","proclaim_announce_to_","physics","mammoth","magnetic_pole","danish_class_","paper_clip","dance_therapist","fence_wall","applaud_clap_to_","horseshoe","through","heart","phonology","insure_to_","fist","shepherd","breeze","vomiting_vomit_puking","kitten","perch_fish_","knocking","branch","great_experience","male_genitals_man_with_penis_","summary_abstract","moon","these","stationery_shop_stationery_store","care_protection_defence","swamp_bog_marsh"],"top_values":[],"top_words":[["up_upward",1],["bottle_flask",1],["male_friend",1],["knight",1],["digital_device",1],["passport",1],["blood",1],["tangerine_clementine_mandarin",1],["sandstorm",1],["cns_injury",1],["selfcontrol",1],["reflection_mental_consideration",1],["psychiatrist",1],["slang",1],["parrot_myna_talking_bird",1],["good_evening_bye_",1],["asparagus",1],["painful_painfully_sore",1],["shoe",1],["microwave_to_",1],["similar_like_alike",1],["french_fries_chips_old_",1],["train_station_building_",1],["ladder",1],["eternal_life_immortality",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6181,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":6181,"n_null":0,"n_unique":6181,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":72,"len_mean":14.530334897265815,"len_median":12.0,"len_min":2,"len_p95":31.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":-75.89799999999997,"url_rate":0.0,"vocab_size":6181,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"symbol_count","extras":{"singletons":0,"top_values":[["1",6181]]},"kind":"categorical","n":6181,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"1"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns[keyword].n_unique","columns[keyword].stats.one_word_rate","columns[keyword].stats.len_min","columns[keyword].stats.len_max","columns[keyword].stats.len_median","columns[keyword].stats.len_mean","columns[keyword].stats.len_p95","columns[symbol_count].n_unique","columns[symbol_count].stats.top_value","columns[symbol_count].stats.top_rate"],"featured_charts":[{"caption":"Distribution of keyword character lengths to see the spread from 2 up to 72 characters.","column":"keyword","kind":"length"},{"caption":"Histogram view of keyword lengths to spot the typical range around the median of 12.","column":"keyword","kind":"histogram"},{"caption":"Confirms that symbol_count is a constant single category ('1') with no variation.","column":"symbol_count","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset contains 6,181 rows and 2 columns drawn from blissapi.db, pairing a free-text 'keyword' field with a categorical 'symbol_count' field. Every keyword is unique (6,181 distinct values across 6,181 rows) and is exactly one word, with lengths ranging from 2 to 72 characters and a median of 12. The 'symbol_count' column is fully constant at the value '1', so it carries no information for analysis. The most useful first look is the distribution of keyword lengths, since that is essentially the only varying signal in the data.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.one_word_rate","stats.duplicate_rate","stats.len_mean","stats.len_median","stats.len_max","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column is a single-word keyword or concept tag, with every one of the 6181 rows holding a unique value (n_unique = 6181, duplicate_rate = 0.0, one_word_rate = 1.0). Tokens are short (len_mean 14.5, len_median 12) and many are compound forms joined by underscores like 'tangerine_clementine_mandarin' or 'cns_injury', suggesting a controlled vocabulary of concept labels rather than free text. The fully unique vocabulary means it behaves like an identifier for distinct concepts, not a categorical feature.","role":"identifier","scope":"column","target":"keyword","treatment":"Treat as a concept key; split underscore-joined tokens and embed if semantic similarity is needed, otherwise leave out of modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column records a symbol count, but every one of the 6181 rows holds the value \"1\" (top_rate 1.0, cardinality 1, entropy 0). It carries no information and was flagged for imbalance. There are no nulls, just a single constant.","role":"metadata","scope":"column","target":"symbol_count","treatment":"Drop; constant column with zero entropy."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":1143,"prompt_tokens":2779,"total_tokens":3922}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:07:02+00:00","mode":"full","row_count":6181,"sampled_rows":6181,"seed":42,"source":"/home/coolhand/data/blissapi.db"},"notes":[],"saturn_version":"0.2.0","schema":{"keyword":"text","symbol_count":"categorical"}}
