{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"51.3% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"emoji","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1400,0,0,0,1143,0,0,0,215,0,0,0,0,1010,0,0,0,613,0,0,0,0,99,0,0,0,427,0,0,0,0,128,0,0,0,95,0,0,0,95],"edges":[1.0,1.225,1.45,1.675,1.9,2.125,2.35,2.575,2.8,3.025,3.25,3.475,3.7,3.9250000000000003,4.15,4.375,4.6,4.825,5.05,5.275,5.5,5.7250000000000005,5.95,6.175,6.4,6.625,6.8500000000000005,7.075,7.3,7.525,7.75,7.9750000000000005,8.2,8.425,8.65,8.875,9.1,9.325000000000001,9.55,9.775,10.0]},"near_unique":true,"sample":["\ud83e\udd23","\ud83e\udd60","\ud83d\udce8","\u26f3","\ud83d\udc68\ud83c\udfff\u200d\ud83e\uddaf","\ud83c\uddec\ud83c\udde7","\ud83d\udc69\ud83c\udffd\u200d\ud83e\uddbc\u200d\u27a1","\ud83d\udeaf","\ud83c\udfb1","\ud83d\udde8","\ud83d\udc69\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83d\udc69\ud83c\udffe","\u26f9\ud83c\udffe\u200d\u2642\ufe0f","\ud83d\udc82\ud83c\udfff\u200d\u2642\ufe0f","\ud83d\ude17","\ud83d\udc73\ud83c\udffd","\ud83d\ude47\ud83c\udffb\u200d\u2640","\ud83d\udc85\ud83c\udffe","\ud83e\ude70","\ud83c\uddf7\ud83c\uddf4","\ud83e\uddce\ud83c\udffc","\ud83d\udc70\ud83c\udffc","\ud83d\udc4b\ud83c\udffe","\ud83d\ude46\u200d\u2640","\ud83e\uddb6\ud83c\udffe","\ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66","\ud83c\udfdd\ufe0f","\ud83e\udd38\u200d\u2642\ufe0f","\ud83e\udd26\ud83c\udffd\u200d\u2642\ufe0f","\ud83d\udc4e\ud83c\udffc","\ud83c\uddec\ud83c\uddf9","\ud83d\udc69\ud83c\udffe\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc69\ud83c\udfff","\ud83c\uddfb\ud83c\udde6","\ud83e\uddd1\ud83c\udffd\u200d\u2764\u200d\ud83e\uddd1\ud83c\udffb","\ud83d\udeb6\ud83c\udfff\u200d\u2640\u200d\u27a1","\ud83e\udddd\ud83c\udffd\u200d\u2642\ufe0f","\ud83d\udc68\ud83c\udffd\u200d\ud83e\uddbc\u200d\u27a1","\ud83c\udd7f","\ud83c\udfcc\ud83c\udffd\u200d\u2640","\ud83d\udc69\u200d\ud83e\uddbd","\u26f9\u200d\u2642\ufe0f","\ud83c\udfab","\u00a9\ufe0f","\ud83d\udc82\u200d\u2642","\u2733","\ud83c\udde7\ud83c\uddee","\ud83c\uddf1\ud83c\uddf0","\ud83e\uddda\ud83c\udffb\u200d\u2642","\ud83d\udeb6\ud83c\udffb\u200d\u2642\u200d\u27a1\ufe0f","\ud83d\ude45\ud83c\udfff\u200d\u2640","\ud83d\udecf"],"top_values":[],"top_words":[["\ud83d\ude00",1],["\ud83d\ude03",1],["\ud83d\ude04",1],["\ud83d\ude01",1],["\ud83d\ude06",1],["\ud83d\ude05",1],["\ud83e\udd23",1],["\ud83d\ude02",1],["\ud83d\ude42",1],["\ud83d\ude43",1],["\ud83e\udee0",1],["\ud83d\ude09",1],["\ud83d\ude0a",1],["\ud83d\ude07",1],["\ud83e\udd70",1],["\ud83d\ude0d",1],["\ud83e\udd29",1],["\ud83d\ude18",1],["\ud83d\ude17",1],["\u263a\ufe0f",1],["\u263a",1],["\ud83d\ude1a",1],["\ud83d\ude19",1],["\ud83e\udd72",1],["\ud83d\ude0b",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5225,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":5225,"n_null":0,"n_unique":5225,"null_rate":0.0,"stats":{"allcaps_rate":0.5133014354066986,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.9774162679425837,"len_max":10,"len_mean":3.415885167464115,"len_median":3.0,"len_min":1,"len_p95":8.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":1.8183000000000005,"url_rate":0.0,"vocab_size":5225,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"26.8% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"}],"column":"codepoints","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1400,0,0,0,249,894,0,0,136,79,0,0,141,544,325,0,25,553,15,20,12,42,45,0,6,60,48,105,205,33,3,95,0,0,0,0,95,0,0,95],"edges":[4.0,5.25,6.5,7.75,9.0,10.25,11.5,12.75,14.0,15.25,16.5,17.75,19.0,20.25,21.5,22.75,24.0,25.25,26.5,27.75,29.0,30.25,31.5,32.75,34.0,35.25,36.5,37.75,39.0,40.25,41.5,42.75,44.0,45.25,46.5,47.75,49.0,50.25,51.5,52.75,54.0]},"near_unique":true,"sample":["1F923","1F960","1F4E8","26F3","1F468 1F3FF 200D 1F9AF","1F1EC 1F1E7","1F469 1F3FD 200D 1F9BC 200D 27A1","1F6AF","1F3B1","1F5E8","1F469 1F3FB 200D 1F91D 200D 1F469 1F3FE","26F9 1F3FE 200D 2642 FE0F","1F482 1F3FF 200D 2642 FE0F","1F617","1F473 1F3FD","1F647 1F3FB 200D 2640","1F485 1F3FE","1FA70","1F1F7 1F1F4","1F9CE 1F3FC","1F470 1F3FC","1F44B 1F3FE","1F646 200D 2640","1F9B6 1F3FE","1F469 200D 1F469 200D 1F467 200D 1F466","1F3DD FE0F","1F938 200D 2642 FE0F","1F926 1F3FD 200D 2642 FE0F","1F44E 1F3FC","1F1EC 1F1F9","1F469 1F3FE 200D 2764 FE0F 200D 1F48B 200D 1F469 1F3FF","1F1FB 1F1E6","1F9D1 1F3FD 200D 2764 200D 1F9D1 1F3FB","1F6B6 1F3FF 200D 2640 200D 27A1","1F9DD 1F3FD 200D 2642 FE0F","1F468 1F3FD 200D 1F9BC 200D 27A1","1F17F","1F3CC 1F3FD 200D 2640","1F469 200D 1F9BD","26F9 200D 2642 FE0F","1F3AB","00A9 FE0F","1F482 200D 2642","2733","1F1E7 1F1EE","1F1F1 1F1F0","1F9DA 1F3FB 200D 2642","1F6B6 1F3FB 200D 2642 200D 27A1 FE0F","1F645 1F3FF 200D 2640","1F6CF"],"top_values":[],"top_words":[["200d",3747],["fe0f",1318],["1f3fb",703],["1f3fc",703],["1f3fd",703],["1f3fe",703],["1f3ff",703],["1f468",676],["1f469",676],["2642",674],["2640",674],["1f9d1",514],["2764",398],["27a1",290],["1f48b",197],["1f91d",92],["1f6b6",90],["1f9ce",90],["1f3c3",90],["1faef",61],["1f430",61],["1f9af",55],["1f9bc",55],["1f9bd",55],["1f1f2",40]],"vocab_skipped":null,"word_histogram":{"counts":[1400,0,0,1143,0,0,215,0,0,0,1010,0,0,613,0,0,99,0,0,0,427,0,0,128,0,0,95,0,0,95],"edges":[1.0,1.3,1.6,1.9,2.2,2.5,2.8,3.1,3.4,3.6999999999999997,4.0,4.3,4.6,4.9,5.2,5.5,5.8,6.1,6.3999999999999995,6.7,7.0,7.3,7.6,7.8999999999999995,8.2,8.5,8.8,9.1,9.4,9.7,10.0]}},"kind":"text","n":5225,"n_null":0,"n_unique":5225,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":54,"len_mean":18.04019138755981,"len_median":15.0,"len_min":4,"len_p95":43.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.2679425837320574,"readability_flesch_mean":118.80090000000001,"url_rate":0.0,"vocab_size":1451,"word_mean":3.415885167464115,"word_median":3.0}},{"alerts":[],"column":"status","extras":{"singletons":0,"top_values":[["fully-qualified",3944],["minimally-qualified",1029],["unqualified",243],["component",9]]},"kind":"categorical","n":5225,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":0.9896229177029641,"entropy_ratio":0.49481145885148203,"top_rate":0.7548325358851674,"top_value":"fully-qualified"}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"duplicates","level":"warn","message":"24.3% duplicate strings"}],"column":"name","extras":{"language_counts":{"__engine":"fasttext:4,997","ar":19,"ca":5,"ceb":1,"de":36,"en":4680,"eo":2,"es":21,"fa":33,"fi":2,"fr":17,"gl":2,"hu":3,"id":8,"it":7,"ja":16,"la":1,"lt":2,"ms":3,"nl":19,"nn":28,"pl":27,"pt":4,"ru":19,"sv":4,"ta":4,"th":7,"uk":9,"ur":4,"war":2,"zh":3},"language_sample_size":5000,"length_histogram":{"counts":[36,154,216,252,319,374,249,189,150,135,125,122,162,252,256,245,275,273,195,162,153,120,63,64,76,60,61,75,66,70,64,40,32,46,28,24,26,8,4,4],"edges":[7.0,8.975,10.95,12.925,14.9,16.875,18.85,20.825000000000003,22.8,24.775000000000002,26.75,28.725,30.700000000000003,32.675,34.650000000000006,36.625,38.6,40.575,42.550000000000004,44.525,46.5,48.475,50.45,52.425000000000004,54.400000000000006,56.375,58.35,60.325,62.300000000000004,64.275,66.25,68.225,70.2,72.175,74.15,76.125,78.10000000000001,80.075,82.05,84.025,86.0]},"near_unique":false,"sample":["E3.0 rolling on the floor laughing","E5.0 fortune cookie","E0.6 incoming envelope","E0.6 flag in hole","E12.0 man with white cane: dark skin tone","E0.6 flag: United Kingdom","E15.1 woman in motorized wheelchair facing right: medium skin tone","E1.0 no littering","E0.6 pool 8 ball","E2.0 left speech bubble","E12.1 women holding hands: light skin tone, medium-dark skin tone","E4.0 man bouncing ball: medium-dark skin tone","E4.0 man guard: dark skin tone","E1.0 kissing face","E1.0 person wearing turban: medium skin tone","E4.0 woman bowing: light skin tone","E1.0 nail polish: medium-dark skin tone","E12.0 ballet shoes","E2.0 flag: Romania","E12.0 person kneeling: medium-light skin tone","E1.0 person with veil: medium-light skin tone","E1.0 waving hand: medium-dark skin tone","E4.0 woman gesturing OK","E11.0 foot: medium-dark skin tone","E2.0 family: woman, woman, girl, boy","E0.7 desert island","E4.0 man cartwheeling","E4.0 man facepalming: medium skin tone","E1.0 thumbs down: medium-light skin tone","E2.0 flag: Guatemala","E13.1 kiss: woman, woman, medium-dark skin tone, dark skin tone","E2.0 flag: Vatican City","E13.1 couple with heart: person, person, medium skin tone, light skin tone","E15.1 woman walking facing right: dark skin tone","E5.0 man elf: medium skin tone","E15.1 man in motorized wheelchair facing right: medium skin tone","E0.6 P button","E4.0 woman golfing: medium skin tone","E12.0 woman in manual wheelchair","E4.0 man bouncing ball","E0.6 ticket","E0.6 copyright","E4.0 man guard","E0.6 eight-spoked asterisk","E2.0 flag: Burundi","E2.0 flag: Sri Lanka","E5.0 man fairy: light skin tone","E15.1 man walking facing right: light skin tone","E4.0 woman gesturing NO: dark skin tone","E0.7 bed"],"top_values":[["E2.0 eye in speech bubble",4],["E4.0 man detective",4],["E4.0 woman detective",4],["E15.1 woman walking facing right",4],["E15.1 woman walking facing right: light skin tone",4],["E15.1 woman walking facing right: medium-light skin tone",4],["E15.1 woman walking facing right: medium skin tone",4],["E15.1 woman walking facing right: medium-dark skin tone",4],["E15.1 woman walking facing right: dark skin tone",4],["E15.1 man walking facing right",4],["E15.1 man walking facing right: light skin tone",4],["E15.1 man walking facing right: medium-light skin tone",4],["E15.1 man walking facing right: medium skin tone",4],["E15.1 man walking facing right: medium-dark skin tone",4],["E15.1 man walking facing right: dark skin tone",4],["E15.1 woman kneeling facing right",4],["E15.1 woman kneeling facing right: light skin tone",4],["E15.1 woman kneeling facing right: medium-light skin tone",4],["E15.1 woman kneeling facing right: medium skin tone",4],["E15.1 woman kneeling facing right: medium-dark skin tone",4]],"top_words":[["skin",3450],["tone",2800],["e4.0",1030],["woman",821],["man",820],["e0.6",793],["light",696],["medium",695],["medium-light",690],["medium-dark",690],["dark",690],["tone,",650],["e1.0",512],["with",498],["e13.1",422],["e5.0",339],["person",333],["woman,",328],["man,",324],["e15.1",301],["e2.0",297],["facing",289],["e12.0",266],["flag:",262],["e0.7",254]],"vocab_skipped":null,"word_histogram":{"counts":[612,0,0,1056,0,0,393,0,0,419,0,0,1083,0,0,644,0,0,362,0,0,101,0,0,310,0,0,65,0,180],"edges":[2.0,2.3333333333333335,2.6666666666666665,3.0,3.333333333333333,3.6666666666666665,4.0,4.333333333333333,4.666666666666666,5.0,5.333333333333333,5.666666666666666,6.0,6.333333333333333,6.666666666666666,7.0,7.333333333333333,7.666666666666666,8.0,8.333333333333332,8.666666666666666,9.0,9.333333333333332,9.666666666666666,10.0,10.333333333333332,10.666666666666666,11.0,11.333333333333332,11.666666666666666,12.0]}},"kind":"text","n":5225,"n_null":0,"n_unique":3953,"null_rate":0.0,"stats":{"allcaps_rate":0.00019138755980861245,"boilerplate_rate":0.0,"duplicate_rate":0.24344497607655502,"emoji_rate":0.0,"len_max":86,"len_mean":33.810143540669856,"len_median":34.0,"len_min":7,"len_p95":67.0,"n_duplicates":1272,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":78.2965996753247,"url_rate":0.0,"vocab_size":1912,"word_mean":5.520574162679426,"word_median":6.0}},{"alerts":[],"column":"group","extras":{"singletons":0,"top_values":[["People & Body",3468],["Objects",316],["Symbols",305],["Flags",276],["Travel & Places",268],["Smileys & Emotion",187],["Animals & Nature",167],["Food & Drink",133],["Activities",96],["Component",9]]},"kind":"categorical","n":5225,"n_null":0,"n_unique":10,"null_rate":0.0,"stats":{"cardinality":10,"entropy":1.9076936537801823,"entropy_ratio":0.5742730123256528,"top_rate":0.663732057416268,"top_value":"People & Body"}},{"alerts":[],"column":"subgroup","extras":{"singletons":1,"top_values":[["person-activity",697],["person-role",635],["family",533],["person-sport",480],["person-gesture",300],["country-flag",259],["person-fantasy",246],["person",192],["animal-mammal",68],["hand-fingers-open",67],["sky & weather",65],["hands",62],["hand-fingers-partial",55],["transport-ground",55],["clothing",50],["body-parts",49],["alphanum",49],["hand-single-finger",43],["person-resting",42],["tool",38]]},"kind":"categorical","n":5225,"n_null":0,"n_unique":100,"null_rate":0.0,"stats":{"cardinality":100,"entropy":4.981654614581713,"entropy_ratio":0.7498137335134926,"top_rate":0.13339712918660288,"top_value":"person-activity"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.group.top_values","columns.group.stats","columns.status.top_values","columns.status.stats","columns.subgroup.top_values","columns.subgroup.stats","columns.name.stats","columns.name.top_words","columns.codepoints.stats"],"featured_charts":[{"caption":"Shows how dominant 'People & Body' is relative to all other emoji groups.","column":"group","kind":"bar"},{"caption":"Highlights that roughly three quarters of entries are fully-qualified, with smaller minimally-qualified and unqualified slices.","column":"status","kind":"donut"},{"caption":"Top subgroups (person-activity, person-role, family) reveal where the catalog packs the most variants.","column":"subgroup","kind":"bar"},{"caption":"Distribution of name lengths (median 34 chars) hints at how many descriptors carry skin-tone or gender qualifiers.","column":"name","kind":"length"},{"caption":"Codepoint-string length spans 4 to 54 characters, signalling how many emoji are multi-codepoint ZWJ sequences.","column":"codepoints","kind":"length"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a catalog of 5,225 Unicode emoji, with each row carrying the emoji glyph, its codepoint sequence, an English-leaning name, and three classification fields (group, subgroup, status). The collection is heavily skewed toward people: the 'group' field shows 'People & Body' accounts for 3,468 of 5,225 rows (about 66%), so most subsequent breakdowns will be dominated by human figures. The 'status' field is similarly lopsided, with 'fully-qualified' covering 3,944 rows versus much smaller minimally-qualified, unqualified, and component buckets. The 'subgroup' column gives a finer 100-way split worth exploring, led by person-activity (697) and person-role (635). Name-level duplication (1,272 duplicate names, ~24%) reflects skin-tone and gender variants of the same base concept, which is the other thing to keep in mind when counting.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","emoji_rate","one_word_rate","allcaps_rate","len_min","len_max","len_median","vocab_size"],"model":"anthropic:claude-opus-4-7","narrative":"This column appears to be a unique emoji identifier or catalog entry, with all 5225 values distinct (n_unique equals n) and a 97.7% emoji_rate. Each entry is a single token (one_word_rate 1.0) ranging from 1 to 10 characters with a median length of 3. The 51.3% allcaps_rate is unusual for an emoji column and suggests some entries contain ASCII letter components (e.g., regional indicators or text-based glyphs) rather than pure pictographs.","role":"identifier","scope":"column","target":"emoji","treatment":"Treat as a unique key; drop from modelling or use only as a join/lookup field."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.len_mean","stats.len_max","stats.word_median","stats.vocab_size","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Unicode codepoint sequences (likely emoji definitions), with every one of the 5225 rows unique and fully uppercase-hex. Tokens like '200d' (zero-width joiner, 3747 occurrences), 'fe0f' (variation selector, 1318), and skin-tone modifiers '1f3fb'-'1f3ff' (703 each) dominate, alongside the man/woman bases '1f468'/'1f469' (676 each). String length averages 18 characters (max 54) with a median of 3 tokens, consistent with multi-codepoint emoji ZWJ sequences.","role":"identifier","scope":"column","target":"codepoints","treatment":"Treat as a unique key per emoji; split on whitespace into codepoint tokens if structural features are needed."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical qualification status with 4 levels and no nulls across 5225 rows. Heavily dominated by 'fully-qualified' at 75.5%, with 'minimally-qualified' (1029) and 'unqualified' (243) trailing, and 'component' a rare tail at just 9 occurrences. Entropy ratio of 0.495 confirms the imbalance.","role":"label","scope":"column","target":"status","treatment":"One-hot encode; consider collapsing the rare 'component' class or stratifying splits to preserve it."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.duplicate_rate","stats.n_duplicates","stats.len_mean","stats.word_mean","stats.vocab_size","top_values","top_words","language_counts","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short descriptive labels for emoji (e.g. 'E4.0 man detective', 'E15.1 woman walking facing right: dark skin tone'), averaging 5.5 words and 33.8 characters with a versioned 'E#.#' prefix. Duplicates are heavy at 24.3% (1272 rows) because skin-tone variants share base names \u2014 'skin' (3450) and 'tone' (2800) dominate the vocabulary of 1912 tokens. Although 4680 rows are tagged English, the language detector also flags 29 other languages including German (36), Persian (33), and Polish (27), likely false positives on the short codepoint-style tokens rather than true multilingual content.","role":"label","scope":"column","target":"name","treatment":"Treat as the canonical emoji label; strip the 'E#.#' version prefix and skin-tone suffix if you need a deduplicated key."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical grouping column with 10 distinct values matching the standard Unicode emoji category taxonomy (e.g., \"People & Body\", \"Smileys & Emotion\", \"Flags\"). The distribution is heavily imbalanced: \"People & Body\" alone covers 66.4% of the 5,225 rows, while \"Component\" appears just 9 times. No nulls, and entropy ratio of 0.57 confirms the skew toward one dominant class.","role":"feature","scope":"column","target":"group","treatment":"One-hot or target-encode; consider grouping the rare \"Component\" class given its tiny support."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical taxonomy label with 100 distinct subgroups across 5225 rows and no nulls, suggesting an emoji or icon classification scheme dominated by people-related categories. The top value 'person-activity' covers 13.3% of rows, and the top eight values are all person/family/flag related, indicating a long tail where 92 remaining subgroups together account for most of the diversity (entropy ratio 0.75). No single category dominates overwhelmingly, but the person-centric concentration is notable.","role":"feature","scope":"column","target":"subgroup","treatment":"Group-encode or target-encode given the 100 levels, or collapse rare subgroups into an 'other' bucket before modelling."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":2675,"prompt_tokens":9558,"total_tokens":12233}},"language_counts":{"ar":19,"ca":5,"ceb":1,"de":36,"en":4680,"eo":2,"es":21,"fa":33,"fi":2,"fr":17,"gl":2,"hu":3,"id":8,"it":7,"ja":16,"la":1,"lt":2,"ms":3,"nl":19,"nn":28,"pl":27,"pt":4,"ru":19,"sv":4,"ta":4,"th":7,"uk":9,"ur":4,"war":2,"zh":3},"meta":{"generated_at":"2026-05-01T18:05:41+00:00","mode":"full","row_count":5225,"sampled_rows":5225,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/cache/emoji/unicode_emoji_list_20260119.json"},"notes":[],"saturn_version":"0.2.0","schema":{"codepoints":"text","emoji":"text","group":"categorical","name":"text","status":"categorical","subgroup":"categorical"}}
