{"columns":[{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"}],"column":"name","extras":{"language_counts":{"__engine":"fasttext:4,804","ca":50,"ceb":7,"cs":27,"da":9,"de":552,"el":10,"en":1847,"eo":7,"es":251,"et":13,"fi":39,"fr":1029,"he":7,"hu":7,"id":6,"it":389,"ja":12,"lt":7,"lv":7,"nl":146,"no":13,"pl":77,"pt":96,"ro":12,"ru":24,"sh":6,"sk":19,"sv":30,"tr":17,"uk":29},"language_sample_size":5000,"length_histogram":{"counts":[438,994,1446,1162,1119,774,389,341,184,107,69,44,28,17,7,2,2,6,0,1,3,1,6,0,0,3,1,0,0,0,0,0,1,0,0,0,0,0,0,1],"edges":[4.0,8.675,13.35,18.025,22.7,27.375,32.05,36.725,41.4,46.074999999999996,50.75,55.425,60.099999999999994,64.775,69.45,74.125,78.8,83.475,88.14999999999999,92.825,97.5,102.175,106.85,111.52499999999999,116.19999999999999,120.875,125.55,130.225,134.9,139.575,144.25,148.92499999999998,153.6,158.275,162.95,167.625,172.29999999999998,176.975,181.65,186.325,191.0]},"near_unique":false,"sample":["C\u0153ur Complice","Queso Feta de Grecia","Kremost Naturell","Comt\u00e9 18 mois","Oneg kosher gourmet, mozzarella cheese","Sir Gauda","Cottage Cheese small curd 4% milkfat minimum","Cheddar cheese spread squeeze","Goat's cheese","Cacouyard","Gouda semi curado","Stracciatella di burrata","Cremia - Kr\u00e4uter","Brique","Schnittlauch","Raclette Classique 400 g 16 tranches Richesmonts","Tartare  au lait d'amande","Emmental dlouh\u00e1 doba zr\u00e1n\u00ed","Classic cremiger weichk\u00e4se","Fancy Shredded, Parmesan Cheese","Buko Rosa und schwarzer Pfeffer","Mozzarella di bufala campana aop","Kiri","Fromage affin\u00e9 au lait de ch\u00e8vre","Vacherin Mont-D'Or","Gouda vieux","Skyr superberry","Comt\u00e9 15 mois","Frischk\u00e4se","Spreadable cheese","Emmenthal","cream cheese","Mozzarella di Bufala","Munster","Emmental r\u00e2p\u00e9","Country Cottage Cheese","natur cream cheese","Mozzarella di bufala campana Cirillo","Mythology Organic Feta cheese","Grana Padano grattuggiato","Mozzarella sliced","Cottage Cheese","Frischk\u00e4sezubereitung","\u041a\u0440\u0435\u043c-\u0441\u0438\u0440 \u043c'\u044f\u043a\u0438\u0439, 20% \u0436\u0438\u0440\u0443","Amul Malai Paneer","RACLETTE Swiss","K\u00e4se - Gouda gerieben 2,19  48 % Fett","heese","Fromage blanc 3,3%","Norvegia lettost, 16% fett"],"top_values":[["Mozzarella",34],["Cottage cheese",29],["Cottage Cheese",27],["Mascarpone",20],["Camembert",19],["Gouda",18],["Burrata",17],["Feta",17],["Halloumi",14],["Cream Cheese",12],["Emmentaler",12],["Ricotta",12],["Brie",11],["Emmental",11],["Mozzarella di bufala campana",9],["Mozarella",9],["Philadelphia",9],["Kiri",9],["Edam",8],["Mozzarella Cheese",8]],"top_words":[["cheese",1465],["mozzarella",604],["de",507],["cottage",410],["fromage",406],["gouda",352],["di",248],["queso",239],["-",221],["feta",214],["cheddar",209],["camembert",201],["&",192],["au",188],["cream",183],["shredded",174],["le",152],["bufala",151],["light",148],["emmental",143],["lait",140],["bio",138],["raclette",135],["brie",131],["aop",124]],"vocab_skipped":null,"word_histogram":{"counts":[744,1761,1821,1229,713,427,205,108,53,30,17,13,2,6,0,7,0,0,6,1,1,0,0,0,1,0,0,0,0,1],"edges":[1.0,1.9333333333333333,2.8666666666666667,3.8,4.733333333333333,5.666666666666667,6.6,7.533333333333333,8.466666666666667,9.4,10.333333333333334,11.266666666666667,12.2,13.133333333333333,14.066666666666666,15.0,15.933333333333334,16.866666666666667,17.8,18.733333333333334,19.666666666666668,20.6,21.533333333333335,22.46666666666667,23.4,24.333333333333332,25.266666666666666,26.2,27.133333333333333,28.066666666666666,29.0]}},"kind":"text","n":7146,"n_null":0,"n_unique":6337,"null_rate":0.0,"stats":{"allcaps_rate":0.017072488105233697,"boilerplate_rate":0.0,"duplicate_rate":0.1132101875174923,"emoji_rate":0.000559753708368318,"len_max":191,"len_mean":22.96235656311223,"len_median":21.0,"len_min":4,"len_p95":44.0,"n_duplicates":809,"n_empty":0,"one_word_rate":0.10411418975650713,"readability_flesch_mean":53.613227205882374,"url_rate":0.0,"vocab_size":4732,"word_mean":3.443324937027708,"word_median":3.0}},{"alerts":[],"column":"country","extras":{"singletons":33,"top_values":[["France",1853],["Germany",907],["United States",759],["Belgium",334],["United Kingdom",333],["Spain",319],["Italy",307],["Switzerland",209],["Poland",145],["Netherlands",134],["Austria",127],["Canada",125],["Sweden",123],["Portugal",115],["Ireland",114],["Czech Republic",103],["Australia",100],["Finland",88],["Norway",75],["Bulgaria",60]]},"kind":"categorical","n":7146,"n_null":0,"n_unique":111,"null_rate":0.0,"stats":{"cardinality":111,"entropy":4.267860136714475,"entropy_ratio":0.628142318731386,"top_rate":0.2593059054016233,"top_value":"France"}},{"alerts":[],"column":"category","extras":{"singletons":0,"top_values":[["Cream Cheese",1187],["Mozzarella",702],["Soft Cheese",637],["Grated Cheese",571],["Cottage Cheese",544],["Goat Cheese",526],["Cheese Spread",473],["Gouda",456],["Hard Cheese",340],["Feta",246],["Fresh Cheese",196],["Fromage Blanc",150],["Raclette",144],["Comt\u00e9",99],["Edam",97],["Havarti",95],["Burrata",88],["Halloumi",87],["Ricotta",85],["Dairy Products",77]]},"kind":"categorical","n":7146,"n_null":0,"n_unique":32,"null_rate":0.0,"stats":{"cardinality":32,"entropy":4.097652985722821,"entropy_ratio":0.8195305971445641,"top_rate":0.16610691295829835,"top_value":"Cream Cheese"}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"value","extras":{"histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.525,0.55,0.575,0.6,0.625,0.65,0.675,0.7,0.725,0.75,0.775,0.8,0.825,0.8500000000000001,0.875,0.9,0.925,0.95,0.9750000000000001,1.0,1.025,1.05,1.0750000000000002,1.1,1.125,1.15,1.175,1.2000000000000002,1.225,1.25,1.275,1.3,1.3250000000000002,1.35,1.375,1.4,1.425,1.4500000000000002,1.475,1.5]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":7146,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":1.0,"mean":1.0,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":1.0,"skew":0.0,"std":0.0,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.name.language_counts","columns.name.stats.duplicate_rate","columns.category.n_unique","columns.category.top_values","columns.category.stats.top_rate","columns.country.n_unique","columns.country.top_values","columns.country.stats.top_rate","columns.value.n_unique","columns.value.stats.mean"],"featured_charts":[{"caption":"France dominates origin counts at ~26%, with a long tail across 111 countries.","column":"country","kind":"bar"},{"caption":"Cream Cheese, Mozzarella, and Soft Cheese lead the 32 categories \u2014 check whether top categories are over-represented.","column":"category","kind":"bar"},{"caption":"Most cheese names are short (median 21 characters, ~3 words); look for outliers up to 191 characters.","column":"name","kind":"length"},{"caption":"Language mix of names is led by English and French \u2014 confirm this matches your expected scope.","column":"name","kind":"donut"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset is a catalogue of 7,146 cheese product entries with a name, a category, a country of origin, and a constant value field. Cheeses span 32 categories and 111 countries, with France alone accounting for 25.9% of rows and Germany and the United States rounding out the top three. Category is led by Cream Cheese (1,187 rows, 16.6%), followed by Mozzarella and Soft Cheese, suggesting some categories are far more populated than others. The name column is multilingual (predominantly English and French, with notable German, Spanish, and Italian presence) and has an 11.3% duplicate rate worth investigating before any de-duplicated analysis. Note that the value column is constant at 1.0 across all rows and carries no analytical signal.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["alerts","language_counts","n","n_unique","stats.duplicate_rate","stats.n_duplicates","stats.len_mean","stats.word_mean","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Short product names for cheeses, averaging 3.4 words / 23 characters, with top tokens 'cheese' (1465), 'fromage' (406), 'queso' (239) and varieties like Mozzarella, Cottage cheese, Gouda. Language detection spans 30 codes \u2014 predominantly en (1847), fr (1029), de (552), it (389), es (251) \u2014 confirming the 'multilingual' alert. 809 duplicates (11.3%) include casing variants ('Cottage cheese' 29 vs 'Cottage Cheese' 27), and 6337 unique values out of 7146 means it is high-cardinality but not an identifier.","role":"label","scope":"column","target":"name","treatment":"Normalize case and language before grouping; consider canonicalizing to a cheese-variety taxonomy."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a country-of-origin or location categorical with 111 distinct values across 7146 rows and no nulls. The distribution is Europe-heavy and concentrated: France alone accounts for 25.9% of records, with Germany (907) and the United States (759) trailing, giving an entropy ratio of 0.63. The long tail of 100+ smaller countries means rare-category handling will matter.","role":"feature","scope":"column","target":"country","treatment":"Group rare countries into an 'Other' bucket before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical product-type field for cheese items, with 32 distinct categories across 7146 rows and no nulls. Cream Cheese leads at 16.6% (1187 rows), followed by Mozzarella and Soft Cheese, and entropy ratio 0.82 indicates a fairly even spread rather than dominance by one value. No rare-label or drift signals are present in the evidence.","role":"feature","scope":"column","target":"category","treatment":"One-hot or target-encode for modelling; cardinality of 32 is manageable."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.min","stats.max","stats.mean","stats.std","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"The column 'value' is numeric but completely constant: all 7146 rows hold the value 1.0, with zero variance and a single unique value. It carries no information for analysis or modelling.","role":"other","scope":"column","target":"value","treatment":"Drop; constant column with no signal."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":1681,"prompt_tokens":6155,"total_tokens":7836}},"language_counts":{"ca":50,"ceb":7,"cs":27,"da":9,"de":552,"el":10,"en":1847,"eo":7,"es":251,"et":13,"fi":39,"fr":1029,"he":7,"hu":7,"id":6,"it":389,"ja":12,"lt":7,"lv":7,"nl":146,"no":13,"pl":77,"pt":96,"ro":12,"ru":24,"sh":6,"sk":19,"sv":30,"tr":17,"uk":29},"meta":{"generated_at":"2026-05-01T18:05:14+00:00","mode":"full","row_count":7146,"sampled_rows":7146,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/cheese_list.json"},"notes":[],"saturn_version":"0.2.0","schema":{"category":"categorical","country":"categorical","name":"text","value":"numeric"}}
