{"attributions":[{"component":"fastText lid.176 language identification model","license":"CC-BY-SA-3.0","note":"Language counts in this report were produced with the fastText lid.176 model, licensed CC-BY-SA-3.0. This report is a derivative work and carries the same license for those figures.","url":"https://fasttext.cc/docs/en/language-identification.html"}],"columns":[{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"}],"column":"name","extras":{"language_counts":{"__engine":"fasttext:4,804","ca":50,"ceb":7,"cs":27,"da":9,"de":552,"el":10,"en":1847,"eo":7,"es":251,"et":13,"fi":39,"fr":1029,"he":7,"hu":7,"id":6,"it":389,"ja":12,"lt":7,"lv":7,"nl":146,"no":13,"pl":77,"pt":96,"ro":12,"ru":24,"sh":6,"sk":19,"sv":30,"tr":17,"uk":29},"language_sample_size":5000,"length_histogram":{"counts":[438,994,1446,1162,1119,774,389,341,184,107,69,44,28,17,7,2,2,6,0,1,3,1,6,0,0,3,1,0,0,0,0,0,1,0,0,0,0,0,0,1],"edges":[4.0,8.675,13.35,18.025,22.7,27.375,32.05,36.725,41.4,46.074999999999996,50.75,55.425,60.099999999999994,64.775,69.45,74.125,78.8,83.475,88.14999999999999,92.825,97.5,102.175,106.85,111.52499999999999,116.19999999999999,120.875,125.55,130.225,134.9,139.575,144.25,148.92499999999998,153.6,158.275,162.95,167.625,172.29999999999998,176.975,181.65,186.325,191.0]},"near_unique":false,"sample":["C\u0153ur Complice","Queso Feta de Grecia","Kremost Naturell","Comt\u00e9 18 mois","Oneg kosher gourmet, mozzarella cheese","Sir Gauda","Cottage Cheese small curd 4% milkfat minimum","Cheddar cheese spread squeeze","Goat's cheese","Cacouyard","Gouda semi curado","Stracciatella di burrata","Cremia - Kr\u00e4uter","Brique","Schnittlauch","Raclette Classique 400 g 16 tranches Richesmonts","Tartare  au lait d'amande","Emmental dlouh\u00e1 doba zr\u00e1n\u00ed","Classic cremiger weichk\u00e4se","Fancy Shredded, Parmesan Cheese","Buko Rosa und schwarzer Pfeffer","Mozzarella di bufala campana aop","Kiri","Fromage affin\u00e9 au lait de ch\u00e8vre","Vacherin Mont-D'Or","Gouda vieux","Skyr superberry","Comt\u00e9 15 mois","Frischk\u00e4se","Spreadable cheese","Emmenthal","cream cheese","Mozzarella di Bufala","Munster","Emmental r\u00e2p\u00e9","Country Cottage Cheese","natur cream cheese","Mozzarella di bufala campana Cirillo","Mythology Organic Feta cheese","Grana Padano grattuggiato","Mozzarella sliced","Cottage Cheese","Frischk\u00e4sezubereitung","\u041a\u0440\u0435\u043c-\u0441\u0438\u0440 \u043c'\u044f\u043a\u0438\u0439, 20% \u0436\u0438\u0440\u0443","Amul Malai Paneer","RACLETTE Swiss","K\u00e4se - Gouda gerieben 2,19  48 % Fett","heese","Fromage blanc 3,3%","Norvegia lettost, 16% fett"],"top_values":[["Mozzarella",34],["Cottage cheese",29],["Cottage Cheese",27],["Mascarpone",20],["Camembert",19],["Gouda",18],["Burrata",17],["Feta",17],["Halloumi",14],["Cream Cheese",12],["Emmentaler",12],["Ricotta",12],["Brie",11],["Emmental",11],["Mozzarella di bufala campana",9],["Mozarella",9],["Philadelphia",9],["Kiri",9],["Edam",8],["Mozzarella Cheese",8]],"top_words":[["cheese",1465],["mozzarella",604],["de",507],["cottage",410],["fromage",406],["gouda",352],["di",248],["queso",239],["-",221],["feta",214],["cheddar",209],["camembert",201],["&",192],["au",188],["cream",183],["shredded",174],["le",152],["bufala",151],["light",148],["emmental",143],["lait",140],["bio",138],["raclette",135],["brie",131],["aop",124]],"vocab_skipped":null,"word_histogram":{"counts":[744,1761,1821,1229,713,427,205,108,53,30,17,13,2,6,0,7,0,0,6,1,1,0,0,0,1,0,0,0,0,1],"edges":[1.0,1.9333333333333333,2.8666666666666667,3.8,4.733333333333333,5.666666666666667,6.6,7.533333333333333,8.466666666666667,9.4,10.333333333333334,11.266666666666667,12.2,13.133333333333333,14.066666666666666,15.0,15.933333333333334,16.866666666666667,17.8,18.733333333333334,19.666666666666668,20.6,21.533333333333335,22.46666666666667,23.4,24.333333333333332,25.266666666666666,26.2,27.133333333333333,28.066666666666666,29.0]}},"kind":"text","n":7146,"n_null":0,"n_unique":6337,"null_rate":0.0,"stats":{"allcaps_rate":0.017072488105233697,"boilerplate_rate":0.0,"duplicate_rate":0.1132101875174923,"emoji_rate":0.000559753708368318,"len_max":191,"len_mean":22.96235656311223,"len_median":21.0,"len_min":4,"len_p95":44.0,"n_duplicates":809,"n_empty":0,"one_word_rate":0.10411418975650713,"readability_flesch_mean":53.613227205882374,"url_rate":0.0,"vocab_size":4732,"word_mean":3.443324937027708,"word_median":3.0}},{"alerts":[],"column":"country","extras":{"singletons":33,"top_values":[["France",1853],["Germany",907],["United States",759],["Belgium",334],["United Kingdom",333],["Spain",319],["Italy",307],["Switzerland",209],["Poland",145],["Netherlands",134],["Austria",127],["Canada",125],["Sweden",123],["Portugal",115],["Ireland",114],["Czech Republic",103],["Australia",100],["Finland",88],["Norway",75],["Bulgaria",60]]},"kind":"categorical","n":7146,"n_null":0,"n_unique":111,"null_rate":0.0,"stats":{"cardinality":111,"entropy":4.267860136714475,"entropy_ratio":0.628142318731386,"top_rate":0.2593059054016233,"top_value":"France"}},{"alerts":[],"column":"category","extras":{"singletons":0,"top_values":[["Cream Cheese",1187],["Mozzarella",702],["Soft Cheese",637],["Grated Cheese",571],["Cottage Cheese",544],["Goat Cheese",526],["Cheese Spread",473],["Gouda",456],["Hard Cheese",340],["Feta",246],["Fresh Cheese",196],["Fromage Blanc",150],["Raclette",144],["Comt\u00e9",99],["Edam",97],["Havarti",95],["Burrata",88],["Halloumi",87],["Ricotta",85],["Dairy Products",77]]},"kind":"categorical","n":7146,"n_null":0,"n_unique":32,"null_rate":0.0,"stats":{"cardinality":32,"entropy":4.097652985722821,"entropy_ratio":0.8195305971445641,"top_rate":0.16610691295829835,"top_value":"Cream Cheese"}},{"alerts":[{"code":"constant","level":"info","message":"only one distinct value"}],"column":"value","extras":{"histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.525,0.55,0.575,0.6,0.625,0.65,0.675,0.7,0.725,0.75,0.775,0.8,0.825,0.8500000000000001,0.875,0.9,0.925,0.95,0.9750000000000001,1.0,1.025,1.05,1.0750000000000002,1.1,1.125,1.15,1.175,1.2000000000000002,1.225,1.25,1.275,1.3,1.3250000000000002,1.35,1.375,1.4,1.425,1.4500000000000002,1.475,1.5]},"sample":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]},"kind":"numeric","n":7146,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"iqr":0.0,"kurtosis":0.0,"max":1.0,"mean":1.0,"median":1.0,"min":1.0,"n_outliers":0,"outlier_rate":0.0,"q1":1.0,"q3":1.0,"skew":0.0,"std":0.0,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","country.stats.top_value","country.stats.top_rate","country.n_unique","category.stats.top_value","category.stats.top_rate","category.n_unique","name.stats.duplicate_rate","name.language_counts","value.alerts"],"featured_charts":[{"caption":"Look for the sharp drop-off after France and Germany \u2014 the dataset is heavily concentrated in a handful of Western European countries.","column":"country","kind":"bar"},{"caption":"Cream Cheese, Mozzarella, and Soft Cheese together dominate the category mix, revealing which cheese types are most catalogued.","column":"category","kind":"bar"},{"caption":"The top language breakdown shows English and French names account for the majority, despite 30 languages being present overall.","column":"name","kind":"donut"},{"caption":"Most cheese names are concise (median 21 characters), but a long tail stretches to 191 characters \u2014 worth inspecting for data quality issues.","column":"name","kind":"length"}],"model":"anthropic:default","narrative":"This dataset is a multilingual catalogue of 7,146 cheese products spanning 32 categories and 111 countries of origin. The most immediately striking pattern is the geographic concentration: France alone accounts for 26% of all entries (1,853), followed by Germany and the United States, suggesting the dataset skews heavily toward Western European dairy traditions. On the category side, Cream Cheese dominates with 1,187 entries (17%), and the top 5 categories together cover over half the dataset \u2014 worth examining for potential over-representation. The 'value' column is entirely constant at 1.0 and can be safely ignored. Note also that the product names are highly multilingual (30 languages detected) with an 11% duplicate rate, indicating some cheese types are listed under multiple language variants.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","language_counts","stats.duplicate_rate","stats.n_duplicates","stats.len_mean","stats.word_median","alerts","n","n_unique"],"model":"anthropic:default","narrative":"This column contains the names of cheese products (or cheese-related food items), as evidenced by top values like 'Mozzarella', 'Cottage Cheese', and 'Gouda', and dominant words including 'cheese', 'mozzarella', 'fromage', and 'queso'. Notably, 11.3% of values are duplicates (809 out of 7146), partly due to case-inconsistent entries like 'Cottage cheese' (29) and 'Cottage Cheese' (27) being counted separately. A multilingual alert is triggered across 30 detected languages \u2014 English (1847), French (1029), German (552), Italian (389), and Spanish (251) are dominant \u2014 reflecting international product naming rather than true language mixing in a single entry. The mean name length is ~23 characters with a median of 3 words, consistent with structured product label strings rather than free text.","role":"label","scope":"column","target":"name","treatment":"Normalise case before deduplication or grouping; consider language-aware normalisation to consolidate cross-lingual synonyms (e.g. 'fromage', 'queso', 'cheese') for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["alerts","n_unique","null_rate","stats.min","stats.max","stats.std","stats.mean","n"],"model":"anthropic:default","narrative":"This column is a numeric constant: every one of its 7,146 non-null rows holds exactly the value 1.0, with zero variance, zero skew, and a single unique value. It carries no information and will contribute nothing to any model or analysis. This is likely a placeholder, a join artifact, or a weight/flag column that was never populated with real data.","role":"other","scope":"column","target":"value","treatment":"Drop immediately; zero-variance constant adds no predictive or descriptive value."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","top_value","top_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column is a product category label for what appears to be a cheese-focused retail or food dataset, with 32 distinct cheese types across 7,146 records and no nulls. The distribution is moderately uneven: 'Cream Cheese' dominates at 16.6% (1,187 rows), while the top 10 categories alone account for the vast majority of records. The entropy ratio of 0.82 suggests reasonable spread across categories but with a clear long tail beyond the top 10. No anomalies or alerts are present.","role":"label","scope":"column","target":"category","treatment":"One-hot encode or target-encode for modelling; consider grouping low-frequency tail categories if sparse classes cause issues."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","null_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column records the country of origin or residence for each record, spanning 111 distinct countries across 7,146 rows with no nulls. France dominates heavily, accounting for 25.9% of all records (1,853 rows), followed by Germany (907) and United States (759) \u2014 suggesting a strongly Europe-centric dataset, likely French-sourced. The entropy ratio of 0.628 indicates moderate distributional spread, but the long tail of 111 countries means many nations are sparsely represented beyond the top 10.","role":"feature","scope":"column","target":"country","treatment":"One-hot encode top countries and group sparse tail into an 'Other' category before modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":1552,"prompt_tokens":5096,"total_tokens":6648}},"language_counts":{"ca":50,"ceb":7,"cs":27,"da":9,"de":552,"el":10,"en":1847,"eo":7,"es":251,"et":13,"fi":39,"fr":1029,"he":7,"hu":7,"id":6,"it":389,"ja":12,"lt":7,"lv":7,"nl":146,"no":13,"pl":77,"pt":96,"ro":12,"ru":24,"sh":6,"sk":19,"sv":30,"tr":17,"uk":29},"meta":{"generated_at":"2026-06-21T23:45:17+00:00","mode":"full","row_count":7146,"sampled_rows":7146,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/cheese_list.json"},"notes":[],"saturn_version":"0.2.0","schema":{"category":"categorical","country":"categorical","name":"text","value":"numeric"}}
