{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"199 singleton categories"}],"column":"name","extras":{"singletons":199,"top_values":[["Carolina Reaper Hot Sauce",6],["Tabasco",5],["Sriracha Hot Chilli Sauce",3],["Sriracha Hot Chili Sauce",3],["Sauce de piment sriracha",3],["",3],["Ghost pepper hot sauce",3],["Carolina Reaper Sauce",3],["Carolina reaper hot sauce",3],["Carolina Reaper",3],["Salsa Picante",2],["Sriracha Sauce",2],["Sriracha",2],["Sauce sriracha",2],["Sauce de Piment Sriracha",2],["Tabasco Green Pepper Sauce",2],["Tabasco\u00ae brand pepper sauce",2],["Habanero Hot Sauce",2],["Hot Sauce Chile Habanero",2],["Ghost Pepper",2]]},"kind":"categorical","n":258,"n_null":0,"n_unique":221,"null_rate":0.0,"stats":{"cardinality":221,"entropy":7.6656516139656095,"entropy_ratio":0.9843024557005532,"top_rate":0.023255813953488372,"top_value":"Carolina Reaper Hot Sauce"}},{"alerts":[{"code":"long_tail","level":"info","message":"132 singleton categories"}],"column":"brand","extras":{"singletons":132,"top_values":[["",37],["Tabasco",12],["McIlhenny Company, Tabasco",11],["Flying Goose Brand",6],["Melinda's",5],["Lola's Fine Hot Sauce",5],["Cholula",4],["Encona",4],["El Yucateco",4],["Mrs. Renfro's",4],["Huy Fong Foods, Inc.",3],["Sauce Shop",3],["Go-Tan",2],["Vitasia",2],["Valentina",2],["Heinz",2],["sauce shop",2],["CHOLULA",2],["TABASCO",2],["Serpis",2]]},"kind":"categorical","n":258,"n_null":0,"n_unique":158,"null_rate":0.0,"stats":{"cardinality":158,"entropy":6.530367165115257,"entropy_ratio":0.8941077765436926,"top_rate":0.1434108527131783,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"99 singleton categories"}],"column":"countries","extras":{"singletons":99,"top_values":[["United States",54],["France",28],["en:us",10],["en:gb",8],["en:fr",8],["en:france",4],["en:germany",4],["United States, World",4],["en:United States",4],["United Kingdom",3],["en:United Kingdom",3],["France, United States",3],["en:Canada",3],["World",3],["France, en:morocco",2],["en:ma",2],["France,Royaume-Uni",2],["en:Germany",2],["Belgique,France",2],["Canada",2]]},"kind":"categorical","n":258,"n_null":0,"n_unique":123,"null_rate":0.0,"stats":{"cardinality":123,"entropy":5.67648469188011,"entropy_ratio":0.8176410272552587,"top_rate":0.20930232558139536,"top_value":"United States"}},{"alerts":[{"code":"long_tail","level":"info","message":"85 singleton categories"}],"column":"categories","extras":{"singletons":85,"top_values":[["",35],["Condiments, Sauces, Hot sauces, Groceries",32],["Condiments, Sauces, Groceries",23],["Condiments, Sauces, Dips, Groceries",13],["Condiments, Sauces, Sauces chili, en:groceries",9],["Condiments,Sauces",8],["Condiments, Sauces, Hot sauces",7],["Hot sauces",5],["Condiments,Sauces,Hot sauces",5],["Condiments,Sauces,Hot sauces,Groceries",5],["Condiments, Sauces, en:hot-sauces",4],["Condiments,Sauces,Sauces chili",4],["Sauces chili",3],["Condiments, Sauces, Sauces chili, Sauces sriracha, en:groceries",3],["Condiments, Sauces, Barbecue sauces, Groceries",3],["Condiments, Sauces",3],["undefined",3],["Condimentos,Salsas,Salsas de chiles,en:groceries",2],["en:hot-sauces",2],["Condiments, Sauces, Hot sauces, Sriracha sauces",2]]},"kind":"categorical","n":258,"n_null":0,"n_unique":106,"null_rate":0.0,"stats":{"cardinality":106,"entropy":5.5055810720134435,"entropy_ratio":0.8183183955867513,"top_rate":0.13565891472868216,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"203 singleton categories"}],"column":"ingredients","extras":{"singletons":203,"top_values":[["",49],["Distilled Vinegar, Red Pepper (19%), Salt.",2],["Vinaigre d'alcool, piment rouge (19%), sel.",2],["Distilled vinegar, red pepper, salt.",2],["R\u00f8d chillipepper 54%, sukker, hvitl\u00f8k, salt, vann, syre (eddiksyre, sitronsyre), smaksforsterker (mononatriumglutamat), konserveringsmiddel (natriumbenzoat).",1],["Wasser, 30% Zucker, 8% Chilischoten*, Paprika, modifizierte St\u00e4rke, Speisesalz S\u00e4uerungsmittel: Essigs\u00e4ure; Knoblauch, Zwiebeln, Verdickungsmittel: Xanthan; Konservierungsstoff: Kaliumsorbat.",1],["soybean oil [45%], chilli [25%], onion [15%], fermented soybeans [soybeans, water], flavour enhancer [e621], salt, sugar, sichuan pepper powder,",1],["Water, Chili Pepper, Vinegar, Salt, Spice, Sodium Benzoate (Preservative).",1],["Fermented Red Cayenne Peppers (35%), Spirit Vinegar, Water, Salt, Garlic Powder.",1],["Eau, piments (5%), sel, acidifiant (acide ac\u00e9tique), stabilisant (gomme xanthane), farine de riz, \u00e9pices, vinaigre de cidre, ar\u00f4mes naturels.",1],["Vineger, Louisiana type Red Chili Pepper, Salt, Thickener(Xanthan Gum), Green Pepper Natural Identical Flavor, Natural Color(E120), Antioxidant (Ascorbic Acid). May Contain Celery.",1],["chilli 61%, sugar, water, salt, garlic, flavour enhancer: monosodium glutamate, stabiliser: xanthan gum, acidity regulator: acetic acid, citric acid, preservative: potassium sorbate",1],["pickled red chilli 64% [chili, salt, acidity regulator (acetic acid)), sugar, water, garlic, salt, thickener (modified starch, xanthan gum), acidity regulator (acetic acid, citric acid), flavour enhancer (yeast extract), preservative (potassium sorbate), colour (paprika oleoresin).",1],["WATER, DRIED CHILI PEPPERS (5.0%) (ARBOL & PIQUIN), SALT, VINEGAR BLEND (SPIRIT VINEGAR CIDER VINEGAR), SPICES, STABILISER (XANTHAN GUM)",1],["Red hot pepper (87%), Garlic, Coriander, Salt, Caraway, Acidifying : E330,",1],["45% raapzaadolie, water, 20% sriracha saus (rode pepers, suiker, knoflook, zout, water, voedingszuur (azijnzuur, citroenzuur),smaakversterker (mononatriumglutamaat), conserveermiddel (natriumbenzoaat)), suiker, azijn, mosterd (water, azijn, MOSTERDZAAD,suiker, zout), zout, gemodificeerde zetmelen, voedingszuur (melkzuur), HEEL EIPOEDER, conserveermiddelen (kaliumsorbaat,natriumbenzoaat), verdikkingsmiddel (xanthaangom), antioxidant (calcium-dinatrium-EDTA).",1],["Chilis, Zucker, Knoblauch, Salz, Essigs\u00e4ure E260, Konservierungsmittel Kaliumsorbat E202, Konservierungsmittel Natriumbisulfit E222, Xanthan E415.",1],["water, 32% piri-piri pepper, salt, acidity regulators: acetic acid, lactic acid, citric acid, wine vinegar (contains sulphites), spices, thickener: xanthan gum, paprika extract, preservatives: sodium benzoate, potassium sorbate,",1],["Chili (83.23%), Sugar, Salt, Garlic (3.60%), Acetic Acid, Potassium Sorbate and Sodium Bisulfite as preservatives, Xanthan Gum. CONTAINS SULPHITE (SODIUM BISULFITE) INGR",1],["Chili 70%, Zucker, Wasser, Salz, Sauerungsmittel: Essigs\u00e4ure, Citronens\u00e4ure; Verdickungsmittel: Xanthan; Geschmacksverst\u00e4rker Mononatriumglutamat; Konservierungsstoff Kaliumsorbat",1]]},"kind":"categorical","n":258,"n_null":0,"n_unique":207,"null_rate":0.0,"stats":{"cardinality":207,"entropy":6.921611819912999,"entropy_ratio":0.8996716129044801,"top_rate":0.18992248062015504,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"62 singleton categories"}],"column":"labels","extras":{"singletons":62,"top_values":[["",145],["No gluten",9],["No GMOs, Non GMO project",9],["Sans gluten",5],["Halal",4],["en:vegan",4],["No GMOs, Non GMO project, en:no-gluten",3],["Point Vert",3],["Vegetarian, Vegan, Green Dot",2],["Triman",2],["No gluten, en:vegan",2],["Punto Verde",2],["Sans OGM,en:Non GMO project",2],["en:halal",2],["en:no-gluten",2],["Sin gluten,Punto Verde",1],["Vegetarian, Vegan, European Vegetarian Union, European Vegetarian Union Vegan, Nutriscore, Rainforest Alliance, en:green-dot",1],["Vegetarian",1],["Thai quality label, Halal, Natural colorings, Thailand Diversity & Refinement, The Central Islamic Committee of Thailand",1],["No gluten, No added MSG",1]]},"kind":"categorical","n":258,"n_null":0,"n_unique":77,"null_rate":0.0,"stats":{"cardinality":77,"entropy":3.5567122028771645,"entropy_ratio":0.5675496013436535,"top_rate":0.562015503875969,"top_value":""}},{"alerts":[{"code":"long_tail","level":"info","message":"258 singleton categories"}],"column":"url","extras":{"singletons":258,"top_values":[["https://world.openfoodfacts.org/product/8710605030051",1],["https://world.openfoodfacts.org/product/20170196",1],["https://world.openfoodfacts.org/product/6921804700269",1],["https://world.openfoodfacts.org/product/0097339000054",1],["https://world.openfoodfacts.org/product/0041500888125",1],["https://world.openfoodfacts.org/product/3166296552214",1],["https://world.openfoodfacts.org/product/6221033171107",1],["https://world.openfoodfacts.org/product/8853662056029",1],["https://world.openfoodfacts.org/product/5020580016999",1],["https://world.openfoodfacts.org/product/0049733000215",1],["https://world.openfoodfacts.org/product/6194049100044",1],["https://world.openfoodfacts.org/product/8710605030044",1],["https://world.openfoodfacts.org/product/0024463061095",1],["https://world.openfoodfacts.org/product/20026752",1],["https://world.openfoodfacts.org/product/0024463061163",1],["https://world.openfoodfacts.org/product/8853662056067",1],["https://world.openfoodfacts.org/product/0702382999100",1],["https://world.openfoodfacts.org/product/9556041131063",1],["https://world.openfoodfacts.org/product/0016229912437",1],["https://world.openfoodfacts.org/product/0633148100624",1]]},"kind":"categorical","n":258,"n_null":0,"n_unique":258,"null_rate":0.0,"stats":{"cardinality":258,"entropy":8.011227255423252,"entropy_ratio":0.9999999999999998,"top_rate":0.003875968992248062,"top_value":"https://world.openfoodfacts.org/product/8710605030051"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"source","extras":{"singletons":0,"top_values":[["OpenFoodFacts",258]]},"kind":"categorical","n":258,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"OpenFoodFacts"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"type","extras":{"singletons":0,"top_values":[["hot_sauce_product",258]]},"kind":"categorical","n":258,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"hot_sauce_product"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["brand","countries","labels","categories","name","source","type"],"featured_charts":[{"caption":"Top brands are Tabasco-related, but the long tail of 158 brands and 37 blanks dominates the field.","column":"brand","kind":"bar"},{"caption":"United States and France lead, but watch for duplicate encodings like 'en:us' that need normalization.","column":"countries","kind":"bar"},{"caption":"Over half the rows have no label at all \u2014 non-blank tags like 'No gluten' and 'Non GMO project' are rare.","column":"labels","kind":"bar"},{"caption":"Most products cluster into a few Condiments/Sauces/Hot sauces variants with inconsistent delimiters.","column":"categories","kind":"bar"},{"caption":"Product names are nearly unique (221 of 258); only a few staples like 'Carolina Reaper Hot Sauce' and 'Tabasco' repeat.","column":"name","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 258 hot sauce products sourced entirely from OpenFoodFacts, with 9 categorical columns covering brand, category, country, ingredients, labels, name, and URL. Brands are highly fragmented across 158 unique values, with Tabasco (12) and McIlhenny Company, Tabasco (11) leading but no dominant player \u2014 and 37 records have a blank brand worth investigating. Geographically, the United States (54) and France (28) account for the largest shares of the 123 country values, though inconsistent encoding (e.g., 'en:us' vs 'United States') suggests a data-cleaning task. The labels column is sparse: 145 of 258 rows are blank, so dietary tags like 'No gluten' or 'Non GMO project' apply to only a small minority. Note that source and type are constant (OpenFoodFacts / hot_sauce_product) and carry no analytical signal.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is a product name field for hot sauces, with 221 unique values across 258 rows and near-maximal entropy ratio of 0.984. The top value 'Carolina Reaper Hot Sauce' only covers 2.3% of rows, and casing/spelling variants ('Carolina Reaper Hot Sauce' vs 'Carolina reaper hot sauce', 'Sriracha Hot Chilli Sauce' vs 'Sriracha Hot Chili Sauce') plus a French entry and 3 empty strings indicate inconsistent normalization despite a 0.0 null rate.","role":"label","scope":"column","target":"name","treatment":"normalize casing and spelling variants (and treat empty strings as missing) before grouping or joining."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical brand label for what appears to be a hot sauce catalogue, with 158 distinct brands across 258 rows and very high entropy ratio (0.894) indicating a long tail. The most common value is the empty string at 37 occurrences (14.3% top rate), meaning missing-as-blank dominates over real brands like Tabasco (12) and McIlhenny Company, Tabasco (11). Note also that 'Tabasco' and 'McIlhenny Company, Tabasco' likely refer to the same maker but appear as separate categories, suggesting inconsistent normalisation.","role":"feature","scope":"column","target":"brand","treatment":"Replace empty strings with explicit nulls, normalise brand aliases (e.g. Tabasco vs McIlhenny), then group rare brands into 'Other' before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This is a country-of-origin or sale label for 258 records, with 123 distinct values and no nulls. The encoding is inconsistent: plain names ('United States', 54) coexist with Open Food Facts-style tag prefixes ('en:us', 10; 'en:United States', 4) and multi-country strings ('United States, World'), so the same country appears under several spellings. High entropy ratio (0.82) and a long tail confirm the values are fragmented well beyond the 20.9% top rate.","role":"feature","scope":"column","target":"countries","treatment":"Normalize to ISO country codes (strip 'en:' prefixes, split comma lists) before grouping or encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Comma-delimited product category tags, dominated by condiment/sauce/hot-sauce hierarchies. Cardinality is high (106 unique across 258 rows, entropy ratio 0.82) and the most common value is the empty string at 13.6% (35 rows), indicating missing labels encoded as blanks rather than nulls. Near-duplicate variants differ only by spacing, casing, or 'en:' prefixes (e.g., 'Condiments,Sauces' vs 'Condiments, Sauces, Groceries'), so raw cardinality overstates the true taxonomy.","role":"feature","scope":"column","target":"categories","treatment":"Normalise delimiters/casing, treat empty strings as missing, then split into a multi-hot tag encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Free-text ingredient lists for what appears to be hot-sauce or chili products, with 207 distinct strings across 258 rows and entropy ratio 0.90 indicating near-unique values. The dominant 'value' is an empty string at 49 rows (19% top_rate), so roughly a fifth of records have no ingredients recorded. The remaining entries mix multiple languages (English, French, Norwegian, German) and formatting conventions, so direct categorical use is not viable.","role":"free_text","scope":"column","target":"ingredients","treatment":"Treat empty strings as missing, then tokenize/normalize across languages and extract ingredient features before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Free-form product label tags (dietary, certification, packaging) with 77 distinct values across 258 rows. Over half the rows (56.2%) carry an empty string rather than a true null, so null_rate=0 is misleading. Values mix languages (English 'No gluten' vs French 'Sans gluten') and formats (raw text vs Open Food Facts taxonomy codes like 'en:vegan'), and many cells concatenate multiple labels with commas.","role":"feature","scope":"column","target":"labels","treatment":"Treat empty strings as missing, split on commas, normalise language/taxonomy variants, then multi-hot encode."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds Open Food Facts product URLs, one per row, with the trailing path segment being the product barcode. Every one of the 258 values is unique (entropy_ratio 1.0, top_rate 0.0039), so it functions as a row identifier rather than a feature.","role":"identifier","scope":"column","target":"url","treatment":"Drop from modelling; keep as a lookup link or join key on the embedded barcode."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the data provenance, with every one of the 258 rows tagged 'OpenFoodFacts'. Cardinality is 1 and entropy is 0, so it carries no information for modelling and simply documents that the entire slice came from a single source.","role":"metadata","scope":"column","target":"source","treatment":"Drop before modelling; retain only as dataset-level provenance."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column is a constant categorical tag identifying every row as 'hot_sauce_product', appearing in all 258 records with no nulls. Cardinality is 1 and entropy is 0, so it carries no discriminative information. It likely served as a type marker from an ingestion pipeline rather than a usable feature.","role":"metadata","scope":"column","target":"type","treatment":"Drop before modelling; single constant value provides no signal."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":3123,"prompt_tokens":12011,"total_tokens":15134}},"language_counts":{},"meta":{"generated_at":"2026-05-01T17:04:18+00:00","mode":"full","row_count":258,"sampled_rows":258,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/hot_sauces.json"},"notes":[],"saturn_version":"0.2.0","schema":{"brand":"categorical","categories":"categorical","countries":"categorical","ingredients":"categorical","labels":"categorical","name":"categorical","source":"categorical","type":"categorical","url":"categorical"}}
