{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"175 singleton categories"}],"column":"name","extras":{"singletons":175,"top_values":[["Bell Pepper",1],["Gypsy Pepper",1],["Purple Beauty Pepper",1],["Melrose Pepper",1],["Carmen Pepper",1],["California Wonder Pepper",1],["Peperone di Senise",1],["Fushimi Pepper",1],["Elephant Ears Pepper",1],["Habanada Pepper",1],["Tangerine Dream Pepper",1],["Chilly Chili",1],["Shishito Pepper",1],["Trinidad Perfume",1],["Banana Pepper",1],["Pepperoncini",1],["Pimento Pepper",1],["Jimmy Nardello Pepper",1],["Mariachi Pepper",1],["Santa Fe Grande Pepper",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":175,"null_rate":0.0,"stats":{"cardinality":175,"entropy":7.451211111832327,"entropy_ratio":0.9999999999999998,"top_rate":0.005714285714285714,"top_value":"Bell Pepper"}},{"alerts":[],"column":"heat","extras":{"singletons":0,"top_values":[["Medium",70],["Mild",45],["Super Hot",30],["Hot",17],["Extra Hot",13]]},"kind":"categorical","n":175,"n_null":0,"n_unique":5,"null_rate":0.0,"stats":{"cardinality":5,"entropy":2.074168237396166,"entropy_ratio":0.8932956373469373,"top_rate":0.4,"top_value":"Medium"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+10.31"},{"code":"outliers","level":"warn","message":"16.6% rows beyond 1.5 IQR"}],"column":"scoville_min","extras":{"histogram":{"counts":[164,7,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1153846.1538461538,2307692.3076923075,3461538.461538461,4615384.615384615,5769230.769230769,6923076.923076922,8076923.076923076,9230769.23076923,10384615.384615384,11538461.538461538,12692307.692307692,13846153.846153844,15000000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,50.0,0.0,0.0,100.0,100.0,0.0,500.0,500.0,700.0,0.0,1.0,100.0,100.0,100.0,500.0,500.0,500.0,0.0,500.0,500.0,0.0,1100.0,1000.0,1000.0,1000.0,1000.0,1000.0,500.0,500.0,1000.0,1000.0,1500.0,1000.0,2500.0,2500.0,500.0,500.0,1000.0,1000.0,2500.0,2500.0,2500.0,1000.0,1000.0,1000.0,4000.0,4000.0,5000.0,1000.0,2500.0,2500.0,5000.0,4000.0,2500.0,2500.0,2500.0,5000.0,5000.0,10000.0,5000.0,5000.0,10000.0,10000.0,10000.0,20000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,10000.0,10000.0,12000.0,15000.0,15000.0,15000.0,20000.0,10000.0,20000.0,20000.0,20000.0,5000.0,15000.0,30000.0,20000.0,25000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,40000.0,40000.0,30000.0,30000.0,30000.0,30000.0,40000.0,40000.0,55000.0,50000.0,50000.0,60000.0,70000.0,30000.0,50000.0,50000.0,50000.0,50000.0,70000.0,80000.0,80000.0,50000.0,50000.0,100000.0,50000.0,125000.0,125000.0,125000.0,100000.0,100000.0,100000.0,100000.0,150000.0,100000.0,125000.0,150000.0,300000.0,100000.0,350000.0,425000.0,800000.0,800000.0,800000.0,855000.0,1000000.0,1000000.0,1067286.0,800000.0,1000000.0,1000000.0,900000.0,800000.0,1000000.0,1300000.0,1000000.0,923889.0,1200000.0,1200000.0,1200000.0,1500000.0,1400000.0,1400000.0,2480000.0,2693000.0,3000000.0,15000000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":44,"null_rate":0.0,"stats":{"iqr":74000.0,"kurtosis":120.13244910165552,"max":15000000.0,"mean":289208.72571428574,"median":15000.0,"min":0.0,"n_outliers":29,"outlier_rate":0.1657142857142857,"q1":1000.0,"q3":75000.0,"skew":10.312710185840928,"std":1218458.193625058,"zero_rate":0.09714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.45"},{"code":"outliers","level":"warn","message":"24.6% rows beyond 1.5 IQR"}],"column":"scoville_max","extras":{"histogram":{"counts":[155,16,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1230769.2307692308,2461538.4615384615,3692307.692307692,4923076.923076923,6153846.153846154,7384615.384615384,8615384.615384616,9846153.846153846,11076923.076923076,12307692.307692308,13538461.538461538,14769230.769230768,16000000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,200.0,500.0,500.0,500.0,500.0,500.0,600.0,700.0,800.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1200.0,1500.0,1500.0,1500.0,2000.0,2000.0,2500.0,2500.0,2500.0,2500.0,2500.0,3000.0,3000.0,3000.0,4000.0,4000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,6000.0,6000.0,7000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,23000.0,23000.0,23000.0,25000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,32000.0,40000.0,40000.0,45000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,58000.0,60000.0,65000.0,70000.0,75000.0,80000.0,80000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,300000.0,300000.0,325000.0,325000.0,325000.0,350000.0,350000.0,350000.0,350000.0,350000.0,350000.0,400000.0,400000.0,445000.0,500000.0,577000.0,577000.0,1000000.0,1000000.0,1001304.0,1041427.0,1200000.0,1200000.0,1250000.0,1268250.0,1300000.0,1350000.0,1382118.0,1463700.0,1500000.0,1500000.0,1598227.0,1853986.0,2000000.0,2000000.0,2000000.0,2000000.0,2200000.0,2200000.0,2480000.0,2693000.0,3000000.0,16000000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":59,"null_rate":0.0,"stats":{"iqr":97250.0,"kurtosis":106.10825375947879,"max":16000000.0,"mean":384835.4971428571,"median":30000.0,"min":0.0,"n_outliers":43,"outlier_rate":0.24571428571428572,"q1":2750.0,"q3":100000.0,"skew":9.450168040806018,"std":1333100.1779862773,"zero_rate":0.05714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.79"},{"code":"outliers","level":"warn","message":"23.4% rows beyond 1.5 IQR"}],"column":"scoville_median","extras":{"histogram":{"counts":[161,10,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1192307.6923076923,2384615.3846153845,3576923.076923077,4769230.769230769,5961538.461538461,7153846.153846154,8346153.846153846,9538461.538461538,10730769.23076923,11923076.923076922,13115384.615384614,14307692.307692308,15500000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,51.0,125.0,250.0,250.0,300.0,300.0,250.0,550.0,600.0,750.0,500.0,501.0,550.0,550.0,550.0,750.0,750.0,750.0,500.0,750.0,750.0,500.0,1150.0,1250.0,1250.0,1250.0,1500.0,1500.0,1500.0,1500.0,1750.0,1750.0,2000.0,2000.0,2750.0,2750.0,2250.0,2250.0,3000.0,3000.0,3750.0,3750.0,3750.0,3000.0,3000.0,3000.0,5000.0,5000.0,6000.0,4500.0,5250.0,5250.0,6500.0,6000.0,5250.0,5250.0,6250.0,7500.0,7500.0,10000.0,7500.0,7500.0,16500.0,16500.0,16500.0,22500.0,17500.0,17500.0,17500.0,17500.0,17500.0,17500.0,20000.0,20000.0,21000.0,22500.0,22500.0,22500.0,25000.0,20000.0,25000.0,25000.0,25000.0,17500.0,22500.0,31000.0,30000.0,32500.0,37500.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,45000.0,45000.0,40000.0,40000.0,40000.0,40000.0,49000.0,50000.0,60000.0,60000.0,62500.0,70000.0,75000.0,65000.0,75000.0,75000.0,75000.0,75000.0,85000.0,90000.0,90000.0,75000.0,75000.0,200000.0,175000.0,225000.0,225000.0,225000.0,225000.0,225000.0,225000.0,225000.0,250000.0,225000.0,262500.0,275000.0,372500.0,300000.0,463500.0,501000.0,900000.0,900000.0,900652.0,948214.0,1100000.0,1100000.0,1158643.0,1034125.0,1150000.0,1175000.0,1141059.0,1131850.0,1250000.0,1400000.0,1299114.0,1388938.0,1600000.0,1600000.0,1600000.0,1750000.0,1800000.0,1800000.0,2480000.0,3180000.0,3000000.0,15500000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":80,"null_rate":0.0,"stats":{"iqr":88000.0,"kurtosis":111.46783027770147,"max":15500000.0,"mean":339804.98285714287,"median":22500.0,"min":0.0,"n_outliers":41,"outlier_rate":0.2342857142857143,"q1":2000.0,"q3":90000.0,"skew":9.794140882291954,"std":1278965.5803661651,"zero_rate":0.05714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.79"},{"code":"outliers","level":"warn","message":"23.4% rows beyond 1.5 IQR"}],"column":"jalRP","extras":{"histogram":{"counts":[161,10,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,227.10615384615386,454.2123076923077,681.3184615384616,908.4246153846154,1135.5307692307692,1362.6369230769233,1589.743076923077,1816.8492307692309,2043.9553846153847,2271.0615384615385,2498.1676923076925,2725.2738461538465,2952.38]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.05,0.05,0.06,0.06,0.05,0.1,0.11,0.14,0.1,0.1,0.1,0.1,0.1,0.14,0.14,0.14,0.1,0.14,0.14,0.1,0.22,0.24,0.24,0.24,0.29,0.29,0.29,0.29,0.33,0.33,0.38,0.38,0.52,0.52,0.43,0.43,0.57,0.57,0.71,0.71,0.71,0.57,0.57,0.57,0.95,0.95,1.14,0.86,1.0,1.0,1.24,1.14,1.0,1.0,1.19,1.43,1.43,1.9,1.4,1.42,3.14,3.14,3.14,4.29,3.33,3.33,3.33,3.33,3.33,3.33,3.81,3.81,4.0,4.29,4.29,4.29,4.76,3.81,4.76,4.76,4.76,3.33,4.29,5.9,5.71,6.19,7.14,7.62,7.62,7.62,7.62,7.62,7.62,7.62,7.62,7.62,8.57,8.57,7.62,7.62,7.61,7.62,9.33,9.52,11.43,11.43,11.9,13.33,14.29,12.38,14.29,14.29,14.29,14.29,16.19,17.14,17.14,14.29,14.29,38.1,33.33,42.86,42.86,42.86,42.86,42.86,42.86,42.86,47.62,42.86,50.0,52.38,70.95,57.14,88.29,95.43,171.43,171.43,171.55,180.61,209.52,209.5,220.69,196.98,219.05,223.81,217.34,215.59,238.1,266.6,247.45,264.56,304.76,304.76,304.76,333.33,342.86,342.86,472.38,605.71,571.0,2952.38]},"kind":"numeric","n":175,"n_null":0,"n_unique":81,"null_rate":0.0,"stats":{"iqr":16.76,"kurtosis":111.47828767143369,"max":2952.38,"mean":64.72137142857142,"median":4.29,"min":0.0,"n_outliers":41,"outlier_rate":0.2342857142857143,"q1":0.38,"q3":17.14,"skew":9.794705282442164,"std":243.60693921036346,"zero_rate":0.05714285714285714}},{"alerts":[],"column":"type","extras":{"singletons":1,"top_values":[["annuum",104],["chinense",46],["baccatum",12],["Annuum",4],["frutescens",4],["pubescens",2],["Chinense",2],["N/A",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":8,"null_rate":0.0,"stats":{"cardinality":8,"entropy":1.657216079409757,"entropy_ratio":0.5524053598032523,"top_rate":0.5942857142857143,"top_value":"annuum"}},{"alerts":[],"column":"origin","extras":{"singletons":17,"top_values":[["United States",46],["Mexico",26],["South America",11],["Peru",11],["Italy",8],["Unknown",7],["United Kingdom",7],["Trinidad",7],["Caribbean",6],["India",6],["Brazil",5],["Spain",4],["Hungary",4],["Japan",3],["Africa",3],["China",2],["Thailand",2],["Balkan Peninsula",1],["France",1],["Chile",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":34,"null_rate":0.0,"stats":{"cardinality":34,"entropy":3.9798042711983928,"entropy_ratio":0.7822768235139151,"top_rate":0.26285714285714284,"top_value":"United States"}},{"alerts":[],"column":"use","extras":{"singletons":1,"top_values":[["Culinary",141],["Ornamental",31],["Culinary, Ornamental",2],["",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":0.8097435329167687,"entropy_ratio":0.40487176645838435,"top_rate":0.8057142857142857,"top_value":"Culinary"}},{"alerts":[{"code":"long_tail","level":"info","message":"49 singleton categories"}],"column":"flavor","extras":{"singletons":49,"top_values":[["Sweet",25],["Sweet, Fruity",21],["Neutral",19],["Fruity, Sweet",6],["Bright, Sweet",4],["Sweet, Tangy",4],["Sweet, Fruity, Smoky",4],["Sweet, Fruity, Citrusy",4],["Sweet, Fruity, Earthy, Smoky",4],["Sweet, Fruity, Floral",3],["Sweet, Fruity, Citrusy, Floral",3],["Sweet, Fruity, Earthy",3],["Sweet, Tropical",3],["Bright, Grassy",3],["Sweet, Floral",2],["Sweet, Smoky",2],["Earthy",2],["Smoky, Sweet, Earthy",2],["Smoky, Earthy",2],["Sweet, Citrusy",2]]},"kind":"categorical","n":175,"n_null":0,"n_unique":73,"null_rate":0.0,"stats":{"cardinality":73,"entropy":5.23218307815636,"entropy_ratio":0.8452877829388863,"top_rate":0.14285714285714285,"top_value":"Sweet"}},{"alerts":[{"code":"long_tail","level":"info","message":"175 singleton categories"}],"column":"url","extras":{"singletons":175,"top_values":[["https://www.pepperscale.com/bell-pepper/",1],["https://www.pepperscale.com/gypsy-pepper/",1],["https://www.pepperscale.com/purple-beauty-pepper/",1],["https://www.pepperscale.com/melrose-pepper/",1],["https://www.pepperscale.com/carmen-pepper/",1],["https://www.pepperscale.com/california-wonder-pepper/",1],["https://www.pepperscale.com/peperone-di-senise/",1],["https://www.pepperscale.com/fushimi-pepper/",1],["https://www.pepperscale.com/elephant-ears-pepper/",1],["https://www.pepperscale.com/habanada-pepper/",1],["https://www.pepperscale.com/tangerine-dream-pepper/",1],["https://www.pepperscale.com/chilly-chili/",1],["https://www.pepperscale.com/shishito-pepper/",1],["https://www.pepperscale.com/trinidad-perfume/",1],["https://www.pepperscale.com/banana-pepper/",1],["https://www.pepperscale.com/pepperoncini/",1],["https://www.pepperscale.com/pimento-pepper/",1],["https://pepperscale.com/jimmy-nardello-pepper/",1],["https://www.pepperscale.com/mariachi-pepper/",1],["https://www.pepperscale.com/santa-fe-grande-pepper/",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":175,"null_rate":0.0,"stats":{"cardinality":175,"entropy":7.451211111832327,"entropy_ratio":0.9999999999999998,"top_rate":0.005714285714285714,"top_value":"https://www.pepperscale.com/bell-pepper/"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["scoville_max.stats.median","scoville_max.stats.max","scoville_max.stats.n_outliers","scoville_max.stats.outlier_rate","heat.top_values","origin.top_value","origin.stats.top_rate","type.top_value","type.stats.top_rate","flavor.top_value","flavor.stats.top_rate","use.stats.top_rate"],"featured_charts":[{"caption":"Expect a dramatically right-skewed distribution \u2014 the vast majority of peppers cluster near zero while a handful of super-hots spike toward 16 million SHU.","column":"scoville_max","kind":"histogram"},{"caption":"Look for how 'Medium' dominates at 40% of peppers, with 'Super Hot' and hotter categories making up a surprisingly large tail.","column":"heat","kind":"donut"},{"caption":"The United States (46 entries) and Mexico (26) lead by a wide margin \u2014 check how many other countries contribute only a handful of varieties each.","column":"origin","kind":"bar"},{"caption":"Capsicum annuum accounts for nearly 60% of all varieties; note the case inconsistency between 'annuum' and 'Annuum' that may need cleaning.","column":"type","kind":"bar"},{"caption":"Sweet and fruity descriptors dominate the top flavor profiles, reinforcing that most catalog entries are mild eating peppers rather than heat weapons.","column":"flavor","kind":"bar"}],"model":"anthropic:default","narrative":"This dataset catalogs 175 pepper varieties with attributes covering heat (Scoville scale min/median/max), flavor profile, botanical type, geographic origin, and culinary use. The most striking feature is the extreme right-skew in all three Scoville columns: while the median pepper sits around 15,000\u201330,000 SHU, outliers push to 15\u201316 million SHU, meaning a small cluster of 'Super Hot' varieties dwarfs the rest of the dataset. A secondary angle worth exploring is the geographic and botanical spread \u2014 the United States leads origin with 46 entries, 'annuum' dominates species type at 104 of 175, and flavor descriptions cluster heavily around 'Sweet' and 'Sweet, Fruity', suggesting most peppers in this catalog are mild and food-friendly despite the headline-grabbing extremes.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["skew","kurtosis","median","mean","max","std","iqr","q1","q3","n_outliers","outlier_rate","zero_rate"],"model":"anthropic:default","narrative":"jalRP is a non-negative numeric column with 175 rows and no nulls, likely representing a monetary amount, duration, or count that is naturally right-bounded near zero. The distribution is extremely right-skewed (skew = 9.79, kurtosis = 111.48): the median is just 4.29 while the mean is 64.72, and a maximum of 2952.38 pulls the standard deviation to 243.61 \u2014 41 rows (23.4%) are flagged as outliers. The interquartile range spans only 0.38 to 17.14, meaning 75% of values are below 17.14, yet the top end reaches nearly 3000, signalling a heavy-tailed, potentially power-law distribution.","role":"feature","scope":"column","target":"jalRP","treatment":"Log-transform (e.g., log1p) before modelling to reduce skew; investigate the 41 outliers for data-entry errors or legitimate extreme events."},{"confidence":"high","critiques":[],"evidence_keys":["max","min","mean","median","skew","kurtosis","std","n_outliers","outlier_rate","zero_rate","iqr","q1","q3"],"model":"anthropic:default","narrative":"This column represents the maximum Scoville Heat Unit (SHU) rating for chili peppers or hot sauce products \u2014 a standard measure of capsaicin-driven heat intensity. The distribution is extraordinarily right-skewed (skew=9.45, kurtosis=106.1): the median is only 30,000 SHU while the mean is 384,835 and the maximum reaches 16,000,000 \u2014 consistent with extreme outliers like pure capsaicin extracts sitting alongside mild peppers. With 43 outliers (24.6% of rows) and a standard deviation of 1,333,100 dwarfing the median, the bulk of records cluster at low heat levels while a long tail of superhot entries dominates the mean. The 5.7% zero rate likely reflects peppers with no measurable heat (e.g., bell peppers).","role":"feature","scope":"column","target":"scoville_max","treatment":"Log-transform (log1p) before modelling to compress the extreme right tail; flag zeros as a separate category if heat presence is a meaningful signal."},{"confidence":"high","critiques":[],"evidence_keys":["mean","median","max","min","skew","kurtosis","iqr","q1","q3","n_outliers","outlier_rate","zero_rate","n"],"model":"anthropic:default","narrative":"This column represents the median Scoville heat unit (SHU) rating for chili peppers or hot sauces \u2014 a standard measure of capsaicin-driven spiciness. The distribution is extraordinarily right-skewed (skew=9.79, kurtosis=111.47): the median sits at 22,500 SHU while the mean is pulled to 339,804 SHU by extreme outliers, with a maximum of 15,500,000 SHU (consistent with ultra-hot peppers like Carolina Reaper). 41 of 175 rows (23.4%) are flagged as outliers, and 5.7% of values are exactly zero, suggesting entries for non-spicy varieties (e.g., bell peppers). The IQR spans only 2,000\u201390,000 SHU, confirming that the bulk of records are mild-to-medium heat while a long tail of superhot entries distorts the scale severely.","role":"feature","scope":"column","target":"scoville_median","treatment":"Log-transform (log1p) before modelling to compress the extreme right tail; consider flagging zero-SHU entries as a separate binary indicator."},{"confidence":"high","critiques":[],"evidence_keys":["stats.skew","stats.kurtosis","stats.median","stats.mean","stats.max","stats.min","stats.n_outliers","stats.outlier_rate","stats.zero_rate","n_unique","n"],"model":"anthropic:default","narrative":"This column represents the minimum Scoville Heat Unit (SHU) value for chili peppers or hot sauces, capturing the lower bound of each item's heat range. The distribution is dramatically right-skewed (skew = 10.31, kurtosis = 120.13): the median is just 15,000 SHU while the mean is 289,208 SHU, driven by extreme outliers reaching 15,000,000 SHU (pure capsaicin territory). With 29 outliers (16.6% of rows) and only 44 unique values across 175 records, the data clusters heavily at low heat levels but has a long tail of superhot entries. The 9.7% zero rate corresponds to genuinely non-hot items (e.g., bell peppers at 0 SHU), which is domain-valid.","role":"feature","scope":"column","target":"scoville_min","treatment":"Log-transform (log1p) before modelling to compress the extreme right tail; consider pairing with scoville_max for a range-based heat feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","top_value","top_rate","entropy_ratio","alerts","top_values"],"model":"anthropic:default","narrative":"This column captures flavor descriptors for a food or beverage dataset, stored as free-form comma-separated tag strings (e.g., 'Sweet, Fruity, Smoky'). With 73 unique values across only 175 rows and an entropy ratio of 0.845, the vocabulary is highly fragmented \u2014 notably, 'Sweet' (25 occurrences) and 'Sweet, Fruity' (21) appear to be near-duplicates of ordering variants like 'Fruity, Sweet' (6), suggesting inconsistent entry order inflating cardinality artificially. The long-tail alert confirms that most combinations appear very rarely, with the top value covering only 14.3% of rows.","role":"feature","scope":"column","target":"flavor","treatment":"Split on comma, normalize order, and one-hot or multi-label encode individual flavor tokens before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","cardinality","entropy_ratio","top_rate","top_value","null_rate"],"model":"anthropic:default","narrative":"This column contains the names of individual pepper varieties \u2014 it is a human-readable label for each record in what appears to be a pepper/vegetable catalog. With 175 rows and 175 unique values, every entry is distinct (cardinality = 175, top_rate = 0.0057, i.e. 1/175), making this a perfect natural key. Entropy ratio of ~1.0 confirms near-maximum unpredictability, consistent with a unique-per-row label rather than a grouping variable.","role":"label","scope":"column","target":"name","treatment":"Treat as a unique row label or display name; drop before modelling or use as an index key \u2014 do not encode as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","cardinality","entropy_ratio","top_rate","top_value","alerts"],"model":"anthropic:default","narrative":"This column contains fully-qualified URLs pointing to individual pepper variety pages on pepperscale.com, effectively serving as a unique page identifier for each row. Cardinality is exactly 175 with 175 unique values and a null rate of 0.0, meaning every row has a distinct URL \u2014 the top_rate of 0.005714 (1/175) confirms no duplicates whatsoever. Entropy ratio of ~1.0 indicates maximum dispersion, consistent with a perfect natural key. The long_tail alert is technically triggered but is structurally expected given perfect uniqueness.","role":"identifier","scope":"column","target":"url","treatment":"Retain as a unique row key or use as a foreign key to join additional scraped metadata; do not encode or embed for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n","n_unique","cardinality","entropy_ratio","null_rate","top_values"],"model":"anthropic:default","narrative":"This column is an ordinal heat/spiciness level rating with 5 discrete categories, likely describing food or beverage intensity. 'Medium' dominates at 40% of 175 rows (top_rate 0.4), while the two hottest tiers ('Hot' and 'Extra Hot') together account for only 30 rows \u2014 a pronounced skew toward milder levels. Entropy ratio of 0.893 indicates reasonably spread distribution despite the modal imbalance. No nulls and no unexpected values are present.","role":"label","scope":"column","target":"heat","treatment":"Encode as ordered ordinal (Mild < Medium < Hot < Super Hot < Extra Hot) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","top_values","n_unique","n","null_rate"],"model":"anthropic:default","narrative":"This column records the geographic origin of individuals or items, expressed as country or regional names across 34 distinct values in 175 rows. 'United States' dominates at 26.3% (46 rows), followed by 'Mexico' (26) and several South American/Caribbean entries. Notably, the taxonomy mixes country-level specificity ('Peru', 'Italy') with broad regional labels ('South America', 'Caribbean'), and 7 rows carry the value 'Unknown', indicating inconsistent granularity in data collection.","role":"feature","scope":"column","target":"origin","treatment":"Standardize regional vs. country-level labels, encode 'Unknown' as missing, then one-hot or target-encode for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","n_unique","top_rate","top_value","null_rate","n"],"model":"anthropic:default","narrative":"This column captures Capsicum species type (botanical variety), with 8 apparent unique values across 175 records and no nulls. The dominant value 'annuum' accounts for 59.4% of rows, followed by 'chinense' (46 rows) and 'baccatum' (12 rows). A critical data quality issue exists: 'annuum' and 'Annuum' are treated as distinct values (104 vs. 4 occurrences), as are 'chinense' and 'Chinense' (46 vs. 2), indicating inconsistent capitalisation that inflates apparent cardinality. Additionally, one record holds the sentinel string 'N/A' rather than a true null.","role":"label","scope":"column","target":"type","treatment":"Normalise to lowercase, remap 'N/A' to null, then use as a categorical grouping variable or stratification key."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","n_unique","null_rate","top_values","entropy_ratio"],"model":"anthropic:default","narrative":"This column captures the primary use-case category for each record, likely plants or herbs, with four distinct values and no nulls across 175 rows. 'Culinary' dominates at 80.6% (141 of 175), making the distribution heavily skewed. One entry is an empty string rather than a true null, which effectively acts as a fifth implicit category. The dual-label value 'Culinary, Ornamental' (2 occurrences) signals inconsistent multi-value encoding that may need normalisation.","role":"label","scope":"column","target":"use","treatment":"Normalise the empty-string entry, then either one-hot encode or split multi-value strings (e.g. 'Culinary, Ornamental') into binary indicator columns before modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":3708,"prompt_tokens":10220,"total_tokens":13928}},"language_counts":{},"meta":{"generated_at":"2026-06-21T23:54:04+00:00","mode":"full","row_count":175,"sampled_rows":175,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/peppers.json"},"notes":[],"saturn_version":"0.2.0","schema":{"flavor":"categorical","heat":"categorical","jalRP":"numeric","name":"categorical","origin":"categorical","scoville_max":"numeric","scoville_median":"numeric","scoville_min":"numeric","type":"categorical","url":"categorical","use":"categorical"}}
