{"columns":[{"alerts":[{"code":"long_tail","level":"info","message":"175 singleton categories"}],"column":"name","extras":{"singletons":175,"top_values":[["Bell Pepper",1],["Gypsy Pepper",1],["Purple Beauty Pepper",1],["Melrose Pepper",1],["Carmen Pepper",1],["California Wonder Pepper",1],["Peperone di Senise",1],["Fushimi Pepper",1],["Elephant Ears Pepper",1],["Habanada Pepper",1],["Tangerine Dream Pepper",1],["Chilly Chili",1],["Shishito Pepper",1],["Trinidad Perfume",1],["Banana Pepper",1],["Pepperoncini",1],["Pimento Pepper",1],["Jimmy Nardello Pepper",1],["Mariachi Pepper",1],["Santa Fe Grande Pepper",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":175,"null_rate":0.0,"stats":{"cardinality":175,"entropy":7.451211111832327,"entropy_ratio":0.9999999999999998,"top_rate":0.005714285714285714,"top_value":"Bell Pepper"}},{"alerts":[],"column":"heat","extras":{"singletons":0,"top_values":[["Medium",70],["Mild",45],["Super Hot",30],["Hot",17],["Extra Hot",13]]},"kind":"categorical","n":175,"n_null":0,"n_unique":5,"null_rate":0.0,"stats":{"cardinality":5,"entropy":2.074168237396166,"entropy_ratio":0.8932956373469373,"top_rate":0.4,"top_value":"Medium"}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+10.31"},{"code":"outliers","level":"warn","message":"16.6% rows beyond 1.5 IQR"}],"column":"scoville_min","extras":{"histogram":{"counts":[164,7,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1153846.1538461538,2307692.3076923075,3461538.461538461,4615384.615384615,5769230.769230769,6923076.923076922,8076923.076923076,9230769.23076923,10384615.384615384,11538461.538461538,12692307.692307692,13846153.846153844,15000000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,50.0,0.0,0.0,100.0,100.0,0.0,500.0,500.0,700.0,0.0,1.0,100.0,100.0,100.0,500.0,500.0,500.0,0.0,500.0,500.0,0.0,1100.0,1000.0,1000.0,1000.0,1000.0,1000.0,500.0,500.0,1000.0,1000.0,1500.0,1000.0,2500.0,2500.0,500.0,500.0,1000.0,1000.0,2500.0,2500.0,2500.0,1000.0,1000.0,1000.0,4000.0,4000.0,5000.0,1000.0,2500.0,2500.0,5000.0,4000.0,2500.0,2500.0,2500.0,5000.0,5000.0,10000.0,5000.0,5000.0,10000.0,10000.0,10000.0,20000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,10000.0,10000.0,12000.0,15000.0,15000.0,15000.0,20000.0,10000.0,20000.0,20000.0,20000.0,5000.0,15000.0,30000.0,20000.0,25000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,40000.0,40000.0,30000.0,30000.0,30000.0,30000.0,40000.0,40000.0,55000.0,50000.0,50000.0,60000.0,70000.0,30000.0,50000.0,50000.0,50000.0,50000.0,70000.0,80000.0,80000.0,50000.0,50000.0,100000.0,50000.0,125000.0,125000.0,125000.0,100000.0,100000.0,100000.0,100000.0,150000.0,100000.0,125000.0,150000.0,300000.0,100000.0,350000.0,425000.0,800000.0,800000.0,800000.0,855000.0,1000000.0,1000000.0,1067286.0,800000.0,1000000.0,1000000.0,900000.0,800000.0,1000000.0,1300000.0,1000000.0,923889.0,1200000.0,1200000.0,1200000.0,1500000.0,1400000.0,1400000.0,2480000.0,2693000.0,3000000.0,15000000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":44,"null_rate":0.0,"stats":{"iqr":74000.0,"kurtosis":120.13244910165552,"max":15000000.0,"mean":289208.72571428574,"median":15000.0,"min":0.0,"n_outliers":29,"outlier_rate":0.1657142857142857,"q1":1000.0,"q3":75000.0,"skew":10.312710185840928,"std":1218458.193625058,"zero_rate":0.09714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.45"},{"code":"outliers","level":"warn","message":"24.6% rows beyond 1.5 IQR"}],"column":"scoville_max","extras":{"histogram":{"counts":[155,16,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1230769.2307692308,2461538.4615384615,3692307.692307692,4923076.923076923,6153846.153846154,7384615.384615384,8615384.615384616,9846153.846153846,11076923.076923076,12307692.307692308,13538461.538461538,14769230.769230768,16000000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,200.0,500.0,500.0,500.0,500.0,500.0,600.0,700.0,800.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1200.0,1500.0,1500.0,1500.0,2000.0,2000.0,2500.0,2500.0,2500.0,2500.0,2500.0,3000.0,3000.0,3000.0,4000.0,4000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,6000.0,6000.0,7000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,8000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,23000.0,23000.0,23000.0,25000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,32000.0,40000.0,40000.0,45000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,58000.0,60000.0,65000.0,70000.0,75000.0,80000.0,80000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,300000.0,300000.0,325000.0,325000.0,325000.0,350000.0,350000.0,350000.0,350000.0,350000.0,350000.0,400000.0,400000.0,445000.0,500000.0,577000.0,577000.0,1000000.0,1000000.0,1001304.0,1041427.0,1200000.0,1200000.0,1250000.0,1268250.0,1300000.0,1350000.0,1382118.0,1463700.0,1500000.0,1500000.0,1598227.0,1853986.0,2000000.0,2000000.0,2000000.0,2000000.0,2200000.0,2200000.0,2480000.0,2693000.0,3000000.0,16000000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":59,"null_rate":0.0,"stats":{"iqr":97250.0,"kurtosis":106.10825375947879,"max":16000000.0,"mean":384835.4971428571,"median":30000.0,"min":0.0,"n_outliers":43,"outlier_rate":0.24571428571428572,"q1":2750.0,"q3":100000.0,"skew":9.450168040806018,"std":1333100.1779862773,"zero_rate":0.05714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.79"},{"code":"outliers","level":"warn","message":"23.4% rows beyond 1.5 IQR"}],"column":"scoville_median","extras":{"histogram":{"counts":[161,10,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,1192307.6923076923,2384615.3846153845,3576923.076923077,4769230.769230769,5961538.461538461,7153846.153846154,8346153.846153846,9538461.538461538,10730769.23076923,11923076.923076922,13115384.615384614,14307692.307692308,15500000.0]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,51.0,125.0,250.0,250.0,300.0,300.0,250.0,550.0,600.0,750.0,500.0,501.0,550.0,550.0,550.0,750.0,750.0,750.0,500.0,750.0,750.0,500.0,1150.0,1250.0,1250.0,1250.0,1500.0,1500.0,1500.0,1500.0,1750.0,1750.0,2000.0,2000.0,2750.0,2750.0,2250.0,2250.0,3000.0,3000.0,3750.0,3750.0,3750.0,3000.0,3000.0,3000.0,5000.0,5000.0,6000.0,4500.0,5250.0,5250.0,6500.0,6000.0,5250.0,5250.0,6250.0,7500.0,7500.0,10000.0,7500.0,7500.0,16500.0,16500.0,16500.0,22500.0,17500.0,17500.0,17500.0,17500.0,17500.0,17500.0,20000.0,20000.0,21000.0,22500.0,22500.0,22500.0,25000.0,20000.0,25000.0,25000.0,25000.0,17500.0,22500.0,31000.0,30000.0,32500.0,37500.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,40000.0,45000.0,45000.0,40000.0,40000.0,40000.0,40000.0,49000.0,50000.0,60000.0,60000.0,62500.0,70000.0,75000.0,65000.0,75000.0,75000.0,75000.0,75000.0,85000.0,90000.0,90000.0,75000.0,75000.0,200000.0,175000.0,225000.0,225000.0,225000.0,225000.0,225000.0,225000.0,225000.0,250000.0,225000.0,262500.0,275000.0,372500.0,300000.0,463500.0,501000.0,900000.0,900000.0,900652.0,948214.0,1100000.0,1100000.0,1158643.0,1034125.0,1150000.0,1175000.0,1141059.0,1131850.0,1250000.0,1400000.0,1299114.0,1388938.0,1600000.0,1600000.0,1600000.0,1750000.0,1800000.0,1800000.0,2480000.0,3180000.0,3000000.0,15500000.0]},"kind":"numeric","n":175,"n_null":0,"n_unique":80,"null_rate":0.0,"stats":{"iqr":88000.0,"kurtosis":111.46783027770147,"max":15500000.0,"mean":339804.98285714287,"median":22500.0,"min":0.0,"n_outliers":41,"outlier_rate":0.2342857142857143,"q1":2000.0,"q3":90000.0,"skew":9.794140882291954,"std":1278965.5803661651,"zero_rate":0.05714285714285714}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+9.79"},{"code":"outliers","level":"warn","message":"23.4% rows beyond 1.5 IQR"}],"column":"jalRP","extras":{"histogram":{"counts":[161,10,3,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,227.10615384615386,454.2123076923077,681.3184615384616,908.4246153846154,1135.5307692307692,1362.6369230769233,1589.743076923077,1816.8492307692309,2043.9553846153847,2271.0615384615385,2498.1676923076925,2725.2738461538465,2952.38]},"sample":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.05,0.05,0.06,0.06,0.05,0.1,0.11,0.14,0.1,0.1,0.1,0.1,0.1,0.14,0.14,0.14,0.1,0.14,0.14,0.1,0.22,0.24,0.24,0.24,0.29,0.29,0.29,0.29,0.33,0.33,0.38,0.38,0.52,0.52,0.43,0.43,0.57,0.57,0.71,0.71,0.71,0.57,0.57,0.57,0.95,0.95,1.14,0.86,1.0,1.0,1.24,1.14,1.0,1.0,1.19,1.43,1.43,1.9,1.4,1.42,3.14,3.14,3.14,4.29,3.33,3.33,3.33,3.33,3.33,3.33,3.81,3.81,4.0,4.29,4.29,4.29,4.76,3.81,4.76,4.76,4.76,3.33,4.29,5.9,5.71,6.19,7.14,7.62,7.62,7.62,7.62,7.62,7.62,7.62,7.62,7.62,8.57,8.57,7.62,7.62,7.61,7.62,9.33,9.52,11.43,11.43,11.9,13.33,14.29,12.38,14.29,14.29,14.29,14.29,16.19,17.14,17.14,14.29,14.29,38.1,33.33,42.86,42.86,42.86,42.86,42.86,42.86,42.86,47.62,42.86,50.0,52.38,70.95,57.14,88.29,95.43,171.43,171.43,171.55,180.61,209.52,209.5,220.69,196.98,219.05,223.81,217.34,215.59,238.1,266.6,247.45,264.56,304.76,304.76,304.76,333.33,342.86,342.86,472.38,605.71,571.0,2952.38]},"kind":"numeric","n":175,"n_null":0,"n_unique":81,"null_rate":0.0,"stats":{"iqr":16.76,"kurtosis":111.47828767143369,"max":2952.38,"mean":64.72137142857142,"median":4.29,"min":0.0,"n_outliers":41,"outlier_rate":0.2342857142857143,"q1":0.38,"q3":17.14,"skew":9.794705282442164,"std":243.60693921036346,"zero_rate":0.05714285714285714}},{"alerts":[],"column":"type","extras":{"singletons":1,"top_values":[["annuum",104],["chinense",46],["baccatum",12],["Annuum",4],["frutescens",4],["pubescens",2],["Chinense",2],["N/A",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":8,"null_rate":0.0,"stats":{"cardinality":8,"entropy":1.657216079409757,"entropy_ratio":0.5524053598032523,"top_rate":0.5942857142857143,"top_value":"annuum"}},{"alerts":[],"column":"origin","extras":{"singletons":17,"top_values":[["United States",46],["Mexico",26],["South America",11],["Peru",11],["Italy",8],["Unknown",7],["United Kingdom",7],["Trinidad",7],["Caribbean",6],["India",6],["Brazil",5],["Spain",4],["Hungary",4],["Japan",3],["Africa",3],["China",2],["Thailand",2],["Balkan Peninsula",1],["France",1],["Chile",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":34,"null_rate":0.0,"stats":{"cardinality":34,"entropy":3.9798042711983928,"entropy_ratio":0.7822768235139151,"top_rate":0.26285714285714284,"top_value":"United States"}},{"alerts":[],"column":"use","extras":{"singletons":1,"top_values":[["Culinary",141],["Ornamental",31],["Culinary, Ornamental",2],["",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":0.8097435329167687,"entropy_ratio":0.40487176645838435,"top_rate":0.8057142857142857,"top_value":"Culinary"}},{"alerts":[{"code":"long_tail","level":"info","message":"49 singleton categories"}],"column":"flavor","extras":{"singletons":49,"top_values":[["Sweet",25],["Sweet, Fruity",21],["Neutral",19],["Fruity, Sweet",6],["Bright, Sweet",4],["Sweet, Tangy",4],["Sweet, Fruity, Smoky",4],["Sweet, Fruity, Citrusy",4],["Sweet, Fruity, Earthy, Smoky",4],["Sweet, Fruity, Floral",3],["Sweet, Fruity, Citrusy, Floral",3],["Sweet, Fruity, Earthy",3],["Sweet, Tropical",3],["Bright, Grassy",3],["Sweet, Floral",2],["Sweet, Smoky",2],["Earthy",2],["Smoky, Sweet, Earthy",2],["Smoky, Earthy",2],["Sweet, Citrusy",2]]},"kind":"categorical","n":175,"n_null":0,"n_unique":73,"null_rate":0.0,"stats":{"cardinality":73,"entropy":5.23218307815636,"entropy_ratio":0.8452877829388863,"top_rate":0.14285714285714285,"top_value":"Sweet"}},{"alerts":[{"code":"long_tail","level":"info","message":"175 singleton categories"}],"column":"url","extras":{"singletons":175,"top_values":[["https://www.pepperscale.com/bell-pepper/",1],["https://www.pepperscale.com/gypsy-pepper/",1],["https://www.pepperscale.com/purple-beauty-pepper/",1],["https://www.pepperscale.com/melrose-pepper/",1],["https://www.pepperscale.com/carmen-pepper/",1],["https://www.pepperscale.com/california-wonder-pepper/",1],["https://www.pepperscale.com/peperone-di-senise/",1],["https://www.pepperscale.com/fushimi-pepper/",1],["https://www.pepperscale.com/elephant-ears-pepper/",1],["https://www.pepperscale.com/habanada-pepper/",1],["https://www.pepperscale.com/tangerine-dream-pepper/",1],["https://www.pepperscale.com/chilly-chili/",1],["https://www.pepperscale.com/shishito-pepper/",1],["https://www.pepperscale.com/trinidad-perfume/",1],["https://www.pepperscale.com/banana-pepper/",1],["https://www.pepperscale.com/pepperoncini/",1],["https://www.pepperscale.com/pimento-pepper/",1],["https://pepperscale.com/jimmy-nardello-pepper/",1],["https://www.pepperscale.com/mariachi-pepper/",1],["https://www.pepperscale.com/santa-fe-grande-pepper/",1]]},"kind":"categorical","n":175,"n_null":0,"n_unique":175,"null_rate":0.0,"stats":{"cardinality":175,"entropy":7.451211111832327,"entropy_ratio":0.9999999999999998,"top_rate":0.005714285714285714,"top_value":"https://www.pepperscale.com/bell-pepper/"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["scoville_max","scoville_median","jalRP","heat","use","origin","type","flavor"],"featured_charts":[{"caption":"Heavy right tail \u2014 most peppers sit under 100k SHU but a few extreme varieties exceed a million.","column":"scoville_max","kind":"histogram"},{"caption":"Medium dominates at 40%; Extra Hot and Hot are the smallest buckets.","column":"heat","kind":"bar"},{"caption":"Annuum and chinense cover most peppers, but watch for duplicate casing ('Annuum', 'Chinense') needing cleanup.","column":"type","kind":"bar"},{"caption":"About 80% are culinary, with ornamental as the only meaningful secondary use.","column":"use","kind":"donut"},{"caption":"United States and Mexico together account for over 40% of varieties; the rest is a long tail of 30+ regions.","column":"origin","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 175 pepper varieties with 11 fields covering name, origin, flavor, heat category, biological type, intended use, and Scoville heat measurements (min, median, max, plus a jalape\u00f1o-relative score). The Scoville and jalRP numeric columns are extremely right-skewed (skew ~9-10, kurtosis >100) with max scoville_max reaching 16,000,000 versus a median of just 30,000 \u2014 a handful of super-hot peppers dominate the tail and 24% of rows flag as outliers. On the categorical side, 'Medium' heat accounts for 40% of peppers and 'Culinary' use covers 80%, while origin leans heavily toward the United States (26%) and Mexico (15%). Worth a closer look first: the Scoville distribution (consider a log scale) and the type column, which has casing inconsistencies ('annuum' vs 'Annuum', 'chinense' vs 'Chinense') that should be cleaned before any grouping.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","cardinality","entropy_ratio","top_rate","top_value","top_values","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"The `name` column holds 175 unique strings across 175 rows (cardinality 175, entropy_ratio ~1.0), making it a perfect per-row identifier. Sample values like \"Bell Pepper\", \"Gypsy Pepper\", and \"Peperone di Senise\" suggest this is a catalog of pepper varieties rather than a categorical feature. With every value occurring exactly once (top_rate 0.0057), there is no useful frequency signal to model on.","role":"identifier","scope":"column","target":"name","treatment":"Use as a row label or join key; drop from feature matrix, near-unique."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical heat/spice level rating with 5 ordinal tiers and no nulls across 175 rows. Medium dominates at 40% (70 rows), followed by Mild (45); the upper tiers Hot (17) and Extra Hot (13) are the rarest, while Super Hot (30) is oddly more common than Hot, breaking the expected monotonic decline up the heat scale.","role":"feature","scope":"column","target":"heat","treatment":"Encode as an ordered categorical (Mild < Medium < Hot < Super Hot < Extra Hot) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.std","stats.skew","stats.kurtosis","stats.iqr","stats.q1","stats.q3","stats.n_outliers","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric heat ratings (Scoville minimum) for 175 entries spanning 0 to 15,000,000 with a median of 15,000 \u2014 classic chili pepper data. Distribution is brutally right-skewed (skew 10.31, kurtosis 120.13) with mean 289,208 dwarfing the median, and 29 outliers (16.6% rate) plus 9.7% zeros. The std of 1,218,458 against an IQR of just 74,000 confirms a long, heavy tail.","role":"feature","scope":"column","target":"scoville_min","treatment":"Apply log1p transform before any modelling to tame the extreme skew and outliers."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.iqr","stats.skew","stats.kurtosis","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Maximum Scoville heat ratings for 175 peppers, ranging from 0 to 16,000,000 with a median of 30,000 but a mean of 384,835. Distribution is extremely right-skewed (skew 9.45, kurtosis 106) with 24.6% of values flagged as outliers and 5.7% zeros. The IQR (2,750-100,000) is dwarfed by the max, consistent with a few extreme superhot varieties dominating the tail.","role":"feature","scope":"column","target":"scoville_max","treatment":"log1p-transform before modelling to tame the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.median","stats.mean","stats.max","stats.min","stats.skew","stats.kurtosis","stats.std","stats.q1","stats.q3","stats.iqr","stats.n_outliers","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric column capturing the median Scoville heat rating across 175 entries with no nulls and 80 unique values. The distribution is extremely right-skewed (skew 9.79, kurtosis 111.5): the median is 22,500 while the mean is 339,805 and the max reaches 15,500,000, with 41 outliers (23.4%) and 5.7% zeros. The IQR (2,000 to 90,000) is tiny relative to the std of 1,278,965, confirming a heavy upper tail.","role":"feature","scope":"column","target":"scoville_median","treatment":"Apply a log1p transform before modelling to compress the heavy right tail."},{"confidence":"high","critiques":[],"evidence_keys":["stats.median","stats.q3","stats.max","stats.mean","stats.skew","stats.kurtosis","stats.outlier_rate","stats.zero_rate","n","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"Numeric feature 'jalRP' is extremely right-skewed: the median is 4.29 with Q3 at 17.14, yet the max reaches 2952.38 and the mean (64.72) sits far above the median. Skew of 9.79 and kurtosis of 111.48 confirm a heavy tail, and 23.4% of values flag as outliers with 5.7% exact zeros. Only 81 unique values across 175 rows suggests repeated discrete magnitudes rather than a smooth continuum.","role":"feature","scope":"column","target":"jalRP","treatment":"log1p-transform (or winsorize) before modelling to tame the heavy tail."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the Capsicum species (type), dominated by 'annuum' at 59.4% of 175 rows with 'chinense' second at 46. Watch out for case-inconsistent duplicates ('Annuum' 4, 'Chinense' 2 alongside their lowercase forms) and a literal 'N/A' string that isn't being counted as null (null_rate 0.0).","role":"label","scope":"column","target":"type","treatment":"Lowercase-normalize and convert 'N/A' to null before using as a categorical label."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical origin/country field with 34 distinct values across 175 rows and no nulls. Distribution is moderately concentrated: United States leads at 26.3% (46 rows), followed by Mexico (26) and South America (11), with entropy ratio 0.78 indicating fairly broad spread across the long tail. Notable quirks include a mix of country-level (United States, Italy, India) and region-level (South America, Caribbean) labels, plus 7 explicit 'Unknown' entries.","role":"feature","scope":"column","target":"origin","treatment":"Normalise country-vs-region granularity and treat 'Unknown' as missing before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a low-cardinality categorical describing the use of an item, with 4 distinct values across 175 rows and no nulls. The distribution is heavily skewed: 'Culinary' accounts for 80.6% (141 rows), 'Ornamental' for 31, plus 2 rows with a combined 'Culinary, Ornamental' label and 1 empty string that should be treated as missing. Entropy ratio of 0.40 confirms the imbalance.","role":"feature","scope":"column","target":"use","treatment":"Normalize the empty string to null and split the comma-delimited value into multi-hot flags before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_value","stats.top_rate","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical flavor descriptor field, with values that look like comma-separated tag combinations (e.g. 'Sweet, Fruity, Earthy, Smoky') rather than single labels. Cardinality is high \u2014 73 unique values across only 175 rows \u2014 and entropy_ratio of 0.845 confirms a long tail; the top value 'Sweet' covers just 14.3% of rows. The compound labels suggest the underlying data is multi-label flavor notes that have been collapsed into one string.","role":"feature","scope":"column","target":"flavor","treatment":"Split on commas and one-hot encode the individual flavor tags before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a URL column serving as a per-row identifier, with all 175 values unique and zero nulls. Every entry is a pepperscale.com pepper page (e.g., bell-pepper, gypsy-pepper, habanada-pepper), so the column is effectively a primary key for pepper varieties. Entropy ratio of ~1.0 confirms no repetition.","role":"identifier","scope":"column","target":"url","treatment":"Drop from modelling; retain as a row key or source link."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":3793,"prompt_tokens":12620,"total_tokens":16413}},"language_counts":{},"meta":{"generated_at":"2026-05-01T16:51:40+00:00","mode":"full","row_count":175,"sampled_rows":175,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/peppers.json"},"notes":[],"saturn_version":"0.2.0","schema":{"flavor":"categorical","heat":"categorical","jalRP":"numeric","name":"categorical","origin":"categorical","scoville_max":"numeric","scoville_median":"numeric","scoville_min":"numeric","type":"categorical","url":"categorical","use":"categorical"}}
