{"columns":[{"alerts":[],"column":"ref","extras":{"singletons":8,"top_values":[["414",10],["24",9],["404",9],["387",9],["1462",8],["1454",8],["431",8],["439",8],["1450",8],["552",8],["1458",8],["1466",8],["370",7],["502",7],["636",7],["572",7],["355",7],["486",7],["478",7],["377",7]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":630,"null_rate":0.0,"stats":{"cardinality":630,"entropy":9.256597030919464,"entropy_ratio":0.9954177831721195,"top_rate":0.003952569169960474,"top_value":"414"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"company","extras":{"singletons":0,"top_values":[["",2530]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[],"column":"company_location","extras":{"singletons":5,"top_values":[["U.S.A.",1136],["Canada",177],["France",176],["U.K.",133],["Italy",78],["Belgium",63],["Ecuador",58],["Australia",53],["Switzerland",44],["Germany",42],["Spain",36],["Venezuela",31],["Japan",31],["Denmark",31],["Austria",30],["Colombia",29],["New Zealand",27],["Hungary",26],["Brazil",25],["Peru",23]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":67,"null_rate":0.0,"stats":{"cardinality":67,"entropy":3.6745393644483078,"entropy_ratio":0.6057509622886063,"top_rate":0.4490118577075099,"top_value":"U.S.A."}},{"alerts":[],"column":"review_date","extras":{"histogram":{"counts":[62,0,73,0,0,92,0,0,123,0,110,0,0,163,0,0,194,0,183,0,0,247,0,0,284,0,217,0,0,105,0,0,228,0,193,0,0,81,0,175],"edges":[2006.0,2006.375,2006.75,2007.125,2007.5,2007.875,2008.25,2008.625,2009.0,2009.375,2009.75,2010.125,2010.5,2010.875,2011.25,2011.625,2012.0,2012.375,2012.75,2013.125,2013.5,2013.875,2014.25,2014.625,2015.0,2015.375,2015.75,2016.125,2016.5,2016.875,2017.25,2017.625,2018.0,2018.375,2018.75,2019.125,2019.5,2019.875,2020.25,2020.625,2021.0]},"sample":[2013.0,2013.0,2013.0,2014.0,2015.0,2019.0,2019.0,2010.0,2011.0,2013.0,2014.0,2013.0,2016.0,2008.0,2010.0,2011.0,2013.0,2015.0,2015.0,2018.0,2010.0,2015.0,2016.0,2016.0,2018.0,2009.0,2009.0,2011.0,2021.0,2007.0,2011.0,2012.0,2012.0,2012.0,2018.0,2019.0,2019.0,2015.0,2017.0,2016.0,2016.0,2016.0,2021.0,2018.0,2013.0,2011.0,2018.0,2018.0,2009.0,2009.0,2019.0,2008.0,2021.0,2014.0,2016.0,2006.0,2006.0,2006.0,2007.0,2009.0,2014.0,2021.0,2014.0,2018.0,2021.0,2013.0,2014.0,2016.0,2015.0,2013.0,2014.0,2014.0,2018.0,2014.0,2019.0,2015.0,2010.0,2007.0,2007.0,2019.0,2018.0,2016.0,2016.0,2011.0,2009.0,2010.0,2010.0,2020.0,2021.0,2008.0,2008.0,2012.0,2008.0,2021.0,2016.0,2016.0,2013.0,2013.0,2014.0,2021.0,2021.0,2016.0,2016.0,2014.0,2015.0,2007.0,2008.0,2009.0,2020.0,2019.0,2019.0,2008.0,2010.0,2021.0,2021.0,2016.0,2021.0,2021.0,2007.0,2009.0,2012.0,2006.0,2012.0,2016.0,2015.0,2018.0,2021.0,2021.0,2013.0,2018.0,2021.0,2021.0,2021.0,2011.0,2012.0,2014.0,2019.0,2015.0,2016.0,2013.0,2007.0,2009.0,2019.0,2007.0,2011.0,2011.0,2015.0,2019.0,2021.0,2019.0,2019.0,2019.0,2014.0,2014.0,2009.0,2006.0,2007.0,2008.0,2008.0,2019.0,2011.0,2011.0,2012.0,2018.0,2016.0,2009.0,2013.0,2015.0,2014.0,2018.0,2016.0,2006.0,2010.0,2014.0,2014.0,2020.0,2019.0,2016.0,2018.0,2018.0,2021.0,2019.0,2019.0,2011.0,2012.0,2012.0,2011.0,2012.0,2012.0,2014.0,2017.0,2017.0,2018.0,2018.0,2021.0,2017.0,2018.0,2018.0,2011.0,2012.0,2013.0,2014.0,2015.0,2011.0,2021.0,2018.0,2015.0,2015.0,2017.0,2016.0,2021.0,2021.0,2010.0,2009.0,2006.0,2006.0,2010.0,2011.0,2011.0,2015.0,2014.0,2014.0,2014.0,2018.0,2019.0,2019.0,2016.0,2016.0,2016.0,2019.0,2009.0,2010.0,2012.0,2014.0,2014.0,2014.0,2011.0,2011.0,2011.0,2011.0,2011.0,2013.0,2006.0,2017.0,2017.0,2017.0,2020.0,2020.0,2019.0,2012.0,2018.0,2014.0,2014.0,2014.0,2009.0,2018.0,2018.0,2018.0,2008.0,2009.0,2009.0,2012.0,2014.0,2015.0,2012.0,2012.0,2015.0,2019.0,2014.0,2014.0,2018.0,2017.0,2010.0,2020.0,2011.0,2008.0,2008.0,2021.0,2013.0,2008.0,2010.0,2010.0,2014.0,2019.0,2015.0,2015.0,2015.0,2015.0,2018.0,2013.0,2015.0,2010.0,2010.0,2011.0,2011.0,2013.0,2014.0,2016.0,2020.0,2014.0,2019.0,2019.0,2012.0,2014.0,2018.0,2015.0,2006.0,2006.0,2007.0,2007.0,2019.0,2012.0,2012.0,2015.0,2019.0,2021.0,2021.0,2017.0,2010.0,2018.0,2011.0,2015.0,2015.0,2016.0,2013.0,2014.0,2014.0,2014.0,2008.0,2018.0,2012.0,2012.0,2012.0,2015.0,2015.0,2020.0,2016.0,2019.0,2015.0,2014.0,2014.0,2020.0,2011.0,2011.0,2011.0,2021.0,2013.0,2013.0,2011.0,2009.0,2012.0,2014.0,2014.0,2018.0,2018.0,2014.0,2015.0,2016.0,2019.0,2016.0,2015.0,2019.0,2009.0,2018.0,2009.0,2010.0,2020.0,2016.0,2014.0,2020.0,2011.0,2016.0,2007.0,2019.0,2019.0,2019.0,2018.0,2014.0,2017.0,2012.0,2013.0,2021.0,2007.0,2012.0,2019.0,2020.0,2013.0,2008.0,2010.0,2011.0,2012.0,2017.0,2021.0,2010.0,2008.0,2008.0,2006.0,2011.0,2011.0,2014.0,2016.0,2016.0,2016.0,2017.0,2015.0,2016.0,2016.0,2016.0,2013.0,2013.0,2013.0,2014.0,2018.0,2009.0,2009.0,2009.0,2011.0,2012.0,2012.0,2012.0,2013.0,2013.0,2014.0,2014.0,2016.0,2016.0,2018.0,2018.0,2021.0,2017.0,2016.0,2017.0,2013.0,2014.0,2012.0,2016.0,2018.0,2012.0,2013.0,2014.0,2014.0,2015.0,2013.0,2015.0,2019.0,2016.0,2019.0,2019.0,2015.0,2007.0,2009.0,2014.0,2008.0,2012.0,2012.0,2012.0,2014.0,2015.0,2012.0,2016.0,2020.0,2019.0,2020.0,2020.0,2018.0,2020.0,2020.0,2016.0,2017.0,2017.0,2014.0,2020.0,2006.0,2006.0,2007.0,2009.0,2013.0,2009.0,2018.0,2014.0,2007.0,2018.0,2021.0,2021.0,2013.0,2019.0,2019.0,2019.0,2017.0,2021.0,2015.0,2018.0,2018.0,2010.0,2011.0,2012.0,2012.0,2017.0,2018.0,2018.0]},"kind":"numeric","n":2530,"n_null":0,"n_unique":16,"null_rate":0.0,"stats":{"iqr":6.0,"kurtosis":-0.7727169862548342,"max":2021.0,"mean":2014.3743083003953,"median":2015.0,"min":2006.0,"n_outliers":0,"outlier_rate":0.0,"q1":2012.0,"q3":2018.0,"skew":-0.1833231718264209,"std":3.968267270771448,"zero_rate":0.0}},{"alerts":[],"column":"country_of_bean_origin","extras":{"singletons":10,"top_values":[["Venezuela",253],["Peru",244],["Dominican Republic",226],["Ecuador",219],["Madagascar",177],["Blend",156],["Nicaragua",100],["Bolivia",80],["Tanzania",79],["Colombia",79],["Brazil",78],["Belize",76],["Vietnam",73],["Guatemala",62],["Mexico",55],["Papua New Guinea",50],["Costa Rica",43],["Trinidad",42],["Ghana",41],["India",35]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":62,"null_rate":0.0,"stats":{"cardinality":62,"entropy":4.716522443956182,"entropy_ratio":0.7921341853859242,"top_rate":0.1,"top_value":"Venezuela"}},{"alerts":[{"code":"one_word","level":"warn","message":"33.8% rows are a single word"},{"code":"duplicates","level":"warn","message":"36.6% duplicate strings"}],"column":"specific_bean_origin","extras":{"language_counts":{},"language_sample_size":2530,"length_histogram":{"counts":[86,106,152,211,142,306,60,71,79,54,143,73,98,65,64,119,48,57,42,45,84,37,39,35,29,58,18,23,30,21,33,13,16,14,13,23,8,12,1,2],"edges":[3.0,4.2,5.4,6.6,7.8,9.0,10.2,11.4,12.6,13.799999999999999,15.0,16.2,17.4,18.6,19.8,21.0,22.2,23.4,24.599999999999998,25.8,27.0,28.2,29.4,30.599999999999998,31.799999999999997,33.0,34.2,35.4,36.6,37.8,39.0,40.199999999999996,41.4,42.6,43.8,45.0,46.199999999999996,47.4,48.6,49.8,51.0]},"near_unique":false,"sample":["Matasawalevu, batch 1","Crayfish Bay Estate, 2014","Hawai'i Island, Big Island","Kokoa Kamili Coop","Duarte Province, El Cibao, batch 10","Maya Mountain, 2017, batch 255","Campesino w/ nibs","Amazonas","Ghana","Jamaica","Malekula P., 2013","Uranga, Chiapas, Jimenez Garcia farm","Tanzania","Cuba","Alto Beni, Wild Harvest, Limited Ed.","Fazenda Camboa, Bahia, 2018","Rio Peripa H.","Sambirano","Kerala State","Medagla, Xoco","Chuao, Hacienda San Jose","Elvesia","Peru","Bocas del Toro","Hawai'i Island, Kona Grand Cru E.","Pisa, unroasted","Nicaragua","Valle de Los Rios, batch 990","Duarte Province","Los Llanos","Maranon Canyon, Fortunato No. 4","Papua New Guinea","Absolu","Peru","Alto Beni, 2017 h.","Africa meets Latina","Sur del Lago, Merida","Belize","O'ahu Island, Maunawili, Agri Research C., 2014","Cuba","Papua New Guinea","Conacado","Ecuador","Maya Mtn.","Zorzal","Camino Verde P., Balao, Guayas, batch 1","Arriba","Johe","Bolivar","Honduras"],"top_values":[["Madagascar",55],["Ecuador",43],["Peru",41],["Dominican Republic",38],["Chuao",28],["Venezuela",21],["Kokoa Kamili",20],["Papua New Guinea",17],["Ghana",17],["Sambirano",17],["Ocumare",16],["Belize",16],["Oko Caribe",15],["Ucayali",15],["Tanzania",15],["Porcelana",13],["Vietnam",13],["Alto Beni",13],["Maya Mountain",13],["Brazil",12]],"top_words":[["batch",356],["madagascar",63],["dominican",62],["la",59],["san",54],["ecuador",52],["1",45],["peru",45],["sambirano",44],["kokoa",43],["republic",42],["maya",42],["w/",41],["los",38],["island,",37],["chuao",33],["harvest",33],["camino",31],["lot",30],["p.,",30],["alto",29],["h.",29],["p.",28],["2017",28],["venezuela",27]],"vocab_skipped":null,"word_histogram":{"counts":[854,0,0,547,0,0,0,415,0,0,0,299,0,0,0,204,0,0,126,0,0,0,63,0,0,0,21,0,0,1],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":2530,"n_null":0,"n_unique":1605,"null_rate":0.0,"stats":{"allcaps_rate":0.0015810276679841897,"boilerplate_rate":0.0,"duplicate_rate":0.36561264822134387,"emoji_rate":0.0,"len_max":51,"len_mean":17.115415019762846,"len_median":14.0,"len_min":3,"len_p95":39.0,"n_duplicates":925,"n_empty":0,"one_word_rate":0.3375494071146245,"readability_flesch_mean":28.411783928571452,"url_rate":0.0,"vocab_size":2079,"word_mean":2.6810276679841896,"word_median":2.0}},{"alerts":[{"code":"outliers","level":"warn","message":"9.3% rows beyond 1.5 IQR"}],"column":"cocoa_percent","extras":{"histogram":{"counts":[1,0,1,0,0,1,0,1,16,2,1,8,47,23,14,124,28,106,13,1046,340,72,377,35,63,2,95,18,9,40,1,9,2,12,0,0,0,0,0,23],"edges":[42.0,43.45,44.9,46.35,47.8,49.25,50.7,52.15,53.6,55.05,56.5,57.95,59.4,60.849999999999994,62.3,63.75,65.2,66.65,68.1,69.55,71.0,72.45,73.9,75.35,76.8,78.25,79.69999999999999,81.15,82.6,84.05,85.5,86.94999999999999,88.4,89.85,91.3,92.75,94.19999999999999,95.65,97.1,98.55,100.0]},"sample":[70.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,75.0,75.0,65.0,60.0,60.0,70.0,70.0,70.0,73.0,75.0,68.0,75.0,85.0,70.0,70.0,70.0,70.0,72.0,72.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,72.0,82.0,70.0,80.0,80.0,70.0,70.0,70.0,72.0,74.0,88.0,70.0,70.0,65.0,65.0,70.0,70.0,70.0,70.0,82.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,70.0,75.0,62.0,75.0,70.0,70.0,80.0,70.0,68.0,72.0,70.0,72.0,70.0,70.0,75.0,72.0,66.0,70.0,70.0,74.0,70.0,72.0,70.0,77.0,71.0,72.0,70.0,91.0,71.0,80.0,55.0,70.0,65.0,65.0,72.0,72.0,70.0,70.0,83.0,70.0,80.0,74.0,69.0,71.0,71.0,70.0,70.0,72.0,72.0,100.0,73.5,70.0,77.0,78.0,68.0,75.0,72.0,70.0,72.0,70.0,75.0,73.0,70.0,65.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,82.0,70.0,70.0,75.0,70.0,60.0,64.0,72.0,77.0,80.0,70.0,70.0,72.0,72.0,75.0,70.0,75.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,71.0,65.0,82.0,70.0,70.0,70.0,70.0,78.0,74.0,80.0,76.0,68.0,58.0,85.0,80.0,70.0,73.0,75.0,72.0,70.0,72.0,70.0,70.0,70.0,81.0,70.0,70.0,74.0,69.0,70.0,70.0,70.0,72.0,72.0,68.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,60.0,66.0,68.0,70.0,70.0,75.0,75.0,77.0,75.0,70.0,82.0,76.0,65.0,65.0,65.0,72.0,72.0,70.0,80.0,75.0,55.0,75.0,70.0,72.0,74.0,72.0,72.0,85.0,70.0,70.0,66.0,70.0,70.0,70.0,72.0,74.0,70.0,72.0,72.0,72.0,71.0,72.0,65.0,78.0,68.0,70.0,75.0,64.0,70.0,72.0,72.5,72.5,68.0,70.0,70.0,70.0,74.0,60.0,60.0,66.0,70.0,75.0,70.0,72.0,70.0,70.0,65.0,70.0,78.0,81.0,46.0,67.0,70.0,67.0,75.0,70.0,70.0,70.0,70.0,55.0,74.0,70.0,70.0,72.0,70.0,78.0,72.0,75.0,68.0,65.0,75.0,76.0,70.0,70.0,70.0,72.0,70.0,70.0,85.0,70.0,80.0,70.0,72.0,70.0,60.0,70.0,66.0,67.0,70.0,75.0,70.0,75.0,76.0,75.0,74.0,65.0,67.0,72.0,70.0,63.0,80.0,68.0,78.0,70.0,72.0,70.0,67.0,72.0,70.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,66.0,74.0,70.0,76.0,76.0,80.0,75.0,74.0,72.0,66.0,60.0,65.0,70.0,70.0,72.0,70.0,75.0,70.0,70.0,70.0,76.0,70.0,75.0,67.0,75.0,72.0,75.0,70.0,80.0,73.0,70.0,70.0,70.0,75.0,75.0,75.0,75.0,75.0,85.0,65.0,60.0,55.0,70.0,67.0,75.0,80.0,75.0,75.0,70.0,75.0,71.0,72.0,72.0,77.0,72.0,70.0,70.0,65.0,70.0,75.0,67.0,73.0,73.0,70.0,70.0,72.0,70.0,85.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,80.0,70.0,75.0,70.0,70.0,62.0,70.0,70.0,70.0,84.0,70.0,65.0,61.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,62.0,70.0,70.0,75.0,75.0,75.0,70.0,75.0,68.0,70.0,70.0,70.0,70.0,73.0,70.0,65.0,72.0,75.0,70.0,70.0,70.0,73.0,70.0,70.0,60.0,75.0,75.0,70.0,74.0,64.0,70.0,85.0,64.0,70.0,80.0,70.0,75.0,75.0,72.0,74.0,72.0,89.0,68.0,65.0,70.0,70.0,70.0,70.0,68.0,75.0,65.0,70.0,75.0,65.0,75.0,75.0,72.0]},"kind":"numeric","n":2530,"n_null":0,"n_unique":46,"null_rate":0.0,"stats":{"iqr":4.0,"kurtosis":6.541375731243388,"max":100.0,"mean":71.6397233201581,"median":70.0,"min":42.0,"n_outliers":235,"outlier_rate":0.09288537549407115,"q1":70.0,"q3":74.0,"skew":1.1982965619063584,"std":5.61672422913589,"zero_rate":0.0}},{"alerts":[],"column":"ingredients","extras":{"singletons":3,"top_values":[["3- B,S,C",999],["2- B,S",718],["4- B,S,C,L",286],["5- B,S,C,V,L",184],["4- B,S,C,V",141],["",87],["2- B,S*",31],["4- B,S*,C,Sa",20],["3- B,S*,C",12],["3- B,S,L",8],["4- B,S*,C,V",7],["5-B,S,C,V,Sa",6],["1- B",6],["4- B,S,V,L",5],["4- B,S,C,Sa",5],["6-B,S,C,V,L,Sa",4],["3- B,S,V",3],["4- B,S*,V,L",3],["4- B,S*,C,L",2],["3- B,S*,Sa",1]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":22,"null_rate":0.0,"stats":{"cardinality":22,"entropy":2.4304729003269667,"entropy_ratio":0.5450185378265011,"top_rate":0.3948616600790514,"top_value":"3- B,S,C"}},{"alerts":[{"code":"near_unique","level":"info","message":"98.3% of rows are unique strings"}],"column":"most_memorable_characteristics","extras":{"language_counts":{},"language_sample_size":2530,"length_histogram":{"counts":[1,0,4,4,3,1,0,2,3,9,39,47,52,0,29,34,69,100,145,0,206,206,156,173,179,192,0,201,179,178,121,86,66,0,23,13,3,4,1,1],"edges":[3.0,3.85,4.7,5.55,6.4,7.25,8.1,8.95,9.8,10.649999999999999,11.5,12.35,13.2,14.049999999999999,14.9,15.75,16.6,17.45,18.299999999999997,19.15,20.0,20.849999999999998,21.7,22.55,23.4,24.25,25.099999999999998,25.95,26.8,27.65,28.5,29.349999999999998,30.2,31.05,31.9,32.75,33.599999999999994,34.45,35.3,36.15,37.0]},"near_unique":true,"sample":["chewy, off, rubbery","dark berry, mild floral","intense, tannic, choco, earthy","basic cocoa, gateway","dried fruit, orange peel, cocoa","flat, molasses, creamy","XL nibs, sour, cardboard","blackberry, dirt, high roast","sweet, vanilla, cocoa, mold","sandy, woody, spicy","sticky, butterscotch, nutty","citrus,prononced orange","intense, sweet, brownie","sliglty dry, papaya","sticky, rich, cherry","sandy, intense, sweet, rich","cinamon and nutmeg","dry, molasses, sour","creamy, masculine, earthy","baked bread","subtle, caramel, sour milk","chalky, mild fruit, burnt","dried fruit, sour, nutella","intense, rum, tea","sticky, tobacco","green","multiple off flavors, metallic","complex, strawberry, floral","complex,black pepper,coffee","rich cocoa, spicy, cinamon","orange, floral, caramel","gritty, vanilla, non-descript","sweet, dairy, spice, cocoa","banana, yogurt, cocoa","honey, mild floral, mild off","fatty, nutty, earthy","intense, nutty, mild rubber","nutty, sour milk","spicy, intense, fades fast","fatty, roasty, coffee, off note","chewy, smoke, fruit, cocoa","baked, roasty, coffee","cinamon, nutmeg, coffee","honey, caramel","roasty, berry notes","intense, bourbon","fatty, rich, spice, nutty","strong cocoa, orange citrus","cloying, fatty, bitter","overly roasty, spicy"],"top_values":[],"top_words":[["sweet,",227],["cocoa",224],["mild",213],["cocoa,",182],["nutty,",175],["creamy,",175],["fruit,",162],["sandy,",160],["sour",142],["fatty,",127],["roasty,",124],["intense,",115],["earthy,",114],["floral,",111],["spicy,",107],["fruit",105],["off",103],["rich",101],["nutty",101],["roasty",100],["earthy",82],["sweet",81],["dried",78],["sticky,",75],["vanilla,",69]],"vocab_skipped":null,"word_histogram":{"counts":[18,0,0,0,0,0,268,0,0,0,0,0,1174,0,0,0,0,0,892,0,0,0,0,0,171,0,0,0,0,7],"edges":[1.0,1.1666666666666667,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333,5.0,5.166666666666666,5.333333333333333,5.5,5.666666666666666,5.833333333333333,6.0]}},"kind":"text","n":2530,"n_null":0,"n_unique":2487,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.01699604743083004,"emoji_rate":0.0,"len_max":37,"len_mean":23.062450592885376,"len_median":23.0,"len_min":3,"len_p95":30.0,"n_duplicates":43,"n_empty":0,"one_word_rate":0.0071146245059288534,"readability_flesch_mean":49.70667500000001,"url_rate":0.0,"vocab_size":868,"word_mean":3.375889328063241,"word_median":3.0}},{"alerts":[],"column":"rating","extras":{"histogram":{"counts":[4,0,0,0,0,0,10,0,0,0,3,0,0,33,0,0,17,0,0,0,166,0,0,333,0,0,523,0,0,0,464,0,0,565,0,0,300,0,0,112],"edges":[1.0,1.075,1.15,1.225,1.3,1.375,1.45,1.525,1.6,1.6749999999999998,1.75,1.825,1.9,1.975,2.05,2.125,2.2,2.275,2.3499999999999996,2.425,2.5,2.575,2.65,2.7249999999999996,2.8,2.875,2.95,3.025,3.1,3.175,3.25,3.3249999999999997,3.4,3.475,3.55,3.625,3.6999999999999997,3.775,3.85,3.925,4.0]},"sample":[3.0,3.25,3.5,3.75,3.75,2.75,3.0,2.75,3.75,2.5,2.75,2.5,2.5,2.75,3.0,4.0,3.5,3.25,3.5,3.5,2.75,3.75,3.25,3.75,3.0,2.75,3.25,4.0,3.25,3.0,3.75,2.5,2.5,3.0,3.0,2.5,2.75,3.5,3.5,2.75,2.75,2.75,3.75,3.0,3.5,3.0,3.0,3.0,3.5,3.5,2.75,3.0,3.75,2.75,3.5,3.75,3.75,4.0,2.5,3.0,4.0,3.75,2.75,3.5,3.0,3.5,4.0,3.75,2.75,3.0,2.75,3.0,3.0,3.5,3.75,2.5,2.5,2.0,3.0,3.0,3.0,3.75,3.5,3.0,4.0,3.5,3.75,2.75,4.0,1.5,3.5,3.0,3.5,3.0,3.25,3.5,3.0,3.25,2.75,4.0,3.25,2.5,2.75,2.75,3.75,2.5,3.5,3.0,3.5,3.0,3.75,1.0,3.75,3.5,3.25,2.75,3.25,3.5,3.0,3.75,3.75,1.0,3.5,3.75,3.5,2.5,3.0,3.5,3.5,3.25,2.75,2.75,4.0,3.25,3.0,3.25,3.25,3.0,3.25,2.5,3.0,3.0,3.0,3.5,2.75,3.25,3.75,4.0,4.0,2.5,2.75,3.0,3.25,3.5,3.75,3.5,3.5,3.0,3.75,2.75,3.25,3.5,2.75,4.0,3.25,3.0,3.25,3.5,3.25,3.5,2.75,4.0,3.5,2.5,3.0,3.25,3.25,3.5,2.75,3.0,3.5,3.0,3.25,3.5,2.0,2.5,3.25,4.0,4.0,3.5,3.5,3.5,3.25,3.5,3.25,3.75,3.25,3.75,3.5,3.25,3.5,3.5,3.0,3.25,3.75,3.5,3.5,3.5,3.5,3.25,3.5,3.0,3.25,3.0,2.5,3.5,3.0,2.75,3.25,3.75,2.75,3.25,2.75,3.0,3.25,3.25,3.5,3.5,4.0,3.5,2.5,2.75,3.5,3.0,3.5,3.75,3.75,4.0,4.0,4.0,4.0,3.75,2.0,3.5,3.5,3.75,3.75,3.75,3.25,2.5,3.75,2.75,2.5,3.0,3.0,3.0,3.25,3.5,3.5,3.0,3.75,3.5,2.75,3.0,3.5,3.0,3.25,3.25,3.25,3.75,3.5,3.25,2.75,3.5,1.5,3.5,3.5,2.5,2.75,3.75,2.0,3.5,2.75,3.5,2.75,3.0,3.5,3.5,3.5,3.5,3.5,3.0,3.0,3.0,3.5,3.75,3.25,3.0,3.25,3.0,2.5,3.25,2.5,3.5,3.25,3.5,2.0,3.0,3.5,3.5,3.5,2.0,2.5,3.25,3.0,2.75,3.0,3.5,2.75,2.5,2.75,3.5,3.5,2.75,3.75,3.25,2.25,2.5,3.75,3.0,3.0,2.75,3.0,3.5,3.75,3.0,3.5,3.75,4.0,2.75,3.5,3.0,2.0,2.75,2.75,3.5,2.75,3.0,3.75,3.0,3.0,3.75,4.0,3.25,3.5,2.75,3.25,2.75,3.5,3.0,3.5,2.75,3.5,3.0,3.0,3.25,3.0,2.5,3.25,3.0,2.75,3.75,4.0,3.0,3.0,3.25,3.25,2.75,3.75,3.0,2.75,3.75,3.5,3.25,3.5,3.5,3.25,3.0,3.75,2.5,2.5,2.75,2.75,3.5,3.5,3.5,3.0,2.75,3.5,3.75,3.5,3.5,3.75,2.75,3.0,3.5,2.75,3.25,3.0,3.25,3.25,3.0,3.25,3.0,3.0,3.75,3.5,3.25,4.0,4.0,3.25,3.75,3.25,3.5,3.25,3.75,4.0,4.0,3.25,3.5,3.5,2.75,2.25,3.5,2.5,2.5,2.75,3.25,3.25,3.5,3.75,3.25,2.75,3.25,2.75,3.25,3.25,4.0,3.0,3.0,2.75,3.0,2.5,2.0,2.75,2.75,3.75,2.75,3.25,3.75,3.25,2.75,3.25,3.75,3.0,2.75,3.0,3.0,2.75,3.0,3.0,3.5,2.5,3.75,1.5,3.75,3.5,2.75,3.0,3.0,3.0,3.0,3.5,3.75,2.75,3.25,2.75,3.25,2.75,3.5,3.25,3.0,3.25,3.0,3.75,3.0,3.5,3.75,3.0,3.5]},"kind":"numeric","n":2530,"n_null":0,"n_unique":12,"null_rate":0.0,"stats":{"iqr":0.5,"kurtosis":1.0534163943816193,"max":4.0,"mean":3.1963438735177867,"median":3.25,"min":1.0,"n_outliers":50,"outlier_rate":0.019762845849802372,"q1":3.0,"q3":3.5,"skew":-0.6084413776009047,"std":0.4453213042172302,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["cocoa_percent.stats.mean","cocoa_percent.stats.n_outliers","cocoa_percent.stats.outlier_rate","cocoa_percent.stats.q1","cocoa_percent.stats.q3","rating.stats.mean","rating.stats.median","rating.stats.max","country_of_bean_origin.top_values","company_location.stats.top_value","company_location.stats.top_rate","ingredients.top_values","review_date.stats.min","review_date.stats.max"],"featured_charts":[{"caption":"Look for how tightly scores cluster around 3.0\u20133.5 and whether truly low or perfect ratings are rare.","column":"rating","kind":"histogram"},{"caption":"Venezuela, Peru, Dominican Republic, and Ecuador lead \u2014 check whether certain origins consistently earn higher ratings.","column":"country_of_bean_origin","kind":"bar"},{"caption":"The bulk of bars sit at 70\u201374% cocoa; watch for the long right tail of outliers pushing toward 100%.","column":"cocoa_percent","kind":"histogram"},{"caption":"U.S.A. accounts for nearly 45% of all reviews \u2014 see how the remaining reviews spread across 66 other countries.","column":"company_location","kind":"bar"},{"caption":"Most bars use just 2\u20133 ingredients (beans, sugar, cocoa butter); see how quickly complexity drops off beyond that.","column":"ingredients","kind":"donut"}],"model":"anthropic:default","narrative":"This dataset contains 2,530 chocolate bar reviews covering bean origins, cocoa percentages, ingredients, and expert ratings across reviews dated 2006\u20132021. Two things stand out: first, cocoa percent clusters tightly between 70\u201374% but has 235 outliers (9.3%) stretching up to 100%, suggesting a small but notable group of ultra-dark bars worth investigating. Second, ratings skew modestly negative with a mean of 3.20 and median of 3.25 out of 4.0, indicating most bars are rated good-to-very-good \u2014 but the distribution of scores by bean origin (Venezuela, Peru, Dominican Republic, and Ecuador dominate) could reveal whether provenance drives quality. The 'company' column is entirely blank and should be ignored.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["column","n","n_unique","duplicate_rate","n_duplicates","one_word_rate","top_values","top_words","word_mean"],"model":"anthropic:default","narrative":"This column captures the specific geographic or farm-level origin of cacao beans used in chocolate production, ranging from country-level names (Madagascar, Ecuador, Peru) to named estates and cooperatives (Kokoa Kamili, Chuao, Sambirano). The duplicate rate of 36.6% is expected for a categorical-like origin field with 1,605 unique values out of 2,530 rows, but the top word 'batch' appearing 356 times is surprising \u2014 nearly 14% of entries reference a batch identifier, suggesting some values encode both origin and batch metadata in a single field. One-word entries account for 33.8% of values (country-level origins), while multi-word entries average ~2.7 words, reflecting finer geographic or supplier granularity.","role":"feature","scope":"column","target":"specific_bean_origin","treatment":"Normalize by extracting country-level tokens separately; flag 'batch'-containing entries for parsing or exclusion before grouping or encoding."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","mean","median","q1","q3","iqr","skew","kurtosis","n_outliers","outlier_rate","n_unique","null_rate"],"model":"anthropic:default","narrative":"This column records cocoa percentage for chocolate products, ranging from 42% to 100% across 2,530 rows with no nulls and only 46 distinct values. The distribution is tightly clustered \u2014 Q1 and median both sit at 70%, Q3 at 74%, giving an IQR of just 4 \u2014 but is right-skewed (skew 1.20) with high kurtosis (6.54), driven by 235 outliers (9.3%) that stretch toward extreme values like 100%. The narrow IQR relative to the full range (42\u2013100) suggests most chocolates fall in a standard dark-chocolate band, with a long tail of unusually high-cocoa products pulling the mean (71.64) above the median.","role":"feature","scope":"column","target":"cocoa_percent","treatment":"Use as-is or apply mild Winsorization at the upper tail to dampen the 9.3% outlier influence before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","cardinality","top_value","top_rate","entropy","null_rate"],"model":"anthropic:default","narrative":"This column is intended to capture a company name but contains a single blank string across all 2,530 rows \u2014 it is effectively empty. Cardinality is 1, entropy is 0, and the top value is an empty string with a 100% hit rate, meaning the field was never populated. This is a completely uninformative column with zero analytical value.","role":"other","scope":"column","target":"company","treatment":"Drop entirely; the column carries no information and is populated only with empty strings."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","duplicate_rate","n_duplicates","top_words","vocab_size","word_mean","word_median","len_mean","len_median","one_word_rate"],"model":"anthropic:default","narrative":"This column contains short, comma-separated flavor/texture descriptor phrases for what appears to be a chocolate or confectionery dataset \u2014 top words include 'cocoa', 'sweet', 'nutty', 'creamy', 'sandy', and 'fatty'. With 2487 unique values out of 2530 rows and a mean of ~3.4 words per entry (median 23 characters), entries are brief multi-attribute tags rather than free prose, yet near-uniqueness is triggered by the combinatorial variety of descriptors. Only 43 duplicates exist across 2530 rows (1.7% duplicate rate), and the vocabulary of 868 words suggests a constrained but richly combined descriptor lexicon.","role":"free_text","scope":"column","target":"most_memorable_characteristics","treatment":"Split on commas to explode into multi-hot flavor tags, then use as categorical features or embed for similarity modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","min","max","skew","iqr","q1","q3","n_outliers","outlier_rate","null_rate","mean","median"],"model":"anthropic:default","narrative":"This column is a discrete rating scale, almost certainly a user or product rating, with only 12 distinct values across 2,530 records and no nulls. The range is 1.0\u20134.0 (notably not the common 1\u20135 or 1\u201310 scale), suggesting a 4-point Likert or star-rating system. The distribution is left-skewed (skew = -0.608) and tightly clustered \u2014 IQR of just 0.5, with Q1=3.0 and Q3=3.5 \u2014 indicating a strong ceiling effect where most responses pile up near the top. Only 50 outliers (1.98%) exist, likely low ratings near 1.0.","role":"feature","scope":"column","target":"rating","treatment":"Treat as ordinal categorical or keep numeric; consider ceiling-effect bias before using as a target or feature in regression."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","min","max","median","iqr","skew","kurtosis","n","null_rate"],"model":"anthropic:default","narrative":"This column contains review years, stored as numeric integers spanning 2006 to 2021 \u2014 a 16-year range with only 16 distinct values, confirming it is a year-granularity timestamp rather than a full date. The distribution is nearly symmetric (skew \u22120.18, kurtosis \u22120.77) with a median of 2015 and an IQR of 6 years, suggesting fairly even coverage across the mid-2010s. Notably, 2530 rows collapse into just 16 discrete year values, meaning this field carries no finer temporal resolution and may limit time-series analyses that require month- or day-level precision.","role":"timestamp","scope":"column","target":"review_date","treatment":"Cast to integer year; use as a categorical or ordinal time feature, or bin into periods \u2014 do not treat as a continuous numeric."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","null_rate","entropy_ratio","top_values","n"],"model":"anthropic:default","narrative":"This column encodes the country of company headquarters across 2,530 records, with 67 distinct country values and zero nulls. The distribution is heavily US-dominated: 'U.S.A.' alone accounts for 44.9% of all rows (1,136 of 2,530), nearly 6.4\u00d7 the next most frequent country (Canada at 177). The entropy ratio of 0.606 confirms moderate-to-high concentration despite 67 categories, and the presence of both abbreviations ('U.S.A.', 'U.K.') and full names ('Canada', 'France') suggests inconsistent formatting that may complicate grouping or joining.","role":"feature","scope":"column","target":"company_location","treatment":"Standardise country name formats (e.g. 'U.S.A.' \u2192 'USA'), then one-hot or target-encode for modelling, noting the strong US imbalance."},{"confidence":"high","critiques":[],"evidence_keys":["cardinality","entropy_ratio","top_value","top_rate","top_values","null_rate","n"],"model":"anthropic:default","narrative":"This column records the country of origin for cacao beans used in chocolate production, covering 62 distinct origins across 2,530 rows with no nulls. The distribution is fairly broad (entropy ratio 0.79), with Venezuela leading at exactly 10% (253 rows), followed closely by Peru (244) and Dominican Republic (226) \u2014 no single origin dominates heavily. Notably, 'Blend' appears as a pseudo-origin with 156 entries, meaning ~6% of records are multi-origin mixtures rather than single-country sourced beans, which may need special handling in origin-based analyses.","role":"feature","scope":"column","target":"country_of_bean_origin","treatment":"One-hot or target-encode for modelling; isolate or flag 'Blend' records as a separate category before any geographic or single-origin analysis."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","null_rate","top_values","n"],"model":"anthropic:default","narrative":"This column encodes a structured ingredient combination label for each record, consisting of a count prefix (e.g., '3-') followed by abbreviated ingredient codes (B, S, C, L, V, Sa). With only 22 distinct values across 2,530 rows it functions as a categorical feature rather than free text. Notably, 87 rows carry an empty string despite a reported null_rate of 0.0, which is a hidden missingness issue that needs handling. The top value '3- B,S,C' dominates at ~39.5% of rows, and starred variants (e.g., 'B,S*') suggest a meaningful sub-type modifier that distinguishes at least some categories.","role":"feature","scope":"column","target":"ingredients","treatment":"Treat empty-string entries as missing; one-hot encode or ordinal-encode the 22 categories, or decompose into numeric ingredient count and individual binary ingredient flags."},{"confidence":"medium","critiques":[],"evidence_keys":["n","n_unique","entropy_ratio","top_rate","top_value","top_values","null_rate"],"model":"anthropic:default","narrative":"This column appears to be a numeric reference or ID code stored as a categorical string, likely a ticket number, document reference, or external record identifier. With 630 unique values across 2,530 rows, the average reuse rate is ~4 rows per value, and the entropy ratio of 0.9954 is nearly maximal, indicating an almost-uniform distribution with no dominant category. The most frequent value ('414') appears only 10 times (top_rate \u2248 0.004), confirming no single reference dominates\u2014but the non-unique nature rules out a pure primary key, suggesting these are foreign references that recur legitimately.","role":"foreign_key","scope":"column","target":"ref","treatment":"Left-join or group-by on this reference ID; verify the target table to confirm referential integrity."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":3359,"prompt_tokens":9983,"total_tokens":13342}},"language_counts":{},"meta":{"generated_at":"2026-06-21T23:52:45+00:00","mode":"full","row_count":2530,"sampled_rows":2530,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/chocolate_origins.json"},"notes":[],"saturn_version":"0.2.0","schema":{"cocoa_percent":"numeric","company":"categorical","company_location":"categorical","country_of_bean_origin":"categorical","ingredients":"categorical","most_memorable_characteristics":"text","rating":"numeric","ref":"categorical","review_date":"numeric","specific_bean_origin":"text"}}
