{"columns":[{"alerts":[],"column":"ref","extras":{"singletons":8,"top_values":[["414",10],["24",9],["404",9],["387",9],["1462",8],["1454",8],["431",8],["439",8],["1450",8],["552",8],["1458",8],["1466",8],["370",7],["502",7],["636",7],["572",7],["355",7],["486",7],["478",7],["377",7]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":630,"null_rate":0.0,"stats":{"cardinality":630,"entropy":9.256597030919464,"entropy_ratio":0.9954177831721195,"top_rate":0.003952569169960474,"top_value":"414"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"company","extras":{"singletons":0,"top_values":[["",2530]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":""}},{"alerts":[],"column":"company_location","extras":{"singletons":5,"top_values":[["U.S.A.",1136],["Canada",177],["France",176],["U.K.",133],["Italy",78],["Belgium",63],["Ecuador",58],["Australia",53],["Switzerland",44],["Germany",42],["Spain",36],["Venezuela",31],["Japan",31],["Denmark",31],["Austria",30],["Colombia",29],["New Zealand",27],["Hungary",26],["Brazil",25],["Peru",23]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":67,"null_rate":0.0,"stats":{"cardinality":67,"entropy":3.6745393644483078,"entropy_ratio":0.6057509622886063,"top_rate":0.4490118577075099,"top_value":"U.S.A."}},{"alerts":[],"column":"review_date","extras":{"histogram":{"counts":[62,0,73,0,0,92,0,0,123,0,110,0,0,163,0,0,194,0,183,0,0,247,0,0,284,0,217,0,0,105,0,0,228,0,193,0,0,81,0,175],"edges":[2006.0,2006.375,2006.75,2007.125,2007.5,2007.875,2008.25,2008.625,2009.0,2009.375,2009.75,2010.125,2010.5,2010.875,2011.25,2011.625,2012.0,2012.375,2012.75,2013.125,2013.5,2013.875,2014.25,2014.625,2015.0,2015.375,2015.75,2016.125,2016.5,2016.875,2017.25,2017.625,2018.0,2018.375,2018.75,2019.125,2019.5,2019.875,2020.25,2020.625,2021.0]},"sample":[2013.0,2013.0,2013.0,2014.0,2015.0,2019.0,2019.0,2010.0,2011.0,2013.0,2014.0,2013.0,2016.0,2008.0,2010.0,2011.0,2013.0,2015.0,2015.0,2018.0,2010.0,2015.0,2016.0,2016.0,2018.0,2009.0,2009.0,2011.0,2021.0,2007.0,2011.0,2012.0,2012.0,2012.0,2018.0,2019.0,2019.0,2015.0,2017.0,2016.0,2016.0,2016.0,2021.0,2018.0,2013.0,2011.0,2018.0,2018.0,2009.0,2009.0,2019.0,2008.0,2021.0,2014.0,2016.0,2006.0,2006.0,2006.0,2007.0,2009.0,2014.0,2021.0,2014.0,2018.0,2021.0,2013.0,2014.0,2016.0,2015.0,2013.0,2014.0,2014.0,2018.0,2014.0,2019.0,2015.0,2010.0,2007.0,2007.0,2019.0,2018.0,2016.0,2016.0,2011.0,2009.0,2010.0,2010.0,2020.0,2021.0,2008.0,2008.0,2012.0,2008.0,2021.0,2016.0,2016.0,2013.0,2013.0,2014.0,2021.0,2021.0,2016.0,2016.0,2014.0,2015.0,2007.0,2008.0,2009.0,2020.0,2019.0,2019.0,2008.0,2010.0,2021.0,2021.0,2016.0,2021.0,2021.0,2007.0,2009.0,2012.0,2006.0,2012.0,2016.0,2015.0,2018.0,2021.0,2021.0,2013.0,2018.0,2021.0,2021.0,2021.0,2011.0,2012.0,2014.0,2019.0,2015.0,2016.0,2013.0,2007.0,2009.0,2019.0,2007.0,2011.0,2011.0,2015.0,2019.0,2021.0,2019.0,2019.0,2019.0,2014.0,2014.0,2009.0,2006.0,2007.0,2008.0,2008.0,2019.0,2011.0,2011.0,2012.0,2018.0,2016.0,2009.0,2013.0,2015.0,2014.0,2018.0,2016.0,2006.0,2010.0,2014.0,2014.0,2020.0,2019.0,2016.0,2018.0,2018.0,2021.0,2019.0,2019.0,2011.0,2012.0,2012.0,2011.0,2012.0,2012.0,2014.0,2017.0,2017.0,2018.0,2018.0,2021.0,2017.0,2018.0,2018.0,2011.0,2012.0,2013.0,2014.0,2015.0,2011.0,2021.0,2018.0,2015.0,2015.0,2017.0,2016.0,2021.0,2021.0,2010.0,2009.0,2006.0,2006.0,2010.0,2011.0,2011.0,2015.0,2014.0,2014.0,2014.0,2018.0,2019.0,2019.0,2016.0,2016.0,2016.0,2019.0,2009.0,2010.0,2012.0,2014.0,2014.0,2014.0,2011.0,2011.0,2011.0,2011.0,2011.0,2013.0,2006.0,2017.0,2017.0,2017.0,2020.0,2020.0,2019.0,2012.0,2018.0,2014.0,2014.0,2014.0,2009.0,2018.0,2018.0,2018.0,2008.0,2009.0,2009.0,2012.0,2014.0,2015.0,2012.0,2012.0,2015.0,2019.0,2014.0,2014.0,2018.0,2017.0,2010.0,2020.0,2011.0,2008.0,2008.0,2021.0,2013.0,2008.0,2010.0,2010.0,2014.0,2019.0,2015.0,2015.0,2015.0,2015.0,2018.0,2013.0,2015.0,2010.0,2010.0,2011.0,2011.0,2013.0,2014.0,2016.0,2020.0,2014.0,2019.0,2019.0,2012.0,2014.0,2018.0,2015.0,2006.0,2006.0,2007.0,2007.0,2019.0,2012.0,2012.0,2015.0,2019.0,2021.0,2021.0,2017.0,2010.0,2018.0,2011.0,2015.0,2015.0,2016.0,2013.0,2014.0,2014.0,2014.0,2008.0,2018.0,2012.0,2012.0,2012.0,2015.0,2015.0,2020.0,2016.0,2019.0,2015.0,2014.0,2014.0,2020.0,2011.0,2011.0,2011.0,2021.0,2013.0,2013.0,2011.0,2009.0,2012.0,2014.0,2014.0,2018.0,2018.0,2014.0,2015.0,2016.0,2019.0,2016.0,2015.0,2019.0,2009.0,2018.0,2009.0,2010.0,2020.0,2016.0,2014.0,2020.0,2011.0,2016.0,2007.0,2019.0,2019.0,2019.0,2018.0,2014.0,2017.0,2012.0,2013.0,2021.0,2007.0,2012.0,2019.0,2020.0,2013.0,2008.0,2010.0,2011.0,2012.0,2017.0,2021.0,2010.0,2008.0,2008.0,2006.0,2011.0,2011.0,2014.0,2016.0,2016.0,2016.0,2017.0,2015.0,2016.0,2016.0,2016.0,2013.0,2013.0,2013.0,2014.0,2018.0,2009.0,2009.0,2009.0,2011.0,2012.0,2012.0,2012.0,2013.0,2013.0,2014.0,2014.0,2016.0,2016.0,2018.0,2018.0,2021.0,2017.0,2016.0,2017.0,2013.0,2014.0,2012.0,2016.0,2018.0,2012.0,2013.0,2014.0,2014.0,2015.0,2013.0,2015.0,2019.0,2016.0,2019.0,2019.0,2015.0,2007.0,2009.0,2014.0,2008.0,2012.0,2012.0,2012.0,2014.0,2015.0,2012.0,2016.0,2020.0,2019.0,2020.0,2020.0,2018.0,2020.0,2020.0,2016.0,2017.0,2017.0,2014.0,2020.0,2006.0,2006.0,2007.0,2009.0,2013.0,2009.0,2018.0,2014.0,2007.0,2018.0,2021.0,2021.0,2013.0,2019.0,2019.0,2019.0,2017.0,2021.0,2015.0,2018.0,2018.0,2010.0,2011.0,2012.0,2012.0,2017.0,2018.0,2018.0]},"kind":"numeric","n":2530,"n_null":0,"n_unique":16,"null_rate":0.0,"stats":{"iqr":6.0,"kurtosis":-0.7727169862548342,"max":2021.0,"mean":2014.3743083003953,"median":2015.0,"min":2006.0,"n_outliers":0,"outlier_rate":0.0,"q1":2012.0,"q3":2018.0,"skew":-0.1833231718264209,"std":3.968267270771448,"zero_rate":0.0}},{"alerts":[],"column":"country_of_bean_origin","extras":{"singletons":10,"top_values":[["Venezuela",253],["Peru",244],["Dominican Republic",226],["Ecuador",219],["Madagascar",177],["Blend",156],["Nicaragua",100],["Bolivia",80],["Tanzania",79],["Colombia",79],["Brazil",78],["Belize",76],["Vietnam",73],["Guatemala",62],["Mexico",55],["Papua New Guinea",50],["Costa Rica",43],["Trinidad",42],["Ghana",41],["India",35]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":62,"null_rate":0.0,"stats":{"cardinality":62,"entropy":4.716522443956182,"entropy_ratio":0.7921341853859242,"top_rate":0.1,"top_value":"Venezuela"}},{"alerts":[{"code":"one_word","level":"warn","message":"33.8% rows are a single word"},{"code":"duplicates","level":"warn","message":"36.6% duplicate strings"}],"column":"specific_bean_origin","extras":{"language_counts":{},"language_sample_size":2530,"length_histogram":{"counts":[86,106,152,211,142,306,60,71,79,54,143,73,98,65,64,119,48,57,42,45,84,37,39,35,29,58,18,23,30,21,33,13,16,14,13,23,8,12,1,2],"edges":[3.0,4.2,5.4,6.6,7.8,9.0,10.2,11.4,12.6,13.799999999999999,15.0,16.2,17.4,18.6,19.8,21.0,22.2,23.4,24.599999999999998,25.8,27.0,28.2,29.4,30.599999999999998,31.799999999999997,33.0,34.2,35.4,36.6,37.8,39.0,40.199999999999996,41.4,42.6,43.8,45.0,46.199999999999996,47.4,48.6,49.8,51.0]},"near_unique":false,"sample":["Matasawalevu, batch 1","Crayfish Bay Estate, 2014","Hawai'i Island, Big Island","Kokoa Kamili Coop","Duarte Province, El Cibao, batch 10","Maya Mountain, 2017, batch 255","Campesino w/ nibs","Amazonas","Ghana","Jamaica","Malekula P., 2013","Uranga, Chiapas, Jimenez Garcia farm","Tanzania","Cuba","Alto Beni, Wild Harvest, Limited Ed.","Fazenda Camboa, Bahia, 2018","Rio Peripa H.","Sambirano","Kerala State","Medagla, Xoco","Chuao, Hacienda San Jose","Elvesia","Peru","Bocas del Toro","Hawai'i Island, Kona Grand Cru E.","Pisa, unroasted","Nicaragua","Valle de Los Rios, batch 990","Duarte Province","Los Llanos","Maranon Canyon, Fortunato No. 4","Papua New Guinea","Absolu","Peru","Alto Beni, 2017 h.","Africa meets Latina","Sur del Lago, Merida","Belize","O'ahu Island, Maunawili, Agri Research C., 2014","Cuba","Papua New Guinea","Conacado","Ecuador","Maya Mtn.","Zorzal","Camino Verde P., Balao, Guayas, batch 1","Arriba","Johe","Bolivar","Honduras"],"top_values":[["Madagascar",55],["Ecuador",43],["Peru",41],["Dominican Republic",38],["Chuao",28],["Venezuela",21],["Kokoa Kamili",20],["Papua New Guinea",17],["Ghana",17],["Sambirano",17],["Ocumare",16],["Belize",16],["Oko Caribe",15],["Ucayali",15],["Tanzania",15],["Porcelana",13],["Vietnam",13],["Alto Beni",13],["Maya Mountain",13],["Brazil",12]],"top_words":[["batch",356],["madagascar",63],["dominican",62],["la",59],["san",54],["ecuador",52],["1",45],["peru",45],["sambirano",44],["kokoa",43],["republic",42],["maya",42],["w/",41],["los",38],["island,",37],["chuao",33],["harvest",33],["camino",31],["lot",30],["p.,",30],["alto",29],["h.",29],["p.",28],["2017",28],["venezuela",27]],"vocab_skipped":null,"word_histogram":{"counts":[854,0,0,547,0,0,0,415,0,0,0,299,0,0,0,204,0,0,126,0,0,0,63,0,0,0,21,0,0,1],"edges":[1.0,1.2666666666666666,1.5333333333333332,1.8,2.0666666666666664,2.333333333333333,2.6,2.8666666666666667,3.1333333333333333,3.4,3.6666666666666665,3.933333333333333,4.2,4.466666666666667,4.733333333333333,5.0,5.266666666666667,5.533333333333333,5.8,6.066666666666666,6.333333333333333,6.6,6.866666666666666,7.133333333333333,7.4,7.666666666666667,7.933333333333334,8.2,8.466666666666667,8.733333333333334,9.0]}},"kind":"text","n":2530,"n_null":0,"n_unique":1605,"null_rate":0.0,"stats":{"allcaps_rate":0.0015810276679841897,"boilerplate_rate":0.0,"duplicate_rate":0.36561264822134387,"emoji_rate":0.0,"len_max":51,"len_mean":17.115415019762846,"len_median":14.0,"len_min":3,"len_p95":39.0,"n_duplicates":925,"n_empty":0,"one_word_rate":0.3375494071146245,"readability_flesch_mean":28.411783928571452,"url_rate":0.0,"vocab_size":2079,"word_mean":2.6810276679841896,"word_median":2.0}},{"alerts":[{"code":"outliers","level":"warn","message":"9.3% rows beyond 1.5 IQR"}],"column":"cocoa_percent","extras":{"histogram":{"counts":[1,0,1,0,0,1,0,1,16,2,1,8,47,23,14,124,28,106,13,1046,340,72,377,35,63,2,95,18,9,40,1,9,2,12,0,0,0,0,0,23],"edges":[42.0,43.45,44.9,46.35,47.8,49.25,50.7,52.15,53.6,55.05,56.5,57.95,59.4,60.849999999999994,62.3,63.75,65.2,66.65,68.1,69.55,71.0,72.45,73.9,75.35,76.8,78.25,79.69999999999999,81.15,82.6,84.05,85.5,86.94999999999999,88.4,89.85,91.3,92.75,94.19999999999999,95.65,97.1,98.55,100.0]},"sample":[70.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,75.0,75.0,65.0,60.0,60.0,70.0,70.0,70.0,73.0,75.0,68.0,75.0,85.0,70.0,70.0,70.0,70.0,72.0,72.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,72.0,82.0,70.0,80.0,80.0,70.0,70.0,70.0,72.0,74.0,88.0,70.0,70.0,65.0,65.0,70.0,70.0,70.0,70.0,82.0,75.0,75.0,75.0,75.0,75.0,75.0,75.0,70.0,75.0,62.0,75.0,70.0,70.0,80.0,70.0,68.0,72.0,70.0,72.0,70.0,70.0,75.0,72.0,66.0,70.0,70.0,74.0,70.0,72.0,70.0,77.0,71.0,72.0,70.0,91.0,71.0,80.0,55.0,70.0,65.0,65.0,72.0,72.0,70.0,70.0,83.0,70.0,80.0,74.0,69.0,71.0,71.0,70.0,70.0,72.0,72.0,100.0,73.5,70.0,77.0,78.0,68.0,75.0,72.0,70.0,72.0,70.0,75.0,73.0,70.0,65.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,82.0,70.0,70.0,75.0,70.0,60.0,64.0,72.0,77.0,80.0,70.0,70.0,72.0,72.0,75.0,70.0,75.0,72.0,72.0,70.0,70.0,70.0,70.0,70.0,70.0,71.0,65.0,82.0,70.0,70.0,70.0,70.0,78.0,74.0,80.0,76.0,68.0,58.0,85.0,80.0,70.0,73.0,75.0,72.0,70.0,72.0,70.0,70.0,70.0,81.0,70.0,70.0,74.0,69.0,70.0,70.0,70.0,72.0,72.0,68.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,60.0,66.0,68.0,70.0,70.0,75.0,75.0,77.0,75.0,70.0,82.0,76.0,65.0,65.0,65.0,72.0,72.0,70.0,80.0,75.0,55.0,75.0,70.0,72.0,74.0,72.0,72.0,85.0,70.0,70.0,66.0,70.0,70.0,70.0,72.0,74.0,70.0,72.0,72.0,72.0,71.0,72.0,65.0,78.0,68.0,70.0,75.0,64.0,70.0,72.0,72.5,72.5,68.0,70.0,70.0,70.0,74.0,60.0,60.0,66.0,70.0,75.0,70.0,72.0,70.0,70.0,65.0,70.0,78.0,81.0,46.0,67.0,70.0,67.0,75.0,70.0,70.0,70.0,70.0,55.0,74.0,70.0,70.0,72.0,70.0,78.0,72.0,75.0,68.0,65.0,75.0,76.0,70.0,70.0,70.0,72.0,70.0,70.0,85.0,70.0,80.0,70.0,72.0,70.0,60.0,70.0,66.0,67.0,70.0,75.0,70.0,75.0,76.0,75.0,74.0,65.0,67.0,72.0,70.0,63.0,80.0,68.0,78.0,70.0,72.0,70.0,67.0,72.0,70.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,75.0,66.0,74.0,70.0,76.0,76.0,80.0,75.0,74.0,72.0,66.0,60.0,65.0,70.0,70.0,72.0,70.0,75.0,70.0,70.0,70.0,76.0,70.0,75.0,67.0,75.0,72.0,75.0,70.0,80.0,73.0,70.0,70.0,70.0,75.0,75.0,75.0,75.0,75.0,85.0,65.0,60.0,55.0,70.0,67.0,75.0,80.0,75.0,75.0,70.0,75.0,71.0,72.0,72.0,77.0,72.0,70.0,70.0,65.0,70.0,75.0,67.0,73.0,73.0,70.0,70.0,72.0,70.0,85.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,80.0,70.0,75.0,70.0,70.0,62.0,70.0,70.0,70.0,84.0,70.0,65.0,61.0,75.0,70.0,70.0,70.0,70.0,70.0,70.0,62.0,70.0,70.0,75.0,75.0,75.0,70.0,75.0,68.0,70.0,70.0,70.0,70.0,73.0,70.0,65.0,72.0,75.0,70.0,70.0,70.0,73.0,70.0,70.0,60.0,75.0,75.0,70.0,74.0,64.0,70.0,85.0,64.0,70.0,80.0,70.0,75.0,75.0,72.0,74.0,72.0,89.0,68.0,65.0,70.0,70.0,70.0,70.0,68.0,75.0,65.0,70.0,75.0,65.0,75.0,75.0,72.0]},"kind":"numeric","n":2530,"n_null":0,"n_unique":46,"null_rate":0.0,"stats":{"iqr":4.0,"kurtosis":6.541375731243388,"max":100.0,"mean":71.6397233201581,"median":70.0,"min":42.0,"n_outliers":235,"outlier_rate":0.09288537549407115,"q1":70.0,"q3":74.0,"skew":1.1982965619063584,"std":5.61672422913589,"zero_rate":0.0}},{"alerts":[],"column":"ingredients","extras":{"singletons":3,"top_values":[["3- B,S,C",999],["2- B,S",718],["4- B,S,C,L",286],["5- B,S,C,V,L",184],["4- B,S,C,V",141],["",87],["2- B,S*",31],["4- B,S*,C,Sa",20],["3- B,S*,C",12],["3- B,S,L",8],["4- B,S*,C,V",7],["5-B,S,C,V,Sa",6],["1- B",6],["4- B,S,V,L",5],["4- B,S,C,Sa",5],["6-B,S,C,V,L,Sa",4],["3- B,S,V",3],["4- B,S*,V,L",3],["4- B,S*,C,L",2],["3- B,S*,Sa",1]]},"kind":"categorical","n":2530,"n_null":0,"n_unique":22,"null_rate":0.0,"stats":{"cardinality":22,"entropy":2.4304729003269667,"entropy_ratio":0.5450185378265011,"top_rate":0.3948616600790514,"top_value":"3- B,S,C"}},{"alerts":[{"code":"near_unique","level":"info","message":"98.3% of rows are unique strings"}],"column":"most_memorable_characteristics","extras":{"language_counts":{},"language_sample_size":2530,"length_histogram":{"counts":[1,0,4,4,3,1,0,2,3,9,39,47,52,0,29,34,69,100,145,0,206,206,156,173,179,192,0,201,179,178,121,86,66,0,23,13,3,4,1,1],"edges":[3.0,3.85,4.7,5.55,6.4,7.25,8.1,8.95,9.8,10.649999999999999,11.5,12.35,13.2,14.049999999999999,14.9,15.75,16.6,17.45,18.299999999999997,19.15,20.0,20.849999999999998,21.7,22.55,23.4,24.25,25.099999999999998,25.95,26.8,27.65,28.5,29.349999999999998,30.2,31.05,31.9,32.75,33.599999999999994,34.45,35.3,36.15,37.0]},"near_unique":true,"sample":["chewy, off, rubbery","dark berry, mild floral","intense, tannic, choco, earthy","basic cocoa, gateway","dried fruit, orange peel, cocoa","flat, molasses, creamy","XL nibs, sour, cardboard","blackberry, dirt, high roast","sweet, vanilla, cocoa, mold","sandy, woody, spicy","sticky, butterscotch, nutty","citrus,prononced orange","intense, sweet, brownie","sliglty dry, papaya","sticky, rich, cherry","sandy, intense, sweet, rich","cinamon and nutmeg","dry, molasses, sour","creamy, masculine, earthy","baked bread","subtle, caramel, sour milk","chalky, mild fruit, burnt","dried fruit, sour, nutella","intense, rum, tea","sticky, tobacco","green","multiple off flavors, metallic","complex, strawberry, floral","complex,black pepper,coffee","rich cocoa, spicy, cinamon","orange, floral, caramel","gritty, vanilla, non-descript","sweet, dairy, spice, cocoa","banana, yogurt, cocoa","honey, mild floral, mild off","fatty, nutty, earthy","intense, nutty, mild rubber","nutty, sour milk","spicy, intense, fades fast","fatty, roasty, coffee, off note","chewy, smoke, fruit, cocoa","baked, roasty, coffee","cinamon, nutmeg, coffee","honey, caramel","roasty, berry notes","intense, bourbon","fatty, rich, spice, nutty","strong cocoa, orange citrus","cloying, fatty, bitter","overly roasty, spicy"],"top_values":[],"top_words":[["sweet,",227],["cocoa",224],["mild",213],["cocoa,",182],["nutty,",175],["creamy,",175],["fruit,",162],["sandy,",160],["sour",142],["fatty,",127],["roasty,",124],["intense,",115],["earthy,",114],["floral,",111],["spicy,",107],["fruit",105],["off",103],["rich",101],["nutty",101],["roasty",100],["earthy",82],["sweet",81],["dried",78],["sticky,",75],["vanilla,",69]],"vocab_skipped":null,"word_histogram":{"counts":[18,0,0,0,0,0,268,0,0,0,0,0,1174,0,0,0,0,0,892,0,0,0,0,0,171,0,0,0,0,7],"edges":[1.0,1.1666666666666667,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333,5.0,5.166666666666666,5.333333333333333,5.5,5.666666666666666,5.833333333333333,6.0]}},"kind":"text","n":2530,"n_null":0,"n_unique":2487,"null_rate":0.0,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.01699604743083004,"emoji_rate":0.0,"len_max":37,"len_mean":23.062450592885376,"len_median":23.0,"len_min":3,"len_p95":30.0,"n_duplicates":43,"n_empty":0,"one_word_rate":0.0071146245059288534,"readability_flesch_mean":49.70667500000001,"url_rate":0.0,"vocab_size":868,"word_mean":3.375889328063241,"word_median":3.0}},{"alerts":[],"column":"rating","extras":{"histogram":{"counts":[4,0,0,0,0,0,10,0,0,0,3,0,0,33,0,0,17,0,0,0,166,0,0,333,0,0,523,0,0,0,464,0,0,565,0,0,300,0,0,112],"edges":[1.0,1.075,1.15,1.225,1.3,1.375,1.45,1.525,1.6,1.6749999999999998,1.75,1.825,1.9,1.975,2.05,2.125,2.2,2.275,2.3499999999999996,2.425,2.5,2.575,2.65,2.7249999999999996,2.8,2.875,2.95,3.025,3.1,3.175,3.25,3.3249999999999997,3.4,3.475,3.55,3.625,3.6999999999999997,3.775,3.85,3.925,4.0]},"sample":[3.0,3.25,3.5,3.75,3.75,2.75,3.0,2.75,3.75,2.5,2.75,2.5,2.5,2.75,3.0,4.0,3.5,3.25,3.5,3.5,2.75,3.75,3.25,3.75,3.0,2.75,3.25,4.0,3.25,3.0,3.75,2.5,2.5,3.0,3.0,2.5,2.75,3.5,3.5,2.75,2.75,2.75,3.75,3.0,3.5,3.0,3.0,3.0,3.5,3.5,2.75,3.0,3.75,2.75,3.5,3.75,3.75,4.0,2.5,3.0,4.0,3.75,2.75,3.5,3.0,3.5,4.0,3.75,2.75,3.0,2.75,3.0,3.0,3.5,3.75,2.5,2.5,2.0,3.0,3.0,3.0,3.75,3.5,3.0,4.0,3.5,3.75,2.75,4.0,1.5,3.5,3.0,3.5,3.0,3.25,3.5,3.0,3.25,2.75,4.0,3.25,2.5,2.75,2.75,3.75,2.5,3.5,3.0,3.5,3.0,3.75,1.0,3.75,3.5,3.25,2.75,3.25,3.5,3.0,3.75,3.75,1.0,3.5,3.75,3.5,2.5,3.0,3.5,3.5,3.25,2.75,2.75,4.0,3.25,3.0,3.25,3.25,3.0,3.25,2.5,3.0,3.0,3.0,3.5,2.75,3.25,3.75,4.0,4.0,2.5,2.75,3.0,3.25,3.5,3.75,3.5,3.5,3.0,3.75,2.75,3.25,3.5,2.75,4.0,3.25,3.0,3.25,3.5,3.25,3.5,2.75,4.0,3.5,2.5,3.0,3.25,3.25,3.5,2.75,3.0,3.5,3.0,3.25,3.5,2.0,2.5,3.25,4.0,4.0,3.5,3.5,3.5,3.25,3.5,3.25,3.75,3.25,3.75,3.5,3.25,3.5,3.5,3.0,3.25,3.75,3.5,3.5,3.5,3.5,3.25,3.5,3.0,3.25,3.0,2.5,3.5,3.0,2.75,3.25,3.75,2.75,3.25,2.75,3.0,3.25,3.25,3.5,3.5,4.0,3.5,2.5,2.75,3.5,3.0,3.5,3.75,3.75,4.0,4.0,4.0,4.0,3.75,2.0,3.5,3.5,3.75,3.75,3.75,3.25,2.5,3.75,2.75,2.5,3.0,3.0,3.0,3.25,3.5,3.5,3.0,3.75,3.5,2.75,3.0,3.5,3.0,3.25,3.25,3.25,3.75,3.5,3.25,2.75,3.5,1.5,3.5,3.5,2.5,2.75,3.75,2.0,3.5,2.75,3.5,2.75,3.0,3.5,3.5,3.5,3.5,3.5,3.0,3.0,3.0,3.5,3.75,3.25,3.0,3.25,3.0,2.5,3.25,2.5,3.5,3.25,3.5,2.0,3.0,3.5,3.5,3.5,2.0,2.5,3.25,3.0,2.75,3.0,3.5,2.75,2.5,2.75,3.5,3.5,2.75,3.75,3.25,2.25,2.5,3.75,3.0,3.0,2.75,3.0,3.5,3.75,3.0,3.5,3.75,4.0,2.75,3.5,3.0,2.0,2.75,2.75,3.5,2.75,3.0,3.75,3.0,3.0,3.75,4.0,3.25,3.5,2.75,3.25,2.75,3.5,3.0,3.5,2.75,3.5,3.0,3.0,3.25,3.0,2.5,3.25,3.0,2.75,3.75,4.0,3.0,3.0,3.25,3.25,2.75,3.75,3.0,2.75,3.75,3.5,3.25,3.5,3.5,3.25,3.0,3.75,2.5,2.5,2.75,2.75,3.5,3.5,3.5,3.0,2.75,3.5,3.75,3.5,3.5,3.75,2.75,3.0,3.5,2.75,3.25,3.0,3.25,3.25,3.0,3.25,3.0,3.0,3.75,3.5,3.25,4.0,4.0,3.25,3.75,3.25,3.5,3.25,3.75,4.0,4.0,3.25,3.5,3.5,2.75,2.25,3.5,2.5,2.5,2.75,3.25,3.25,3.5,3.75,3.25,2.75,3.25,2.75,3.25,3.25,4.0,3.0,3.0,2.75,3.0,2.5,2.0,2.75,2.75,3.75,2.75,3.25,3.75,3.25,2.75,3.25,3.75,3.0,2.75,3.0,3.0,2.75,3.0,3.0,3.5,2.5,3.75,1.5,3.75,3.5,2.75,3.0,3.0,3.0,3.0,3.5,3.75,2.75,3.25,2.75,3.25,2.75,3.5,3.25,3.0,3.25,3.0,3.75,3.0,3.5,3.75,3.0,3.5]},"kind":"numeric","n":2530,"n_null":0,"n_unique":12,"null_rate":0.0,"stats":{"iqr":0.5,"kurtosis":1.0534163943816193,"max":4.0,"mean":3.1963438735177867,"median":3.25,"min":1.0,"n_outliers":50,"outlier_rate":0.019762845849802372,"q1":3.0,"q3":3.5,"skew":-0.6084413776009047,"std":0.4453213042172302,"zero_rate":0.0}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","rating","cocoa_percent","company_location","country_of_bean_origin","ingredients","company","review_date"],"featured_charts":[{"caption":"Check how tightly ratings cluster around 3.25 and whether the left tail of low scores is meaningful.","column":"rating","kind":"histogram"},{"caption":"Look for the 70% spike and the 9% of bars flagged as outliers above or below the typical range.","column":"cocoa_percent","kind":"histogram"},{"caption":"Note how heavily U.S.A. dominates at ~45% of all reviews, dwarfing Canada and France.","column":"company_location","kind":"bar"},{"caption":"See the more balanced spread across Venezuela, Peru, Dominican Republic, and Ecuador as top bean sources.","column":"country_of_bean_origin","kind":"bar"},{"caption":"Observe that two recipes (3-ingredient B,S,C and 2-ingredient B,S) account for the majority of bars.","column":"ingredients","kind":"donut"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 2,530 chocolate bar reviews with 10 columns covering bean origins, cocoa percentages, ingredients, ratings, and review metadata. Ratings cluster tightly (median 3.25, IQR 0.5) on a 1\u20134 scale, while cocoa percent is similarly concentrated around 70% but carries 235 outliers worth investigating. Geographic skew is notable: U.S.A. dominates company locations at 44.9% of records, whereas bean origins are more diverse, led by Venezuela, Peru, and the Dominican Republic. Heads up that the `company` column is entirely empty (single blank value across all rows), so it should be excluded from analysis.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"`ref` is a high-cardinality categorical with 630 distinct values across 2530 rows and no nulls, with entropy ratio 0.9954 indicating a near-uniform distribution. Values are short numeric strings (e.g. \"414\", \"24\", \"1462\") and the most frequent appears only 10 times (top_rate 0.0040), so this behaves like a reference/lookup id repeated a handful of times rather than a free-form feature.","role":"foreign_key","scope":"column","target":"ref","treatment":"treat as a foreign key and left-join to its reference table rather than one-hot encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"The column is labelled 'company' but contains a single value\u2014an empty string\u2014across all 2530 rows. Cardinality is 1, entropy is 0, and top_rate is 1.0, so it carries no information. This is effectively a placeholder field that was never populated.","role":"metadata","scope":"column","target":"company","treatment":"Drop; constant empty value provides no signal."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Country of the chocolate maker, with 67 distinct locations and no nulls across 2530 rows. Heavily US-centric: 'U.S.A.' accounts for 44.9% (1136 rows), followed by Canada (177), France (176), and the U.K. (133), giving an entropy ratio of 0.61. Country names use abbreviated forms ('U.S.A.', 'U.K.') so any joins on canonical country lists will need normalisation.","role":"feature","scope":"column","target":"company_location","treatment":"Normalise country labels and group long-tail countries before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.std","stats.skew","stats.q1","stats.q3","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"This column stores the year a review was recorded, ranging from 2006 to 2021 with only 16 unique values across 2530 rows. The distribution is centered around 2015 (mean 2014.37, median 2015) with a modest spread (std 3.97) and is roughly symmetric (skew -0.18). No nulls or outliers are present.","role":"timestamp","scope":"column","target":"review_date","treatment":"Treat as a year-level temporal feature; bin or convert to datetime for trend analysis."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical country label identifying where the cocoa beans originated, with 62 distinct values across 2530 complete rows and no nulls. The distribution is broad rather than concentrated: the top value Venezuela accounts for only 10% of rows, and entropy ratio 0.79 confirms fairly even spread across many origins. Notable wrinkle: 'Blend' appears as the 6th most common value (156 rows), meaning some entries aren't a single country and will need special handling.","role":"feature","scope":"column","target":"country_of_bean_origin","treatment":"Group rare origins into 'Other', isolate 'Blend' as its own category, then one-hot or target-encode."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","top_values","top_words","stats.one_word_rate","stats.duplicate_rate","stats.word_mean"],"model":"anthropic:claude-opus-4-7","narrative":"This column captures the specific bean origin (region, estate, or country) for what appears to be a chocolate/cocoa dataset, with 1,605 unique values across 2,530 rows. Top values are dominated by countries like Madagascar (55), Ecuador (43), and Peru (41), but the high frequency of the word 'batch' (356 occurrences) suggests many entries mix origin names with batch identifiers, inflating uniqueness. Roughly 34% of values are single words and 37% are duplicates, indicating inconsistent granularity \u2014 some entries are broad countries, others are specific estates or batch-tagged labels.","role":"feature","scope":"column","target":"specific_bean_origin","treatment":"Normalize by stripping batch suffixes and standardizing to country/region before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.iqr","stats.std","stats.skew","stats.kurtosis","stats.outlier_rate","stats.n_outliers"],"model":"anthropic:claude-opus-4-7","narrative":"This is the cocoa percentage of each chocolate bar, ranging from 42 to 100 with a tight median of 70 and IQR of just 4. The distribution is right-skewed (skew 1.20, kurtosis 6.54) and 9.3% of rows flag as outliers \u2014 likely the high-cocoa tail pushing toward 100%. With only 46 unique values across 2530 rows, the field is effectively semi-discrete.","role":"feature","scope":"column","target":"cocoa_percent","treatment":"Use as-is or bin into cocoa-strength buckets; no transform needed given the narrow IQR."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be a coded recipe/composition field where each value lists a count followed by single-letter ingredient tokens (e.g. 'B,S,C' for what looks like beef/sauce/cheese-style components). With only 22 distinct combinations across 2530 rows and a top value ('3- B,S,C') covering 39.5% of records, the field is highly concentrated \u2014 entropy_ratio is just 0.545. Notably, 87 rows carry an empty string rather than null, so null_rate=0.0 understates true missingness.","role":"feature","scope":"column","target":"ingredients","treatment":"Treat empty strings as missing, then one-hot encode the ingredient tokens (split on comma) rather than the raw combined string."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.word_mean","stats.vocab_size","stats.duplicate_rate","stats.n_duplicates","top_words","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Short free-text tasting notes (mean 23 characters, ~3 words) describing flavor and texture characteristics, almost certainly from a chocolate or cocoa review dataset given top tokens like 'cocoa', 'sweet', 'nutty', 'creamy', and 'fruit'. Values are near-unique (2487 distinct of 2530) yet built from a small vocabulary of 868 words, indicating these are comma-separated descriptor combinations rather than prose. Only 43 exact duplicates and no empties or URLs; readability mean of 49.7 is not very meaningful at this length.","role":"free_text","scope":"column","target":"most_memorable_characteristics","treatment":"Split on commas into descriptor tags and one-hot or embed for modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.q1","stats.q3","stats.iqr","stats.std","stats.skew","stats.kurtosis","stats.n_outliers","stats.outlier_rate","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"A bounded numeric rating on a 1.0\u20134.0 scale with only 12 distinct values, suggesting half- or quarter-step increments rather than continuous scores. The distribution is tight (IQR 0.5, std 0.45) and slightly left-skewed (-0.61), centered near 3.25 with a mean of 3.20, and 50 low-end outliers (1.98%) pull the tail. No nulls or zeros, so every row carries a usable score.","role":"feature","scope":"column","target":"rating","treatment":"Use as-is as an ordinal/numeric feature; consider treating the 50 low-end outliers separately if modelling tails."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":3488,"prompt_tokens":12218,"total_tokens":15706}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:05:04+00:00","mode":"full","row_count":2530,"sampled_rows":2530,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/chocolate_origins.json"},"notes":[],"saturn_version":"0.2.0","schema":{"cocoa_percent":"numeric","company":"categorical","company_location":"categorical","country_of_bean_origin":"categorical","ingredients":"categorical","most_memorable_characteristics":"text","rating":"numeric","ref":"categorical","review_date":"numeric","specific_bean_origin":"text"}}
