{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"99.2% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"}],"column":"id","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[3503,15645,38487,31119,25119,10857,6439,4867,2424,1975,982,721,386,268,198,101,69,25,35,13,10,10,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,5.425,9.85,14.274999999999999,18.7,23.125,27.549999999999997,31.974999999999998,36.4,40.824999999999996,45.25,49.675,54.099999999999994,58.525,62.949999999999996,67.375,71.8,76.225,80.64999999999999,85.075,89.5,93.925,98.35,102.77499999999999,107.19999999999999,111.625,116.05,120.475,124.89999999999999,129.325,133.75,138.17499999999998,142.6,147.025,151.45,155.875,160.29999999999998,164.725,169.15,173.575,178.0]},"near_unique":true,"sample":["my_sassy_hubby","kattappanayile_rithwik_roshan","pinky","bethune-the-making-of-a-hero","mr_jones_2013","los_chicos_crenen_1942","mad_wednesday_1946","in_the_dark_room","clawed","starlight_hotel","brother_bear","lloyds_of_london","1205971-camille","dulhan_banoo_mein_teri_2000","nude_for_satan","endless_love_2014","the-saint-meets-the-tiger","a_holiday_homecoming","lifeline_clyfford_still","indru_netru_naalai_2015","with_friends_like_these_1998","entrusted","puzzles","al_boyout_asrar","the_lower_rooms","cinema-cinema","ugly_ones","divorce_iranian_style","meet_the_parents_1992","1057133-crusades","liebelei_1936","hitched_for_the_holidays","paar-the-crossing","further_beyond","the_theory_of_everything_2006","el_evangelista","le_bossu_2011","the_maggie","creature_of_the_walking_dead","dawns_here_are_quiet","rosa_and_the_executioner_of_the_fiend","madam_1993","snatch","the-wiz-kid-of-osbourne-street","faro_goddess_of_the_waters_2007","blood-of-the-dragon","hoboken-hollow","innocent_man","the_deflowering_of_ariella_von","im_losing_you"],"top_values":[],"top_words":[["westfront_1918_1930",2],["anandi_gopal",2],["a_good_dream_2018",2],["perfect_proposal",2],["still_dreaming",2],["hear_no_evil",2],["catch_me_if_you_can",2],["world_trade_center_anatomy_of_the_collapse_2002",2],["like_a_turtle_on_its_back_1978",2],["a_deadly_secret",2],["nothing-but-ghosts-nichts-als-gespenster",2],["the_face_of_love",2],["the_project_of_the_century",2],["baseketball",2],["loot_2012",2],["screen_tests",2],["dragon_in_jail",2],["shot_in_bombay",2],["which_way_to_the_front",2],["hole_in_the_sky",2],["soggadi_pellam",2],["patita",2],["love_live_sunshine_the_school_idol_movie_over_the_rainbow",2],["eye-2",2],["the-secret-six",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,143258,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":143258,"n_null":0,"n_unique":142052,"null_rate":0.0,"stats":{"allcaps_rate":0.005786762344860322,"boilerplate_rate":0.0,"duplicate_rate":0.008418378031244329,"emoji_rate":0.0,"len_max":178,"len_mean":18.15210319842522,"len_median":16.0,"len_min":1,"len_p95":37.0,"n_duplicates":1206,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":-75.47499999999995,"url_rate":0.0,"vocab_size":19976,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"}],"column":"title","extras":{"language_counts":{"__engine":"fasttext:4,454","af":3,"ca":6,"ceb":12,"cs":2,"da":2,"de":80,"en":3946,"eo":6,"es":123,"et":3,"fi":10,"fr":72,"hr":3,"hu":3,"id":8,"it":55,"ja":3,"la":2,"lv":1,"ms":4,"nl":23,"no":5,"pl":8,"pt":20,"ru":5,"sl":5,"sr":2,"sv":16,"tl":2,"tr":12},"language_sample_size":5000,"length_histogram":{"counts":[6174,21384,38720,28652,18097,12387,5661,3782,3096,1631,1338,635,436,346,159,137,95,37,49,17,17,17,15,4,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1],"edges":[1.0,5.375,9.75,14.125,18.5,22.875,27.25,31.625,36.0,40.375,44.75,49.125,53.5,57.875,62.25,66.625,71.0,75.375,79.75,84.125,88.5,92.875,97.25,101.625,106.0,110.375,114.75,119.125,123.5,127.875,132.25,136.625,141.0,145.375,149.75,154.125,158.5,162.875,167.25,171.625,176.0]},"near_unique":false,"sample":["My Sassy Hubby","Marie for Memory","The Locals","All the Winters That Have Been","The Luck of Ginger Coffey","Bleeding Through","School Killer","Territories","Miral","Murt Ramirez Wants to Kick My Ass","Goshogaoka","Trinity and Beyond","The Peace Tree","Old: Trailer","Muhammad: The Last Prophet","Waist Deep","Brideshead Revisited","The Last Tour","Pen\u00e9lope","The Passion of the Mao","A Field in England","Deuces Wild","Robots of Brixton","Infernal Affairs III","The Answer","Family Holidays","Aliens, Clowns & Geeks","Cruel Summer","Red Pier","Heval","As Tears Go By","Hailey Dean Mystery: 2+2=4 Murders","Paint Cans","Zombie Town","The Grey Zone","The Sweetest Sound","Museum","Awaken Soul to Soul","Incredible Violence","Robbery Under Arms","My Big Bossing","The Other City","Magic Hour","Happy New Year, Colin Burstead","Voice of the Whistler","White Mama","Goth Kill","Drive - My Life In Skateboarding: Mike Vallely","12 rounds","Star Trek II: The Wrath of Khan"],"top_values":[["The Return",17],["The Stranger",17],["First Love",16],["Hero",16],["Blue",15],["Mother",15],["Alone",15],["A Christmas Carol",15],["Paradise",15],["Home",14],["The Crossing",14],["Hamlet",14],["Lucky",14],["Rage",13],["The Outsider",13],["Alice in Wonderland",12],["Limbo",12],["Love",12],["Treasure Island",12],["Underground",12]],"top_words":[["the",5643],["of",2129],["a",1031],["in",694],["and",665],["to",510],["love",326],["my",283],["for",274],["on",265],["&",227],["i",214],["story",193],["life",189],["man",187],["from",185],["me",172],["night",169],["is",159],["last",159],["you",159],["christmas",159],["with",156],["la",153],["-",152]],"vocab_skipped":null,"word_histogram":{"counts":[24499,40128,32700,20898,11686,5882,3062,0,1753,993,551,308,204,90,0,64,22,23,11,7,5,2,0,1,1,0,0,0,0,1],"edges":[1.0,1.8666666666666667,2.7333333333333334,3.6,4.466666666666667,5.333333333333334,6.2,7.066666666666666,7.933333333333334,8.8,9.666666666666668,10.533333333333333,11.4,12.266666666666667,13.133333333333333,14.0,14.866666666666667,15.733333333333334,16.6,17.46666666666667,18.333333333333336,19.2,20.066666666666666,20.933333333333334,21.8,22.666666666666668,23.533333333333335,24.400000000000002,25.266666666666666,26.133333333333333,27.0]}},"kind":"text","n":143258,"n_null":367,"n_unique":126403,"null_rate":0.0025618115567716984,"stats":{"allcaps_rate":0.003989054594061208,"boilerplate_rate":9.097843811016789e-05,"duplicate_rate":0.11538865288926524,"emoji_rate":0.0,"len_max":176,"len_mean":17.23315674185218,"len_median":15.0,"len_min":1,"len_p95":37.0,"n_duplicates":16488,"n_empty":0,"one_word_rate":0.17145236578930795,"readability_flesch_mean":60.61848722527475,"url_rate":0.0,"vocab_size":17597,"word_mean":3.073615553113912,"word_median":3.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"48.9% null"}],"column":"audienceScore","extras":{"histogram":{"counts":[1158,84,353,437,1114,814,1413,812,2208,884,1918,1406,1799,2064,1875,1756,3056,2022,2212,1176,3593,1526,3065,1583,3772,1583,3187,1692,3102,1777,2972,1877,3343,2034,2618,1791,1830,879,668,1795],"edges":[0.0,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,22.5,25.0,27.5,30.0,32.5,35.0,37.5,40.0,42.5,45.0,47.5,50.0,52.5,55.0,57.5,60.0,62.5,65.0,67.5,70.0,72.5,75.0,77.5,80.0,82.5,85.0,87.5,90.0,92.5,95.0,97.5,100.0]},"sample":[71.0,25.0,0.0,0.0,41.0,53.0,100.0,38.0,29.0,71.0,83.0,78.0,88.0,80.0,93.0,71.0,42.0,69.0,49.0,78.0,82.0,56.0,38.0,19.0,55.0,21.0,67.0,77.0,72.0,20.0,80.0,69.0,49.0,61.0,76.0,32.0,100.0,38.0,40.0,72.0,38.0,46.0,93.0,49.0,75.0,82.0,54.0,10.0,50.0,80.0,39.0,74.0,25.0,87.0,14.0,38.0,65.0,46.0,49.0,45.0,41.0,58.0,40.0,83.0,13.0,8.0,44.0,70.0,37.0,0.0,45.0,32.0,60.0,43.0,0.0,45.0,80.0,66.0,38.0,66.0,17.0,62.0,79.0,78.0,52.0,44.0,9.0,50.0,96.0,47.0,63.0,78.0,67.0,18.0,58.0,90.0,25.0,47.0,77.0,64.0,60.0,84.0,65.0,48.0,90.0,93.0,22.0,35.0,69.0,32.0,89.0,45.0,64.0,33.0,91.0,60.0,59.0,83.0,46.0,30.0,42.0,87.0,87.0,25.0,31.0,73.0,53.0,87.0,40.0,61.0,81.0,100.0,33.0,20.0,22.0,82.0,61.0,59.0,32.0,88.0,86.0,60.0,52.0,43.0,85.0,71.0,84.0,40.0,80.0,19.0,38.0,60.0,23.0,43.0,69.0,62.0,50.0,77.0,37.0,46.0,50.0,77.0,40.0,64.0,76.0,60.0,27.0,70.0,92.0,81.0,49.0,64.0,40.0,81.0,40.0,82.0,90.0,58.0,38.0,12.0,52.0,57.0,37.0,77.0,40.0,83.0,38.0,65.0,38.0,40.0,67.0,23.0,24.0,92.0,58.0,56.0,0.0,48.0,38.0,89.0,29.0,40.0,29.0,83.0,68.0,45.0,76.0,23.0,58.0,50.0,80.0,27.0,80.0,20.0,100.0,38.0,62.0,53.0,10.0,73.0,39.0,78.0,19.0,54.0,52.0,84.0,100.0,87.0,80.0,95.0,9.0,53.0,57.0,0.0,33.0,87.0,100.0,61.0,85.0,48.0,11.0,46.0,21.0,36.0,60.0,17.0,45.0,0.0,12.0,42.0,6.0,60.0,56.0,12.0,56.0,49.0,51.0,83.0,52.0,44.0,46.0,33.0,76.0,12.0,69.0,17.0,100.0,55.0,63.0,62.0,80.0,83.0,100.0,71.0,30.0,61.0,57.0,35.0,90.0,39.0,54.0,26.0,60.0,33.0,43.0,43.0,12.0,24.0,52.0,71.0,69.0,100.0,45.0,82.0,74.0,26.0,100.0,76.0,47.0,67.0,20.0,35.0,83.0,100.0,36.0,20.0,92.0,44.0,14.0,33.0,76.0,95.0,90.0,33.0,83.0,23.0,100.0,57.0,54.0,50.0,29.0,69.0,76.0,60.0,56.0,30.0,77.0,60.0,70.0,75.0,48.0,100.0,10.0,82.0,51.0,66.0,43.0,33.0,88.0,86.0,100.0,47.0,69.0,20.0,67.0,92.0,80.0,14.0,57.0,70.0,93.0,68.0,23.0,26.0,33.0,0.0,33.0,85.0,40.0,10.0,57.0,64.0,80.0,44.0,14.0,80.0,30.0,87.0,73.0,38.0,22.0,84.0,20.0,89.0,71.0,31.0,79.0,87.0,11.0,46.0,57.0,38.0,26.0,81.0,12.0,31.0,23.0,50.0,83.0,73.0,55.0,83.0,82.0,57.0,20.0,0.0,67.0,40.0,30.0,79.0,53.0,57.0,39.0,82.0,60.0,36.0,43.0,96.0,60.0,41.0,44.0,33.0,50.0,32.0,67.0,69.0,99.0,60.0,75.0,14.0,84.0,40.0,58.0,61.0,67.0,31.0,38.0,33.0,79.0,50.0,66.0,38.0,19.0,73.0,87.0,50.0,80.0,100.0,79.0,10.0,46.0,47.0,6.0,0.0,76.0,51.0,29.0,84.0,76.0,48.0,92.0,71.0,91.0,62.0,35.0,53.0,84.0,84.0,37.0,63.0,92.0,18.0,37.0,40.0,62.0,80.0,29.0,61.0,48.0,75.0,92.0,79.0,42.0,56.0,71.0,75.0,76.0,72.0,75.0,42.0,59.0,77.0,70.0,76.0,85.0,69.0,69.0,73.0,56.0,82.0,84.0,53.0,62.0,60.0,40.0,39.0,38.0,69.0,73.0,75.0]},"kind":"numeric","n":143258,"n_null":70010,"n_unique":101,"null_rate":0.4886987114157674,"stats":{"iqr":39.0,"kurtosis":-0.8322282294065255,"max":100.0,"mean":55.67496723460026,"median":57.0,"min":0.0,"n_outliers":0,"outlier_rate":0.0,"q1":37.0,"q3":76.0,"skew":-0.22568186101438198,"std":24.553647682901225,"zero_rate":0.015604521625163827}},{"alerts":[{"code":"null_rate","level":"warn","message":"76.4% null"}],"column":"tomatoMeter","extras":{"histogram":{"counts":[723,77,201,224,375,458,529,262,859,203,557,433,431,610,436,436,964,651,521,228,1119,386,964,335,1163,769,1293,493,1215,614,1166,786,1951,1247,1623,1488,1782,1104,1133,4068],"edges":[0.0,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,22.5,25.0,27.5,30.0,32.5,35.0,37.5,40.0,42.5,45.0,47.5,50.0,52.5,55.0,57.5,60.0,62.5,65.0,67.5,70.0,72.5,75.0,77.5,80.0,82.5,85.0,87.5,90.0,92.5,95.0,97.5,100.0]},"sample":[92.0,86.0,88.0,75.0,43.0,64.0,100.0,93.0,83.0,71.0,100.0,57.0,21.0,32.0,83.0,76.0,70.0,42.0,55.0,48.0,0.0,100.0,100.0,90.0,17.0,86.0,89.0,33.0,94.0,100.0,78.0,28.0,47.0,85.0,77.0,38.0,91.0,96.0,90.0,38.0,100.0,68.0,61.0,43.0,88.0,14.0,83.0,90.0,35.0,0.0,2.0,72.0,71.0,26.0,22.0,86.0,24.0,56.0,73.0,80.0,92.0,44.0,93.0,67.0,17.0,69.0,100.0,54.0,55.0,93.0,80.0,79.0,36.0,83.0,17.0,100.0,58.0,80.0,89.0,71.0,91.0,85.0,42.0,20.0,100.0,63.0,62.0,0.0,93.0,24.0,10.0,10.0,33.0,91.0,81.0,100.0,96.0,100.0,71.0,82.0,43.0,36.0,20.0,87.0,50.0,78.0,31.0,21.0,25.0,0.0,80.0,87.0,100.0,91.0,72.0,88.0,41.0,81.0,51.0,61.0,72.0,4.0,88.0,32.0,50.0,67.0,91.0,57.0,60.0,83.0,14.0,55.0,73.0,93.0,50.0,63.0,64.0,100.0,80.0,94.0,27.0,44.0,89.0,87.0,90.0,80.0,79.0,67.0,96.0,100.0,83.0,13.0,81.0,28.0,94.0,60.0,49.0,94.0,50.0,100.0,67.0,98.0,55.0,0.0,57.0,55.0,100.0,30.0,45.0,68.0,33.0,100.0,80.0,75.0,80.0,83.0,72.0,86.0,100.0,83.0,100.0,64.0,100.0,88.0,28.0,88.0,88.0,94.0,0.0,88.0,80.0,58.0,50.0,10.0,99.0,100.0,81.0,14.0,78.0,69.0,39.0,75.0,40.0,56.0,68.0,70.0,71.0,100.0,93.0,40.0,33.0,82.0,80.0,55.0,73.0,33.0,80.0,97.0,69.0,100.0,28.0,77.0,97.0,33.0,76.0,64.0,57.0,95.0,88.0,100.0,92.0,36.0,95.0,100.0,81.0,75.0,100.0,70.0,80.0,50.0,43.0,88.0,50.0,94.0,60.0,80.0,88.0,78.0,87.0,100.0,46.0,27.0,20.0,0.0,48.0,63.0,80.0,63.0,75.0,100.0,29.0,17.0,41.0,89.0,60.0,59.0,100.0,34.0,100.0,26.0,94.0,84.0,37.0,73.0,98.0,42.0,68.0,100.0,63.0,83.0,75.0,85.0,83.0,21.0,67.0,13.0,81.0,100.0,0.0,89.0,55.0,82.0,71.0,63.0,63.0,34.0,7.0,73.0,88.0,52.0,90.0,93.0,83.0,91.0,8.0,75.0,27.0,100.0,62.0,86.0,63.0,79.0,65.0,61.0,100.0,78.0,83.0,43.0,98.0,95.0,78.0,90.0,85.0,100.0,67.0,25.0,94.0,100.0,46.0,100.0,17.0,87.0,92.0,72.0,80.0,43.0,82.0,63.0,20.0,88.0,74.0,87.0,98.0,50.0,88.0,60.0,47.0,78.0,22.0,100.0,100.0,8.0,78.0,88.0,83.0,87.0,71.0,41.0,83.0,10.0,33.0,60.0,100.0,90.0,80.0,90.0,61.0,20.0,9.0,75.0,25.0,83.0,60.0,100.0,14.0,86.0,92.0,26.0,67.0,83.0,76.0,93.0,50.0,90.0,100.0,87.0,29.0,8.0,80.0,16.0,60.0,33.0,63.0,90.0,91.0,28.0,88.0,100.0,44.0,20.0,40.0,100.0,42.0,23.0,87.0,71.0,100.0,0.0,72.0,25.0,100.0,83.0,50.0,80.0,11.0,83.0,89.0,57.0,85.0,25.0,83.0,100.0,100.0,97.0,78.0,45.0,30.0,74.0,53.0,100.0,88.0,37.0,89.0,85.0,50.0,43.0,100.0,86.0,26.0,65.0,13.0,70.0,50.0,71.0,33.0,73.0,71.0,94.0,81.0,88.0,44.0,89.0,71.0,96.0,94.0,84.0,67.0,57.0,80.0,69.0,33.0,63.0,32.0,60.0,90.0,77.0,100.0,42.0,87.0,63.0,71.0,0.0,64.0,97.0,93.0,37.0,83.0,51.0,86.0,70.0,93.0,100.0,100.0,88.0,95.0,38.0,100.0,90.0,93.0,69.0,67.0,69.0,67.0,60.0,78.0,83.0,63.0,21.0,98.0,10.0]},"kind":"numeric","n":143258,"n_null":109381,"n_unique":101,"null_rate":0.7635245501123846,"stats":{"iqr":44.0,"kurtosis":-0.6657010567816708,"max":100.0,"mean":65.77034566224873,"median":73.0,"min":0.0,"n_outliers":0,"outlier_rate":0.0,"q1":45.0,"q3":89.0,"skew":-0.6467461765236463,"std":28.02320282131846,"zero_rate":0.020840098001594}},{"alerts":[{"code":"null_rate","level":"warn","message":"90.2% null"}],"column":"rating","extras":{"singletons":3,"top_values":[["R",7734],["PG-13",3446],["PG",1911],["TVPG",424],["TV14",397],["TVMA",57],["NC-17",19],["TVG",1],["TVY7",1],["G",1]]},"kind":"categorical","n":143258,"n_null":129267,"n_unique":10,"null_rate":0.90233704225942,"stats":{"cardinality":10,"entropy":1.7098682558931866,"entropy_ratio":0.5147216336575051,"top_rate":0.5527839325280538,"top_value":"R"}},{"alerts":[{"code":"multilingual","level":"info","message":"5 languages detected in sample"},{"code":"null_rate","level":"warn","message":"90.2% null"},{"code":"duplicates","level":"warn","message":"40.3% duplicate strings"}],"column":"ratingContents","extras":{"language_counts":{"__engine":"fasttext:443","en":425,"it":12,"km":1,"ro":5},"language_sample_size":5000,"length_histogram":{"counts":[326,791,207,529,281,915,839,592,956,702,910,757,889,819,539,633,493,532,369,398,310,246,215,158,146,96,95,70,45,43,19,26,11,11,5,3,5,6,2,2],"edges":[5.0,8.575,12.15,15.725000000000001,19.3,22.875,26.450000000000003,30.025000000000002,33.6,37.175000000000004,40.75,44.325,47.900000000000006,51.475,55.050000000000004,58.625,62.2,65.775,69.35000000000001,72.925,76.5,80.075,83.65,87.22500000000001,90.80000000000001,94.375,97.95,101.525,105.10000000000001,108.67500000000001,112.25,115.825,119.4,122.97500000000001,126.55000000000001,130.125,133.70000000000002,137.275,140.85,144.425,148.0]},"near_unique":false,"sample":["['Rude and Suggestive Material']","['Thematic Elements and Language']","['Sexual References', 'Language Throughout', 'Some Violence']","['Strong Bloody Violence', 'Language']","['Some Violent Content', 'Language']","['Language and Brief Violence']","['Some Mild Action', 'Rude Material/Language']","['Some Drug Use', 'Language Throughout', 'Strong Sexual Content']","['Thematic Material', 'Some Violent Content', 'Brief Language']","['Depiction of Killing', 'Some Aberrant Sexual Content', 'Depiction of Torture']","['Terror', 'Violence']","['Violence', 'Suggestive Material', 'Language']","['Language Throughout', 'Drug Use', 'Some Nudity', 'Strong Crude Sexual Content']","['Brief Sexuality', 'Language']","['Nudity', 'Language', 'Some Graphic Sexuality', 'Strong Bloody Violence', 'Strong Brutal Violence']","['Language', 'Strong Violence']","['Language', 'Rude Humor']","['Violence', 'Drug Content', 'Sexuality', 'Language']","['L']","['Some Violent Images', 'Sexual Content']","['Crude and Sexual Content', 'Brief Graphic Nudity', 'Drug Material', 'Pervasive Language']","['Strong Violent Content', 'Sexual Assault', 'Language Throughout', 'Brief Nudity', 'Sexual Content', 'Some Drug Use']","['Intense Action', 'Brief Strong Language', 'Sequences of Violence', 'Sexual Content']","['Language Throughout', 'Drug Use', 'Crude Sexual Material']","['Language']","['Brief Drug Use', 'Violence', 'Language']","['Some Sexual Content', 'Language', 'Violence']","['Sexual Content', 'Brief Partial Nudity', 'Smoking']","['Brief Strong Language', 'Mature Thematic Material']","['Some Violence', 'Language', 'Brief Sexuality']","['Mild Rude Humor', 'Mild Language']","['Some Sexual References', 'Language']","['Language', 'Graphic Nudity', 'A Rape', 'Strong Sexuality']","['Some Sexual Content', 'Language']","['Sex-Related Humor', 'Language']","['Graphic Sequences of War', 'Graphic Sequences of Violence', 'Language']","['Mild Rude Humor and Action']","['V']","['Brief Strong Language', 'Brief Drug Use', 'Suggestive Material']","['Violence', 'Language', 'Some Drug Material', 'Nude Images']","['Brief Strong Language', 'Drug References']","['Sexual Content']","['Strong Sexual Content', 'Language']","['Some Violent Images']","['Sequences of Violence', 'Language', 'Action', 'Smoking']","['Some Thematic Elements', 'Rough Sports Action', 'Language']","['Smoking', 'Sexual Content']","['Language']","['Seq. of Martial Arts Action', 'Mild Violence']","['Some Language', 'Adventure Action', 'Suggestive Content']"],"top_values":[["['Language']",365],["['V']",155],["['Some Language']",143],["['Some Violence']",126],["['Violence']",105],["['L']",105],["['V', 'L']",103],["['L', 'V']",93],["['Violence', 'Language']",81],["['Brief Strong Language']",81],["['Sexual Content', 'Language']",80],["['Language', 'Violence']",69],["['Language', 'Some Violence']",57],["['Some Sexual Content', 'Language']",57],["['Language', 'Sexual Content']",55],["['Mild Thematic Elements']",53],["['Some Sexual Content']",52],["['Violence and Language']",50],["['Language', 'Some Sexual Content']",48],["['D']",47]],"top_words":[["'some",3738],["['some",3138],["content',",2417],["sexual",2073],["['language',",1949],["'language',",1798],["violence',",1779],["'language']",1697],["'brief",1653],["language',",1653],["violence']",1598],["content']",1436],["use',",1374],["drug",1348],["language']",1293],["'sexual",1243],["'strong",1182],["'drug",1149],["images',",1014],["'violence']",933],["['brief",917],["nudity',",877],["material',",870],["and",861],["'thematic",778]],"vocab_skipped":null,"word_histogram":{"counts":[829,1267,0,1962,0,2262,0,2081,1730,0,1293,0,955,0,654,414,0,263,0,148,0,63,31,0,26,0,7,0,5,1],"edges":[1.0,1.5666666666666667,2.1333333333333333,2.7,3.2666666666666666,3.833333333333333,4.4,4.966666666666667,5.533333333333333,6.1,6.666666666666666,7.233333333333333,7.8,8.366666666666667,8.933333333333334,9.5,10.066666666666666,10.633333333333333,11.2,11.766666666666666,12.333333333333332,12.9,13.466666666666667,14.033333333333333,14.6,15.166666666666666,15.733333333333333,16.299999999999997,16.866666666666667,17.433333333333334,18.0]}},"kind":"text","n":143258,"n_null":129267,"n_unique":8353,"null_rate":0.90233704225942,"stats":{"allcaps_rate":0.061682510185119006,"boilerplate_rate":0.0,"duplicate_rate":0.40297334000428847,"emoji_rate":0.0,"len_max":148,"len_mean":46.15974555071117,"len_median":44.0,"len_min":5,"len_p95":88.0,"n_duplicates":5638,"n_empty":0,"one_word_rate":0.05925237652776785,"readability_flesch_mean":15.017375487013009,"url_rate":0.0,"vocab_size":1188,"word_mean":5.169466085340576,"word_median":5.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"78.5% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"60.8% duplicate strings"}],"column":"releaseDateTheaters","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30773,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[9.5,9.525,9.55,9.575,9.6,9.625,9.65,9.675,9.7,9.725,9.75,9.775,9.8,9.825,9.85,9.875,9.9,9.925,9.95,9.975,10.0,10.025,10.05,10.075,10.1,10.125,10.15,10.175,10.2,10.225,10.25,10.275,10.3,10.325,10.35,10.375,10.4,10.425,10.45,10.475,10.5]},"near_unique":false,"sample":["1997-02-20","1980-09-10","2017-09-08","2010-05-21","2022-11-11","2021-03-12","1970-05-06","2022-07-22","2018-09-07","2011-05-06","2014-01-10","2002-02-15","2021-11-05","2018-07-20","1999-01-22","2016-02-19","1957-11-01","1985-07-10","2013-08-23","2009-03-20","1969-02-26","2015-12-25","2016-08-12","2014-10-31","1996-06-20","2015-12-14","1946-02-01","2007-10-19","2004-07-19","2006-09-01","1984-03-09","2020-08-14","2016-07-22","2013-08-14","1952-05-02","1943-06-18","2023-02-24","1980-09-09","1915-07-12","2007-12-14","2000-03-10","2020-06-19","2013-09-28","1910-08-01","1944-09-14","1996-03-20","1908-02-01","2008-09-12","2015-04-17","2014-09-26"],"top_values":[["2018-09-14",37],["2018-10-26",33],["2023-02-03",33],["2018-09-28",33],["2021-12-03",33],["2022-06-03",32],["2019-08-02",32],["2022-11-04",32],["2019-11-01",31],["2017-10-06",31],["2017-10-13",30],["2019-03-01",30],["2017-09-08",29],["2019-09-13",29],["2022-10-07",29],["2015-12-04",28],["2020-02-14",28],["2020-03-06",28],["2019-10-04",28],["2021-08-20",28]],"top_words":[["2018-09-14",25],["2022-10-07",24],["2018-09-28",23],["2022-06-03",22],["2021-12-03",22],["2019-04-12",21],["2021-11-19",21],["2019-09-20",21],["2019-11-15",21],["2019-09-13",21],["2017-03-03",20],["2019-08-02",20],["2015-08-14",20],["2023-03-10",20],["2018-10-26",20],["2015-10-16",19],["2023-02-03",19],["2017-10-13",19],["2019-07-12",19],["2022-04-22",19],["2020-10-02",19],["2019-02-08",19],["2019-03-01",19],["2017-11-03",19],["2019-11-01",19]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30773,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":143258,"n_null":112485,"n_unique":12062,"null_rate":0.785191751944045,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.6080330159555455,"emoji_rate":0.0,"len_max":10,"len_mean":10.0,"len_median":10.0,"len_min":10,"len_p95":10.0,"n_duplicates":18711,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":9088,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"44.6% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"94.0% duplicate strings"}],"column":"releaseDateStreaming","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,79419],"edges":[7.0,7.075,7.15,7.225,7.3,7.375,7.45,7.525,7.6,7.675,7.75,7.825,7.9,7.975,8.05,8.125,8.2,8.275,8.35,8.425,8.5,8.575,8.65,8.725,8.8,8.875,8.95,9.025,9.1,9.175,9.25,9.325,9.4,9.475,9.55,9.625,9.7,9.775,9.85,9.925,10.0]},"near_unique":false,"sample":["2010-09-28","2012-11-27","2020-07-04","2020-12-02","2018-08-25","2012-12-04","2015-10-06","2014-03-18","2016-09-07","2019-03-26","2011-12-20","2018-02-09","2005-05-17","2007-04-24","2016-07-01","2021-02-07","2003-03-11","2020-07-18","2009-01-27","2020-09-07","2016-08-10","2016-08-10","2002-05-07","2013-05-28","2017-05-22","2020-04-17","2004-11-23","2017-03-11","2020-08-30","2012-05-08","2017-03-18","2019-06-10","2013-11-05","2020-09-20","2020-12-21","2006-08-22","2014-03-18","2017-09-04","2019-11-21","2006-08-22","2018-08-25","2017-03-11","2022-03-16","2021-10-08","2016-03-28","2009-04-14","2020-06-11","2007-11-20","2019-08-02","2019-01-22"],"top_values":[["2017-05-22",1232],["2016-11-30",633],["2016-08-10",611],["2017-05-23",594],["2020-04-17",425],["2017-03-23",389],["2020-07-26",369],["2017-03-10",363],["2019-01-03",358],["2018-08-25",318],["2017-03-11",290],["2017-04-04",277],["2016-11-18",246],["2019-01-04",245],["2017-01-25",233],["2016-09-01",222],["2019-10-21",207],["2015-10-01",204],["2012-01-01",194],["2017-03-13",186]],"top_words":[["2017-05-22",319],["2016-11-30",165],["2016-08-10",165],["2017-05-23",147],["2017-03-10",105],["2017-03-23",102],["2020-07-26",101],["2020-04-17",99],["2019-01-03",81],["2018-08-25",78],["2017-04-04",75],["2017-03-11",71],["2016-11-18",69],["2017-01-25",62],["2016-09-01",62],["2019-01-04",61],["2017-03-17",51],["2019-10-21",50],["2012-01-01",50],["2019-04-01",44],["2019-03-08",42],["2015-10-01",40],["2016-12-05",39],["2017-03-13",39],["2016-11-05",38]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,79420,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":143258,"n_null":63838,"n_unique":4726,"null_rate":0.4456156026190509,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.940493578443717,"emoji_rate":0.0,"len_max":10,"len_mean":9.99996222613951,"len_median":10.0,"len_min":7,"len_p95":10.0,"n_duplicates":74694,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3514,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=+7.62"},{"code":"outliers","level":"warn","message":"11.4% rows beyond 1.5 IQR"}],"column":"runtimeMinutes","extras":{"histogram":{"counts":[12329,110286,6482,254,37,19,9,7,3,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,68.475,135.95,203.42499999999998,270.9,338.375,405.84999999999997,473.32499999999993,540.8,608.275,675.75,743.2249999999999,810.6999999999999,878.175,945.6499999999999,1013.1249999999999,1080.6,1148.0749999999998,1215.55,1283.0249999999999,1350.5,1417.975,1485.4499999999998,1552.925,1620.3999999999999,1687.8749999999998,1755.35,1822.8249999999998,1890.2999999999997,1957.7749999999999,2025.2499999999998,2092.725,2160.2,2227.6749999999997,2295.1499999999996,2362.625,2430.1,2497.575,2565.0499999999997,2632.5249999999996,2700.0]},"sample":[105.0,83.0,93.0,104.0,84.0,90.0,169.0,95.0,107.0,90.0,88.0,97.0,67.0,95.0,107.0,87.0,89.0,93.0,57.0,54.0,116.0,84.0,125.0,91.0,86.0,75.0,106.0,66.0,90.0,55.0,96.0,125.0,90.0,80.0,21.0,180.0,84.0,113.0,97.0,149.0,112.0,107.0,81.0,92.0,83.0,84.0,116.0,80.0,90.0,99.0,78.0,105.0,90.0,104.0,93.0,102.0,263.0,70.0,108.0,103.0,97.0,13.0,163.0,47.0,86.0,90.0,152.0,90.0,111.0,96.0,79.0,76.0,103.0,96.0,80.0,96.0,95.0,103.0,113.0,97.0,113.0,111.0,90.0,58.0,97.0,98.0,93.0,105.0,89.0,92.0,89.0,95.0,81.0,23.0,112.0,102.0,55.0,103.0,76.0,90.0,90.0,150.0,145.0,70.0,83.0,23.0,139.0,109.0,106.0,92.0,100.0,80.0,100.0,90.0,54.0,111.0,93.0,84.0,95.0,90.0,82.0,75.0,92.0,90.0,108.0,94.0,94.0,95.0,93.0,75.0,124.0,63.0,80.0,93.0,107.0,101.0,79.0,84.0,79.0,102.0,112.0,81.0,122.0,101.0,97.0,90.0,89.0,160.0,109.0,101.0,89.0,61.0,120.0,100.0,85.0,94.0,87.0,39.0,145.0,238.0,68.0,150.0,84.0,100.0,62.0,13.0,111.0,90.0,105.0,90.0,97.0,112.0,130.0,124.0,84.0,146.0,138.0,73.0,103.0,90.0,99.0,95.0,134.0,60.0,100.0,85.0,76.0,84.0,94.0,121.0,96.0,90.0,111.0,180.0,77.0,64.0,90.0,110.0,80.0,92.0,141.0,90.0,17.0,45.0,37.0,84.0,102.0,90.0,95.0,82.0,95.0,134.0,79.0,83.0,88.0,83.0,97.0,90.0,90.0,99.0,85.0,85.0,90.0,145.0,82.0,80.0,85.0,103.0,69.0,109.0,82.0,82.0,119.0,112.0,120.0,48.0,90.0,103.0,100.0,146.0,106.0,20.0,85.0,74.0,80.0,96.0,90.0,167.0,40.0,88.0,120.0,84.0,56.0,96.0,56.0,14.0,110.0,102.0,55.0,71.0,82.0,90.0,87.0,96.0,92.0,94.0,120.0,20.0,15.0,80.0,95.0,97.0,97.0,97.0,115.0,113.0,98.0,95.0,90.0,82.0,87.0,87.0,88.0,101.0,26.0,61.0,93.0,139.0,93.0,89.0,103.0,97.0,170.0,95.0,89.0,83.0,87.0,72.0,92.0,24.0,94.0,94.0,88.0,105.0,110.0,94.0,84.0,148.0,90.0,100.0,150.0,82.0,90.0,97.0,82.0,72.0,102.0,90.0,111.0,85.0,93.0,90.0,110.0,103.0,76.0,54.0,111.0,70.0,98.0,70.0,95.0,143.0,91.0,97.0,74.0,86.0,104.0,92.0,105.0,95.0,100.0,92.0,82.0,83.0,117.0,115.0,48.0,86.0,115.0,180.0,108.0,91.0,83.0,43.0,11.0,96.0,100.0,90.0,85.0,89.0,90.0,153.0,85.0,95.0,91.0,117.0,65.0,105.0,89.0,100.0,87.0,85.0,96.0,75.0,86.0,88.0,64.0,81.0,98.0,115.0,67.0,92.0,94.0,90.0,107.0,105.0,96.0,118.0,90.0,83.0,82.0,85.0,90.0,92.0,101.0,84.0,97.0,95.0,80.0,135.0,112.0,93.0,115.0,78.0,97.0,90.0,84.0,104.0,93.0,97.0,125.0,67.0,104.0,100.0,90.0,85.0,94.0,114.0,83.0,93.0,92.0,102.0,90.0,89.0,100.0,75.0,86.0,94.0,70.0,97.0,99.0,85.0,110.0,91.0,87.0,106.0,86.0,125.0,96.0,98.0,102.0,113.0,85.0,97.0,95.0,96.0,113.0,105.0,88.0,98.0,114.0,112.0,105.0,100.0,110.0,92.0,97.0,97.0,16.0,95.0,84.0,96.0,16.0,91.0,110.0,91.0,78.0,78.0,87.0,165.0,120.0,103.0,104.0,101.0,96.0,85.0,98.0,101.0,71.0,75.0,110.0,62.0,79.0,93.0,95.0,90.0,85.0,80.0,106.0,86.0,90.0,100.0,90.0,90.0,92.0,62.0,85.0,98.0,95.0,67.0]},"kind":"numeric","n":143258,"n_null":13827,"n_unique":324,"null_rate":0.09651817001493808,"stats":{"iqr":19.0,"kurtosis":598.6512866442964,"max":2700.0,"mean":93.70857831585941,"median":92.0,"min":1.0,"n_outliers":14720,"outlier_rate":0.11372855034728929,"q1":84.0,"q3":103.0,"skew":7.623126740995684,"std":28.129175060308523,"zero_rate":0.0}},{"alerts":[{"code":"one_word","level":"warn","message":"55.3% rows are a single word"},{"code":"duplicates","level":"warn","message":"97.8% duplicate strings"}],"column":"genre","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[28416,26877,3141,18414,18553,3196,10562,3727,4991,5352,854,2468,2166,1055,240,906,639,124,89,123,164,11,30,42,18,4,8,1,2,0,0,1,0,0,0,0,0,0,0,1],"edges":[3.0,5.45,7.9,10.350000000000001,12.8,15.25,17.700000000000003,20.150000000000002,22.6,25.05,27.5,29.950000000000003,32.400000000000006,34.85,37.300000000000004,39.75,42.2,44.650000000000006,47.1,49.550000000000004,52.0,54.45,56.900000000000006,59.35,61.800000000000004,64.25,66.7,69.15,71.60000000000001,74.05000000000001,76.5,78.95,81.4,83.85000000000001,86.30000000000001,88.75,91.2,93.65,96.10000000000001,98.55000000000001,101.0]},"near_unique":false,"sample":["Documentary","Musical, Comedy","Documentary, Music","Action, Adventure","Comedy, Drama","Mystery & thriller, Drama","Documentary","Documentary","Crime, Drama, Mystery & thriller","Drama","Sci-fi, Adventure, Action, Fantasy, Comedy","Mystery & thriller","Comedy","Drama","Drama","Comedy","Documentary","Comedy, Drama","Drama","Western","Drama","Crime, Drama","Comedy, Drama","Action","Adventure, Documentary","History, Drama","Documentary, Biography","Kids & family, Holiday, Fantasy","Horror, Mystery & thriller","Biography, Drama","Documentary","Kids & family, Fantasy, Adventure","Documentary","Adventure, Fantasy","Documentary","Comedy, Romance","Action, Mystery & thriller","Comedy","Adventure","Drama","Comedy","Documentary, Music","Kids & family, Adventure","Action, Comedy","Crime, Drama","Drama","Drama","Drama, Action","Comedy","Drama"],"top_values":[["Drama",27860],["Documentary",15162],["Comedy",11514],["Mystery & thriller",7015],["Comedy, Drama",5479],["Horror",5129],["Action",3547],["Crime, Drama",2922],["Romance, Comedy",2749],["Horror, Mystery & thriller",2238],["Western",2215],["Adventure",1962],["Drama, Romance",1746],["Romance",1583],["Sci-fi",1540],["Drama, Mystery & thriller",1471],["Action, Mystery & thriller",888],["History, Drama",821],["Crime, Drama, Mystery & thriller",799],["Documentary, Music",714]],"top_words":[["drama",6652],["&",3293],["mystery",2777],["comedy",2690],["thriller",2300],["documentary",2270],["comedy,",1923],["drama,",1828],["horror",997],["action,",932],["romance",892],["crime,",883],["romance,",759],["action",739],["horror,",663],["adventure,",617],["documentary,",544],["adventure",518],["kids",497],["family,",482],["thriller,",477],["animation",476],["sci-fi",420],["western",393],["fantasy,",364]],"vocab_skipped":null,"word_histogram":{"counts":[73128,0,28470,0,0,14463,0,9182,0,0,4773,0,1649,0,0,385,0,88,0,0,33,0,2,0,0,1,0,0,0,1],"edges":[1.0,1.4,1.8,2.2,2.6,3.0,3.4000000000000004,3.8000000000000003,4.2,4.6,5.0,5.4,5.800000000000001,6.2,6.6000000000000005,7.0,7.4,7.800000000000001,8.2,8.600000000000001,9.0,9.4,9.8,10.200000000000001,10.600000000000001,11.0,11.4,11.8,12.200000000000001,12.600000000000001,13.0]}},"kind":"text","n":143258,"n_null":11083,"n_unique":2912,"null_rate":0.0773639168493208,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.9779686022318895,"emoji_rate":0.0,"len_max":101,"len_mean":12.906979383393228,"len_median":11.0,"len_min":3,"len_p95":32.0,"n_duplicates":129263,"n_empty":0,"one_word_rate":0.5532665027425762,"readability_flesch_mean":-19.664524999999976,"url_rate":0.0,"vocab_size":66,"word_mean":1.8739095895592963,"word_median":1.0}},{"alerts":[],"column":"originalLanguage","extras":{"singletons":5,"top_values":[["English",85034],["Spanish",4786],["Japanese",3482],["Hindi",3309],["French (Canada)",3282],["Chinese",3166],["French (France)",2760],["English (United Kingdom)",2553],["Italian",2303],["German",2155],["Korean",1226],["Arabic",938],["Spanish (Spain)",936],["Tamil",909],["Russian",898],["Portuguese (Brazil)",867],["Telugu",774],["Malayalam",642],["Unknown language",528],["Dutch",482]]},"kind":"categorical","n":143258,"n_null":13858,"n_unique":112,"null_rate":0.09673456281673624,"stats":{"cardinality":112,"entropy":2.605090844568999,"entropy_ratio":0.38268767743074855,"top_rate":0.6571406491499228,"top_value":"English"}},{"alerts":[{"code":"duplicates","level":"warn","message":"55.3% duplicate strings"}],"column":"director","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[7477,109547,14942,5028,1160,412,164,87,69,41,28,24,19,17,3,3,9,13,1,6,4,2,0,1,0,2,0,0,0,1,0,0,1,0,0,0,2,0,0,1],"edges":[1.0,9.125,17.25,25.375,33.5,41.625,49.75,57.875,66.0,74.125,82.25,90.375,98.5,106.625,114.75,122.875,131.0,139.125,147.25,155.375,163.5,171.625,179.75,187.875,196.0,204.125,212.25,220.375,228.5,236.625,244.75,252.875,261.0,269.125,277.25,285.375,293.5,301.625,309.75,317.875,326.0]},"near_unique":false,"sample":["Robert Cavanah","Kinji Fukasaku","Carlo Lizzani","Sacha Polak","Michael Lei","Jim Makichuk","Larry Brand","Kevin de la Isla O'Neill","Jerry Rothwell","Masaki Tsujino","Aditya Om","William Nigh","Timothy Bond","Nasri Cheppy","Steven Schachter","Shawn Woodard","Miguel \u00c1ngel Lamata","Sofia Bohdanowicz","Paul Aaron","Solomon Onita Jr.","Martin Owen","Mona Nicoara","Brandon Cornett","Jorge Ameer","John Garwood","Mehdi Idir","\u00d3scar Urrutia Lazo","Juan Antonio de la Riva","Arnaud Lemort","Peter Brook","Franc Aleu","Francis Delia","Francesca Bertini,Gustavo Serena","Unknown Director","Anselm Chan","Humberto Sol\u00e1s","Allan Dwan","Mathilde Profit","Enzo D'Al\u00f2","Julio Medem","Bryant Mainord","Wenn V. Deramas","Patton Oswalt","Kim Sang-jin","Irving Cummings","Kenji Kamiyama","Martin Beck","James Nam,See-Yuen Ng","John Edginton","Unknown Director"],"top_values":[["Unknown Director",3544],["David DeCoteau",129],["Sam Newfield",124],["Fred Olen Ray",93],["Joseph Kane",85],["Michael Curtiz",82],["Lesley Selander",82],["Richard Thorpe",75],["Raoul Walsh",69],["John Ford",68],["Michael Feifer",66],["Takashi Miike",66],["Lloyd Bacon",64],["George Sherman",63],["Terry Ingram",62],["Jim Wynorski",62],["Gordon Douglas",61],["William A. Wellman",60],["William Beaudine",59],["Cheh Chang",58]],"top_words":[["unknown",513],["director",513],["john",391],["david",337],["michael",330],["robert",290],["paul",203],["peter",198],["james",193],["richard",184],["william",169],["mark",145],["de",143],["george",132],["lee",129],["j.",121],["a.",112],["thomas",111],["daniel",107],["tom",103],["scott",103],["charles",103],["andrew",100],["christopher",100],["frank",100]],"vocab_skipped":null,"word_histogram":{"counts":[1023,112781,21809,2473,619,0,159,76,42,20,15,0,16,13,6,3,2,0,1,1,1,1,1,0,0,0,0,0,1,1],"edges":[1.0,1.8333333333333335,2.666666666666667,3.5,4.333333333333334,5.166666666666667,6.0,6.833333333333334,7.666666666666667,8.5,9.333333333333334,10.166666666666668,11.0,11.833333333333334,12.666666666666668,13.5,14.333333333333334,15.166666666666668,16.0,16.833333333333336,17.666666666666668,18.5,19.333333333333336,20.166666666666668,21.0,21.833333333333336,22.666666666666668,23.5,24.333333333333336,25.166666666666668,26.0]}},"kind":"text","n":143258,"n_null":4194,"n_unique":62207,"null_rate":0.029275851959401918,"stats":{"allcaps_rate":7.190933670827821e-05,"boilerplate_rate":0.0,"duplicate_rate":0.5526735891388138,"emoji_rate":0.0,"len_max":326,"len_mean":14.80681556693321,"len_median":14.0,"len_min":1,"len_p95":26.0,"n_duplicates":76857,"n_empty":0,"one_word_rate":0.00735632514525686,"readability_flesch_mean":47.17310000000001,"url_rate":0.0,"vocab_size":16693,"word_mean":2.2131824196053618,"word_median":2.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"37.1% null"},{"code":"duplicates","level":"warn","message":"25.3% duplicate strings"}],"column":"writer","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[565,37626,17271,12261,9944,4841,3059,1894,937,718,355,234,120,79,60,46,28,25,9,13,9,12,5,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,7.525,14.05,20.575000000000003,27.1,33.625,40.150000000000006,46.675000000000004,53.2,59.725,66.25,72.775,79.30000000000001,85.825,92.35000000000001,98.875,105.4,111.92500000000001,118.45,124.97500000000001,131.5,138.025,144.55,151.07500000000002,157.60000000000002,164.125,170.65,177.175,183.70000000000002,190.22500000000002,196.75,203.275,209.8,216.32500000000002,222.85000000000002,229.375,235.9,242.425,248.95000000000002,255.47500000000002,262.0]},"near_unique":false,"sample":["Robert Cavanah,Jon Kirby","Albert Espinosa","Miles Bellar","Suzette Couture,Pierre Sarrazin","Kelly Fullerton","Jason Richman","Raffaella Verga","Barry Massoni,Rene Perez","Michael McGovern","Guy Hibbert","Mary Walsh,Heather Conkie","Ben Daitz","Tejesh Ghadage","Diablo Cody","Scott Z. Burns","Jeff Edelstein","Kevin Costello,Mark Potts,Cole Selix","May Lau","Walter Klenhard","Jeff Buhler","Martin Owen","Nancey Silvers","Daisuke Habara","Robert Arthur Jansen","Taika Waititi","Keith Fraser","Geri Cudia Barger,Gilbert M. Shilton","Dardano Sacchetti","Taylor Ri'chard","Jonathon Kimble","Romain Berthomieu,James Huth,Hugo Jacomet","Karen DeWolf,Robert St. Claire,Wallace MacDonald","Steven Zaillian","Ted Sears,Erdman Penner,Bill Peet,Joe Rinaldi,Milt Banta,Ralph Wright,William Cottrell","Laura Paolucci,Francesca Archibugi,Francesco Piccolo","Clarence Budington Kelland,Horace McCoy","Maren Louise K\u00e4ehne,Annette K. Olesen","Bo Brinkman","Carl Reiner,Aaron Ruben","Wei Lu","Nicole Avril","Shyam Menon","Bernard Rose","Barrie Dunn,Douglas Bell,Patrick Graham","Pere Portabella","Houston Branch","Charlie Brown","Julia Davis","John Fasano","Marc Didden"],"top_values":[["Jing Wong",48],["Barbara Kymlicka",46],["Woody Allen",38],["Christine Conradt",37],["Kuang Ni",37],["Ingmar Bergman",36],["Jake Helgren",33],["Werner Herzog",33],["Charlie Chaplin",32],["Mark Monroe",27],["H. M. Walker",27],["Tyler Perry",27],["Rolfe Kanefsky",25],["Adam Rockoff",25],["Hong Sang-soo",25],["Michael Feifer",25],["Abbas Kiarostami",22],["Agn\u00e8s Varda",22],["Jean-Luc Godard",22],["Marcy Holland",21]],"top_words":[["michael",372],["david",342],["john",330],["robert",236],["james",194],["de",188],["paul",180],["mark",173],["j.",164],["lee",157],["scott",146],["peter",143],["a.",128],["daniel",127],["chris",126],["richard",119],["m.",117],["william",116],["andrew",114],["brian",113],["thomas",111],["christopher",99],["stephen",99],["adam",94],["charles",91]],"vocab_skipped":null,"word_histogram":{"counts":[482,49149,0,25679,9242,0,3547,1166,0,453,202,0,96,52,0,25,12,0,6,2,0,1,1,0,0,0,0,0,0,1],"edges":[1.0,1.6666666666666665,2.333333333333333,3.0,3.6666666666666665,4.333333333333333,5.0,5.666666666666666,6.333333333333333,7.0,7.666666666666666,8.333333333333332,9.0,9.666666666666666,10.333333333333332,11.0,11.666666666666666,12.333333333333332,13.0,13.666666666666666,14.333333333333332,15.0,15.666666666666666,16.333333333333332,17.0,17.666666666666664,18.333333333333332,19.0,19.666666666666664,20.333333333333332,21.0]}},"kind":"text","n":143258,"n_null":53142,"n_unique":67274,"null_rate":0.3709531055857265,"stats":{"allcaps_rate":6.658085134715255e-05,"boilerplate_rate":0.0,"duplicate_rate":0.25347330107860977,"emoji_rate":0.0,"len_max":262,"len_mean":21.344955390829597,"len_median":16.0,"len_min":1,"len_p95":47.0,"n_duplicates":22842,"n_empty":0,"one_word_rate":0.0053486617248879225,"readability_flesch_mean":37.7884142857143,"url_rate":0.0,"vocab_size":26458,"word_mean":2.710550845576812,"word_median":2.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"89.7% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"67.0% duplicate strings"}],"column":"boxOffice","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[2,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,107,0,0,0,0,0,0,0,4011,0,0,0,0,0,0,0,6707,0,0,0,0,0,0,3910],"edges":[2.0,2.125,2.25,2.375,2.5,2.625,2.75,2.875,3.0,3.125,3.25,3.375,3.5,3.625,3.75,3.875,4.0,4.125,4.25,4.375,4.5,4.625,4.75,4.875,5.0,5.125,5.25,5.375,5.5,5.625,5.75,5.875,6.0,6.125,6.25,6.375,6.5,6.625,6.75,6.875,7.0]},"near_unique":false,"sample":["$41.1M","$334.2K","$5.6K","$11.1K","$12.4M","$2.0M","$83.8M","$2.0M","$321.9K","$15.5M","$31.6K","$14.7K","$292.8K","$226.2K","$64.0M","$25.7M","$1.7M","$819.8K","$4.0M","$1.6M","$13.5K","$581.1K","$4.7K","$23.2K","$1.6M","$18.8K","$33.0K","$119.8K","$625.7K","$1.4M","$33.7K","$8.8M","$330.3M","$5.2M","$44.8M","$99.8K","$14.9M","$35.0K","$28.9M","$11.5M","$38.6K","$11.7K","$197.3K","$10.8K","$727.1K","$1.9M","$99.6M","$39.4M","$694.6K","$856.9K"],"top_values":[["$1.1M",118],["$1.2M",100],["$1.3M",97],["$1.0M",97],["$1.6M",70],["$1.4M",69],["$2.0M",68],["$1.8M",67],["$2.1M",63],["$1.5M",60],["$2.3M",59],["$1.7M",57],["$2.2M",52],["$2.4M",50],["$2.5M",46],["$1.9M",42],["$3.3M",41],["$4.2M",39],["$5.7M",39],["$3.0M",38]],"top_words":[["$1.1m",118],["$1.2m",100],["$1.3m",97],["$1.0m",97],["$1.6m",70],["$1.4m",69],["$2.0m",68],["$1.8m",67],["$2.1m",63],["$1.5m",60],["$2.3m",59],["$1.7m",57],["$2.2m",52],["$2.4m",50],["$2.5m",46],["$1.9m",42],["$3.3m",41],["$4.2m",39],["$5.7m",39],["$3.0m",38],["$3.4m",38],["$2.6m",37],["$4.0m",36],["$2.7m",35],["$2.9m",34]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14743,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":143258,"n_null":128515,"n_unique":4863,"null_rate":0.8970877717125745,"stats":{"allcaps_rate":0.999864342399783,"boilerplate_rate":0.0,"duplicate_rate":0.6701485450722376,"emoji_rate":0.0,"len_max":7,"len_mean":5.9768703791629925,"len_median":6.0,"len_min":2,"len_p95":7.0,"n_duplicates":9880,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":4863,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"83.9% null"},{"code":"duplicates","level":"warn","message":"83.9% duplicate strings"}],"column":"distributor","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[4694,14064,2077,968,437,254,146,125,58,50,39,22,17,10,7,8,6,3,3,4,2,5,0,2,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1],"edges":[3.0,12.55,22.1,31.650000000000002,41.2,50.75,60.300000000000004,69.85000000000001,79.4,88.95,98.5,108.05000000000001,117.60000000000001,127.15,136.70000000000002,146.25,155.8,165.35000000000002,174.9,184.45000000000002,194.0,203.55,213.10000000000002,222.65,232.20000000000002,241.75000000000003,251.3,260.85,270.40000000000003,279.95000000000005,289.5,299.05,308.6,318.15000000000003,327.70000000000005,337.25,346.8,356.35,365.90000000000003,375.45000000000005,385.0]},"near_unique":false,"sample":["Grapevine Video","20th Century Fox","Samuel Goldwyn Company","Warner Bros. Pictures","Metro-Goldwyn-Mayer","Gravitas Ventures","LS Video, United Artists, Hollywood Classics, Columbia TriStar Home Video, Reel Media International [us], Madacy Entertainment Group Inc. [us]","Netflix","20th Century Fox","Fox Searchlight","Warner Bros.","20th Century Fox","Metro-Goldwyn-Mayer","Magnolia Pictures","Sprint Films","United Artists","Paramount Pictures","New Century Vista Film Company [us]","Funimation, Eleven Arts","United Artists","Fine Line Features","Metro-Goldwyn-Mayer, Loew's Inc.","October Films","Warner Bros. Pictures","Buena Vista Pictures","Warner Bros. Pictures","RKO Radio Pictures","Columbia Pictures","Cinema Guild","Mutual Film","Unified Pictures","Fox","Warner Bros. Pictures","Universal Pictures","MCA/Universal Pictures [us]","Nandar Pictures","Yash Raj Films","Santa Barbara Filmed Ent.","Universal Pictures, ArtMattan Productions, Polygram","IFC Films","Metro-Goldwyn-Mayer Distributing Corp.","ArtMattan Films","Fox","IFC Films","Walt Disney","Strand Releasing","Gravitas Ventures","Variance Films","Paramount Pictures","Open Road"],"top_values":[["Paramount Pictures",994],["20th Century Fox",745],["Universal Pictures",737],["Warner Bros. Pictures",668],["Metro-Goldwyn-Mayer",603],["IFC Films",550],["Columbia Pictures",471],["Sony Pictures Classics",385],["Gravitas Ventures",382],["Lionsgate Films",375],["United Artists",375],["Warner Bros.",365],["Magnolia Pictures",312],["Vertical Entertainment",292],["Miramax Films",282],["Sony Pictures Entertainment",245],["Strand Releasing",208],["RKO Radio Pictures",188],["Saban Films",182],["Kino Lorber",175]],"top_words":[["pictures",5838],["films",2996],["entertainment",1987],["warner",1223],["fox",1018],["bros.",987],["paramount",974],["universal",837],["century",820],["20th",798],["sony",761],["releasing",745],["columbia",730],["home",667],["film",666],["media",650],["metro-goldwyn-mayer",648],["ifc",557],["pictures,",528],["lionsgate",493],["united",487],["artists",443],["international",415],["classics",390],["new",388]],"vocab_skipped":null,"word_histogram":{"counts":[14285,6013,1695,320,308,110,119,27,51,11,23,6,12,1,6,1,8,2,2,1,0,0,2,1,0,0,0,0,0,1],"edges":[1.0,2.5,4.0,5.5,7.0,8.5,10.0,11.5,13.0,14.5,16.0,17.5,19.0,20.5,22.0,23.5,25.0,26.5,28.0,29.5,31.0,32.5,34.0,35.5,37.0,38.5,40.0,41.5,43.0,44.5,46.0]}},"kind":"text","n":143258,"n_null":120253,"n_unique":3694,"null_rate":0.8394155998268857,"stats":{"allcaps_rate":0.014735926972397305,"boilerplate_rate":0.0,"duplicate_rate":0.8394262116931102,"emoji_rate":0.0,"len_max":385,"len_mean":19.88802434253423,"len_median":17.0,"len_min":3,"len_p95":43.0,"n_duplicates":19311,"n_empty":0,"one_word_rate":0.09376222560312976,"readability_flesch_mean":16.84629642857145,"url_rate":4.346881112801565e-05,"vocab_size":2650,"word_mean":2.6491632253857857,"word_median":2.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"88.9% null"}],"column":"soundMix","extras":{"singletons":265,"top_values":[["Surround",4075],["Dolby Digital",2375],["Stereo",2082],["Mono",1246],["Stereo, Surround",473],["Surround, Stereo",451],["Dolby",411],["Dolby SRD, DTS, SDDS",253],["Dolby Atmos",241],["Dolby SR",198],["Dolby SR, DTS, Dolby Stereo, Surround, SDDS, Dolby A, Dolby Digital",192],["Dolby Stereo, Dolby Digital, Dolby A, Surround, Dolby SR",167],["Surround, Dolby Digital",133],["Dolby, Surround",119],["SDDS, Dolby Digital, DTS",118],["Surround, Dolby SRD, DTS, SDDS",118],["Dolby SRD",107],["Surround, Dolby SR, Dolby Digital, Dolby A, Dolby Stereo",101],["Dolby Atmos, Dolby Digital",93],["Datasat, Dolby Digital",84]]},"kind":"categorical","n":143258,"n_null":127341,"n_unique":551,"null_rate":0.8888927668960895,"stats":{"cardinality":551,"entropy":4.663040705665351,"entropy_ratio":0.5120895626478291,"top_rate":0.25601558082553244,"top_value":"Surround"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.boxOffice.null_rate","columns.rating.null_rate","columns.tomatoMeter.null_rate","columns.releaseDateTheaters.null_rate","columns.audienceScore.null_rate","columns.genre.top_values","columns.runtimeMinutes.stats","columns.tomatoMeter.stats","columns.audienceScore.stats","columns.originalLanguage.top_rate","columns.originalLanguage.top_values"],"featured_charts":[{"caption":"Drama, Documentary, and Comedy dominate; note how long-tail combo genres fragment the rest.","column":"genre","kind":"bar"},{"caption":"Most films cluster around 84\u2013103 minutes, but extreme outliers stretch the tail to 2,700 minutes.","column":"runtimeMinutes","kind":"histogram"},{"caption":"Critic scores skew positive with a median of 73 \u2014 look for the left tail of poorly reviewed films.","column":"tomatoMeter","kind":"histogram"},{"caption":"Audience scores are flatter and more centered (median 57) than critic scores \u2014 compare the two distributions.","column":"audienceScore","kind":"histogram"},{"caption":"English accounts for ~66% of titles; the remaining 111 languages form a long tail worth segmenting.","column":"originalLanguage","kind":"donut"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 143,258 movies from Rotten Tomatoes across 16 columns covering metadata (title, director, writer, distributor), release info, runtime, genre, language, ratings, and critic/audience scores. Coverage is highly uneven \u2014 fields like boxOffice (89.7% null), rating (90.2% null), tomatoMeter (76.4% null), and releaseDateTheaters (78.5% null) are sparse, while audienceScore is missing in roughly half the rows. Worth a closer look first: the genre distribution, which is dominated by Drama (27,860), Documentary (15,162), and Comedy (11,514), and runtimeMinutes, which is heavily right-skewed (skew 7.6, max 2,700 minutes) with ~11.4% flagged as outliers despite a tight IQR of 84\u2013103 minutes. The tomatoMeter and audienceScore distributions also tell a clear story \u2014 critics skew positive (median 73) while audiences are more middling (median 57). English dominates originalLanguage at 65.7% of titles, so any language-based analysis will be lopsided.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","one_word_rate","word_mean","len_mean","duplicate_rate","n_duplicates","top_words","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Slug-style identifier column: every value is a single token (one_word_rate 1.0, word_mean 1.0) with mean length ~18 chars and 142052 uniques out of 143258 rows. The 1206 duplicates (0.84%) are surprising for an id field \u2014 top repeats like 'catch_me_if_you_can' and 'hear_no_evil' suggest these are title-derived slugs rather than guaranteed-unique keys. Readability score is meaningless here (\u221275.5) because the tokens are underscore-joined phrases, not prose.","role":"identifier","scope":"column","target":"id","treatment":"Use as a join key but deduplicate first \u2014 it is not strictly unique."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","language_counts","stats.len_mean","stats.word_median","stats.duplicate_rate","stats.n_duplicates","stats.one_word_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Short titles (mean 17 chars, median 3 words) of what look like films or works \u2014 top values include 'The Return', 'A Christmas Carol', 'Hero', 'Blue'. Predominantly English (3946) but 29 other languages are detected, with Spanish (123), German (80), and French (72) most common. Notable duplication: 16,488 repeats (11.5% duplicate rate) across 126,403 unique values out of 143,258 rows, and 17% are single-word titles.","role":"label","scope":"column","target":"title","treatment":"Normalise case and tokenize for embedding; do not treat as a unique key given the 11.5% duplicate rate."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.mean","stats.median","stats.std","stats.iqr","stats.skew","stats.kurtosis","stats.n_outliers","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is an audience rating score on a 0-100 scale with 101 unique integer values, mean 55.67 and median 57. The distribution is wide (std 24.55, IQR 39) and slightly left-skewed (skew -0.23, kurtosis -0.83) with no outliers flagged. The dominant concern is missingness: 48.87% of rows are null, so nearly half the dataset lacks this score.","role":"feature","scope":"column","target":"audienceScore","treatment":"Impute or add a missing-indicator before modelling given the ~49% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.mean","stats.median","stats.min","stats.max","stats.q1","stats.q3","stats.skew","stats.zero_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This is the Rotten Tomatoes critic score (tomatoMeter), a 0-100 percentage with 101 unique integer values, mean 65.77 and median 73. The distribution is left-skewed (skew -0.65) with Q1 at 45 and Q3 at 89, indicating most rated titles lean favorable. The dominant concern is coverage: 76.35% of rows are null, so the field is only populated for a minority of records.","role":"feature","scope":"column","target":"tomatoMeter","treatment":"Impute or add a missingness indicator before modelling given the 76% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.top_value","stats.top_rate","stats.cardinality","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a content rating field mixing theatrical (R, PG-13, PG, NC-17, G) and television (TVPG, TV14, TVMA, TVY7, TVG) classifications across 10 distinct values. The column is 90.23% null, so only ~9.77% of the 143,258 rows carry a rating, and within those R alone accounts for 55.28% of values. The mixed rating systems and the long tail (TVG, TVY7, G each appearing once) suggest inconsistent sourcing rather than a clean controlled vocabulary.","role":"feature","scope":"column","target":"rating","treatment":"Normalize TV vs MPAA codes into a unified scheme and add an explicit 'missing' category before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.duplicate_rate","stats.n_duplicates","stats.vocab_size","top_values","top_words","language_counts"],"model":"anthropic:claude-opus-4-7","narrative":"This column stores content-rating descriptors (e.g. 'Language', 'Violence', 'Some Sexual Content') serialised as Python-style list literals rather than clean arrays. It is 90.23% null and, among the 14k populated rows, 40.3% are duplicates with only 8,353 unique values across 143,258 records. A handful of non-English entries (12 it, 5 ro, 1 km) appear despite the vocabulary being tiny (1,188 words), and the bracket/quote artefacts in top_words confirm the values were never parsed out of their string representation.","role":"feature","scope":"column","target":"ratingContents","treatment":"Parse the list-literal strings into a multi-hot encoding of rating tags before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.duplicate_rate","stats.len_min","stats.len_max","stats.one_word_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a theatrical release date stored as an ISO-format string (every value is exactly 10 characters and a single token, e.g. '2018-09-14'). It is sparsely populated \u2014 78.52% null \u2014 and the non-null values are heavily repeated, with a 60.8% duplicate rate across 12,062 distinct dates. The 'allcaps' alert is a false positive driven by digits-only strings; there's no actual text content to mine.","role":"timestamp","scope":"column","target":"releaseDateTheaters","treatment":"Parse to date and impute or flag the 78.52% missing before any time-based feature engineering."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.duplicate_rate","stats.one_word_rate","stats.len_median","stats.len_min","stats.len_max","top_values","n_unique"],"model":"anthropic:claude-opus-4-7","narrative":"This is a streaming-release date stored as ISO-8601 text (len_median 10, one_word_rate 1.0, all top values match YYYY-MM-DD). Roughly 44.56% of rows are null and the duplicate_rate is 0.94, with a single date 2017-05-22 appearing 1232 times \u2014 heavy clustering on a few release days. The text-style alerts (allcaps, one_word, short_text) are artifacts of the date format, not a quality issue.","role":"timestamp","scope":"column","target":"releaseDateStreaming","treatment":"Parse to date dtype and treat missingness explicitly before any temporal feature engineering."},{"confidence":"high","critiques":[],"evidence_keys":["stats.median","stats.q1","stats.q3","stats.max","stats.skew","stats.kurtosis","stats.outlier_rate","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"Movie or episode runtime in minutes, with a typical feature-length distribution (median 92, IQR 84-103). The tail is extreme: max 2700, skew 7.62, kurtosis 598.65, and 11.37% of rows flagged as outliers, suggesting a mix of shorts, multi-part specials, or full series totals alongside standard films. Roughly 9.65% of rows are null.","role":"feature","scope":"column","target":"runtimeMinutes","treatment":"Cap or log-transform before modelling and impute the ~10% nulls."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.one_word_rate","stats.vocab_size","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical genre label for films, often a single word like 'Drama' (27,860 rows) or 'Documentary' (15,162) but sometimes a comma-separated combo such as 'Comedy, Drama'. With only 66 distinct vocabulary tokens but 2,912 unique strings and a 97.8% duplicate rate, the cardinality comes entirely from how genres are concatenated. Note the 7.74% null rate and that 55% of values are single-word \u2014 multi-genre rows are the minority.","role":"label","scope":"column","target":"genre","treatment":"Split on comma and one-hot encode into a small set of genre flags."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical language label with 112 distinct values, dominated by English at 65.7% of non-null rows (85,034 of 143,258). The long tail spans regional variants (e.g., 'French (Canada)' vs 'French (France)', 'English (United Kingdom)') alongside bare language names like 'English' and 'French', suggesting inconsistent locale tagging that will fragment counts. Null rate is 9.67%, and entropy ratio of 0.38 confirms heavy concentration in a few categories.","role":"feature","scope":"column","target":"originalLanguage","treatment":"Normalize locale variants to base language codes, then one-hot encode the top categories and bucket the rest as 'Other'."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.n_duplicates","stats.word_mean","stats.len_mean","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Holds a film director's name, averaging 2.2 words and 14.8 characters with 62,207 unique values across 143,258 rows. The duplicate rate is 55.3% (76,857 rows), inflated by a 'Unknown Director' sentinel that occurs 3,544 times and should not be treated as a real name. Null rate is 2.93%, and the long tail (David DeCoteau at 129, Sam Newfield at 124) reflects prolific B-movie directors rather than data quality issues.","role":"feature","scope":"column","target":"director","treatment":"Replace 'Unknown Director' with null and use as a high-cardinality categorical (target/frequency encode)."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.duplicate_rate","n_unique","stats.word_mean","stats.len_mean","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Holds writer credits, typically one or two personal names averaging 2.7 words and 21 characters, with familiar figures like Jing Wong, Woody Allen, and Ingmar Bergman topping the list. Coverage is weak: 37.1% of rows are null and 25.3% are duplicates across 67,274 unique values, so a single column likely concatenates multiple co-writers per title. Top tokens (michael, david, john) confirm Western personal names dominate, though 'de' hints at multi-name strings or non-English credits mixed in.","role":"feature","scope":"column","target":"writer","treatment":"Split on delimiters into individual writers and explode for any per-person analysis; impute or flag the 37% missing before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.one_word_rate","stats.allcaps_rate","stats.duplicate_rate","stats.len_min","stats.len_max","n_unique","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Box office gross stored as a short currency string like \"$1.1M\" \u2014 every value is one token, 99.99% allcaps, and lengths cluster between 2 and 7 characters. The column is 89.71% null and only 4,863 distinct values cover the 14,762 populated rows, with a 67.01% duplicate rate concentrated on round million-dollar figures. Note this is a coarse, pre-formatted string (millions only), not a precise revenue number.","role":"feature","scope":"column","target":"boxOffice","treatment":"Parse the \"$X.XM\" string into a numeric dollar amount and decide whether to impute or drop given the 89.71% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_mean","stats.word_median","stats.vocab_size","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column lists film distributor names, dominated by major studios like Paramount Pictures (994), 20th Century Fox (745), and Universal Pictures (737). It is overwhelmingly sparse with an 83.94% null rate and a 83.94% duplicate rate across 3,694 unique values, suggesting most rows lack distributor data while a small set of studios accounts for the populated entries. Names are short (mean length 19.9 chars, median 2 words) and vocabulary is concentrated around terms like 'pictures', 'films', and 'entertainment'.","role":"feature","scope":"column","target":"distributor","treatment":"Normalize studio name variants and treat as a high-cardinality categorical with an explicit 'missing' bucket given the 83.94% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Catalogues the audio mix format of each title (Surround, Dolby Digital, Stereo, Mono, etc.), with 551 distinct labels across 143,258 rows. The dominant issue is sparsity: 88.89% of values are null, and even among populated rows 'Surround' covers only 25.6%. Free-form combinations like 'Stereo, Surround' vs 'Surround, Stereo' and overlapping Dolby variants suggest the field is unnormalised multi-label text rather than a clean taxonomy.","role":"feature","scope":"column","target":"soundMix","treatment":"Split on commas, normalise Dolby/Surround variants, and treat as multi-hot; consider dropping if downstream task can't tolerate 89% missingness."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":5728,"prompt_tokens":26963,"total_tokens":32691}},"language_counts":{"af":3,"ca":6,"ceb":12,"cs":2,"da":2,"de":80,"en":4371,"eo":6,"es":123,"et":3,"fi":10,"fr":72,"hr":3,"hu":3,"id":8,"it":67,"ja":3,"km":1,"la":2,"lv":1,"ms":4,"nl":23,"no":5,"pl":8,"pt":20,"ro":5,"ru":5,"sl":5,"sr":2,"sv":16,"tl":2,"tr":12},"meta":{"generated_at":"2026-05-01T23:13:52+00:00","mode":"full","row_count":143258,"sampled_rows":143258,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/entertainment/movies/rotten_tomatoes/rotten_tomatoes_movies.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"audienceScore":"numeric","boxOffice":"text","director":"text","distributor":"text","genre":"text","id":"text","originalLanguage":"categorical","rating":"categorical","ratingContents":"text","releaseDateStreaming":"text","releaseDateTheaters":"text","runtimeMinutes":"numeric","soundMix":"categorical","title":"text","tomatoMeter":"numeric","writer":"text"}}
