{"attributions":[{"component":"fastText lid.176 language identification model","license":"CC-BY-SA-3.0","note":"Language counts in this report were produced with the fastText lid.176 model, licensed CC-BY-SA-3.0. This report is a derivative work and carries the same license for those figures.","url":"https://fasttext.cc/docs/en/language-identification.html"}],"columns":[{"alerts":[],"column":"index","extras":{"histogram":{"counts":[162,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,161,162,162],"edges":[0.0,161.525,323.05,484.57500000000005,646.1,807.625,969.1500000000001,1130.675,1292.2,1453.7250000000001,1615.25,1776.775,1938.3000000000002,2099.8250000000003,2261.35,2422.875,2584.4,2745.925,2907.4500000000003,3068.975,3230.5,3392.025,3553.55,3715.0750000000003,3876.6000000000004,4038.125,4199.650000000001,4361.175,4522.7,4684.225,4845.75,5007.275000000001,5168.8,5330.325,5491.85,5653.375,5814.900000000001,5976.425,6137.95,6299.475,6461.0]},"sample":[33.0,41.0,44.0,62.0,88.0,114.0,120.0,135.0,139.0,140.0,142.0,159.0,160.0,187.0,202.0,211.0,226.0,235.0,258.0,263.0,283.0,284.0,356.0,382.0,405.0,407.0,429.0,460.0,482.0,489.0,513.0,516.0,517.0,524.0,532.0,539.0,552.0,562.0,574.0,587.0,588.0,595.0,598.0,643.0,650.0,677.0,680.0,696.0,703.0,726.0,733.0,745.0,766.0,767.0,772.0,777.0,779.0,784.0,788.0,791.0,838.0,840.0,846.0,847.0,849.0,869.0,879.0,888.0,898.0,901.0,916.0,928.0,935.0,944.0,951.0,957.0,971.0,973.0,990.0,991.0,1006.0,1022.0,1023.0,1039.0,1061.0,1067.0,1089.0,1092.0,1094.0,1110.0,1114.0,1139.0,1140.0,1143.0,1167.0,1169.0,1170.0,1202.0,1212.0,1249.0,1251.0,1276.0,1277.0,1309.0,1362.0,1370.0,1413.0,1429.0,1447.0,1451.0,1463.0,1468.0,1487.0,1492.0,1500.0,1529.0,1530.0,1612.0,1614.0,1626.0,1639.0,1660.0,1664.0,1669.0,1681.0,1682.0,1688.0,1720.0,1722.0,1746.0,1765.0,1780.0,1781.0,1784.0,1795.0,1833.0,1836.0,1837.0,1853.0,1854.0,1855.0,1867.0,1871.0,1886.0,1889.0,1892.0,1893.0,1894.0,1914.0,1927.0,1951.0,1964.0,1968.0,1974.0,1979.0,1994.0,1997.0,1999.0,2010.0,2014.0,2039.0,2049.0,2050.0,2074.0,2088.0,2110.0,2120.0,2125.0,2129.0,2155.0,2160.0,2188.0,2209.0,2218.0,2227.0,2233.0,2257.0,2277.0,2333.0,2334.0,2343.0,2346.0,2367.0,2409.0,2410.0,2417.0,2476.0,2489.0,2494.0,2506.0,2545.0,2560.0,2582.0,2583.0,2598.0,2608.0,2618.0,2634.0,2642.0,2656.0,2663.0,2664.0,2674.0,2678.0,2693.0,2694.0,2699.0,2742.0,2747.0,2748.0,2752.0,2755.0,2762.0,2763.0,2775.0,2780.0,2784.0,2788.0,2793.0,2807.0,2818.0,2820.0,2832.0,2835.0,2862.0,2867.0,2868.0,2872.0,2896.0,2914.0,2960.0,2971.0,2985.0,2993.0,2995.0,3008.0,3038.0,3046.0,3052.0,3053.0,3056.0,3064.0,3068.0,3069.0,3072.0,3077.0,3091.0,3129.0,3144.0,3179.0,3188.0,3202.0,3231.0,3247.0,3257.0,3258.0,3267.0,3283.0,3289.0,3325.0,3340.0,3365.0,3373.0,3375.0,3385.0,3389.0,3401.0,3408.0,3409.0,3410.0,3423.0,3441.0,3444.0,3456.0,3473.0,3491.0,3499.0,3563.0,3573.0,3585.0,3618.0,3623.0,3630.0,3656.0,3676.0,3682.0,3711.0,3737.0,3791.0,3808.0,3809.0,3816.0,3830.0,3832.0,3855.0,3859.0,3863.0,3866.0,3878.0,3890.0,3893.0,3902.0,3904.0,3910.0,3920.0,3982.0,4001.0,4032.0,4035.0,4047.0,4049.0,4066.0,4068.0,4070.0,4074.0,4082.0,4106.0,4111.0,4113.0,4135.0,4139.0,4144.0,4163.0,4177.0,4190.0,4191.0,4206.0,4214.0,4217.0,4235.0,4247.0,4273.0,4276.0,4279.0,4281.0,4288.0,4314.0,4322.0,4354.0,4396.0,4402.0,4426.0,4432.0,4459.0,4464.0,4475.0,4486.0,4548.0,4551.0,4565.0,4569.0,4572.0,4576.0,4585.0,4615.0,4645.0,4646.0,4658.0,4669.0,4679.0,4699.0,4703.0,4714.0,4725.0,4732.0,4733.0,4734.0,4736.0,4742.0,4744.0,4749.0,4775.0,4799.0,4800.0,4806.0,4811.0,4828.0,4834.0,4846.0,4862.0,4872.0,4881.0,4884.0,4887.0,4888.0,4892.0,4928.0,4935.0,4937.0,4947.0,4965.0,4988.0,5004.0,5021.0,5022.0,5034.0,5036.0,5048.0,5049.0,5052.0,5090.0,5096.0,5124.0,5136.0,5138.0,5148.0,5175.0,5204.0,5212.0,5224.0,5230.0,5277.0,5281.0,5305.0,5318.0,5324.0,5344.0,5346.0,5367.0,5398.0,5439.0,5453.0,5459.0,5522.0,5530.0,5547.0,5556.0,5560.0,5579.0,5600.0,5611.0,5615.0,5616.0,5633.0,5635.0,5651.0,5664.0,5682.0,5711.0,5758.0,5759.0,5762.0,5828.0,5830.0,5831.0,5859.0,5896.0,5906.0,5908.0,5920.0,5927.0,5931.0,5936.0,5943.0,5952.0,5965.0,5977.0,5979.0,5985.0,5997.0,6001.0,6005.0,6011.0,6013.0,6022.0,6062.0,6067.0,6094.0,6099.0,6116.0,6123.0,6125.0,6130.0,6133.0,6152.0,6158.0,6162.0,6174.0,6176.0,6183.0,6186.0,6195.0,6199.0,6210.0,6230.0,6254.0,6256.0,6269.0,6292.0,6350.0,6383.0,6392.0,6396.0,6421.0,6433.0,6435.0,6439.0,6444.0,6453.0,6457.0]},"kind":"numeric","n":6462,"n_null":0,"n_unique":6462,"null_rate":0.0,"stats":{"iqr":3230.5,"kurtosis":-1.2000000574747833,"max":6461.0,"mean":3230.5,"median":3230.5,"min":0.0,"n_outliers":0,"outlier_rate":0.0,"q1":1615.25,"q3":4845.75,"skew":0.0,"std":1865.56305173532,"zero_rate":0.00015475085112968121}},{"alerts":[{"code":"near_unique","level":"info","message":"99.7% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.9% rows are a single word"},{"code":"allcaps","level":"info","message":"74.8% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Case Number","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[4,0,0,122,0,0,0,0,0,0,0,0,0,4150,0,0,14,0,0,0,2128,0,0,14,0,0,24,0,0,0,2,0,0,1,0,0,0,0,0,1],"edges":[6.0,6.3,6.6,6.9,7.2,7.5,7.8,8.1,8.4,8.7,9.0,9.3,9.6,9.9,10.2,10.5,10.8,11.1,11.399999999999999,11.7,12.0,12.3,12.6,12.899999999999999,13.2,13.5,13.8,14.1,14.4,14.7,15.0,15.299999999999999,15.6,15.9,16.2,16.5,16.799999999999997,17.1,17.4,17.7,18.0]},"near_unique":true,"sample":["2020.01.05","1942.11.01","1910.03.08","1926.08.24","1992.06.17","1830.07.26","1990.00.00","1896.12.17.R","1925.11.22","2017.10.21","1963.11.12","1975.01.19","2004.12.16","2019.11.23","2004.01.03.b","2010.07.17.a","2015.06.11","1921.08.28","ND.0087","1994.12.09.a","2003.02.27","2017.09.16.a","2011.09.30","2015.02.09","1954.07.04.R","1940.01.07","1970.06.22","2010.02.19","2016.04.07.b","1826.12.00","1959.09.27","ND.0018","1959.04.09..a","1995.09.28","1998.12.20.R","1990.09.08","1874.06.00","1979.03.24","1989.01.26.a","1975.04.25","1928.05.17","1880.10.10","2005.03.10","1881.08.12","1853.07.13.R","1767.00.00","2000.07.16.a","1995.09.03.b","2011.12.23","1901.07.30"],"top_values":[],"top_words":[["2019.10.08",2],["2019.07.22",2],["2014.08.02",2],["2013.10.05",2],["2012.09.02.b",2],["2009.12.18",2],["2006.09.02",2],["2005.04.06",2],["1990.05.10",2],["1983.06.15",2],["1980.07.00",2],["1966.12.26",2],["1962.06.11.b",2],["&",2],["b",2],["1954.00.00",2],["g",2],["1923.00.00.a",2],["1920.00.00.b",2],["1915.07.06.a.r",2],["1913.08.27.r",2],["1907.10.16.r",2],["2020.02.05",1],["2020.01.30.r",1],["2020.01.17",1]],"vocab_skipped":null,"word_histogram":{"counts":[6455,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":6462,"n_null":2,"n_unique":6442,"null_rate":0.00030950170225936243,"stats":{"allcaps_rate":0.7479876160990712,"boilerplate_rate":0.0,"duplicate_rate":0.0027863777089783283,"emoji_rate":0.0,"len_max":18,"len_mean":10.626934984520124,"len_median":10.0,"len_min":6,"len_p95":12.0,"n_duplicates":18,"n_empty":0,"one_word_rate":0.9992260061919505,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":6445,"word_mean":1.0010835913312695,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"87.3% rows are a single word"}],"column":"Date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[308,0,347,31,5088,33,43,4,7,5,552,5,5,7,8,0,1,0,3,1,2,1,2,0,1,1,1,0,0,1,0,0,0,2,1,0,0,0,0,1],"edges":[4.0,5.5,7.0,8.5,10.0,11.5,13.0,14.5,16.0,17.5,19.0,20.5,22.0,23.5,25.0,26.5,28.0,29.5,31.0,32.5,34.0,35.5,37.0,38.5,40.0,41.5,43.0,44.5,46.0,47.5,49.0,50.5,52.0,53.5,55.0,56.5,58.0,59.5,61.0,62.5,64.0]},"near_unique":false,"sample":["05-Jan-2020","Nov-1942","Mar-1910","23-Jul-1926","17-Jun-1992","26-Jul-1830","19-Dec-1989","Reported 17-Dec-1896","22-Nov-1925","21-Oct-2017","Reported 05-Nov-1963","19-Jan-1975","16-Dec-2004","23-Nov-2019","03-Jan-2004","17-Jul-2010","11-Jun-2015","21-Jul-1921","No date, Before 1975","09-Dec-1994","27-Feb-2003","16-Sep-2017","30-Sep-2011","09-Feb-2015","03-Jul-1954","01-Jan-1940","15-Jun-1970","190Feb-2010","07-Apr-2016","Dec-1826","26-Sep-1959","05-Apr-1959","09-Apr-1959","28-Sep-1995","Reported 20-Dec-1998","08-Sep-1990","June 1874","11-Mar-1979","26-Jan-1989","25-Apr-1975","Reported 14-Apr-1928","10-Oct-1880","10-Mar-2005","12-Aug-1881","Reported 13-Jul-1853","1767","16-Jul-2000","03-Sep-1995","21-Dec-2011","30-Jul-1901"],"top_values":[["1957",11],["1942",9],["1956",8],["1958",7],["1950",7],["1941",7],["1949",6],["No date",6],["Before 1958",6],["05-Oct-2003",5],["12-Apr-2001",5],["28-Jul-1995",5],["1970s",5],["Oct-1960",5],["1959",5],["Aug-1956",5],["1955",5],["1954",5],["1940",5],["No date, Before 1963",5]],"top_words":[["reported",559],["before",85],["ca.",33],["no",26],["may",20],["date,",19],["summer",17],["late",15],["1950",15],["1958",14],["1957",14],["early",13],["1956",11],["1942",11],["of",10],["or",9],["1940",9],["to",8],["between",8],["1963",8],["1960",8],["1954",8],["&",7],["1965",7],["1955",7]],"vocab_skipped":null,"word_histogram":{"counts":[5639,0,0,734,0,0,45,0,0,24,0,0,7,0,0,4,0,0,4,0,0,2,0,0,1,0,0,0,0,1],"edges":[1.0,1.3333333333333333,1.6666666666666665,2.0,2.333333333333333,2.6666666666666665,3.0,3.333333333333333,3.6666666666666665,4.0,4.333333333333333,4.666666666666666,5.0,5.333333333333333,5.666666666666666,6.0,6.333333333333333,6.666666666666666,7.0,7.333333333333333,7.666666666666666,8.0,8.333333333333332,8.666666666666666,9.0,9.333333333333332,9.666666666666666,10.0,10.333333333333332,10.666666666666666,11.0]}},"kind":"text","n":6462,"n_null":1,"n_unique":5552,"null_rate":0.00015475085112968121,"stats":{"allcaps_rate":0.050920910075839654,"boilerplate_rate":0.0,"duplicate_rate":0.1406902956198731,"emoji_rate":0.0,"len_max":64,"len_mean":11.425475932518186,"len_median":11.0,"len_min":4,"len_p95":20.0,"n_duplicates":909,"n_empty":0,"one_word_rate":0.872775112211732,"readability_flesch_mean":89.92677500000003,"url_rate":0.0,"vocab_size":5496,"word_mean":1.1547748026621265,"word_median":1.0}},{"alerts":[{"code":"high_skew","level":"info","message":"skew=-6.55"}],"column":"Year","extras":{"histogram":{"counts":[126,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4,6,7,37,371,1975,3930,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[0.0,75.475,150.95,226.42499999999998,301.9,377.375,452.84999999999997,528.3249999999999,603.8,679.275,754.75,830.2249999999999,905.6999999999999,981.175,1056.6499999999999,1132.125,1207.6,1283.0749999999998,1358.55,1434.0249999999999,1509.5,1584.975,1660.4499999999998,1735.925,1811.3999999999999,1886.8749999999998,1962.35,2037.8249999999998,2113.2999999999997,2188.7749999999996,2264.25,2339.725,2415.2,2490.6749999999997,2566.1499999999996,2641.625,2717.1,2792.575,2868.0499999999997,2943.5249999999996,3019.0]},"sample":[2019.0,2019.0,2019.0,2019.0,2019.0,2018.0,2018.0,2018.0,2018.0,2018.0,2018.0,2018.0,2018.0,2018.0,2017.0,2017.0,2017.0,2017.0,2017.0,2017.0,2017.0,2016.0,2016.0,2016.0,2016.0,2016.0,2016.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2015.0,2014.0,2014.0,2014.0,2014.0,2014.0,2014.0,2014.0,2014.0,2014.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2013.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2012.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2011.0,2010.0,2010.0,2010.0,2010.0,2010.0,2010.0,2010.0,2010.0,2009.0,2009.0,2009.0,2009.0,2009.0,2008.0,2008.0,2008.0,2008.0,2008.0,2008.0,2007.0,2007.0,2007.0,2007.0,2007.0,2007.0,2007.0,2006.0,2006.0,2006.0,2006.0,2006.0,2006.0,2006.0,2005.0,2005.0,2005.0,2005.0,2005.0,2005.0,2005.0,2004.0,2004.0,2004.0,2004.0,2004.0,2004.0,2004.0,2004.0,2004.0,2004.0,2003.0,2003.0,2003.0,2003.0,2003.0,2003.0,2003.0,2003.0,2003.0,2003.0,2002.0,2002.0,2002.0,2002.0,2002.0,2002.0,2002.0,2002.0,2002.0,2002.0,2001.0,2001.0,2001.0,2001.0,2001.0,2001.0,2001.0,2001.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,1999.0,1999.0,1998.0,1998.0,1998.0,1998.0,1997.0,1997.0,1997.0,1996.0,1995.0,1995.0,1995.0,1995.0,1994.0,1994.0,1994.0,1994.0,1994.0,1993.0,1993.0,1993.0,1993.0,1993.0,1993.0,1992.0,1992.0,1992.0,1992.0,1992.0,1992.0,1991.0,1991.0,1991.0,1991.0,1991.0,1991.0,1990.0,1990.0,1990.0,1990.0,1990.0,1989.0,1989.0,1989.0,1989.0,1989.0,1988.0,1988.0,1988.0,1988.0,1988.0,1987.0,1986.0,1986.0,1986.0,1985.0,1985.0,1985.0,1985.0,1984.0,1984.0,1984.0,1984.0,1984.0,1983.0,1983.0,1983.0,1983.0,1983.0,1983.0,1982.0,1982.0,1981.0,1981.0,1981.0,1980.0,1979.0,1979.0,1979.0,1978.0,1978.0,1977.0,1976.0,1976.0,1975.0,1975.0,1975.0,1975.0,1975.0,1975.0,1974.0,1974.0,1974.0,1973.0,1973.0,1973.0,1972.0,1972.0,1972.0,1970.0,1969.0,1969.0,1968.0,1968.0,1968.0,1967.0,1967.0,1967.0,1966.0,1966.0,1965.0,1964.0,1964.0,1964.0,1964.0,1964.0,1964.0,1964.0,1964.0,1964.0,1963.0,1963.0,1963.0,1963.0,1963.0,1963.0,1963.0,1962.0,1962.0,1961.0,1961.0,1961.0,1961.0,1961.0,1961.0,1961.0,1961.0,1961.0,1960.0,1960.0,1960.0,1960.0,1960.0,1960.0,1960.0,1960.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1959.0,1958.0,1958.0,1958.0,1957.0,1956.0,1956.0,1956.0,1955.0,1955.0,1955.0,1954.0,1954.0,1953.0,1953.0,1952.0,1952.0,1952.0,1952.0,1951.0,1950.0,1950.0,1949.0,1949.0,1949.0,1949.0,1948.0,1948.0,1948.0,1948.0,1947.0,1947.0,1947.0,1947.0,1947.0,1947.0,1947.0,1946.0,1945.0,1944.0,1944.0,1944.0,1944.0,1943.0,1943.0,1943.0,1942.0,1942.0,1942.0,1942.0,1942.0,1942.0,1941.0,1940.0,1940.0,1940.0,1939.0,1938.0,1938.0,1937.0,1936.0,1936.0,1936.0,1936.0,1936.0,1936.0,1935.0,1934.0,1934.0,1933.0,1932.0,1932.0,1932.0,1931.0,1931.0,1930.0,1930.0,1929.0,1929.0,1928.0,1927.0,1926.0,1925.0,1925.0,1924.0,1924.0,1923.0,1921.0,1918.0,1916.0,1916.0,1913.0,1912.0,1911.0,1910.0,1910.0,1909.0,1907.0,1907.0,1907.0,1907.0,1906.0,1906.0,1905.0,1904.0,1903.0,1900.0,1898.0,1898.0,1898.0,1893.0,1893.0,1893.0,1890.0,1887.0,1887.0,1887.0,1886.0,1885.0,1885.0,1884.0,1883.0,1883.0,1882.0,1880.0,1880.0,1880.0,1879.0,1878.0,1878.0,1877.0,1877.0,1872.0,1871.0,1864.0,1863.0,1863.0,1863.0,1862.0,1862.0,1862.0,1858.0,1856.0,1855.0,1853.0,1852.0,1852.0,1851.0,1847.0,1846.0,1845.0,1840.0,1829.0,1828.0,1822.0,1800.0,1742.0,1580.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},"kind":"numeric","n":6462,"n_null":3,"n_unique":252,"null_rate":0.00046425255338904364,"stats":{"iqr":63.0,"kurtosis":42.54106469471457,"max":3019.0,"mean":1929.619445734634,"median":1980.0,"min":0.0,"n_outliers":266,"outlier_rate":0.041182845641740205,"q1":1943.0,"q3":2006.0,"skew":-6.554408357322348,"std":278.3163402088628,"zero_rate":0.01935284099705837}},{"alerts":[],"column":"Type","extras":{"singletons":4,"top_values":[["Unprovoked",4716],["Provoked",593],["Invalid",552],["Sea Disaster",239],["Watercraft",142],["Boat",109],["Boating",92],["Questionable",10],["Unconfirmed",1],["Unverified",1],["Under investigation",1],["Boatomg",1]]},"kind":"categorical","n":6462,"n_null":5,"n_unique":12,"null_rate":0.0007737542556484061,"stats":{"cardinality":12,"entropy":1.4569836422036362,"entropy_ratio":0.4064153089217941,"top_rate":0.7303701409323216,"top_value":"Unprovoked"}},{"alerts":[],"column":"Country","extras":{"singletons":78,"top_values":[["USA",2310],["AUSTRALIA",1374],["SOUTH AFRICA",585],["NEW ZEALAND",135],["PAPUA NEW GUINEA",135],["BAHAMAS",115],["BRAZIL",113],["MEXICO",95],["ITALY",71],["PHILIPPINES",62],["FIJI",62],["REUNION",60],["NEW CALEDONIA",56],["CUBA",46],["SPAIN",44],["MOZAMBIQUE",44],["EGYPT",42],["INDIA",40],["JAPAN",34],["CROATIA",34]]},"kind":"categorical","n":6462,"n_null":51,"n_unique":205,"null_rate":0.007892293407613741,"stats":{"cardinality":205,"entropy":3.9090510160751624,"entropy_ratio":0.5090254763895935,"top_rate":0.36031820308844176,"top_value":"USA"}},{"alerts":[{"code":"long_tail","level":"info","message":"540 singleton categories"}],"column":"Area","extras":{"singletons":540,"top_values":[["Florida",1076],["New South Wales",498],["Queensland",325],["Hawaii",312],["California",294],["KwaZulu-Natal",215],["Western Australia",197],["Western Cape Province",195],["Eastern Cape Province",165],["South Carolina",163],["North Carolina",111],["South Australia",104],["Victoria",92],["Texas",75],["Pernambuco",74],["Torres Strait",72],["North Island",70],["New Jersey",55],["Tasmania",42],["South Island",41]]},"kind":"categorical","n":6462,"n_null":463,"n_unique":810,"null_rate":0.0716496440730424,"stats":{"cardinality":810,"entropy":6.162974258712775,"entropy_ratio":0.637871641880697,"top_rate":0.1793632272045341,"top_value":"Florida"}},{"alerts":[{"code":"multilingual","level":"info","message":"31 languages detected in sample"},{"code":"duplicates","level":"warn","message":"29.9% duplicate strings"}],"column":"Location","extras":{"language_counts":{"__engine":"fasttext:4,247","af":5,"ca":11,"ceb":13,"cs":2,"cy":10,"de":61,"en":3746,"eo":5,"es":79,"fi":8,"fr":66,"hr":4,"hu":4,"id":11,"it":58,"jbo":4,"lv":3,"ms":7,"nl":29,"pl":5,"pt":65,"ro":6,"ru":5,"sh":2,"sv":9,"sw":2,"tl":1,"tr":4,"vi":5,"war":4},"language_sample_size":5000,"length_histogram":{"counts":[106,542,758,720,445,358,352,393,524,293,505,203,152,124,124,95,54,52,26,17,21,14,8,5,2,4,6,1,2,1,1,0,1,1,2,0,3,1,0,1],"edges":[3.0,5.9,8.8,11.7,14.6,17.5,20.4,23.3,26.2,29.099999999999998,32.0,34.9,37.8,40.699999999999996,43.6,46.5,49.4,52.3,55.199999999999996,58.1,61.0,63.9,66.8,69.7,72.6,75.5,78.39999999999999,81.3,84.2,87.1,90.0,92.89999999999999,95.8,98.7,101.6,104.5,107.39999999999999,110.3,113.2,116.1,119.0]},"near_unique":false,"sample":["Esperance","Mornington Island, Gulf of Carpentaria","Tripoli","Rosebud","Boa Viagem, Recife","Sydney Harbor","North Jetty Park, Fort Pierce, St Lucie County","Caibarien Harbor","Middle Brighton, Port Phillip","Gars Garabulli","Mocambo","Bribie Island","Folly Beach","Off Barra Vieja","Pismo Beach, San Luis Obispo County","Ras Nasrani, Sharm el-Sheikh","Tyrendarra Beach near Portland","15 miles up the Cataract River","In a river feeding into the Bay of Bengal","Truro (Cape Cod), Barnstable County","Perth? (Margaret River District)","Ocean Reef Park, Singer Island, Palm Beach County","Cat Island","Folette","Cojimar","Country Club Beach, Durban","Daytona Beach, Volusia County","New Smyrna Beach, Volusia County","Ain Sokhna","Off the harbor wall at Mandrakina","Thiaroye Guedi","Symi Island","Little River Beach, Horry County","New Smyrna Beach, Volusia County","Coco Beach, Dar-es-Salaam","Paiva","In the bay near the naval yard at Pensacola, Escambia County","Amanzimtoti","Kanazawa?","Fort Walton Beach, Okaloosa County","1 mile off Mala Wharf, Lahaina, Maui","Hawkesbury Bridge, Sydney","Ponce Inlet, Volusia County","Fernandina, Nassau County","St. Helena Bay","Tombo Island in the Sierra Leone River","Isle of Palms, Charleston County","Santa Rosa, Cozumel","Indialantic, Brevard County","Tazacorte, La Palma"],"top_values":[["New Smyrna Beach, Volusia County",181],["Daytona Beach, Volusia County",34],["Cocoa Beach, Brevard County",26],["Ponce Inlet, Volusia County",21],["Melbourne Beach, Brevard County",19],["Durban",18],["Myrtle Beach, Horry County",17],["Isle of Palms, Charleston County",16],["Boa Viagem, Recife",14],["Ponce Inlet, New Smyrna Beach, Volusia County",13],["Jacksonville Beach, Duval County",12],["Piedade",12],["Ormond Beach, Volusia County",11],["Palm Beach, Palm Beach County",11],["Jensen Beach, Martin County",11],["Mossel Bay",11],["Ahvaz, on the Karun River",11],["Boa Viagem Beach, Recife",10],["Singer Island, Riviera Beach, Palm Beach County",10],["Nahoon",10]],"top_words":[["county",1532],["beach,",1144],["beach",463],["island",412],["bay",379],["of",341],["volusia",323],["off",319],["new",259],["near",246],["smyrna",221],["river",219],["island,",216],["miles",192],["port",179],["north",161],["the",160],["palm",146],["san",141],["st.",139],["south",139],["bay,",128],["brevard",110],["point,",103],["sydney",99]],"vocab_skipped":null,"word_histogram":{"counts":[879,1497,0,885,987,0,796,347,0,242,0,115,64,0,50,20,0,14,7,0,4,0,1,0,0,1,2,0,5,1],"edges":[1.0,1.6333333333333333,2.2666666666666666,2.9,3.533333333333333,4.166666666666666,4.8,5.433333333333334,6.066666666666666,6.699999999999999,7.333333333333333,7.966666666666667,8.6,9.233333333333333,9.866666666666667,10.5,11.133333333333333,11.766666666666666,12.399999999999999,13.033333333333333,13.666666666666666,14.299999999999999,14.933333333333334,15.566666666666666,16.2,16.833333333333332,17.466666666666665,18.099999999999998,18.733333333333334,19.366666666666667,20.0]}},"kind":"text","n":6462,"n_null":545,"n_unique":4148,"null_rate":0.08433921386567626,"stats":{"allcaps_rate":0.000507013689369613,"boilerplate_rate":0.0,"duplicate_rate":0.29896907216494845,"emoji_rate":0.0,"len_max":119,"len_mean":22.75477437890823,"len_median":21.0,"len_min":3,"len_p95":47.0,"n_duplicates":1769,"n_empty":0,"one_word_rate":0.1485550109852966,"readability_flesch_mean":53.39891428571431,"url_rate":0.0,"vocab_size":4483,"word_mean":3.5399695791786376,"word_median":3.0}},{"alerts":[{"code":"one_word","level":"warn","message":"62.9% rows are a single word"},{"code":"duplicates","level":"warn","message":"74.3% duplicate strings"}],"column":"Activity","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[2033,2150,468,376,228,177,134,84,42,51,35,25,20,10,9,14,10,6,5,1,1,7,4,4,3,0,2,1,0,2,3,0,0,1,0,0,2,1,0,1],"edges":[1.0,7.325,13.65,19.975,26.3,32.625,38.95,45.275,51.6,57.925000000000004,64.25,70.575,76.9,83.22500000000001,89.55,95.875,102.2,108.525,114.85000000000001,121.175,127.5,133.82500000000002,140.15,146.475,152.8,159.125,165.45000000000002,171.775,178.1,184.425,190.75,197.07500000000002,203.4,209.725,216.05,222.375,228.70000000000002,235.025,241.35,247.675,254.0]},"near_unique":false,"sample":["Scuba diving","Swimming","Swimming","Launching rowboat through the surf","Surfing","Wreck of the USS Somers","Swimming","Sea Disaster","Fishing","Spearfishing","Collecting marine specimens","Scuba diving","Standing / Surfing","Snorkeling","Spearfishing","Body boarding","Kayak Fishing","Swimming","Kitesurfing","Swimming","Spearfishing","Surfing","Attempting to Kite surf from Egypt to Saudi Arabia","Surfing","Free diving, working on U/W scenes for motion picture","The Pacquebot Laconia, enroute to Liverpool with 600 Italian prisoners onboard, was torpedoed by the German submarine U-156 and only 2 rafts were launched before the ship went down. Unable to board an overcrowded raft, he was swimming.","Diving for abalone","Snorkeling","Swimming","Harassing a shark","Aircraft crashed into sea","Swimming in pool formed by construction of a wharf","Spearfishing","Surfing","Standing","Surfing","Swimming","Spearfishing","Finning the shark that bit him","Surfing","Swimming","Swimming","Surfing","Bathing","Bathing","Swimming","Surfing","Floating on raft","Boogie Boarding","Swimming"],"top_values":[["Surfing",1025],["Swimming",932],["Fishing",459],["Spearfishing",350],["Bathing",166],["Wading",158],["Diving",134],["Standing",103],["Snorkeling",100],["Scuba diving",80],["Body boarding",62],["Body surfing",50],["Kayaking",37],["Boogie boarding",36],["Fell overboard",33],["Treading water",32],["Pearl diving",32],["Free diving",31],["Surf skiing",20],["Windsurfing",19]],"top_words":[["surfing",1137],["swimming",1108],["fishing",704],["diving",525],["spearfishing",415],["the",359],["in",248],["a",233],["for",211],["on",208],["bathing",191],["water",191],["wading",183],["to",181],["&",175],["shark",171],["of",169],["from",162],["scuba",147],["boarding",145],["standing",144],["body",138],["boat",134],["snorkeling",109],["fell",105]],"vocab_skipped":null,"word_histogram":{"counts":[4436,343,515,158,95,134,38,62,28,15,21,8,14,2,6,5,2,5,7,1,5,0,1,0,2,4,1,1,0,1],"edges":[1.0,2.4,3.8,5.199999999999999,6.6,8.0,9.399999999999999,10.799999999999999,12.2,13.6,15.0,16.4,17.799999999999997,19.2,20.599999999999998,22.0,23.4,24.799999999999997,26.2,27.599999999999998,29.0,30.4,31.799999999999997,33.199999999999996,34.599999999999994,36.0,37.4,38.8,40.199999999999996,41.599999999999994,43.0]}},"kind":"text","n":6462,"n_null":552,"n_unique":1516,"null_rate":0.08542246982358404,"stats":{"allcaps_rate":0.0005076142131979696,"boilerplate_rate":0.0,"duplicate_rate":0.7434856175972927,"emoji_rate":0.0,"len_max":254,"len_mean":16.207952622673435,"len_median":8.0,"len_min":1,"len_p95":49.0,"n_duplicates":4394,"n_empty":0,"one_word_rate":0.6289340101522842,"readability_flesch_mean":39.558370242992325,"url_rate":0.0,"vocab_size":2244,"word_mean":2.4967851099830796,"word_median":1.0}},{"alerts":[],"column":"Name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[939,1355,2774,488,223,128,94,47,66,34,25,26,20,6,3,8,2,1,1,2,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,6.5,12.0,17.5,23.0,28.5,34.0,39.5,45.0,50.5,56.0,61.5,67.0,72.5,78.0,83.5,89.0,94.5,100.0,105.5,111.0,116.5,122.0,127.5,133.0,138.5,144.0,149.5,155.0,160.5,166.0,171.5,177.0,182.5,188.0,193.5,199.0,204.5,210.0,215.5,221.0]},"near_unique":false,"sample":["Peter ___","sailor","Jules Antoine","Mrs. Hoskin","Siale Sime","male","Todd R. Wenke","male","male","Susan Peteka","O.D.","Danson Nakaima","Jay Catherall","Hanan Shaul","Wolfgang Leander","Jason Whitworth","Patrick Thornton","Master Hurley","Carl Bruster","male","James Ingram","male","Dave Fordson","Diego Gomes Mota","Noel Langford","Colonel B. & Sub-Lieutenant D.","Robert Richard","male","Hoata Iotua","Ned & Pawn","Kenneth William Murray","crewman","12 m fishing boat. Occupant: Henry Tervo","Beth Shannon","G.C.","John Ferrerira","boy","Gerald Correria","Michael Preston","Francisco Pelle","schoolboy, a Torres Strait Islander","a native diver","Jamie Marie Daigle","Arthur Evans","sailor","male","Ken Crew","Lee Kwan-seok","Jan Lisewski","Frederick Wiseman"],"top_values":[["male",579],["female",106],["boy",23],["2 males",19],["boat",14],["child",12],["Anonymous",11],["sailor",11],["males",10],["a sailor",8],["girl",7],["Unidentified",6],["fisherman",6],["a pearl diver",6],["a soldier",5],["a native",5],["Unknown",4],["black male",4],["2 fishermen",4],["soldier",4]],"top_words":[["male",639],["a",269],["&",218],["john",164],["occupants:",118],["female",117],["the",104],["william",94],["james",90],["of",87],["2",80],["robert",79],["from",75],["mr.",73],["boat",70],["boat,",68],["david",65],["michael",64],["richard",53],["boy",51],["charles",50],["peter",48],["thomas",48],["george",47],["paul",44]],"vocab_skipped":null,"word_histogram":{"counts":[4845,657,249,244,63,51,47,56,10,11,4,2,2,0,2,0,0,2,1,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,2.2666666666666666,3.533333333333333,4.8,6.066666666666666,7.333333333333333,8.6,9.866666666666667,11.133333333333333,12.399999999999999,13.666666666666666,14.933333333333334,16.2,17.466666666666665,18.733333333333334,20.0,21.266666666666666,22.53333333333333,23.799999999999997,25.066666666666666,26.333333333333332,27.599999999999998,28.866666666666667,30.133333333333333,31.4,32.666666666666664,33.93333333333333,35.199999999999996,36.46666666666667,37.733333333333334,39.0]}},"kind":"text","n":6462,"n_null":215,"n_unique":5339,"null_rate":0.03327143299288146,"stats":{"allcaps_rate":0.0068833039859132385,"boilerplate_rate":0.0,"duplicate_rate":0.14534976788858653,"emoji_rate":0.0,"len_max":221,"len_mean":14.830158476068513,"len_median":13.0,"len_min":1,"len_p95":35.0,"n_duplicates":908,"n_empty":0,"one_word_rate":0.16567952617256282,"readability_flesch_mean":51.73407689810191,"url_rate":0.0,"vocab_size":6536,"word_mean":2.4528573715383386,"word_median":2.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"99.6% null"}],"column":"Unnamed: 9","extras":{"singletons":0,"top_values":[["M",24],["F",4]]},"kind":"categorical","n":6462,"n_null":6434,"n_unique":2,"null_rate":0.995666976168369,"stats":{"cardinality":2,"entropy":0.5916727785823275,"entropy_ratio":0.5916727785823275,"top_rate":0.8571428571428571,"top_value":"M"}},{"alerts":[{"code":"null_rate","level":"warn","message":"44.4% null"}],"column":"Age","extras":{"singletons":70,"top_values":[["17",160],["18",155],["20",146],["19",145],["16",140],["15",140],["21",122],["22",118],["25",111],["24",109],["14",104],["13",97],["26",85],["23",83],["28",82],["30",80],["29",80],["27",79],["12",75],["32",72]]},"kind":"categorical","n":6462,"n_null":2871,"n_unique":154,"null_rate":0.44428969359331477,"stats":{"cardinality":154,"entropy":5.8266705973699455,"entropy_ratio":0.8018221761076745,"top_rate":0.04455583402951824,"top_value":"17"}},{"alerts":[{"code":"multilingual","level":"info","message":"10 languages detected in sample"},{"code":"allcaps","level":"info","message":"13.1% rows are all-caps"},{"code":"duplicates","level":"warn","message":"41.9% duplicate strings"}],"column":"Injury","extras":{"language_counts":{"__engine":"fasttext:4,337","de":496,"en":3812,"es":6,"fr":11,"ja":1,"nl":8,"pl":1,"ro":1,"th":1},"language_sample_size":5000,"length_histogram":{"counts":[1206,740,851,788,636,429,382,251,238,200,132,128,100,73,55,47,45,31,13,22,15,6,9,10,4,2,2,0,3,3,3,1,0,2,2,1,0,1,0,2],"edges":[5.0,10.725,16.45,22.174999999999997,27.9,33.625,39.349999999999994,45.074999999999996,50.8,56.525,62.25,67.975,73.69999999999999,79.425,85.14999999999999,90.875,96.6,102.32499999999999,108.05,113.77499999999999,119.5,125.225,130.95,136.67499999999998,142.39999999999998,148.125,153.85,159.575,165.29999999999998,171.02499999999998,176.75,182.475,188.2,193.92499999999998,199.64999999999998,205.375,211.1,216.825,222.54999999999998,228.27499999999998,234.0]},"near_unique":false,"sample":["FATAL","Hip bitten","Right hand severely bitten by netted shark PROVOKED INCIDENT","Arm injured","No details","FATAL","No injury, board broken in two","FATAL","Thigh lacerated","Minor injuries","Elbow bitten","FATAL, of 190 passengers & crew thrown into the water, 50 people were said to have been killed by sharks","Left foot severed","Believed to have drowned. Partial remains washed ashore at North Beach, Mylestom, New South Wales on 17 December. Shark involvement prior to death could not be confirmed","No injury, swim fin damaged","Lacerations to forearm","Left arm amputated at elbow & severe injury to leg","Right hip, buttock, elbow, arm & wrist bitten","Foot severed","No details","2 lacerations on left thigh","Lacerations to right foot","No injury","Thigh bitten","FATAL, abdomen, buttock, right thigh & hands bitten","No injury. Shark grazed canoe, snapped at a piece of mast being trailed in the water & followed it into 2' of water","Another diver shot shark, shark bit his left foream PROVOKED INCIDENT","Arm bitten PROVOKED INCIDENT","FATAL","FATAL","Ankle twisted, swim fin bitten","Foot bitten","Thigh lacerated","Survived","No Injury, ski bitten","Hand & wrist bitten, tooth fragments in wound","FATAL","FATAL, but shark involvement prior to death was not determined","Board rammed by shark, skegs knocked loose & 5' strip of fiberglass torn off, thigh scratched by shark\u2019s teeth","Lacerations to right foot","Knee bitten","FATAL","Laceration on siide of calf, small laceration on thigh, large bruise on other leg inside the knee, knuckle of hand abraded","No injury, sack rammed by shark & shark harassed him when he surfaced","FATAL","Human remains recovered from shark","Non-fatal","Leg & ankle bitten","No injury, punctures to swim fin","FATAL"],"top_values":[["FATAL",823],["Survived",97],["Foot bitten",93],["No injury",88],["Leg bitten",74],["Left foot bitten",52],["No details",43],["Right foot bitten",39],["Hand bitten",32],["No injury, board bitten",31],["Thigh bitten",27],["Lacerations to foot",25],["FATAL, body not recovered",25],["Calf bitten",23],["Arm bitten",23],["Minor injury",23],["Right leg bitten",22],["Lacerations to left foot",22],["Lacerations to right foot",22],["Foot lacerated",22]],"top_words":[["to",1708],["bitten",1428],["shark",1208],["&",1011],["no",926],["fatal",913],["leg",875],["right",873],["left",867],["foot",788],["lacerations",629],["provoked",588],["by",583],["incident",582],["injury",557],["lacerated",439],["hand",436],["fatal,",426],["injury,",425],["of",403],["the",401],["arm",395],["thigh",390],["on",361],["and",267]],"vocab_skipped":null,"word_histogram":{"counts":[1780,724,921,1140,441,288,420,145,117,86,135,35,46,72,11,11,19,7,3,4,12,2,2,2,4,0,2,0,1,3],"edges":[1.0,2.3,3.6,4.9,6.2,7.5,8.8,10.1,11.4,12.700000000000001,14.0,15.3,16.6,17.900000000000002,19.2,20.5,21.8,23.1,24.400000000000002,25.7,27.0,28.3,29.6,30.900000000000002,32.2,33.5,34.800000000000004,36.1,37.4,38.7,40.0]}},"kind":"text","n":6462,"n_null":29,"n_unique":3738,"null_rate":0.004487774682760755,"stats":{"allcaps_rate":0.13073216228820145,"boilerplate_rate":0.0,"duplicate_rate":0.41893362350380847,"emoji_rate":0.0,"len_max":234,"len_mean":31.52868024249961,"len_median":25.0,"len_min":5,"len_p95":82.0,"n_duplicates":2695,"n_empty":0,"one_word_rate":0.14891963314161355,"readability_flesch_mean":53.74188095895593,"url_rate":0.0,"vocab_size":2550,"word_mean":5.414114720969998,"word_median":4.0}},{"alerts":[],"column":"Fatal (Y/N)","extras":{"singletons":3,"top_values":[["N",4439],["Y",1400],["UNKNOWN",71],["F",2],["M",1],["2017",1],["y",1]]},"kind":"categorical","n":6462,"n_null":547,"n_unique":7,"null_rate":0.08464871556793563,"stats":{"cardinality":7,"entropy":0.8896999150326991,"entropy_ratio":0.31691750410404407,"top_rate":0.750464919695689,"top_value":"N"}},{"alerts":[{"code":"long_tail","level":"info","message":"199 singleton categories"},{"code":"null_rate","level":"warn","message":"52.5% null"}],"column":"Time","extras":{"singletons":199,"top_values":[["Afternoon",193],["11h00",131],["Morning",126],["12h00",113],["15h00",111],["16h00",106],["14h00",102],["16h30",79],["17h30",77],["13h00",75],["17h00",74],["14h30",73],["18h00",72],["15h30",67],["11h30",65],["13h30",64],["10h00",63],["Night",63],["09h00",55],["10h30",51]]},"kind":"categorical","n":6462,"n_null":3392,"n_unique":366,"null_rate":0.5249148870318787,"stats":{"cardinality":366,"entropy":6.558804921716635,"entropy_ratio":0.7702015155853905,"top_rate":0.06286644951140065,"top_value":"Afternoon"}},{"alerts":[{"code":"multilingual","level":"info","message":"15 languages detected in sample"},{"code":"null_rate","level":"warn","message":"45.2% null"},{"code":"duplicates","level":"warn","message":"58.6% duplicate strings"}],"column":"Species ","extras":{"language_counts":{"__engine":"fasttext:2,671","de":14,"en":2582,"eu":13,"fi":18,"it":3,"ja":1,"nl":2,"ro":2,"sh":1,"sq":7,"sv":2,"tr":8,"vi":7,"zh":11},"language_sample_size":5000,"length_histogram":{"counts":[105,855,833,448,324,285,120,127,115,170,32,25,21,13,17,11,7,4,6,4,2,0,1,1,0,2,1,1,1,1,0,2,1,1,0,0,0,1,0,1],"edges":[3.0,7.775,12.55,17.325000000000003,22.1,26.875,31.650000000000002,36.425000000000004,41.2,45.975,50.75,55.525000000000006,60.300000000000004,65.075,69.85000000000001,74.625,79.4,84.17500000000001,88.95,93.72500000000001,98.5,103.275,108.05000000000001,112.825,117.60000000000001,122.37500000000001,127.15,131.925,136.70000000000002,141.47500000000002,146.25,151.025,155.8,160.57500000000002,165.35000000000002,170.125,174.9,179.675,184.45000000000002,189.22500000000002,194.0]},"near_unique":false,"sample":["White shark","Questionable incident","Questionable incident","Carpet shark, 5'","Bull shark","Invalid","1 m shark","Shark involvement prior to death unconfirmed","\"Attacked by a number of sharks\"","2 m shark","1.5 to 2 m [5' to 6.75'] shark","Sand shark?","6' to 8' shark, possibly a tiger shark","6.5' shark","Tiger shark","Grey reef shark","White shark","Shark involvement prior to death not confirmed","Bull shark","Nurse shark, 1.2 m [4']","Shark involvement prior to death was not confirmed","5' shark","White shark, 5m","White shark","White shark, 1.5 m [5'] k","Blue shark, 1.5 m [5']","Invalid","1' to 2' shark","Tiger shark, 14'","Shark involvement questionable","3 m [10'] bull shark","Grey nurse shark","shark pup","1.2 m to 1.8 m [4' to 6'] shark","Tiger shark, 1.5 m [5']k","small blacktip shark","Bull shark","5 m to 6 m [16.5' to 20'] white shark","6' shark","1.5 m [5'] Caribbean reef shark","Bull shark suspected due to freshwater habitat","Reported to involve a 3.7 m [12'] shark, possibly a white shark","3' to 3.5' shark","Thought to involve a 2.6 m [8.5'] white shark","Shark involvement prior to death unconfirmed","3' shark","3.5 m [11.5'] white shark","White shark, 4.3 m [14']","a small reef shark","Invalid"],"top_values":[["White shark",166],["Shark involvement prior to death was not confirmed",105],["Invalid",102],["Shark involvement not confirmed",89],["Tiger shark",81],["Shark involvement prior to death unconfirmed",68],["Bull shark",53],["4' shark",41],["6' shark",40],["1.8 m [6'] shark",38],["Questionable incident",35],["Questionable",34],["1.5 m [5'] shark",33],["1.2 m [4'] shark",29],["3' shark",28],["5' shark",27],["4' to 5' shark",25],["2 m shark",25],["Wobbegong shark",24],["3 m [10'] shark",22]],"top_words":[["shark",2317],["m",1421],["to",958],["shark,",844],["white",648],["involvement",333],["tiger",275],["not",234],["confirmed",228],["a",213],["prior",199],["death",194],["bull",192],["3",172],["1.8",152],["1.5",150],["was",139],["4'",131],["2",114],["5'",110],["blacktip",106],["nurse",103],["invalid",103],["6'",101],["2.4",91]],"vocab_skipped":null,"word_histogram":{"counts":[1049,548,668,465,207,90,263,126,27,19,25,14,13,5,6,1,0,0,2,3,0,0,1,0,3,1,1,0,0,1],"edges":[1.0,2.166666666666667,3.3333333333333335,4.5,5.666666666666667,6.833333333333334,8.0,9.166666666666668,10.333333333333334,11.5,12.666666666666668,13.833333333333334,15.0,16.166666666666668,17.333333333333336,18.5,19.666666666666668,20.833333333333336,22.0,23.166666666666668,24.333333333333336,25.5,26.666666666666668,27.833333333333336,29.0,30.166666666666668,31.333333333333336,32.5,33.66666666666667,34.833333333333336,36.0]}},"kind":"text","n":6462,"n_null":2924,"n_unique":1466,"null_rate":0.4524914887031879,"stats":{"allcaps_rate":0.0002826455624646693,"boilerplate_rate":0.0,"duplicate_rate":0.5856416054267948,"emoji_rate":0.0,"len_max":194,"len_mean":22.95110231769361,"len_median":17.0,"len_min":3,"len_p95":50.0,"n_duplicates":2072,"n_empty":0,"one_word_rate":0.040983606557377046,"readability_flesch_mean":88.63096000323206,"url_rate":0.0,"vocab_size":1105,"word_mean":4.445449406444319,"word_median":4.0}},{"alerts":[{"code":"multilingual","level":"info","message":"23 languages detected in sample"},{"code":"duplicates","level":"warn","message":"22.7% duplicate strings"}],"column":"Investigator or Source","extras":{"language_counts":{"__engine":"fasttext:4,922","ca":3,"cs":2,"de":56,"en":4457,"eo":2,"es":134,"fr":100,"hu":1,"id":4,"it":62,"ja":14,"ms":1,"nl":6,"no":1,"pl":12,"pt":15,"ru":26,"sl":1,"sv":12,"tr":1,"uk":1,"zh":11},"language_sample_size":5000,"length_histogram":{"counts":[152,461,1164,844,1098,816,400,315,203,174,150,142,116,59,61,45,39,40,30,23,23,18,14,14,9,8,5,5,4,3,4,0,0,1,0,1,1,0,0,1],"edges":[3.0,8.175,13.35,18.525,23.7,28.875,34.05,39.225,44.4,49.574999999999996,54.75,59.925,65.1,70.27499999999999,75.45,80.625,85.8,90.975,96.14999999999999,101.325,106.5,111.675,116.85,122.02499999999999,127.19999999999999,132.375,137.54999999999998,142.725,147.9,153.075,158.25,163.42499999999998,168.6,173.775,178.95,184.125,189.29999999999998,194.475,199.65,204.825,210.0]},"near_unique":false,"sample":["B. Myatt, GSAF","M. Murphy; V.M. Coppleson (1962), pp.207-208","The Sun, 4/3/1910; Authenticity questioned by G.H. Balazs in J. Borg, p.70","NY Herald Tribune, 7/25/1926; A. De Maddalena; Anon. (1926a), Anon. (1926b); C. Moore, GSAF","Charlotte Observer, 6/24/1992, p.1C & 8/8/1992, p.2C","C. Black, GSAF; Sydney Gazette, 1/22/1831","Courier-Mail, 11/24/1989, p.3; J. West, ASAF","The Star, 12/17/1896","V.M. Coppleson.W2, (1933); V.M. Coppleson (1958), pp.111 & 241; West Australia, 1/5/1967; A. Sharpe, pp.129-130; H. Edwards, pp.131-133","New Zealand Herald, 10/22/2017","H.D. Baldridge, p.109","Reunion Marine Observatory","J. Eager","B. Myatt, GSAF","J. Carlsen","NY Post, 7/16/2010","Clincanoo, 6/1/2015","New York Times, 9/1/1921","R. Skocik, p.176","A. Gifford, GSAF","J. Eager, scubaradio.com","Orlando Sentinel, 9/16/2017","News 24, 9/29/2011","The Telegraph, 1/6/2015","C. Moore, GSAF","A. De Maddalena; M. Zuffa (pers. Comm.)","E. Pace, FSAF","Die Burger, 2/18/2010","Maui Now, 3/31/2016","Edinburgh Advertiser. 9/12/1828","R. Collier, p.172","Daily Kennebec Journal, 3/27/1911","R.P.L. Straughan; R.F. Hutton; T. Helm, p.241;","S. Petersohn, GSAF; Orlando Sentinel, 9/18/1995, p.C.3","E. Ritter. GSAF","R. Collier, pp.116-118","Daily Southern Cross, 6/15/1874","W. Leander","Courier Mail, 1/4/1989, p.1; Herald, 1-4/1989, p.1; Miami Herald, 1/5/1989; A. Sharpe, p.87","Cape Times, 4/7/1975","The Western Champion, 4/14/1928","Sydney Morning Herald, 10/26/1880","Solomon Star, 3/9/2005","Providence Journal 8/15/1881","C. Creswell, GSAF; Washington Post, 10/7/1883, p.2","C. Moore, GSAF","A. Gifford, GSAF","Fort Pierce Tribune, 9/1/1995","Daily Examiner, 12/11/2011","M. Levine, GSAF"],"top_values":[["C. Moore, GSAF",106],["C. Creswell, GSAF",97],["S. Petersohn, GSAF",88],["R. Collier",55],["R. Collier, GSAF",54],["K. McMurray, TrackingSharks.com",52],["M. Levine, GSAF",51],["T. Peake, GSAF",49],["B. Myatt, GSAF",35],["A. Gifford, GSAF",30],["C. Moore. GSAF",27],["GSAF",23],["E. Ritter, GSAF",21],["E. Pace, FSAF",21],["M. Vorenberg, GSAF",21],["G. Van Grevelynghe",19],["JCOnline",16],["C. Johansson, GSAF",14],["W. Leander",14],["J. Green, p.36",14]],"top_words":[["gsaf",983],["m.",538],["v.m.",516],["&",513],["coppleson",487],["r.",464],["c.",441],["the",423],["j.",414],["a.",389],["(1958),",303],["times,",295],["levine,",275],["herald,",268],["s.",266],["gsaf;",260],["daily",242],["news,",221],["collier,",212],["p.",203],["l.",202],["sentinel,",200],["d.",199],["orlando",190],["t.",184]],"vocab_skipped":null,"word_histogram":{"counts":[932,2356,978,492,382,321,235,167,133,81,70,86,45,36,25,21,27,16,9,7,3,9,5,1,1,0,2,0,0,3],"edges":[1.0,2.033333333333333,3.066666666666667,4.1000000000000005,5.133333333333334,6.166666666666667,7.200000000000001,8.233333333333334,9.266666666666667,10.3,11.333333333333334,12.366666666666667,13.400000000000002,14.433333333333335,15.466666666666669,16.5,17.533333333333335,18.56666666666667,19.6,20.633333333333336,21.666666666666668,22.700000000000003,23.733333333333334,24.76666666666667,25.800000000000004,26.833333333333336,27.86666666666667,28.900000000000002,29.933333333333337,30.96666666666667,32.0]}},"kind":"text","n":6462,"n_null":19,"n_unique":4979,"null_rate":0.002940266171463943,"stats":{"allcaps_rate":0.018469656992084433,"boilerplate_rate":0.0,"duplicate_rate":0.22722334316312276,"emoji_rate":0.0,"len_max":210,"len_mean":32.23731181126804,"len_median":26.0,"len_min":3,"len_p95":77.0,"n_duplicates":1464,"n_empty":0,"one_word_rate":0.025919602669563866,"readability_flesch_mean":73.62297899207347,"url_rate":0.0023281080242123233,"vocab_size":7898,"word_mean":4.792177557038647,"word_median":3.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.6% of rows are unique strings"},{"code":"one_word","level":"warn","message":"98.7% rows are a single word"},{"code":"null_rate","level":"warn","message":"52.6% null"}],"column":"pdf","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[2,0,0,1,0,4,0,4,0,14,49,139,277,0,438,473,391,0,275,229,159,134,0,112,86,72,0,60,50,31,27,0,13,8,6,0,3,2,4,3],"edges":[10.0,10.775,11.55,12.325,13.1,13.875,14.65,15.425,16.2,16.975,17.75,18.525,19.3,20.075000000000003,20.85,21.625,22.4,23.175,23.950000000000003,24.725,25.5,26.275000000000002,27.05,27.825,28.6,29.375,30.150000000000002,30.925,31.7,32.475,33.25,34.025000000000006,34.8,35.575,36.35,37.125,37.900000000000006,38.675,39.45,40.225,41.0]},"near_unique":true,"sample":["2020.01.12-Malten.pdf","1900.09.05-Hartman.pdf","1872.11.30.R-MalayPirates.pdf","1885.04.16.R-GermanShip.pdf","1948.12.26-Keys.pdf","ND-0094-HaeNyeo.pdf","1947.05.13.R-Kenya.pdf","1857.05.05-Dunn.pdf","1884.08.18-Rylor.pdf","1971.11.25.R-Chan.pdf","1928.11.18-SeaBright.pdf","1936.12.15-boat-Ayerst.pdf","1959.06.14.b-Topete.pdf","2019.11.14-Tiago.pdf","1958.11.14-FalseBayFisherman.pdf","1962.08.00.b-Mattacchione.pdf","1967.07.29-NV-McGonigal.pdf","1881.00.00.a-Italy.pdf","ND-0023-Dalton.pdf","1950.07.09-Galveston.pdf","1958.00.00.c-Korem.pdf","1971.04.00-Angoche.pdf","1963.10.15-Nakata.pdf","1966.12.26-NV-Hussain-Ali.pdf","1911.10.25-Toby.pdf","1898.07.26.R-Gunner.pdf","1934.08.26-Steeke.pdf","1962.05.00-Suva-Fiji.pdf","1968.05.00-NV-Gilaba.pdf","ND-0102-Bahrein.pdf","1920.11.04-Philippines.pdf","1919.03.16-Cadets.pdf","1974.05.26-Hancock_Collier.pdf","1951.07.11-Sierks.pdf","1954.00.00.d-Vieux.pdf","1947.06.16-fishermen-Coogee.pdf","1818.05.22.R-Norway.pdf","1939.00.00.e-WREN.pdf","1946.01.01-Redfern.pdf","1936.12.19-Slaughter.pdf","1887.07.30-TheSara.pdf","1837.07.00-Barony.pdf","1959.04.05-McAuley.pdf","1839.00.00.b-Wishart.pdf","1637.00.00.R-Manrique.pdf","ND-0083-Fleming.pdf","1955.02.05-Rautenberg.pdf","1951.02.03-Pride.pdf","1963.07.15-Driscoll.pdf","1864.04.00-Mauritius.pdf"],"top_values":[],"top_words":[["-",13],["fisherman.pdf",3],["k.",2],["mcmurray,",2],["tracking",2],["sharks.com",2],["bay.pdf",2],["crew.pdf",2],["midway.pdf",2],["1935.06.05.r-solomonislands.pdf",2],["1934.12.23.a-b-inman.pdf",2],["1931.09.21.a-b-holaday-barrows.pdf",2],["1929.03.04.a-b.roads-aldridge.pdf",2],["harbor.pdf",2],["1923.00.00.a-nj",2],["1921.11.27.a-b-jack.pdf",2],["1916.12.08.a-b-german.pdf",2],["1916.07.12.a-b-stillwell-fisher.pdf",2],["1907.10.16.r-hongkong.pdf",2],["1906.09.27.r.a&b-munich-swede.pdf",2],["1898.00.00.r-syria.pdf",2],["2020.01.17-schroeter.pdf",1],["2020.01.16-king.pdf",1],["2020.01.13-horne.pdf",1],["2020.01.12-malten.pdf",1]],"vocab_skipped":null,"word_histogram":{"counts":[3025,0,0,0,0,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,7],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":6462,"n_null":3396,"n_unique":3054,"null_rate":0.5255338904363974,"stats":{"allcaps_rate":0.00032615786040443573,"boilerplate_rate":0.0,"duplicate_rate":0.003913894324853229,"emoji_rate":0.0,"len_max":41,"len_mean":23.73091976516634,"len_median":23.0,"len_min":10,"len_p95":31.0,"n_duplicates":12,"n_empty":0,"one_word_rate":0.9866275277234181,"readability_flesch_mean":-66.80857499999996,"url_rate":0.0,"vocab_size":3098,"word_mean":1.0215264187866928,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.6% of rows are unique strings"},{"code":"one_word","level":"warn","message":"98.8% rows are a single word"},{"code":"url_heavy","level":"info","message":"100.0% rows contain a URL"},{"code":"null_rate","level":"warn","message":"52.6% null"}],"column":"href formula","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,0,1,0,4,0,5,0,14,49,139,276,0,437,473,392,0,275,228,159,134,0,112,86,72,0,60,48,31,27,0,13,8,6,0,3,2,4,3],"edges":[64.0,64.775,65.55,66.325,67.1,67.875,68.65,69.425,70.2,70.975,71.75,72.525,73.3,74.075,74.85,75.625,76.4,77.175,77.95,78.725,79.5,80.275,81.05,81.825,82.6,83.375,84.15,84.925,85.7,86.475,87.25,88.025,88.8,89.575,90.35,91.125,91.9,92.675,93.45,94.225,95.0]},"near_unique":true,"sample":["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.12-Malten.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1900.08.21-Burriss.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1872.11.30.R-MalayPirates.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1885.04.16.R-GermanShip.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1948.12.14.a-Jeppeson.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0094-HaeNyeo.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1947.04.06-Watt.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1856.11.25.R-Fiji.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1884.08.18-Rylor.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1971.09.25-Horner.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1928.11.15.R-Wright.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1936.12.01-Swan.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1959.05.30-TonyDicks.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/2019.11.14-Tiago.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1958.10.12-Allman_Collier.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1962.07.20-Masnori.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1967.06.00-Messina.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1880.11.25-AlexeyDrury.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0023-Dalton.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1950.06.25-Salango.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1957.12.31-Evans.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1971.00.00.b-Nagib.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1963.09.13-Barron.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1966.11.00-Maineri.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1911.09.20-Ashe.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1898.07.15-SomaliBoatman.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1934.08.21-Novak.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1962.04.07-Blane.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1968.04.11.R-Laiokeke.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0102-Bahrein.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1920.07.14-McCann.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1919.01.15-Tahuna-Beach.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1974.04.12-Alexander.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1951.05.22-NV-Lanse.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1954.00.00.b-Severino.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1947.04.20-Neilson.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1818.05.22.R-Norway.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1939.00.00.c-Haberdasher.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1946.00.00.a-Arabwoman.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1936.12.12-Lundberg.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1887.03.12-dinghy-Black-Point.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1837.01.17-Howe.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1959.03.00-Straughan.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1839.00.00.b-Wishart.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1637.00.00.R-Manrique.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0083-Fleming.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1955.02.01-Racing-scull.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1951.01.03-HawkesburyRiver.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1963.06.01.b-Pogl.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1864.01.27.R-SailorSolent.pdf"],"top_values":[],"top_words":[["-",13],["fisherman.pdf",3],["bay.pdf",2],["crew.pdf",2],["midway.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1935.06.05.r-solomonislands.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1934.12.23.a-b-inman.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1931.09.21.a-b-holaday-barrows.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1929.03.04.a-b.roads-aldridge.pdf",2],["harbor.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1923.00.00.a-nj",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1921.11.27.a-b-jack.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1916.12.08.a-b-german.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1916.07.12.a-b-stillwell-fisher.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1907.10.16.r-hongkong.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1906.09.27.r.a&b-munich-swede.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1898.00.00.r-syria.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.17-schroeter.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.16-king.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.13-horne.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.12-malten.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.07-holmes.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.05-johnson.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.03-canala.pdf",1],["harkattackfile.net/spreadsheets/pdf_directory/2020.01.02-biighetti.pdf",1]],"vocab_skipped":null,"word_histogram":{"counts":[3025,0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,5],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":6462,"n_null":3400,"n_unique":3051,"null_rate":0.5261528938409161,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0035924232527759633,"emoji_rate":0.0,"len_max":95,"len_mean":77.72893533638145,"len_median":77.0,"len_min":64,"len_p95":85.0,"n_duplicates":11,"n_empty":0,"one_word_rate":0.98791639451339,"readability_flesch_mean":-820.1766499999998,"url_rate":0.9996734160679295,"vocab_size":3089,"word_mean":1.018615284128021,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.6% of rows are unique strings"},{"code":"one_word","level":"warn","message":"98.8% rows are a single word"},{"code":"url_heavy","level":"info","message":"100.0% rows contain a URL"},{"code":"null_rate","level":"warn","message":"52.6% null"}],"column":"href","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,1,4,19,49,415,908,666,224,290,199,131,80,27,21,9,6,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,2,2],"edges":[64.0,65.775,67.55,69.325,71.1,72.875,74.65,76.425,78.2,79.975,81.75,83.525,85.3,87.075,88.85,90.625,92.4,94.175,95.95,97.725,99.5,101.275,103.05,104.82499999999999,106.6,108.375,110.15,111.925,113.69999999999999,115.475,117.25,119.025,120.8,122.57499999999999,124.35,126.125,127.9,129.675,131.45,133.225,135.0]},"near_unique":true,"sample":["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.12-Malten.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1900.08.21-Burriss.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1872.11.30.R-MalayPirates.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1885.04.16.R-GermanShip.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1948.12.14.a-Jeppeson.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0094-HaeNyeo.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1947.04.06-Watt.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1856.11.25.R-Fiji.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1884.08.18-Rylor.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1971.09.25-Horner.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1928.11.15.R-Wright.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1936.12.01-Swan.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1959.05.30-TonyDicks.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/2019.11.14-Tiago.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1958.10.12-Allman_Collier.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1962.07.20-Masnori.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1967.06.00-Messina.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1880.11.25-AlexeyDrury.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0023-Dalton.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1950.06.25-Salango.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1957.12.31-Evans.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1971.00.00.b-Nagib.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1963.09.13-Barron.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1966.11.00-Maineri.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1911.09.20-Ashe.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1898.07.15-SomaliBoatman.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1934.08.21-Novak.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1962.04.07-Blane.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1968.04.11.R-Laiokeke.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0102-Bahrein.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1920.07.14-McCann.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1919.01.15-Tahuna-Beach.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1974.04.12-Alexander.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1951.05.22-NV-Lanse.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1954.00.00.b-Severino.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1947.04.20-Neilson.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1818.05.22.R-Norway.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1939.00.00.c-Haberdasher.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1946.00.00.a-Arabwoman.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1936.12.12-Lundberg.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1887.03.12-dinghy-Black-Point.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1837.01.17-Howe.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1959.03.00-Straughan.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1839.00.00.b-Wishart.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1637.00.00-Manrique.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/ND-0083-Fleming.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1955.02.01-Racing-scull.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1951.01.03-HawkesburyRiver.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1963.06.01.b-Pogl.pdf","http://sharkattackfile.net/spreadsheets/pdf_directory/1864.01.27.R-SailorSolent.pdf"],"top_values":[],"top_words":[["-",15],["fisherman.pdf",3],["bay.pdf",2],["crew.pdf",2],["midway.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1935.06.05.r-solomonislands.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1934.12.23.a-b-inman.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1931.09.21.a-b-holaday-barrows.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1929.03.04.a-b.roads-aldridge.pdf",2],["harbor.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1923.00.00.a-nj",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1921.11.27.a-b-jack.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1916.12.08.a-b-german.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1916.07.12.a-b-stillwell-fisher.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1907.10.16.r-hongkong.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1906.09.27.r.a&b-munich-swede.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/1898.00.00.r-syria.pdf",2],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.17-schroeter.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.16-king.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.13-horne.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.12-malten.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.07-holmes.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.05-johnson.pdf",1],["http://sharkattackfile.net/spreadsheets/pdf_directory/2020.01.03-canala.pdf",1],["harkattackfile.net/spreadsheets/pdf_directory/2020.01.02-biighetti.pdf",1]],"vocab_skipped":null,"word_histogram":{"counts":[3025,0,0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,6],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0]}},"kind":"text","n":6462,"n_null":3400,"n_unique":3051,"null_rate":0.5261528938409161,"stats":{"allcaps_rate":0.0,"boilerplate_rate":0.0,"duplicate_rate":0.0035924232527759633,"emoji_rate":0.0,"len_max":135,"len_mean":77.88667537557153,"len_median":77.0,"len_min":64,"len_p95":86.0,"n_duplicates":11,"n_empty":0,"one_word_rate":0.98791639451339,"readability_flesch_mean":-824.4066499999998,"url_rate":0.9996734160679295,"vocab_size":3091,"word_mean":1.0195950359242325,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.7% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.8% rows are a single word"},{"code":"allcaps","level":"info","message":"79.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"52.6% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Case Number.1","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[120,0,0,0,0,0,0,4,0,0,1884,0,0,0,7,0,0,0,1011,0,0,8,0,0,0,24,0,0,0,2,0,0,1,0,0,0,0,0,0,1],"edges":[7.0,7.275,7.55,7.825,8.1,8.375,8.65,8.925,9.2,9.475,9.75,10.025,10.3,10.575,10.850000000000001,11.125,11.4,11.675,11.95,12.225000000000001,12.5,12.775,13.05,13.325,13.600000000000001,13.875,14.15,14.425,14.700000000000001,14.975000000000001,15.25,15.525,15.8,16.075000000000003,16.35,16.625,16.9,17.175,17.450000000000003,17.725,18.0]},"near_unique":true,"sample":["2020.01.12","1900.08.21","1872.11.30.R","1885.04.16.R","1948.12.14.a","ND.0094","1947.04.06","1856.11.25.R","1884.08.18","1971.09.25","1928.11.15.R","1936.12.01","1959.05.30","2019.11.14","1958.10.12","1962.07.20","1967.06.00","1880.11.25","ND.0023","1950.06.25","1957.12.31","1971.00.00.b","1963.09.13","1966.11.00","1911.09.20","1898.07.15","1934.08.21","1962.04.07","1968.04.11.R","ND.0102","1920.07.14","1919.01.15","1974.04.12","1951.05.22","1954.00.00.b","1947.04.20","1818.05.22.R","1939.00.00.c","1946.00.00.a","1936.12.12","1887.03.12","1837.01.17","1959.03.00","1839.00.00.b","1637.00.00.R","ND.0083","1955.02.01.","1951.01.03","1963.06.01.b","1864.01.27.R"],"top_values":[],"top_words":[["1966.12.26",2],["1962.06.11.b",2],["&",2],["b",2],["1954.00.00",2],["g",2],["1952.08.04",2],["1923.00.00.a",2],["1920.00.00.b",2],["1915.07.06.a.r",2],["1913.08.27.r",2],["1907.10.16.r",2],["2020.01.17",1],["2020.01.16",1],["2020.01.13",1],["2020.01.12",1],["2020.01.07",1],["2020.01.05",1],["2020.01.03",1],["2020.01.02",1],["2019.12.30",1],["2019.12.28",1],["2019.12.26.b",1],["2019.12.26.a",1],["2019.12.24",1]],"vocab_skipped":null,"word_histogram":{"counts":[3057,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":6462,"n_null":3400,"n_unique":3054,"null_rate":0.5261528938409161,"stats":{"allcaps_rate":0.7896799477465709,"boilerplate_rate":0.0,"duplicate_rate":0.002612671456564337,"emoji_rate":0.0,"len_max":18,"len_mean":10.59079033311561,"len_median":10.0,"len_min":7,"len_p95":12.0,"n_duplicates":8,"n_empty":0,"one_word_rate":0.9983670803396473,"readability_flesch_mean":121.21492500000002,"url_rate":0.0,"vocab_size":3057,"word_mean":1.0022860875244939,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"99.8% of rows are unique strings"},{"code":"one_word","level":"warn","message":"99.8% rows are a single word"},{"code":"allcaps","level":"info","message":"79.0% rows are all-caps"},{"code":"null_rate","level":"warn","message":"52.6% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"Case Number.2","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[120,0,0,0,0,0,0,4,0,0,1884,0,0,0,7,0,0,0,1011,0,0,8,0,0,0,24,0,0,0,2,0,0,1,0,0,0,0,0,0,1],"edges":[7.0,7.275,7.55,7.825,8.1,8.375,8.65,8.925,9.2,9.475,9.75,10.025,10.3,10.575,10.850000000000001,11.125,11.4,11.675,11.95,12.225000000000001,12.5,12.775,13.05,13.325,13.600000000000001,13.875,14.15,14.425,14.700000000000001,14.975000000000001,15.25,15.525,15.8,16.075000000000003,16.35,16.625,16.9,17.175,17.450000000000003,17.725,18.0]},"near_unique":true,"sample":["2020.01.12","1900.08.21","1872.11.30.R","1885.04.16.R","1948.12.14.a","ND.0094","1947.04.06","1856.11.25.R","1884.08.18","1971.09.25","1928.11.15.R","1936.12.01","1959.05.30","2019.11.14","1958.10.12","1962.07.20","1967.06.00","1880.11.25","ND.0023","1950.06.25","1957.12.31","1971.00.00.b","1963.09.13","1966.11.00","1911.09.20","1898.07.15","1934.08.21","1962.04.07","1968.04.11.R","ND.0102","1920.07.14","1919.01.15","1974.04.12","1951.05.22","1954.00.00.b","1947.04.20","1818.05.22.R","1939.00.00.c","1946.00.00.a","1936.12.12","1887.03.12","1837.01.17","1959.03.00","1839.00.00.b","1637.00.00.R","ND.0083","1955.02.01.","1951.01.03","1963.06.01.b","1864.01.27.R"],"top_values":[],"top_words":[["1966.12.26",2],["1962.06.11.b",2],["&",2],["b",2],["1954.00.00",2],["g",2],["1923.00.00.a",2],["1920.00.00.b",2],["1915.07.06.a.r",2],["1913.08.27.r",2],["1907.10.16.r",2],["2020.01.17",1],["2020.01.16",1],["2020.01.13",1],["2020.01.12",1],["2020.01.07",1],["2020.01.05",1],["2020.01.03",1],["2020.01.02",1],["2019.12.30",1],["2019.12.28",1],["2019.12.26.b",1],["2019.12.26.a",1],["2019.12.24",1],["2019.12.21",1]],"vocab_skipped":null,"word_histogram":{"counts":[3057,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2],"edges":[1.0,1.0666666666666667,1.1333333333333333,1.2,1.2666666666666666,1.3333333333333333,1.4,1.4666666666666668,1.5333333333333332,1.6,1.6666666666666665,1.7333333333333334,1.8,1.8666666666666667,1.9333333333333333,2.0,2.0666666666666664,2.1333333333333333,2.2,2.2666666666666666,2.333333333333333,2.4,2.466666666666667,2.533333333333333,2.6,2.666666666666667,2.7333333333333334,2.8,2.8666666666666667,2.9333333333333336,3.0]}},"kind":"text","n":6462,"n_null":3400,"n_unique":3055,"null_rate":0.5261528938409161,"stats":{"allcaps_rate":0.7896799477465709,"boilerplate_rate":0.0,"duplicate_rate":0.0022860875244937948,"emoji_rate":0.0,"len_max":18,"len_mean":10.59079033311561,"len_median":10.0,"len_min":7,"len_p95":12.0,"n_duplicates":7,"n_empty":0,"one_word_rate":0.9983670803396473,"readability_flesch_mean":121.21492500000002,"url_rate":0.0,"vocab_size":3058,"word_mean":1.0022860875244939,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"52.6% null"}],"column":"original order","extras":{"histogram":{"counts":[163,162,163,162,163,162,163,162,163,162,163,162,163,162,163,162,163,162,110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27],"edges":[3.0,165.475,327.95,490.42499999999995,652.9,815.375,977.8499999999999,1140.325,1302.8,1465.2749999999999,1627.75,1790.225,1952.6999999999998,2115.1749999999997,2277.65,2440.125,2602.6,2765.075,2927.5499999999997,3090.025,3252.5,3414.975,3577.45,3739.9249999999997,3902.3999999999996,4064.875,4227.349999999999,4389.825,4552.3,4714.775,4877.25,5039.724999999999,5202.2,5364.675,5527.15,5689.625,5852.099999999999,6014.575,6177.05,6339.525,6502.0]},"sample":[6487.0,6483.0,3036.0,3023.0,3011.0,3010.0,3003.0,3002.0,3000.0,2990.0,2989.0,2981.0,2971.0,2970.0,2965.0,2956.0,2950.0,2947.0,2934.0,2932.0,2906.0,2899.0,2887.0,2880.0,2865.0,2864.0,2852.0,2844.0,2836.0,2829.0,2827.0,2825.0,2823.0,2822.0,2820.0,2818.0,2812.0,2807.0,2799.0,2797.0,2787.0,2774.0,2773.0,2757.0,2754.0,2753.0,2744.0,2739.0,2734.0,2732.0,2725.0,2723.0,2722.0,2721.0,2709.0,2703.0,2701.0,2693.0,2692.0,2690.0,2672.0,2671.0,2667.0,2662.0,2660.0,2657.0,2656.0,2647.0,2640.0,2639.0,2637.0,2635.0,2634.0,2623.0,2622.0,2617.0,2612.0,2609.0,2608.0,2606.0,2593.0,2585.0,2583.0,2571.0,2570.0,2565.0,2562.0,2560.0,2556.0,2547.0,2546.0,2538.0,2535.0,2532.0,2514.0,2501.0,2484.0,2483.0,2476.0,2475.0,2468.0,2465.0,2431.0,2428.0,2411.0,2408.0,2406.0,2400.0,2382.0,2381.0,2363.0,2355.0,2345.0,2340.0,2334.0,2329.0,2324.0,2320.0,2316.0,2308.0,2303.0,2302.0,2301.0,2298.0,2292.0,2280.0,2267.0,2256.0,2250.0,2244.0,2237.0,2235.0,2234.0,2229.0,2220.0,2217.0,2211.0,2208.0,2202.0,2200.0,2198.0,2194.0,2193.0,2188.0,2181.0,2178.0,2167.0,2160.0,2158.0,2152.0,2145.0,2141.0,2140.0,2139.0,2136.0,2129.0,2126.0,2120.0,2119.0,2118.0,2114.0,2113.0,2106.0,2102.0,2090.0,2083.0,2080.0,2067.0,2053.0,2045.0,2035.0,2023.0,2017.0,1991.0,1985.0,1979.0,1967.0,1959.0,1957.0,1953.0,1949.0,1947.0,1938.0,1925.0,1919.0,1915.0,1910.0,1909.0,1902.0,1896.0,1885.0,1884.0,1878.0,1871.0,1861.0,1855.0,1845.0,1844.0,1838.0,1836.0,1829.0,1821.0,1819.0,1817.0,1810.0,1809.0,1808.0,1805.0,1803.0,1800.0,1799.0,1796.0,1777.0,1772.0,1771.0,1764.0,1762.0,1760.0,1753.0,1741.0,1738.0,1732.0,1714.0,1711.0,1710.0,1703.0,1699.0,1696.0,1691.0,1690.0,1689.0,1686.0,1680.0,1674.0,1668.0,1654.0,1651.0,1646.0,1633.0,1625.0,1596.0,1583.0,1578.0,1576.0,1574.0,1573.0,1568.0,1560.0,1557.0,1555.0,1549.0,1545.0,1542.0,1541.0,1538.0,1536.0,1519.0,1518.0,1511.0,1508.0,1507.0,1496.0,1483.0,1474.0,1460.0,1448.0,1446.0,1437.0,1435.0,1427.0,1421.0,1406.0,1398.0,1392.0,1391.0,1389.0,1386.0,1370.0,1363.0,1356.0,1331.0,1329.0,1307.0,1300.0,1295.0,1294.0,1293.0,1291.0,1289.0,1274.0,1272.0,1271.0,1265.0,1264.0,1258.0,1255.0,1254.0,1249.0,1241.0,1240.0,1222.0,1221.0,1216.0,1215.0,1210.0,1208.0,1204.0,1182.0,1180.0,1170.0,1167.0,1157.0,1155.0,1154.0,1152.0,1151.0,1150.0,1138.0,1132.0,1129.0,1123.0,1116.0,1110.0,1104.0,1100.0,1090.0,1088.0,1080.0,1079.0,1076.0,1071.0,1053.0,1051.0,1047.0,1046.0,1043.0,1038.0,1031.0,1029.0,1018.0,1007.0,993.0,982.0,977.0,971.0,969.0,965.0,962.0,954.0,948.0,944.0,939.0,937.0,936.0,934.0,931.0,929.0,924.0,921.0,919.0,913.0,906.0,900.0,897.0,878.0,877.0,875.0,869.0,861.0,860.0,859.0,857.0,853.0,852.0,849.0,847.0,844.0,842.0,837.0,834.0,830.0,825.0,824.0,809.0,801.0,799.0,777.0,767.0,763.0,760.0,759.0,753.0,743.0,733.0,730.0,722.0,698.0,684.0,680.0,678.0,671.0,668.0,661.0,660.0,645.0,637.0,635.0,624.0,613.0,596.0,593.0,591.0,579.0,573.0,553.0,540.0,533.0,524.0,496.0,491.0,481.0,478.0,461.0,460.0,456.0,450.0,432.0,421.0,419.0,418.0,414.0,408.0,394.0,380.0,377.0,360.0,352.0,348.0,346.0,341.0,307.0,303.0,301.0,299.0,298.0,288.0,286.0,283.0,282.0,280.0,277.0,273.0,272.0,267.0,263.0,255.0,249.0,237.0,236.0,229.0,227.0,215.0,202.0,188.0,182.0,181.0,177.0,171.0,169.0,155.0,149.0,148.0,144.0,142.0,139.0,137.0,134.0,128.0,114.0,111.0,107.0,103.0,101.0,97.0,90.0,82.0,79.0,69.0,64.0,44.0,39.0,38.0,37.0,35.0,33.0,26.0,25.0,22.0,17.0,6.0]},"kind":"numeric","n":6462,"n_null":3400,"n_unique":3061,"null_rate":0.5261528938409161,"stats":{"iqr":1530.5,"kurtosis":3.551229735272676,"max":6502.0,"mean":1563.814826910516,"median":1533.5,"min":3.0,"n_outliers":27,"outlier_rate":0.008817766165904637,"q1":768.25,"q3":2298.75,"skew":0.9878130829974056,"std":988.4096720719749,"zero_rate":0.0}},{"alerts":[{"code":"long_tail","level":"info","message":"2 singleton categories"},{"code":"null_rate","level":"warn","message":"100.0% null"}],"column":"Unnamed: 23","extras":{"singletons":2,"top_values":[["Teramo",1],["change filename",1]]},"kind":"categorical","n":6462,"n_null":6460,"n_unique":2,"null_rate":0.9996904982977406,"stats":{"cardinality":2,"entropy":1.0,"entropy_ratio":1.0,"top_rate":0.5,"top_value":"Teramo"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["Fatal (Y/N).top_values","Fatal (Y/N).stats.top_rate","Injury.top_values","Country.top_values","Area.top_values","Activity.top_values","Year.stats.max","Year.stats.kurtosis","Year.stats.n_outliers","row_count"],"featured_charts":[{"caption":"Look at the dominant 'N' slice versus fatal 'Y' and check for dirty values like 'M', 'F', and '2017' that indicate data entry errors.","column":"Fatal (Y/N)","kind":"donut"},{"caption":"Surfing and swimming together account for the majority of attacks \u2014 compare their counts to lower-risk activities like diving and fishing.","column":"Activity","kind":"bar"},{"caption":"The USA, Australia, and South Africa together dominate the record count; note how sharply the frequency drops after the top three.","column":"Country","kind":"bar"},{"caption":"Unprovoked attacks make up nearly 73% of all incidents \u2014 compare against provoked, invalid, and sea disaster categories.","column":"Type","kind":"donut"},{"caption":"Most records cluster in the modern era, but look for the extreme outliers (including a year value of 3019) that skew the distribution.","column":"Year","kind":"histogram"}],"model":"anthropic:default","narrative":"This dataset is the Global Shark Attack File (GSAF), containing 6,462 records of shark attack incidents spanning centuries of documented cases. The most important thing to examine first is the attack outcome: roughly 75% of incidents are non-fatal ('N'), but 1,400 are recorded as fatal ('Y'), and the 'Injury' column reveals 823 entries simply marked 'FATAL' \u2014 worth cross-checking for data consistency. A second priority is the geographic and activity breakdown: the USA dominates with 2,310 cases (36%), Florida alone accounts for 1,076, and surfing (1,025) and swimming (932) are by far the most dangerous activities. The 'Year' column carries a data quality warning \u2014 a maximum value of 3019 and high kurtosis signal outliers that should be cleaned before any time-series analysis.","scope":"dataset","target":"__global__"},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","n_unique","n","n_duplicates","allcaps_rate","top_words","one_word_rate","alerts"],"model":"anthropic:default","narrative":"This column appears to be a duplicate or alternate version of a case number field, with values formatted as date-like codes (e.g., '1966.12.26', '1923.00.00.a') suggesting archival case identifiers tied to dates with alphabetic suffixes for disambiguation. A striking 52.62% null rate makes this column unreliable for most analyses, and the near-unique flag (3,054 unique values across 6,462 rows) combined with only 8 true duplicates confirms it functions as a quasi-identifier. The allcaps rate of 78.97% is notable given that values appear to be alphanumeric codes rather than natural language, and the '.1' suffix in the column name strongly suggests this is a duplicated column from a merge or pivot operation.","role":"identifier","scope":"column","target":"Case Number.1","treatment":"Investigate overlap with the original 'Case Number' column; if redundant, drop; otherwise impute nulls cautiously or use as a secondary join key."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","n","allcaps_rate","one_word_rate","top_words","n_duplicates","vocab_size","duplicate_rate"],"model":"anthropic:default","narrative":"This column appears to be a structured case number identifier, likely encoding a date-based reference system (e.g., '1966.12.26', '1915.07.06.a.r') typical of archival, legal, or historical record catalogues. With 52.62% null rate across 6,462 rows and only 3,055 unique values out of 3,058 vocabulary size, the column is near-unique but severely incomplete. The 78.97% all-caps rate combined with date-like tokens and alphabetic suffixes (a, b, r) suggests a custom alphanumeric coding scheme rather than free text. Only 7 duplicate values exist, making this effectively an identifier where present.","role":"identifier","scope":"column","target":"Case Number.2","treatment":"Retain as a join/lookup key; impute or flag nulls separately; do not encode numerically."},{"confidence":"high","critiques":[],"evidence_keys":["url_rate","null_rate","one_word_rate","n_unique","n_duplicates","top_words","alerts"],"model":"anthropic:default","narrative":"This column contains URLs linking to PDF source documents in a shark attack file directory (sharkattackfile.net/spreadsheets/pdf_directory/), serving as citation or evidence links for individual incident records. Over half the rows are null (52.62%), indicating many records lack a linked source document. Nearly all non-null values are single-token URLs (one_word_rate 0.988, url_rate 0.9997), with very few duplicates (11 duplicates out of 3,051 unique values), consistent with per-incident citation links. The high null rate is the key analyst concern \u2014 roughly half of incidents have no associated PDF reference.","role":"metadata","scope":"column","target":"href","treatment":"Exclude from predictive modelling; retain as a provenance/citation field, or engineer a binary 'has_source_pdf' indicator from non-null presence."},{"confidence":"high","critiques":[],"evidence_keys":["url_rate","one_word_rate","null_rate","n_unique","n_duplicates","readability_flesch_mean","top_words"],"model":"anthropic:default","narrative":"This column contains hyperlink formulas pointing to PDF source documents on sharkattackfile.net, each URL referencing a dated incident report (e.g., '1935.06.05.r-solomonislands.pdf'). Over half the rows (52.62%) are null, meaning many records lack a linked source document. Values are nearly all single-token URLs (one_word_rate 0.9879, url_rate 0.9997), and the extremely negative Flesch readability score (-820.18) confirms these are machine-generated URL strings, not natural text. Only 11 duplicate values exist across 3,051 unique entries, suggesting most cited PDFs are distinct incident references.","role":"metadata","scope":"column","target":"href formula","treatment":"Extract raw URL string from formula syntax before use; treat as a source-citation reference field and consider joining or flagging unsourced rows (52.62% null) separately."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","duplicate_rate","n_duplicates","allcaps_rate","top_words","len_mean","len_max"],"model":"anthropic:default","narrative":"This column is a case identifier, with values that appear to encode dates in YYYY.MM.DD format (e.g., '2019.10.08'), suggesting case numbers tied to filing or incident dates. With 6,442 unique values out of 6,462 rows and a null rate of 0.0003, it is near-unique and functions as a primary key. The 18 duplicate values (duplicate_rate 0.0028) are a mild anomaly worth investigating \u2014 one value '2012.09.02.b' hints that suffixes are used to disambiguate same-date cases, implying the deduplication logic is not fully consistent. The allcaps_rate of 0.748 suggests a mix of formatting styles across records.","role":"identifier","scope":"column","target":"Case Number","treatment":"Use as a case-level join key; flag the 18 duplicates for deduplication before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","null_rate","stats.duplicate_rate","stats.n_duplicates","n_unique","language_counts","alerts"],"model":"anthropic:default","narrative":"This column records shark species (and incident validity notes) from what appears to be a shark attack dataset, with values ranging from specific species ('White shark', 'Tiger shark', 'Bull shark') to free-text qualifiers like 'Shark involvement not confirmed' and 'Invalid'. The null rate is severe at 45.25%, and 58.56% of non-null values are duplicates \u2014 expected for a species label with only 1,466 unique values across 6,462 rows. More surprising is the multilingual alert: while 2,582 values are classified as English, 14 are German, 18 Finnish, 11 Chinese, and 8 Turkish among others, suggesting some records were entered in non-English locales or scraped from multilingual sources. The mix of species names, size descriptions ('4\\' shark', '1.8 m [6\\'] shark'), and incident-status phrases means this column is semantically heterogeneous and will require parsing or splitting before use.","role":"label","scope":"column","target":"Species ","treatment":"Split into a normalized species category and a separate incident-validity flag; impute or exclude the 45.25% nulls based on task context."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","one_word_rate","top_words","readability_flesch_mean","n_duplicates","word_mean"],"model":"anthropic:default","narrative":"This column contains PDF filenames or partial file paths, evidenced by the '.pdf' suffixes in the top words and a mean token count of ~1 word per value. Over half the rows (52.55%) are null, and with 3,054 unique values out of 3,054 non-null distinct tokens the column is near-unique, functioning more like a document reference key than a descriptive field. The extremely negative Flesch readability score (\u221266.81) is consistent with structured filename strings rather than natural language. A small number of duplicates (12) suggest some documents are referenced by multiple records.","role":"foreign_key","scope":"column","target":"pdf","treatment":"Use as a document reference key to join or retrieve associated PDF files; impute or flag nulls before any join."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","allcaps_rate","language_counts","n","n_unique","null_rate"],"model":"anthropic:default","narrative":"This column describes the outcome or nature of injuries in what appears to be a shark attack dataset, containing free-text descriptions ranging from 'FATAL' to specific anatomical bite locations (e.g., 'Left foot bitten', 'Leg bitten'). The dominant value is 'FATAL' appearing 823 times, making it by far the most frequent entry. Two signals stand out: a high duplicate rate of 41.9% (2,695 duplicates across 6,462 rows) driven by repetitive categorical-style phrases, and an all-caps rate of 13.1% suggesting inconsistent data entry conventions. Additionally, 496 German-language entries co-exist with 3,812 English ones, indicating multilingual sourcing that will complicate any text-based analysis.","role":"label","scope":"column","target":"Injury","treatment":"Normalize case, map high-frequency values to a structured severity/outcome taxonomy, and handle German entries separately or translate before any text embedding or categorical encoding."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","duplicate_rate","one_word_rate","n_unique","n","null_rate","len_median","len_max","word_median"],"model":"anthropic:default","narrative":"This column captures the water-based activity a person was engaged in at the time of an incident (likely a shark attack or drowning registry), dominated by Surfing (1,025) and Swimming (932) with a small tail of descriptive phrases. Despite being labelled 'text', it behaves largely as a categorical label: 62.9% of values are single words, only 1,516 unique values exist across 6,462 rows, and the duplicate rate is 74.3%, indicating a loosely controlled vocabulary rather than a strict enum. The median string length of 8 characters versus a max of 254 suggests a mix of clean category entries and occasional free-text annotations, which may require normalisation before use.","role":"label","scope":"column","target":"Activity","treatment":"Standardise to a controlled vocabulary by clustering near-duplicates (e.g. 'Diving' vs 'Scuba diving'), then encode as a categorical feature; impute or flag the 8.54% nulls separately."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","stats.duplicate_rate","stats.n_duplicates","n_unique","n","language_counts","alerts"],"model":"anthropic:default","narrative":"This column records the investigator or data source credited for each shark attack incident, typically formatted as an abbreviated name plus an organizational affiliation (e.g., 'C. Moore, GSAF'). GSAF (Global Shark Attack File) dominates the top entries and appears in 983 word tokens, making it the primary contributing organization. The duplicate rate of 22.7% (1,464 duplicates across 6,462 rows) is expected given a finite set of investigators filing multiple reports, but the multilingual alert across 22 detected languages is notable \u2014 'en' accounts for 4,457 entries while 'es' (134), 'fr' (100), 'de' (56), and 'it' (62) reflect international source attribution or non-English investigator names. The near-unique cardinality (4,979 unique values out of 6,462) suggests many entries are one-off source combinations rather than standardized identifiers.","role":"metadata","scope":"column","target":"Investigator or Source","treatment":"Normalize organization affiliations (e.g., extract 'GSAF' tag) and standardize investigator name formats before using as a categorical grouping variable."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","stats.duplicate_rate","stats.n_duplicates","language_counts","null_rate","n","n_unique"],"model":"anthropic:default","narrative":"This column captures geographic incident locations, predominantly structured as 'City, County' strings\u2014strongly associated with shark attack or ocean incident records given top values like 'New Smyrna Beach, Volusia County' (181 occurrences) and dominant words 'county', 'beach', 'island', 'bay'. The duplicate rate is notably high at ~29.9% (1,769 duplicates out of 6,462 rows), reflecting repeated incidents at the same hotspot locations rather than data error. The multilingual alert is triggered by automated language detection misclassifying short geographic names (e.g. 'Durban', 'Boa Viagem, Recife') as non-English\u20143,746 of 6,462 values are detected as English, with the remainder split across 29 other 'languages' due to short-string ambiguity. Null rate is 8.43%, which may represent unknown or offshore incident locations.","role":"label","scope":"column","target":"Location","treatment":"Normalize to canonical 'City, County, Country' format, then use as a categorical grouping variable or geocode for spatial analysis."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_value","top_rate","entropy_ratio","alerts","top_values"],"model":"anthropic:default","narrative":"This column captures time-of-day information, but it is stored inconsistently: values mix coarse labels ('Afternoon', 'Morning') with specific clock times in 'HhMM' format ('11h00', '16h30'), yielding 366 unique values across 6,462 rows. The null rate is severe at 52.49%, meaning over half of all records are missing a time entirely. The top value 'Afternoon' accounts for only 6.29% of rows, and entropy ratio is 0.77, indicating a long tail of rarely-seen time strings \u2014 likely data entry inconsistency across sources or time periods.","role":"feature","scope":"column","target":"Time","treatment":"Standardise to 24h numeric minutes-since-midnight, map label categories ('Morning', 'Afternoon') to representative values or a separate flag, then impute or model missingness explicitly given 52.49% null rate."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","n_unique","len_max","len_mean","one_word_rate","word_mean"],"model":"anthropic:default","narrative":"This column contains free-form date annotations for what appears to be a historical or archival dataset, storing dates in highly inconsistent formats: bare years (e.g., '1957', '1942'), structured dates ('05-Oct-2003'), vague phrases ('Before 1958', 'No date', 'ca.', 'summer', 'late'). The top word 'reported' appearing 559 times suggests many values follow a pattern like 'reported [date]', which is a red flag for downstream parsing. With 14.1% duplicate rate (909 duplicates across 5,552 unique values) and a max length of 64 characters, this column cannot be safely cast to a datetime type without substantial normalization work.","role":"metadata","scope":"column","target":"Date","treatment":"Parse and normalize into structured date fields using regex rules per format pattern; flag unparseable values ('No date', 'ca.', 'reported \u2026') as null or uncertain."},{"confidence":"medium","critiques":[],"evidence_keys":["null_rate","n_unique","n","skew","kurtosis","n_outliers","min","max","zero_rate"],"model":"anthropic:default","narrative":"This column appears to be a positional or sequence index assigned to records, likely reflecting the original sort order of items in a source dataset. The most striking issue is a 52.62% null rate \u2014 over half the rows carry no value, which is highly anomalous for an ordering field and suggests either a join that left many rows unmatched or that ordering was only recorded for a subset of records. With only 3,061 unique values across 6,462 rows (and ~3,061 non-null rows expected given the null rate), duplicates exist even among non-null entries, undermining uniqueness as a sequence key. The distribution is mildly right-skewed (skew 0.99) with notable leptokurtosis (3.55) and 27 outliers, hinting at a few unusually large order values relative to the bulk.","role":"metadata","scope":"column","target":"original order","treatment":"Investigate source of 52.62% nulls before use; if retaining, treat as an optional sort key and do not use as a unique identifier given duplicate values."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","top_value","top_rate","n_unique","cardinality","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column represents age stored as a categorical (string) type rather than a numeric type, covering 154 distinct values across 6,462 rows. The most striking issue is a 44.43% null rate, flagged as an alert, meaning nearly half the records lack an age value. The distribution skews young \u2014 the top values cluster tightly between ages 15\u201325, with '17' being most frequent at only 4.46% of rows, suggesting a youth-focused population (e.g., students or a juvenile-related dataset). The high entropy ratio of 0.80 confirms values are spread broadly across the 154 categories despite the youth concentration.","role":"feature","scope":"column","target":"Age","treatment":"Cast to integer, impute or flag nulls (44.43% missing requires explicit strategy), then treat as ordinal or numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["min","max","zero_rate","median","q1","q3","iqr","skew","kurtosis","n_outliers","outlier_rate"],"model":"anthropic:default","narrative":"This column represents a publication or production year for 6,462 records, with values spanning a plausible modern range centered around a median of 1980 and IQR of 1943\u20132006. Two signals are highly surprising: a minimum of 0.0 (nearly 2% zero rate, almost certainly sentinel/missing-year placeholders) and a maximum of 3019.0, which is a data-entry error (likely a typo for a 4-digit year such as 2019). These outliers (266 records, ~4.1%) drive extreme negative skew (\u22126.55) and extraordinary kurtosis (42.54), masking what is otherwise a fairly clean temporal distribution.","role":"feature","scope":"column","target":"Year","treatment":"Null-code zeros and values outside a valid range (e.g., < 1800 or > 2100), correct obvious typos like 3019, then use as an ordinal or numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_rate","n_unique","n","null_rate","alerts","entropy_ratio"],"model":"anthropic:default","narrative":"This column represents geographic sub-national regions (states, provinces) drawn from multiple countries \u2014 the US (Florida, Hawaii, California, South Carolina), Australia (New South Wales, Queensland, Western Australia), and South Africa (KwaZulu-Natal, Western Cape Province, Eastern Cape Province). Florida dominates at 17.9% of records, while 810 unique values against 6,462 rows signals a severe long-tail distribution where the vast majority of areas appear only rarely. The 7.16% null rate and high geographic diversity across at least three countries suggest this dataset is multinational in scope.","role":"feature","scope":"column","target":"Area","treatment":"Encode with frequency or target encoding; consider grouping rare areas (long-tail) into an 'Other' bucket or rolling up to country level to reduce cardinality from 810 classes."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","duplicate_rate","n_duplicates","null_rate","n_unique","n"],"model":"anthropic:default","narrative":"This column is a 'Name' field from what appears to be a historical incident or casualty dataset (likely maritime, given top values like 'boat', 'sailor', 'a sailor'). It is heavily contaminated with non-name entries: the most frequent value is 'male' (579 occurrences), followed by 'female' (106), 'boy' (23), '2 males' (19), and 'boat' (14), indicating that gender/role descriptors were freely mixed with actual proper names. The duplicate rate of 14.5% (908 duplicates across 5,339 unique values) and a 3.33% null rate further confirm this column is inconsistently populated and not a clean identifier.","role":"label","scope":"column","target":"Name","treatment":"Split into two derived columns\u2014one for proper names, one for role/gender descriptors\u2014before any modelling or grouping."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.min","stats.max","stats.skew","stats.kurtosis","stats.mean","stats.median","stats.n_outliers","stats.zero_rate"],"model":"anthropic:default","narrative":"This column is a row index \u2014 a sequential integer identifier running from 0 to 6461 with 6462 unique values and no nulls, perfectly matching the row count. Its distribution is exactly uniform (skew = 0.0, kurtosis \u2248 \u22121.2, mean = median = 3230.5, zero outliers), confirming it was generated as a positional index rather than carrying any domain meaning. The single 'zero' (zero_rate \u2248 0.00015) is simply row 0. There is no analytical signal here.","role":"identifier","scope":"column","target":"index","treatment":"Drop before modelling; carries no predictive information."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","cardinality","top_values","n","null_rate"],"model":"anthropic:default","narrative":"This column classifies shark attack incidents by the nature of the encounter, with 12 distinct categories across 6,462 records. It is heavily dominated by 'Unprovoked' at 73% of all records, creating notable class imbalance. A surprising data quality issue is the fragmentation of watercraft-related incidents across three near-synonymous labels \u2014 'Watercraft' (142), 'Boat' (109), and 'Boating' (92) \u2014 which are almost certainly the same category and should be consolidated. The 'Invalid' category (552 records, ~8.5%) also warrants attention as it may represent records that should be excluded from incident analysis.","role":"label","scope":"column","target":"Type","treatment":"Consolidate 'Boat', 'Boating', and 'Watercraft' into a single category; consider excluding or flagging 'Invalid' records; one-hot encode for modelling given severe class imbalance."},{"confidence":"high","critiques":[],"evidence_keys":["top_value","top_rate","n_unique","n","null_rate","entropy_ratio","top_values"],"model":"anthropic:default","narrative":"This column records the country of origin or occurrence for each record, with 205 distinct country values across 6,462 rows. The distribution is heavily skewed: USA alone accounts for 36% of records (2,310), followed by AUSTRALIA at 21% (1,374) and SOUTH AFRICA at 9% (585), meaning these three countries together represent roughly two-thirds of the dataset. The entropy ratio of 0.51 confirms moderate concentration despite 205 unique values, and the near-zero null rate (0.79%) means coverage is excellent.","role":"feature","scope":"column","target":"Country","treatment":"One-hot encode top countries and group tail countries into a residual 'OTHER' category before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_rate","null_rate","cardinality","n"],"model":"anthropic:default","narrative":"This column is a binary fatality flag for incidents, expected to hold only 'Y' or 'N' values. The dominant value is 'N' (4,439 occurrences, 75% of records), with 'Y' accounting for 1,400 cases (~21.7%). Surprising data quality issues exist: 5 rows contain clearly erroneous values ('F', 'M', '2017', lowercase 'y') suggesting data entry errors or row misalignment, and 71 rows are labeled 'UNKNOWN'. The 8.46% null rate adds further incompleteness.","role":"label","scope":"column","target":"Fatal (Y/N)","treatment":"Standardise 'y' \u2192 'Y', investigate and recode/drop 'F', 'M', '2017' entries, decide on treatment of 'UNKNOWN' and nulls, then binarise (Y=1, N=0) for modelling."}],"providers":["anthropic:default"],"total_usage":{"completion_tokens":6967,"prompt_tokens":30187,"total_tokens":37154}},"language_counts":{"af":5,"ca":14,"ceb":13,"cs":4,"cy":10,"de":627,"en":14597,"eo":7,"es":219,"eu":13,"fi":26,"fr":177,"hr":4,"hu":5,"id":15,"it":123,"ja":16,"jbo":4,"lv":3,"ms":8,"nl":45,"no":1,"pl":18,"pt":80,"ro":9,"ru":31,"sh":3,"sl":1,"sq":7,"sv":23,"sw":2,"th":1,"tl":1,"tr":13,"uk":1,"vi":12,"war":4,"zh":22},"meta":{"generated_at":"2026-06-21T23:26:09+00:00","mode":"full","row_count":6462,"sampled_rows":6462,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/wild/animal_attacks/shark_attacks_gsaf.csv"},"notes":[],"saturn_version":"0.2.0","schema":{"Activity":"text","Age":"categorical","Area":"categorical","Case Number":"text","Case Number.1":"text","Case Number.2":"text","Country":"categorical","Date":"text","Fatal (Y/N)":"categorical","Injury":"text","Investigator or Source":"text","Location":"text","Name":"text","Species ":"text","Time":"categorical","Type":"categorical","Unnamed: 23":"categorical","Unnamed: 9":"categorical","Year":"numeric","href":"text","href formula":"text","index":"numeric","original order":"numeric","pdf":"text"}}
