{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"allcaps","level":"info","message":"99.6% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"full_name","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[6070,0,21295,0,10737,0,2544,0,4,0,0,20,0,17,0,28,0,29,0,0,29,0,17,0,10,0,8,0,9,0,0,4,0,3,0,1,0,1,0,1],"edges":[16.0,16.45,16.9,17.35,17.8,18.25,18.7,19.15,19.6,20.05,20.5,20.95,21.4,21.85,22.3,22.75,23.2,23.65,24.1,24.55,25.0,25.450000000000003,25.9,26.35,26.8,27.25,27.700000000000003,28.15,28.6,29.05,29.5,29.950000000000003,30.4,30.85,31.3,31.75,32.2,32.650000000000006,33.1,33.55,34.0]},"near_unique":true,"sample":["  3553 Mera (1985 JA)","       (2022 WG2)","       (2024 MB1)","       (2023 VK3)","       (2017 UT)","       (2025 UL3)","       (2018 AL3)","       (2024 WD)","       (2023 VB7)","358471 (2007 NS4)","       (2020 WT4)","       (2019 US12)","       (2014 GK45)","  8013 Gordonmoore (1990 KA)","       (2014 QX363)","       (2009 TM8)","859716 (2013 RZ73)","       (2024 AE3)","       (2025 YY8)","       (2017 FN127)","       (2014 YY43)","369296 (2009 SU19)","       (2008 DY)","       (1998 VS)","       (2022 CY4)","       (2022 YY5)","       (2020 CZ2)","       (2009 WM105)","524603 (2003 QA31)","       (2025 UJ1)","       (2021 PL40)","       (2021 RB10)","       (2021 RV9)","       (2017 AP20)","       (2016 HN)","       (2017 WX28)","       (2025 HV2)","       (2019 ME1)","       (2018 CH14)","       (2019 UT1)","       (2023 TM2)","       (2025 DH15)","       (2014 DM112)","       (2025 DN6)","       (2025 QF3)","       (2025 UX107)","       (2015 YL)","       (2017 AG21)","       (2007 VO84)","       (2024 SS2)"],"top_values":[],"top_words":[["(2024",1607],["(2025",1594],["(2022",1578],["(2021",1487],["(2023",1423],["(2020",1415],["(2019",1232],["(2017",993],["(2016",979],["(2018",901],["(2015",793],["(2014",720],["(2013",516],["(2012",491],["(2010",448],["(2011",433],["(2008",389],["(2009",371],["(2007",336],["(2005",316],["(2006",312],["(2004",275],["(2002",245],["(2001",209],["(2003",198]],"vocab_skipped":null,"word_histogram":{"counts":[3230,0,0,0,0,252,0,0,0,0,79,0,0,0,0,108,0,0,0,0,4,0,0,0,0,0,0,0,0,37154],"edges":[3.0,3.2,3.4,3.6,3.8,4.0,4.2,4.4,4.6,4.8,5.0,5.2,5.4,5.6,5.800000000000001,6.0,6.2,6.4,6.6,6.800000000000001,7.0,7.2,7.4,7.6000000000000005,7.800000000000001,8.0,8.2,8.4,8.600000000000001,8.8,9.0]}},"kind":"text","n":40827,"n_null":0,"n_unique":40827,"null_rate":0.0,"stats":{"allcaps_rate":0.9955911529135131,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":34,"len_mean":17.27251573713474,"len_median":17.0,"len_min":16,"len_p95":19.0,"n_duplicates":0,"n_empty":0,"one_word_rate":0.0,"readability_flesch_mean":119.48422500000001,"url_rate":0.0,"vocab_size":12613,"word_mean":8.47858035123815,"word_median":9.0}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"neo","extras":{"singletons":0,"top_values":[["Y",40827]]},"kind":"categorical","n":40827,"n_null":0,"n_unique":1,"null_rate":0.0,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"Y"}},{"alerts":[],"column":"pha","extras":{"singletons":0,"top_values":[["N",38162],["Y",2534]]},"kind":"categorical","n":40827,"n_null":131,"n_unique":2,"null_rate":0.003208660935165454,"stats":{"cardinality":2,"entropy":0.33637737755463615,"entropy_ratio":0.33637737755463615,"top_rate":0.9377334381757421,"top_value":"N"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"80.8% duplicate strings"}],"column":"e","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40827,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[5.5,5.525,5.55,5.575,5.6,5.625,5.65,5.675,5.7,5.725,5.75,5.775,5.8,5.825,5.85,5.875,5.9,5.925,5.95,5.975,6.0,6.025,6.05,6.075,6.1,6.125,6.15,6.175,6.2,6.225,6.25,6.275,6.3,6.325,6.35,6.375,6.4,6.425,6.45,6.475,6.5]},"near_unique":false,"sample":["0.3200","0.4223","0.2275","0.1865","0.5716","0.1836","0.2470","0.0406","0.7137","0.5972","0.1722","0.4944","0.3488","0.4321","0.5073","0.4061","0.5339","0.5046","0.4205","0.2824","0.4901","0.9000","0.6130","0.2773","0.3330","0.7973","0.4199","0.1567","0.5471","0.5948","0.3736","0.3821","0.4227","0.7482","0.3688","0.2924","0.6060","0.6153","0.5773","0.3083","0.2215","0.6990","0.3441","0.2898","0.5742","0.4138","0.6060","0.1127","0.1850","0.2619"],"top_values":[["0.5298",21],["0.5964",18],["0.4826",18],["0.5544",18],["0.5325",18],["0.4647",18],["0.4545",17],["0.4766",17],["0.5479",17],["0.4609",17],["0.5656",17],["0.5131",17],["0.3803",17],["0.5724",17],["0.4656",17],["0.4880",17],["0.5292",17],["0.5485",17],["0.5024",16],["0.5570",16]],"top_words":[["0.4771",11],["0.5404",11],["0.4726",11],["0.6075",11],["0.5479",11],["0.5569",11],["0.5357",10],["0.5292",10],["0.2687",10],["0.5724",10],["0.5120",10],["0.4864",10],["0.5135",10],["0.5830",10],["0.4949",10],["0.4724",10],["0.5196",10],["0.5240",10],["0.5351",10],["0.2729",10],["0.5544",10],["0.4251",10],["0.3378",10],["0.4367",10],["0.4928",10]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40827,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":40827,"n_null":0,"n_unique":7849,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.8077497734342469,"emoji_rate":0.0,"len_max":6,"len_mean":6.0,"len_median":6.0,"len_min":6,"len_p95":6.0,"n_duplicates":32978,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":6736,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"99.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"89.8% duplicate strings"}],"column":"a","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,378,0,0,0,0,0,0,0,3428,0,0,0,0,0,0,0,33988,0,0,0,0,0,0,3011],"edges":[1.0,1.125,1.25,1.375,1.5,1.625,1.75,1.875,2.0,2.125,2.25,2.375,2.5,2.625,2.75,2.875,3.0,3.125,3.25,3.375,3.5,3.625,3.75,3.875,4.0,4.125,4.25,4.375,4.5,4.625,4.75,4.875,5.0,5.125,5.25,5.375,5.5,5.625,5.75,5.875,6.0]},"near_unique":false,"sample":["1.645","2.17","0.8691","0.8551","2.449","1.018","1.148","1.093","2.471","1.875","0.8686","1.722","1.618","2.2","2.606","1.552","1.369","2.011","1.709","0.9373","1.272","2.079","2.688","1.4","1.813","2.538","1.051","0.9","1.737","1.829","1.316","1.629","1.707","2.879","1.424","1.507","2.045","2.245","2.413","1.15","1.276","2.235","1.654","1.351","2.395","1.086","2.618","1.08","1.308","1.128"],"top_values":[["1.299",46],["1.424",45],["1.698",42],["1.263",42],["1.414",41],["1.495",40],["1.555",40],["1.448",40],["1.243",40],["1.427",39],["1.321",39],["1.707",39],["1.66",39],["1.344",39],["1.473",39],["1.193",39],["1.291",39],["1.576",38],["1.562",38],["1.329",38]],"top_words":[["1.291",29],["1.299",26],["1.297",25],["1.263",23],["1.329",23],["1.473",23],["1.66",23],["1.243",23],["1.279",23],["2.133",22],["1.495",22],["1.29",21],["1.512",21],["1.562",21],["1.671",21],["1.344",21],["1.414",21],["1.654",21],["1.301",20],["1.482",20],["1.707",20],["1.193",20],["1.285",20],["1.576",20],["1.253",20]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40827,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":40827,"n_null":0,"n_unique":4170,"null_rate":0.0,"stats":{"allcaps_rate":0.9994611409116516,"boilerplate_rate":0.0,"duplicate_rate":0.8978617091630539,"emoji_rate":0.0,"len_max":6,"len_mean":4.969113576799667,"len_median":5.0,"len_min":1,"len_p95":6.0,"n_duplicates":36657,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3344,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"89.0% duplicate strings"}],"column":"i","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[23372,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17448,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7],"edges":[4.0,4.05,4.1,4.15,4.2,4.25,4.3,4.35,4.4,4.45,4.5,4.55,4.6,4.65,4.7,4.75,4.8,4.85,4.9,4.95,5.0,5.05,5.1,5.15,5.2,5.25,5.3,5.35,5.4,5.45,5.5,5.55,5.6,5.65,5.7,5.75,5.8,5.85,5.9,5.95,6.0]},"near_unique":false,"sample":["36.77","25.17","1.21","2.81","14.36","6.06","6.09","16.35","1.06","5.78","0.80","11.24","4.86","7.60","2.06","2.47","25.12","6.53","3.32","9.56","44.20","14.62","8.80","6.83","9.74","10.19","14.96","9.24","8.34","0.68","9.52","36.18","7.19","13.86","12.31","26.26","2.12","7.68","1.86","5.36","8.55","1.25","16.71","2.92","0.54","25.99","1.93","12.99","28.32","2.83"],"top_values":[["6.07",43],["2.12",42],["2.26",41],["2.76",40],["4.35",39],["2.99",39],["5.16",39],["5.72",38],["4.85",38],["3.83",38],["3.40",38],["5.17",38],["3.85",37],["1.97",37],["3.50",37],["5.71",37],["5.51",37],["6.02",37],["6.53",37],["4.77",36]],"top_words":[["6.24",25],["6.22",24],["3.83",24],["4.46",23],["6.73",23],["4.85",23],["3.26",22],["2.76",22],["2.99",21],["2.13",21],["3.11",21],["4.00",21],["1.97",21],["5.17",20],["3.65",20],["4.49",20],["3.04",20],["8.66",20],["5.50",19],["4.76",19],["6.87",19],["4.22",19],["3.40",19],["5.41",19],["7.14",19]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40827,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":40827,"n_null":0,"n_unique":4489,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.8900482523820021,"emoji_rate":0.0,"len_max":6,"len_mean":4.427707154579077,"len_median":4.0,"len_min":4,"len_p95":5.0,"n_duplicates":36338,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":3827,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"64.2% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"97.5% duplicate strings"}],"column":"per","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[26231,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1230,0,0,0,0,0,0,13220],"edges":[3.0,3.125,3.25,3.375,3.5,3.625,3.75,3.875,4.0,4.125,4.25,4.375,4.5,4.625,4.75,4.875,5.0,5.125,5.25,5.375,5.5,5.625,5.75,5.875,6.0,6.125,6.25,6.375,6.5,6.625,6.75,6.875,7.0,7.125,7.25,7.375,7.5,7.625,7.75,7.875,8.0]},"near_unique":false,"sample":["770","1.17e+03","296","289","1.4e+03","375","449","418","1.42e+03","938","296","825","752","1.19e+03","1.54e+03","706","585","1.04e+03","816","331","524","1.09e+03","1.61e+03","605","892","1.48e+03","394","312","836","904","551","759","815","1.78e+03","621","676","1.07e+03","1.23e+03","1.37e+03","450","526","1.22e+03","777","574","1.35e+03","413","1.55e+03","410","546","438"],"top_values":[["1.13e+03",312],["1.12e+03",311],["1.15e+03",306],["1.14e+03",296],["1.07e+03",288],["1.17e+03",278],["1.02e+03",278],["1.06e+03",275],["1.11e+03",274],["1.16e+03",274],["1.04e+03",273],["1.2e+03",270],["1.05e+03",266],["1.01e+03",257],["1.19e+03",253],["1.03e+03",246],["1.25e+03",245],["1.21e+03",243],["1.24e+03",242],["1.26e+03",242]],"top_words":[["1.14e+03",158],["1.15e+03",156],["1.12e+03",147],["1.02e+03",143],["1.2e+03",142],["1.11e+03",141],["1.17e+03",141],["1.13e+03",139],["1.04e+03",135],["1.06e+03",133],["1.07e+03",130],["1.42e+03",130],["1.05e+03",130],["1.01e+03",127],["1.24e+03",126],["1.19e+03",125],["1.16e+03",124],["1.26e+03",122],["1.25e+03",120],["1.22e+03",119],["1.36e+03",119],["1.03e+03",118],["1.08e+03",117],["1.41e+03",115],["1.23e+03",114]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40827,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":40827,"n_null":0,"n_unique":1025,"null_rate":0.0,"stats":{"allcaps_rate":0.6424914884757635,"boilerplate_rate":0.0,"duplicate_rate":0.9748940652019497,"emoji_rate":0.0,"len_max":8,"len_mean":4.746687241286404,"len_median":3.0,"len_min":3,"len_p95":8.0,"n_duplicates":39802,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":985,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"95.9% duplicate strings"}],"column":"H","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40823],"edges":[4.0,4.025,4.05,4.075,4.1,4.125,4.15,4.175,4.2,4.225,4.25,4.275,4.3,4.325,4.35,4.375,4.4,4.425,4.45,4.475,4.5,4.525,4.55,4.575,4.6,4.625,4.65,4.675,4.7,4.725,4.75,4.775,4.8,4.825,4.85,4.875,4.9,4.925,4.95,4.975,5.0]},"near_unique":false,"sample":["16.40","26.03","24.07","21.73","25.30","25.30","25.10","28.18","20.91","19.83","24.50","22.30","24.40","17.09","27.20","28.40","19.05","26.96","24.89","26.80","20.20","18.08","20.90","22.45","23.97","20.65","25.20","23.50","19.49","27.64","23.05","23.84","20.99","27.16","26.10","20.95","23.92","23.60","19.43","24.09","28.10","26.12","23.20","30.17","28.52","28.24","24.60","24.50","23.60","28.98"],"top_values":[["24.80",304],["25.40",299],["24.60",298],["24.20",296],["25.50",295],["24.50",291],["25.30",289],["24.30",286],["25.00",286],["24.40",280],["24.70",266],["25.80",265],["24.90",264],["25.10",262],["25.20",260],["23.80",258],["26.10",255],["24.00",251],["26.20",250],["26.00",247]],"top_words":[["24.80",167],["25.00",155],["24.50",149],["24.60",142],["24.40",142],["25.40",142],["25.50",139],["25.20",139],["24.20",135],["25.30",135],["24.70",129],["24.30",129],["24.90",126],["23.30",122],["25.70",122],["25.60",120],["24.10",120],["23.90",120],["25.80",119],["26.20",119],["23.80",118],["23.20",117],["24.00",117],["23.70",113],["26.10",111]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,40824,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":40827,"n_null":3,"n_unique":1656,"null_rate":7.34807847747814e-05,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.9594356261022927,"emoji_rate":0.0,"len_max":5,"len_mean":4.999975504605135,"len_median":5.0,"len_min":4,"len_p95":5.0,"n_duplicates":39168,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1522,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"684 singleton categories"},{"code":"null_rate","level":"warn","message":"96.9% null"}],"column":"diameter","extras":{"singletons":684,"top_values":[["0.4",7],["2.3",6],["1.0",5],["1.4",5],["0.451",5],["0.3",4],["0.9",4],["0.432",4],["0.413",4],["0.344",4],["0.654",4],["0.374",4],["0.228",4],["0.143",4],["0.066",4],["1.5",3],["4.3",3],["0.6",3],["1.8",3],["0.7",3]]},"kind":"categorical","n":40827,"n_null":39579,"n_unique":924,"null_rate":0.9694319935336909,"stats":{"cardinality":924,"entropy":9.703098528171894,"entropy_ratio":0.9849112566084207,"top_rate":0.005608974358974359,"top_value":"0.4"}},{"alerts":[{"code":"null_rate","level":"warn","message":"97.1% null"}],"column":"albedo","extras":{"singletons":166,"top_values":[["0.037",15],["0.020",15],["0.031",14],["0.019",12],["0.023",12],["0.018",11],["0.022",10],["0.030",10],["0.025",10],["0.034",10],["0.028",9],["0.042",9],["0.048",9],["0.026",9],["0.137",9],["0.040",9],["0.024",9],["0.046",8],["0.033",8],["0.039",8]]},"kind":"categorical","n":40827,"n_null":39623,"n_unique":437,"null_rate":0.9705097117103877,"stats":{"cardinality":437,"entropy":8.365949562404179,"entropy_ratio":0.953766129628665,"top_rate":0.012458471760797342,"top_value":"0.037"}},{"alerts":[],"column":"class","extras":{"singletons":0,"top_values":[["APO",23175],["AMO",14321],["ATE",3293],["IEO",38]]},"kind":"categorical","n":40827,"n_null":0,"n_unique":4,"null_rate":0.0,"stats":{"cardinality":4,"entropy":1.296213800169641,"entropy_ratio":0.6481069000848205,"top_rate":0.5676390623851862,"top_value":"APO"}}],"insights":{"errors":[],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["row_count","column_count","columns.class.top_values","columns.pha.top_values","columns.neo.top_values","columns.diameter.null_rate","columns.albedo.null_rate","columns.full_name.top_words","columns.H.stats","columns.a.stats"],"featured_charts":[{"caption":"Orbit class distribution: APO and AMO together account for over 90% of the catalog.","column":"class","kind":"donut"},{"caption":"Potentially hazardous flag \u2014 about 6% of asteroids are marked 'Y'.","column":"pha","kind":"bar"},{"caption":"Absolute magnitude (H) values cluster tightly around 24\u201325.5; cast to numeric to see the full shape.","column":"H","kind":"histogram"},{"caption":"Name length is fairly uniform (16\u201319 chars), reflecting the standard '(YYYY XX)' designation format.","column":"full_name","kind":"length"},{"caption":"Top albedo values among the ~3% of rows that have one \u2014 useful for spotting dark vs reflective bodies.","column":"albedo","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This dataset catalogs 40,827 Near-Earth Objects (asteroids) across 11 columns mixing orbital parameters (H, a, e, i, per), physical properties (diameter, albedo), and classification flags (neo, pha, class). Every record has neo='Y', so that column carries no information and can be ignored. The most analytically interesting fields are 'class', where APO dominates at 56.8% followed by AMO at 35.1%, and 'pha' (potentially hazardous), which flags 2,534 objects (about 6.2%) as 'Y'. Note that 'diameter' and 'albedo' are ~97% null, so any size/reflectivity analysis will be limited to roughly 1,200 rows. The orbital-parameter columns are stored as short text rather than numbers \u2014 they will need to be cast to floats before any quantitative work.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.len_min","stats.len_max","stats.len_mean","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Despite the name 'full_name', this column appears to be title-with-year strings (e.g. ending in '(2024'), not personal names \u2014 top tokens are all parenthesised years from 2016-2025. Every one of the 40,827 rows is unique with zero nulls, and 99.56% are all-caps, with lengths tightly bounded between 16 and 34 characters. The year distribution skews recent, with 2024 (1607) and 2025 (1594) leading.","role":"identifier","scope":"column","target":"full_name","treatment":"Treat as a unique title key; parse the trailing year into a separate numeric feature rather than embedding the raw string."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"The `neo` column is a categorical flag that takes a single value 'Y' across all 40,827 rows, with zero nulls and entropy of 0.0. Because cardinality is 1 and top_rate is 1.0, this column carries no information and cannot discriminate between records.","role":"metadata","scope":"column","target":"neo","treatment":"Drop, constant column."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Binary Y/N flag, almost certainly a 'potentially hazardous asteroid' indicator given the column name 'pha'. The class is heavily imbalanced: 'N' covers 93.77% of rows versus 2,534 'Y' values, with a 0.32% null rate. Entropy ratio of 0.34 confirms the skew.","role":"label","scope":"column","target":"pha","treatment":"Encode as binary target and use class-imbalance handling (stratified splits, class weights, or resampling)."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","len_min","len_max","len_mean","one_word_rate","duplicate_rate","top_values","top_words","vocab_size"],"model":"anthropic:claude-opus-4-7","narrative":"Column 'e' is stored as text but every value is a fixed 6-character single token, and the top values ('0.5298', '0.5964', '0.4826', ...) are all numeric strings between 0 and 1. This is almost certainly a numeric feature (likely a probability, ratio, or normalized score) that has been serialized as text. With 7849 unique values across 40827 rows and a duplicate_rate of 0.808, repetition is heavy but not pathological for a discretized score.","role":"feature","scope":"column","target":"e","treatment":"Cast to float and use as a numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","stats.duplicate_rate","stats.one_word_rate","stats.len_max","stats.len_mean","stats.allcaps_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Column 'a' is stored as text but the values are short numeric strings (e.g., '1.299', '1.424'), all single tokens with length 1-6. With 4170 unique values across 40827 rows and an 89.8% duplicate rate, it behaves like a low-precision numeric feature mistakenly typed as string. The 99.9% allcaps flag is a quirk of digit-only strings tripping the case detector and can be ignored.","role":"feature","scope":"column","target":"a","treatment":"Cast to float and treat as a numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","duplicate_rate","len_min","len_max","len_mean","one_word_rate","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed as text, column 'i' holds short numeric tokens (length 4-6, all single-word) like '6.07', '2.12', '2.26' \u2014 almost certainly a decimal numeric feature stored as strings. With 40,827 rows but only 4,489 unique values and an 89% duplicate rate, the value space is heavily concentrated. The 'allcaps' flag and Flesch score of 121.22 are artefacts of treating numeric strings as prose and can be ignored.","role":"feature","scope":"column","target":"i","treatment":"Cast to float and treat as a numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.duplicate_rate","stats.one_word_rate","stats.len_max","stats.allcaps_rate","stats.word_mean","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Despite being typed as text, every value in `per` is a single short token (word_mean 1.0, len_max 8) and the top values are all numeric strings in scientific notation like '1.13e+03', suggesting this is a numeric measurement that was stringified during export. With 40,827 rows but only 1,025 unique values and a 97.5% duplicate rate, the field takes on a small set of repeating numeric codes. The 64% allcaps rate is an artefact of the 'e+03' exponent characters rather than genuine casing.","role":"feature","scope":"column","target":"per","treatment":"Cast back to numeric (parse the scientific-notation strings to float) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["kind","n","n_unique","stats.len_mean","stats.len_min","stats.len_max","stats.one_word_rate","stats.duplicate_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Column H is stored as text but the values are uniformly short numeric tokens (len_mean 4.999, one_word_rate 1.0) clustered tightly around 24-25 (top values 24.20-25.50). With 1656 uniques across 40827 rows and a 95.9% duplicate_rate, this looks like a quantised numeric measurement (price, weight, or similar) miscast as a string. The allcaps flag is a false positive driven by digits.","role":"feature","scope":"column","target":"H","treatment":"Cast to float and treat as a continuous numeric feature."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is almost certainly an asteroid/object diameter measurement stored as strings (e.g. '0.4', '2.3', '0.451'), miscoded as categorical. It is overwhelmingly missing \u2014 96.94% null \u2014 and among the 40,827 rows only 924 distinct values appear, with the most common ('0.4') occurring just 7 times (top_rate 0.0056) and entropy_ratio 0.985 indicating a near-uniform long tail. The mix of one-decimal and three-decimal strings hints at heterogeneous measurement precision across sources.","role":"feature","scope":"column","target":"diameter","treatment":"Cast to numeric and either drop given 96.94% nulls or impute with a missingness indicator before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Likely a geometric/Bond albedo measurement (reflectivity, 0-1 range) stored as a string rather than parsed numeric, given top values like '0.037', '0.020', '0.031'. Coverage is extremely sparse: 97.05% null with only 437 distinct values across 40,827 rows, and the modal value appears just 15 times (1.25%). Entropy ratio of 0.954 shows the few populated values are spread almost uniformly across the 437 levels.","role":"feature","scope":"column","target":"albedo","treatment":"Cast to float and treat as numeric; given 97% nulls, use only as a sparse feature with missingness indicator or drop."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical label with 4 classes across 40,827 rows and no nulls. Distribution is heavily imbalanced: APO accounts for 56.8% and AMO for most of the remainder, while IEO appears only 38 times \u2014 a near-absent class that will be hard to learn or evaluate.","role":"label","scope":"column","target":"class","treatment":"Use as classification target with class-weighting or resampling to handle the IEO minority class."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":3593,"prompt_tokens":16391,"total_tokens":19984}},"language_counts":{},"meta":{"generated_at":"2026-05-01T18:38:20+00:00","mode":"full","row_count":40827,"sampled_rows":40827,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/data/quirky/asteroids.json"},"notes":[],"saturn_version":"0.2.0","schema":{"H":"text","a":"text","albedo":"categorical","class":"categorical","diameter":"categorical","e":"text","full_name":"text","i":"text","neo":"categorical","per":"text","pha":"categorical"}}
