{"columns":[{"alerts":[{"code":"near_unique","level":"info","message":"100.0% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"summons_number","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[9.5,9.525,9.55,9.575,9.6,9.625,9.65,9.675,9.7,9.725,9.75,9.775,9.8,9.825,9.85,9.875,9.9,9.925,9.95,9.975,10.0,10.025,10.05,10.075,10.1,10.125,10.15,10.175,10.2,10.225,10.25,10.275,10.3,10.325,10.35,10.375,10.4,10.425,10.45,10.475,10.5]},"near_unique":true,"sample":["1499918446","4976273052","4976262716","4976263125","9252903896","4976252231","9253525745","4976257137","4976263332","1499713034","4976288067","8985766004","2027686915","1496553809","2014432867","2028476746","1498296282","4976264762","4976249840","9253560411","2025571021","1501981675","2028548472","1500966563","4976276211","4976269693","8862422350","2028408364","1474469085","4976252280","4976284104","4976280871","4976280860","9254066904","9254930325","9253500967","4976253340","9242904922","9251772605","9248309288","4976266930","4976256133","2027638325","4976256030","4976250763","4976248460","2027418490","9254066965","2028586035","4976260227"],"top_values":[],"top_words":[["1500913558",1],["1500906438",1],["1499277933",1],["1502369229",1],["1490900561",1],["1499708300",1],["1495829625",1],["1499006470",1],["1495808464",1],["1499052900",1],["1496035150",1],["1503389613",1],["1499918446",1],["1500353954",1],["1495750590",1],["1498600189",1],["1499240776",1],["1492773840",1],["1495760637",1],["1497879541",1],["1489921187",1],["1489921199",1],["1492370253",1],["1498779451",1],["1494575176",1]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":10000,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.0,"emoji_rate":0.0,"len_max":10,"len_mean":10.0,"len_median":10.0,"len_min":10,"len_p95":10.0,"n_duplicates":0,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":10000,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"near_unique","level":"info","message":"95.2% of rows are unique strings"},{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"}],"column":"plate_id","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[1,0,0,0,0,5,0,0,0,0,12,0,0,0,0,58,0,0,0,0,1578,0,0,0,0,7876,0,0,0,0,432,0,0,0,0,3,0,0,0,35],"edges":[2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4000000000000004,3.6,3.8,4.0,4.2,4.4,4.6,4.800000000000001,5.0,5.2,5.4,5.6,5.800000000000001,6.0,6.2,6.4,6.6000000000000005,6.800000000000001,7.0,7.2,7.4,7.6000000000000005,7.800000000000001,8.0,8.2,8.4,8.600000000000001,8.8,9.0,9.2,9.4,9.600000000000001,9.8,10.0]},"near_unique":true,"sample":["LPG5377","NSXB18","BL54422","LKN2862","MWJM2015","M15UEZ","MHC7554","GJK2986","JMT5468","85997MD","HCZ1427","LJA1091","LZW1314","70709MK","63BSKF","LPW5467","HDE5974","KTN6591","T682047C","GVR7071","JRB6346","LZA4830","LGK2190","HLS1139","BJ00645","JGA6211","LZW1658","LZN1511","983BU7","LXG4436","CDY8048","LNC7984","FDG1000","16785NG","V27SEN","4VNN88","MAX1028","MLV3921","ZYT8081","HCS2969","JMJ5078","GWD7510","LCM5130","JHT9909","C52NSM","LLP4760","CJ53500","CLU2991","BH14609","KLP5966"],"top_values":[],"top_words":[["blankplate",34],["u772849",12],["48151nf",8],["5ell15",8],["5168751",7],["u416492",7],["2822239",5],["213951d",4],["3ttl51",4],["2572162",4],["lnl4888",4],["5070061",4],["2663241",4],["cj53500",4],["fnduel",3],["xrxf30",3],["azk275",3],["51668pf",3],["u442102",3],["52448pf",3],["5543322",3],["t850228",3],["e143aa",3],["txl73e",3],["186068b",3]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":9519,"null_rate":0.0,"stats":{"allcaps_rate":0.9999,"boilerplate_rate":0.0,"duplicate_rate":0.0481,"emoji_rate":0.0,"len_max":10,"len_mean":6.8788,"len_median":7.0,"len_min":2,"len_p95":7.0,"n_duplicates":481,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":100.49300000000002,"url_rate":0.0,"vocab_size":9519,"word_mean":1.0,"word_median":1.0}},{"alerts":[],"column":"registration_state","extras":{"singletons":4,"top_values":[["NY",6935],["NJ",917],["PA",470],["CT",247],["FL",235],["VA",220],["ME",125],["MA",124],["99",88],["GA",65],["NC",64],["TN",56],["MD",53],["TX",36],["OH",31],["IN",30],["IL",28],["CA",27],["SC",26],["MI",24]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":50,"null_rate":0.0,"stats":{"cardinality":50,"entropy":2.0337466116798257,"entropy_ratio":0.3603469938451786,"top_rate":0.6935,"top_value":"NY"}},{"alerts":[],"column":"plate_type","extras":{"singletons":6,"top_values":[["PAS",9072],["OMT",330],["COM",245],["SRF",103],["OMS",84],["999",77],["ORG",17],["TRL",11],["MED",8],["RGL",8],["MOT",7],["NYS",6],["PSD",6],["SPO",4],["VAS",3],["ITP",3],["OMR",2],["TRC",2],["TOW",2],["APP",2]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":27,"null_rate":0.0,"stats":{"cardinality":27,"entropy":0.6958389883641399,"entropy_ratio":0.14634184048466636,"top_rate":0.9072,"top_value":"PAS"}},{"alerts":[{"code":"long_tail","level":"info","message":"368 singleton categories"}],"column":"issue_date","extras":{"singletons":368,"top_values":[["2025-12-28T00:00:00.000",6542],["2025-12-30T00:00:00.000",1594],["2025-12-29T00:00:00.000",356],["2026-06-26T00:00:00.000",14],["2026-09-27T00:00:00.000",13],["2026-09-25T00:00:00.000",12],["2025-12-31T00:00:00.000",12],["2026-06-27T00:00:00.000",11],["2026-10-25T00:00:00.000",10],["2026-08-31T00:00:00.000",10],["2026-08-27T00:00:00.000",10],["2026-07-26T00:00:00.000",10],["2026-06-30T00:00:00.000",10],["2026-08-25T00:00:00.000",9],["2026-10-29T00:00:00.000",8],["2026-10-17T00:00:00.000",8],["2026-10-05T00:00:00.000",8],["2026-09-23T00:00:00.000",8],["2026-07-27T00:00:00.000",8],["2026-07-24T00:00:00.000",8]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":687,"null_rate":0.0,"stats":{"cardinality":687,"entropy":2.764998092524379,"entropy_ratio":0.2933944507966807,"top_rate":0.6542,"top_value":"2025-12-28T00:00:00.000"}},{"alerts":[],"column":"violation_code","extras":{"singletons":8,"top_values":[["36",4416],["21",1463],["40",862],["14",858],["46",360],["20",244],["98",242],["19",168],["16",133],["66",131],["71",116],["74",114],["70",104],["50",96],["17",79],["78",75],["51",66],["80",56],["67",47],["13",38]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":62,"null_rate":0.0,"stats":{"cardinality":62,"entropy":3.0999740400678384,"entropy_ratio":0.5206368548279217,"top_rate":0.4416,"top_value":"36"}},{"alerts":[],"column":"vehicle_body_type","extras":{"singletons":32,"top_values":[["SUBN",5120],["4DSD",2067],["SDN",670],["VAN",275],["SPOR",233],["PICK",194],["TRLR",175],["SEDA",115],["UT",111],["SW",110],["2DSD",94],["4D",82],["DELV",77],["SEMI",53],["TAXI",52],["SU",52],["SD",51],["P-U",46],["TRAC",41],["CONV",26]]},"kind":"categorical","n":10000,"n_null":131,"n_unique":81,"null_rate":0.0131,"stats":{"cardinality":81,"entropy":2.5993686237245446,"entropy_ratio":0.4100047513019764,"top_rate":0.5187962306211369,"top_value":"SUBN"}},{"alerts":[],"column":"vehicle_make","extras":{"singletons":45,"top_values":[["HONDA",1331],["TOYOT",1302],["NISSA",770],["FORD",603],["BMW",559],["ME/BE",521],["JEEP",450],["CHEVR",449],["HYUND",365],["SUBAR",273],["KIA",268],["LEXUS",268],["MAZDA",257],["AUDI",242],["ACURA",221],["VOLKS",199],["DODGE",175],["TESLA",152],["INFIN",151],["GMC",134]]},"kind":"categorical","n":10000,"n_null":78,"n_unique":126,"null_rate":0.0078,"stats":{"cardinality":126,"entropy":4.660590063008295,"entropy_ratio":0.6679666165193021,"top_rate":0.13414634146341464,"top_value":"HONDA"}},{"alerts":[],"column":"issuing_agency","extras":{"singletons":1,"top_values":[["V",4416],["T",2131],["S",1946],["P",1325],["K",41],["N",38],["A",23],["Y",17],["M",13],["O",9],["C",9],["8",8],["3",6],["X",5],["9",3],["W",3],["L",2],["R",2],["F",2],["U",1]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":20,"null_rate":0.0,"stats":{"cardinality":20,"entropy":2.0073162502379707,"entropy_ratio":0.46444924722660963,"top_rate":0.4416,"top_value":"V"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"51.7% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"85.8% duplicate strings"}],"column":"street_code1","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[4817,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,426,0,0,0,0,0,0,0,0,4739],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0,4.1,4.2,4.300000000000001,4.4,4.5,4.6,4.7,4.800000000000001,4.9,5.0]},"near_unique":false,"sample":["35115","0","0","0","34610","0","11010","0","0","18070","0","38590","29450","58760","18730","38690","0","0","0","24890","16490","36090","72730","27540","0","0","27090","18390","60810","0","0","0","0","31272","90585","20740","0","28440","19530","13590","0","0","17370","0","0","0","56990","20770","8890","0"],"top_values":[["0",4817],["13610",52],["10510",40],["34610",35],["38430",34],["10610",32],["51790",32],["61090",31],["67730",31],["10210",30],["31190",28],["41030",28],["25645",27],["10880",27],["31830",26],["10810",24],["10410",24],["34330",22],["25390",22],["10910",22]],"top_words":[["0",4817],["13610",52],["10510",40],["34610",35],["38430",34],["10610",32],["51790",32],["61090",31],["67730",31],["10210",30],["31190",28],["41030",28],["25645",27],["10880",27],["31830",26],["10810",24],["10410",24],["34330",22],["25390",22],["10910",22],["11010",21],["33390",21],["34910",21],["21240",21],["81330",20]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":1420,"null_rate":0.0,"stats":{"allcaps_rate":0.5168,"boilerplate_rate":0.0,"duplicate_rate":0.858,"emoji_rate":0.0,"len_max":5,"len_mean":3.0255,"len_median":4.0,"len_min":1,"len_p95":5.0,"n_duplicates":8580,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1420,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"49.6% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"86.5% duplicate strings"}],"column":"street_code2","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[5005,0,0,0,0,0,0,0,0,0,35,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,490,0,0,0,0,0,0,0,0,4467],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0,4.1,4.2,4.300000000000001,4.4,4.5,4.6,4.7,4.800000000000001,4.9,5.0]},"near_unique":false,"sample":["35290","0","0","0","10410","0","34390","0","0","40404","0","50150","30620","15340","10110","56590","0","0","0","18190","8440","24690","86030","10020","0","0","0","7990","21340","0","0","0","0","18510","0","0","0","10620","0","25900","0","0","10210","0","0","0","81725","25908","13740","0"],"top_values":[["0",5005],["40404",359],["10410",83],["13610",71],["10910",50],["8790",49],["14510",39],["8590",37],["11710",36],["10610",35],["10510",34],["15710",33],["35780",28],["10210",28],["23230",27],["10110",26],["13495",26],["10810",25],["8190",25],["75430",25]],"top_words":[["0",5005],["40404",359],["10410",83],["13610",71],["10910",50],["8790",49],["14510",39],["8590",37],["11710",36],["10610",35],["10510",34],["15710",33],["35780",28],["10210",28],["23230",27],["10110",26],["13495",26],["10810",25],["8190",25],["75430",25],["20190",25],["24690",24],["51090",24],["12550",24],["5010",22]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":1349,"null_rate":0.0,"stats":{"allcaps_rate":0.496,"boilerplate_rate":0.0,"duplicate_rate":0.8651,"emoji_rate":0.0,"len_max":5,"len_mean":2.9379,"len_median":1.0,"len_min":1,"len_p95":5.0,"n_duplicates":8651,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1349,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"47.5% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"86.8% duplicate strings"}],"column":"street_code3","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[5235,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,522,0,0,0,0,0,0,0,0,4228],"edges":[1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7000000000000002,1.8,1.9,2.0,2.1,2.2,2.3,2.4000000000000004,2.5,2.6,2.7,2.8,2.9000000000000004,3.0,3.1,3.2,3.3000000000000003,3.4000000000000004,3.5,3.6,3.7,3.8000000000000003,3.9000000000000004,4.0,4.1,4.2,4.300000000000001,4.4,4.5,4.6,4.7,4.800000000000001,4.9,5.0]},"near_unique":false,"sample":["0","0","0","0","10510","0","34430","0","0","40404","0","52290","13113","59225","10010","29690","0","0","0","18210","8590","10610","75430","49420","0","0","0","8190","21390","0","0","0","0","49876","0","0","0","15620","0","30650","0","0","10110","0","0","0","43040","29150","14440","0"],"top_values":[["0",5235],["40404",359],["10610",62],["10510",53],["15710",45],["11710",44],["8790",43],["13610",42],["10810",34],["8990",34],["10010",31],["33390",31],["12550",30],["10110",27],["10910",26],["5280",26],["8440",26],["86630",26],["23930",26],["44430",25]],"top_words":[["0",5235],["40404",359],["10610",62],["10510",53],["15710",45],["11710",44],["8790",43],["13610",42],["10810",34],["8990",34],["10010",31],["33390",31],["12550",30],["10110",27],["10910",26],["5280",26],["8440",26],["86630",26],["23930",26],["44430",25],["20390",25],["5130",22],["8590",21],["75430",21],["25390",21]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":1317,"null_rate":0.0,"stats":{"allcaps_rate":0.4753,"boilerplate_rate":0.0,"duplicate_rate":0.8683,"emoji_rate":0.0,"len_max":5,"len_mean":2.8496,"len_median":1.0,"len_min":1,"len_p95":5.0,"n_duplicates":8683,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1317,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"89.6% duplicate strings"}],"column":"vehicle_expiration_date","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[4508,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5492],"edges":[8.0,8.05,8.1,8.15,8.2,8.25,8.3,8.35,8.4,8.45,8.5,8.55,8.6,8.65,8.7,8.75,8.8,8.85,8.9,8.95,9.0,9.05,9.1,9.15,9.2,9.25,9.3,9.35,9.4,9.45,9.5,9.55,9.6,9.65,9.7,9.75,9.8,9.85,9.9,9.95,10.0]},"near_unique":false,"sample":["20260730","0.00000000","0.00000000","0.00000000","20260888","0.00000000","88888888","0.00000000","0.00000000","0.00000000","0.00000000","20271210","20270825","20250930","0.00000000","20270225","20260317","0.00000000","0.00000000","20270407","20270729","20270812","20260115","0.00000000","0.00000000","0.00000000","20270922","20270724","0.00000000","0.00000000","0.00000000","0.00000000","0.00000000","88888888","88888888","20270888","0.00000000","88888888","20258888","20271030","0.00000000","0.00000000","0.00000000","0.00000000","0.00000000","0.00000000","20251231","20190731","0.00000000","0.00000000"],"top_values":[["0.00000000",5492],["88888888",556],["20258888",126],["20260930",67],["20260228",56],["20260630",52],["20261031",48],["20260430",44],["20260731",44],["20260131",42],["20260531",41],["20260831",37],["20260331",36],["20261130",27],["20261231",18],["20251231",18],["20260288",18],["20270831",17],["20260188",16],["20250930",15]],"top_words":[["0.00000000",5492],["88888888",556],["20258888",126],["20260930",67],["20260228",56],["20260630",52],["20261031",48],["20260430",44],["20260731",44],["20260131",42],["20260531",41],["20260831",37],["20260331",36],["20261130",27],["20261231",18],["20251231",18],["20260288",18],["20270831",17],["20260188",16],["20250930",15],["20271130",15],["20270331",14],["20260988",14],["20270531",13],["20261014",13]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":1040,"null_rate":0.0,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.896,"emoji_rate":0.0,"len_max":10,"len_mean":9.0984,"len_median":10.0,"len_min":8,"len_p95":10.0,"n_duplicates":8960,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1040,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"44.9% null"}],"column":"violation_location","extras":{"singletons":7,"top_values":[["0018",284],["115",220],["114",178],["103",162],["0047",140],["0075",139],["0077",135],["0062",130],["0072",129],["110",127],["0001",125],["112",124],["0061",120],["0084",106],["0034",105],["0020",104],["109",102],["0067",96],["0073",96],["108",93]]},"kind":"categorical","n":10000,"n_null":4485,"n_unique":87,"null_rate":0.4485,"stats":{"cardinality":87,"entropy":5.98838050792608,"entropy_ratio":0.9294479319560185,"top_rate":0.05149592021758839,"top_value":"0018"}},{"alerts":[],"column":"violation_precinct","extras":{"singletons":7,"top_values":[["0",4485],["18",284],["115",220],["114",178],["103",162],["47",140],["75",139],["77",135],["62",130],["72",129],["110",127],["1",125],["112",124],["61",120],["84",106],["34",105],["20",104],["109",102],["67",96],["73",96]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":88,"null_rate":0.0,"stats":{"cardinality":88,"entropy":4.294925485143383,"entropy_ratio":0.6649076480276224,"top_rate":0.4485,"top_value":"0"}},{"alerts":[],"column":"issuer_precinct","extras":{"singletons":21,"top_values":[["0",6418],["18",282],["115",161],["103",139],["61",115],["62",113],["1",104],["109",90],["70",87],["114",81],["19",80],["14",80],["20",78],["110",75],["63",73],["102",68],["6",65],["60",59],["75",57],["10",57]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":121,"null_rate":0.0,"stats":{"cardinality":121,"entropy":3.088131322322706,"entropy_ratio":0.4463350721670212,"top_rate":0.6418,"top_value":"0"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"46.9% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"85.8% duplicate strings"}],"column":"issuer_code","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[5314,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,24,0,0,0,0,0,0,4654],"edges":[1.0,1.125,1.25,1.375,1.5,1.625,1.75,1.875,2.0,2.125,2.25,2.375,2.5,2.625,2.75,2.875,3.0,3.125,3.25,3.375,3.5,3.625,3.75,3.875,4.0,4.125,4.25,4.375,4.5,4.625,4.75,4.875,5.0,5.125,5.25,5.375,5.5,5.625,5.75,5.875,6.0]},"near_unique":false,"sample":["976628","0","0","0","365882","0","376939","0","0","972724","0","351220","0","960859","0","0","966657","0","0","373703","612312","973748","611159","978413","0","0","357734","0","967587","0","0","0","0","363921","377820","354088","0","379409","370736","370747","0","0","686320","0","0","0","662505","363921","0","0"],"top_values":[["0",5314],["355542",92],["611159",66],["366548",56],["635831",53],["355076",53],["392714",52],["375306",50],["376951",43],["376939",42],["685742",41],["668664",38],["365928",38],["377820",36],["368930",35],["373279",35],["378708",35],["378654",35],["373245",34],["365332",33]],"top_words":[["0",5314],["355542",92],["611159",66],["366548",56],["635831",53],["355076",53],["392714",52],["375306",50],["376951",43],["376939",42],["685742",41],["668664",38],["365928",38],["377820",36],["368930",35],["373279",35],["378708",35],["378654",35],["373245",34],["365332",33],["377824",32],["372261",32],["366552",31],["358580",30],["377879",30]],"vocab_skipped":null,"word_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10000,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[0.5,0.5333333333333333,0.5666666666666667,0.6,0.6333333333333333,0.6666666666666666,0.7,0.7333333333333334,0.7666666666666666,0.8,0.8333333333333333,0.8666666666666667,0.9,0.9333333333333333,0.9666666666666667,1.0,1.0333333333333332,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666665,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333335,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5]}},"kind":"text","n":10000,"n_null":0,"n_unique":1420,"null_rate":0.0,"stats":{"allcaps_rate":0.4686,"boilerplate_rate":0.0,"duplicate_rate":0.858,"emoji_rate":0.0,"len_max":6,"len_mean":3.3383,"len_median":1.0,"len_min":1,"len_p95":6.0,"n_duplicates":8580,"n_empty":0,"one_word_rate":1.0,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1420,"word_mean":1.0,"word_median":1.0}},{"alerts":[{"code":"null_rate","level":"warn","message":"44.2% null"}],"column":"issuer_command","extras":{"singletons":46,"top_values":[["T302",374],["T401",301],["T103",230],["T402",223],["T106",187],["T301",169],["T102",145],["EPIU",142],["MTTF",133],["KN02",105],["T105",104],["KN08",95],["QW01",95],["MN12",88],["BX12",86],["T303",85],["T201",84],["MN07",83],["QW02",63],["KN04",63]]},"kind":"categorical","n":10000,"n_null":4416,"n_unique":228,"null_rate":0.4416,"stats":{"cardinality":228,"entropy":6.522657974172706,"entropy_ratio":0.8327268686752074,"top_rate":0.06697707736389685,"top_value":"T302"}},{"alerts":[{"code":"null_rate","level":"warn","message":"63.6% null"}],"column":"issuer_squad","extras":{"singletons":0,"top_values":[["0000",1513],["F",331],["N",245],["A",214],["L",180],["J",122],["M",120],["Q",110],["R",100],["H",99],["E",99],["X",95],["Y",85],["S",71],["B",70],["P",44],["U",29],["D",28],["C",25],["G",22]]},"kind":"categorical","n":10000,"n_null":6361,"n_unique":23,"null_rate":0.6361,"stats":{"cardinality":23,"entropy":3.308900004573545,"entropy_ratio":0.7314810843129836,"top_rate":0.41577356416597966,"top_value":"0000"}},{"alerts":[{"code":"one_word","level":"warn","message":"100.0% rows are a single word"},{"code":"allcaps","level":"info","message":"100.0% rows are all-caps"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"85.7% duplicate strings"}],"column":"violation_time","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9996,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"edges":[4.5,4.525,4.55,4.575,4.6,4.625,4.65,4.675,4.7,4.725,4.75,4.775,4.8,4.825,4.85,4.875,4.9,4.925,4.95,4.975,5.0,5.025,5.05,5.075,5.1,5.125,5.15,5.175,5.2,5.225,5.25,5.275,5.3,5.325,5.35,5.375,5.4,5.425,5.45,5.475,5.5]},"near_unique":false,"sample":["0840P","0259P","1127A","1135A","0622P","0725A","0453P","0939A","1139A","1230P","0826P","1249P","0928A","0245A","0852A","0743A","1232P","1034A","0334A","0131P","0947A","1124P","1148A","0235A","0421P","0156P","0107A","0102A","0230A","0728A","0808P","0556P","0556P","0821A","0849A","0717A","0812A","1238P","0309P","0139P","1232P","0921A","0958A","0919A","0446A","0117A","0120A","1158A","0915A","1040A"],"top_values":[["0839A",29],["0911A",23],["0915A",22],["0845A",22],["1200P",22],["1138A",22],["1151A",22],["0956A",22],["1203P",22],["0248P",21],["0847A",21],["0936A",21],["0941A",21],["1029A",21],["1136A",21],["1142A",21],["0317P",21],["0935A",20],["1001A",20],["1130A",20]],"top_words":[["0839a",29],["0911a",23],["0915a",22],["0845a",22],["1200p",22],["1138a",22],["1151a",22],["0956a",22],["1203p",22],["0248p",21],["0847a",21],["0936a",21],["0941a",21],["1029a",21],["1136a",21],["1142a",21],["0317p",21],["0935a",20],["1001a",20],["1130a",20],["0955a",20],["1015a",20],["1149a",20],["1013a",20],["1147a",20]],"vocab_skipped":null,"word_histogram":{"counts":[9995,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":10000,"n_null":4,"n_unique":1432,"null_rate":0.0004,"stats":{"allcaps_rate":1.0,"boilerplate_rate":0.0,"duplicate_rate":0.8567426970788315,"emoji_rate":0.0,"len_max":5,"len_mean":5.0,"len_median":5.0,"len_min":5,"len_p95":5.0,"n_duplicates":8564,"n_empty":0,"one_word_rate":0.9998999599839936,"readability_flesch_mean":121.22000000000004,"url_rate":0.0,"vocab_size":1433,"word_mean":1.0001000400160065,"word_median":1.0}},{"alerts":[],"column":"violation_county","extras":{"singletons":0,"top_values":[["QN",1773],["NY",1472],["BK",1233],["BX",1027],["K",936],["Q",902],["Kings",737],["Qns",414],["ST",403],["Bronx",395],["MN",314],["R",116],["Rich",7]]},"kind":"categorical","n":10000,"n_null":271,"n_unique":13,"null_rate":0.0271,"stats":{"cardinality":13,"entropy":3.320309527266242,"entropy_ratio":0.8972743187758757,"top_rate":0.1822386679000925,"top_value":"QN"}},{"alerts":[{"code":"null_rate","level":"warn","message":"46.9% null"}],"column":"violation_in_front_of_or_opposite","extras":{"singletons":1,"top_values":[["F",3553],["O",1140],["I",621],["R",1]]},"kind":"categorical","n":10000,"n_null":4685,"n_unique":4,"null_rate":0.4685,"stats":{"cardinality":4,"entropy":1.2290219483619946,"entropy_ratio":0.6145109741809973,"top_rate":0.6684854186265287,"top_value":"F"}},{"alerts":[{"code":"multilingual","level":"info","message":"28 languages detected in sample"},{"code":"allcaps","level":"info","message":"77.3% rows are all-caps"},{"code":"duplicates","level":"warn","message":"68.8% duplicate strings"}],"column":"street_name","extras":{"language_counts":{"__engine":"fasttext:4,612","ar":14,"ca":14,"cs":10,"da":1,"de":56,"en":3596,"es":86,"eu":5,"fr":39,"gl":1,"id":4,"it":21,"ja":614,"ko":14,"lt":1,"mk":1,"ms":1,"nl":25,"no":9,"pl":3,"pt":15,"ro":1,"ru":12,"sv":1,"te":1,"uk":8,"zh":59},"language_sample_size":5000,"length_histogram":{"counts":[1,0,9,0,15,0,150,0,105,0,0,533,0,727,0,1009,0,396,0,0,415,0,374,0,489,0,404,0,344,0,0,155,0,90,0,111,0,1013,0,3656],"edges":[2.0,2.45,2.9,3.35,3.8,4.25,4.7,5.15,5.6,6.05,6.5,6.95,7.4,7.8500000000000005,8.3,8.75,9.2,9.65,10.1,10.55,11.0,11.450000000000001,11.9,12.35,12.8,13.25,13.700000000000001,14.15,14.6,15.05,15.5,15.950000000000001,16.4,16.85,17.3,17.75,18.2,18.650000000000002,19.1,19.55,20.0]},"near_unique":false,"sample":["BEACH 145 ST","WB HYLAN BLVD @ LUTE","NB KNAPP ST @ ALLEN","EB NORTHERN BLVD @ A","E 45th St","EB FOREST AVE @ CRYS","10th Ave","SB WATERS PL @ BRONX","WB SEAGIRT BLVD @ B","S/W C/O 173 ST","SB WINCHESTER BLVD @","Queens Blvd","RECTOR PLACE","PENELOPE AVE","DEAN STREET","MOTT AVENUE","SHORE FRONT PKY","EB FOUR CORNERS RD @","WB GOETHALS RD N @ J","E 60th St","79 STREET","FOWLER AVE","PROSPECT PLACE","MARION AVE","NB LITTLE NECK PKWY","WB SHORE PKWY @ KNAP","9th Ave","ATLANTIC AVENUE","VERMONT AVE","WB SHORE PKWY @ BROW","EB CROSS BRONX EXPWY","SB BRUCKNER BLVD @ W","WB UNION TRPK @ 213T","Gulf Ave","Sea Breeze Ave","43rd Ave","WB NORTHERN BLVD @ 8","E 223rd St","Lexington Ave","Broad St","NB BAILEY AVE @ W 19","WB QUEENS BLVD @ IRE","EAST 19 STREET","NB OCEAN PKWY @ ELMW","EB E TREMONT AVE @ C","NB WOODHAVEN BLVD @","WEST 137 STREET","Nicholas Ave","65 STREET","SB MAIN ST @ 82ND DR"],"top_values":[["SB CROSS BAY BLVD @",115],["WB N CONDUIT AVE @ 8",76],["BROADWAY",49],["WB ASTORIA BLVD N @",47],["NB WOODHAVEN BLVD @",47],["NB SPRINGFIELD BLVD",46],["EB CROSS BRONX EXPWY",44],["WB N CONDUIT AVE @ 1",44],["NB FRANCIS LEWIS BLV",44],["NB SOUTHERN BLVD @ C",43],["SB HAMILTON AVE @ BU",43],["EB BRUCKNER BLVD @ W",42],["NB CROSS BAY BLVD @",41],["6th Ave",39],["EB NORTHERN BLVD @ A",39],["EB SHORE PKWY @ OCEA",39],["SB FRANCIS LEWIS BLV",38],["W 42nd St",34],["EB S CONDUIT AVE @ M",34],["EASTERN PARKWAY",33]],"top_words":[["@",3809],["ave",2506],["st",2287],["blvd",1255],["sb",1180],["wb",1137],["nb",1118],["street",1002],["eb",982],["avenue",760],["w",749],["e",720],["pkwy",409],["rd",364],["west",320],["s",283],["conduit",283],["n",256],["bay",229],["cross",227],["park",180],["ocean",173],["pl",168],["queens",160],["east",143]],"vocab_skipped":null,"word_histogram":{"counts":[137,0,0,0,0,3855,0,0,0,0,1547,0,0,0,0,810,0,0,0,0,2564,0,0,0,0,1014,0,0,0,69],"edges":[1.0,1.2,1.4,1.6,1.8,2.0,2.2,2.4000000000000004,2.6,2.8,3.0,3.2,3.4000000000000004,3.6,3.8000000000000003,4.0,4.2,4.4,4.6,4.800000000000001,5.0,5.2,5.4,5.6000000000000005,5.800000000000001,6.0,6.2,6.4,6.6000000000000005,6.800000000000001,7.0]}},"kind":"text","n":10000,"n_null":4,"n_unique":3115,"null_rate":0.0004,"stats":{"allcaps_rate":0.7732092837134854,"boilerplate_rate":0.0,"duplicate_rate":0.688375350140056,"emoji_rate":0.0,"len_max":20,"len_mean":14.871748699479792,"len_median":16.0,"len_min":2,"len_p95":20.0,"n_duplicates":6881,"n_empty":0,"one_word_rate":0.013705482192877151,"readability_flesch_mean":62.54637500000002,"url_rate":0.0,"vocab_size":1760,"word_mean":3.512905162064826,"word_median":3.0}},{"alerts":[{"code":"allcaps","level":"info","message":"80.9% rows are all-caps"},{"code":"null_rate","level":"warn","message":"48.1% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"72.8% duplicate strings"}],"column":"intersecting_street","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[192,0,253,0,130,0,407,0,308,0,455,0,646,0,241,0,357,0,508,0,0,210,0,164,0,265,0,125,0,88,0,78,0,110,0,149,0,253,0,248],"edges":[1.0,1.475,1.95,2.425,2.9,3.375,3.8499999999999996,4.324999999999999,4.8,5.2749999999999995,5.75,6.225,6.699999999999999,7.175,7.6499999999999995,8.125,8.6,9.075,9.549999999999999,10.025,10.5,10.975,11.45,11.924999999999999,12.399999999999999,12.875,13.35,13.825,14.299999999999999,14.774999999999999,15.25,15.725,16.2,16.674999999999997,17.15,17.625,18.099999999999998,18.575,19.05,19.525,20.0]},"near_unique":false,"sample":["42 ST","I","N ST","OOD RD","SHAD CREEK RD","@ 27TH ST","GERBOARD RD","ALL AVE","LAMEDA AVE","STEINWAY","E @ 175TH ST","OD PARK DRIVE","RISWOLD AVE","8 AVE","N ST","10ft N/of Academy St","5ft E/of W 2nd St","N ST","ST","@ SCHLEY AVE","53RD ST","124 ST","10ft E/of E 56th St","10ft N/of John St","E ST","NAH AVE","29TH ST","20ft N/of Avenue R","10' E/O LINWOOD STR","42ND ST","ST","AVE","44TH ST","IC AVE","WOOD ST","VE","28TH ST","ST","ER AVE","E Z","8TH ST","ARNES AVE","Y @ BILLS PL","ALL AVE","76TH ST","N AVE","@ B 139TH ST","AVE","40ft N/of W 59th St","ARNEY ST"],"top_values":[["ST",189],["SHAD CREEK RD",98],["8TH ST",87],["AVE",84],["H ST",79],["T",75],["HITE PLAINS RD",60],["E",52],["N ST",49],["42ND ST",47],["EXT. @ LAFAYETTE AV",44],["27TH ST",44],["ROTONA AVE",43],["SH ST",43],["PRINGFIELD BLVD",41],["LAMEDA AVE",39],["TH ST",39],["159TH AVE",38],["N PKWY",38],["EMPHIS AVE",34]],"top_words":[["st",1848],["ave",1545],["@",607],["rd",456],["e",255],["d",178],["pl",174],["w/of",159],["h",155],["n",154],["s/of",144],["e/of",122],["n/of",114],["t",111],["w",108],["shad",98],["creek",98],["blvd",95],["8th",95],["av",83],["pkwy",78],["ln",76],["dr",73],["5ft",73],["ct",66]],"vocab_skipped":null,"word_histogram":{"counts":[591,0,0,0,0,0,2959,0,0,0,0,0,691,0,0,0,0,0,754,0,0,0,0,0,183,0,0,0,0,9],"edges":[1.0,1.1666666666666667,1.3333333333333333,1.5,1.6666666666666665,1.8333333333333333,2.0,2.1666666666666665,2.333333333333333,2.5,2.6666666666666665,2.833333333333333,3.0,3.1666666666666665,3.333333333333333,3.5,3.6666666666666665,3.833333333333333,4.0,4.166666666666666,4.333333333333333,4.5,4.666666666666666,4.833333333333333,5.0,5.166666666666666,5.333333333333333,5.5,5.666666666666666,5.833333333333333,6.0]}},"kind":"text","n":10000,"n_null":4813,"n_unique":1413,"null_rate":0.4813,"stats":{"allcaps_rate":0.8091382301908617,"boilerplate_rate":0.0,"duplicate_rate":0.7275882012724117,"emoji_rate":0.0,"len_max":20,"len_mean":9.271062271062272,"len_median":8.0,"len_min":1,"len_p95":19.0,"n_duplicates":3774,"n_empty":0,"one_word_rate":0.1139386928860613,"readability_flesch_mean":83.03520000000002,"url_rate":0.0,"vocab_size":1053,"word_mean":2.4227877385772123,"word_median":2.0}},{"alerts":[{"code":"long_tail","level":"info","message":"125 singleton categories"},{"code":"imbalance","level":"warn","message":"top value is 98.3% of rows"}],"column":"date_first_observed","extras":{"singletons":125,"top_values":[["0",9827],["20251229",6],["20261006",3],["20260901",3],["20261106",2],["20261101",2],["20260927",2],["20260925",2],["20260919",2],["20260911",2],["20260907",2],["20260829",2],["20250828",2],["20260629",2],["20240601",2],["20260520",2],["20260426",2],["20260328",2],["20260327",2],["20260314",2]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":147,"null_rate":0.0,"stats":{"cardinality":147,"entropy":0.24851694541340616,"entropy_ratio":0.03451781324349344,"top_rate":0.9827,"top_value":"0"}},{"alerts":[],"column":"law_section","extras":{"singletons":0,"top_values":[["408",5584],["1180",4416]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":2,"null_rate":0.0,"stats":{"cardinality":2,"entropy":0.9901367059207401,"entropy_ratio":0.9901367059207401,"top_rate":0.5584,"top_value":"408"}},{"alerts":[],"column":"sub_division","extras":{"singletons":7,"top_values":[["B",4444],["d1",1428],["C",781],["E2",779],["F1",314],["F2",240],["D",222],["C3",183],["K2",124],["J2",123],["J6",122],["k4",120],["J3",109],["e2",94],["C4",84],["E5",81],["E3",61],["c",61],["K6",57],["n8",56]]},"kind":"categorical","n":10000,"n_null":2,"n_unique":76,"null_rate":0.0002,"stats":{"cardinality":76,"entropy":3.2274000161123535,"entropy_ratio":0.5165552912014422,"top_rate":0.4444888977795559,"top_value":"B"}},{"alerts":[{"code":"null_rate","level":"warn","message":"50.6% null"}],"column":"days_parking_in_effect","extras":{"singletons":2,"top_values":[["YYYYYYY",2094],["BBBBBBB",1449],["Y  Y",771],["Y",495],["Y Y Y",25],["YYYYY",23],["YYYYYY",21],["YYYYYBB",19],["YYYYYYB",8],["Y     Y",7],["YY YY",6],["BYBBYBB",4],["BBBBBYB",3],["YBBYBBB",3],["Y Y",3],["BYBBBBB",2],["BBYBBBB",2],["YBBBBBB",2],["BBBYBBB",2],["BYBYBYB",1]]},"kind":"categorical","n":10000,"n_null":5059,"n_unique":21,"null_rate":0.5059,"stats":{"cardinality":21,"entropy":2.024773593150015,"entropy_ratio":0.460980707507487,"top_rate":0.4238008500303582,"top_value":"YYYYYYY"}},{"alerts":[{"code":"null_rate","level":"warn","message":"68.3% null"}],"column":"from_hours_in_effect","extras":{"singletons":5,"top_values":[["ALL",1445],["1130A",303],["0830A",242],["0930A",220],["0900A",161],["0800A",156],["0730A",119],["0700A",116],["1200A",102],["1100A",83],["0300A",61],["0200P",51],["0900P",31],["1000A",17],["1200P",14],["0600A",13],["0400P",8],["1130P",8],["0200A",8],["0100P",5]]},"kind":"categorical","n":10000,"n_null":6830,"n_unique":26,"null_rate":0.683,"stats":{"cardinality":26,"entropy":2.9559521263332664,"entropy_ratio":0.6288671493700748,"top_rate":0.4558359621451104,"top_value":"ALL"}},{"alerts":[{"code":"null_rate","level":"warn","message":"68.3% null"}],"column":"to_hours_in_effect","extras":{"singletons":6,"top_values":[["ALL",1445],["0100P",299],["1100A",219],["1000A",212],["1030A",154],["0800A",118],["0600A",117],["0300A",97],["0930A",95],["0700P",85],["1230P",85],["0830A",45],["0500A",35],["0900A",27],["1200A",23],["0600P",22],["0130P",19],["0400P",15],["0500P",11],["1000P",11]]},"kind":"categorical","n":10000,"n_null":6830,"n_unique":31,"null_rate":0.683,"stats":{"cardinality":31,"entropy":3.0788931967795543,"entropy_ratio":0.6214717794537945,"top_rate":0.4558359621451104,"top_value":"ALL"}},{"alerts":[],"column":"vehicle_color","extras":{"singletons":36,"top_values":[["GY",2079],["BK",1784],["WH",1579],["BL",631],["RD",348],["WHITE",347],["BLK",275],["BLACK",273],["GREY",239],["GRY",167],["GR",148],["BLUE",131],["RED",124],["GRAY",113],["SILVE",99],["WHT",65],["YW",62],["BR",60],["WHI",57],["BLU",46]]},"kind":"categorical","n":10000,"n_null":943,"n_unique":99,"null_rate":0.0943,"stats":{"cardinality":99,"entropy":3.675742078300628,"entropy_ratio":0.5544643754971937,"top_rate":0.2295462073534283,"top_value":"GY"}},{"alerts":[{"code":"null_rate","level":"warn","message":"84.9% null"},{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"unregistered_vehicle","extras":{"singletons":0,"top_values":[["0",1513]]},"kind":"categorical","n":10000,"n_null":8487,"n_unique":1,"null_rate":0.8487,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"0"}},{"alerts":[],"column":"vehicle_year","extras":{"singletons":5,"top_values":[["0",2329],["2024",759],["2025",696],["2023",569],["2019",512],["2022",489],["2021",472],["2018",448],["2020",442],["2017",431],["2016",372],["2015",358],["2014",285],["2013",283],["2012",250],["2011",199],["2010",178],["2008",168],["2026",136],["2007",119]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":39,"null_rate":0.0,"stats":{"cardinality":39,"entropy":4.134045666690086,"entropy_ratio":0.7821629263969229,"top_rate":0.2329,"top_value":"0"}},{"alerts":[{"code":"long_tail","level":"info","message":"4 singleton categories"},{"code":"null_rate","level":"warn","message":"84.8% null"},{"code":"imbalance","level":"warn","message":"top value is 99.5% of rows"}],"column":"meter_number","extras":{"singletons":4,"top_values":[["-",1513],["309485",2],["103553",2],["109720",1],["424475",1],["106506",1],["101339",1]]},"kind":"categorical","n":10000,"n_null":8479,"n_unique":7,"null_rate":0.8479,"stats":{"cardinality":7,"entropy":0.06053753446090475,"entropy_ratio":0.021563904864773838,"top_rate":0.9947403024326101,"top_value":"-"}},{"alerts":[{"code":"imbalance","level":"warn","message":"top value is 97.8% of rows"}],"column":"feet_from_curb","extras":{"singletons":0,"top_values":[["0",9783],["1",42],["2",40],["3",35],["5",30],["4",18],["6",15],["10",12],["8",12],["7",10],["9",3]]},"kind":"categorical","n":10000,"n_null":0,"n_unique":11,"null_rate":0.0,"stats":{"cardinality":11,"entropy":0.2169315460500221,"entropy_ratio":0.06270727968182054,"top_rate":0.9783,"top_value":"0"}},{"alerts":[{"code":"one_word","level":"warn","message":"99.8% rows are a single word"},{"code":"allcaps","level":"info","message":"78.9% rows are all-caps"},{"code":"null_rate","level":"warn","message":"47.7% null"},{"code":"short_text","level":"info","message":"95th-percentile length under 20 chars"},{"code":"duplicates","level":"warn","message":"55.1% duplicate strings"}],"column":"house_number","extras":{"language_counts":{},"language_sample_size":5000,"length_histogram":{"counts":[626,0,0,0,0,0,479,0,0,0,0,0,0,1797,0,0,0,0,0,0,1299,0,0,0,0,0,777,0,0,0,0,0,0,246,0,0,0,0,0,5],"edges":[1.0,1.15,1.3,1.45,1.6,1.75,1.9,2.05,2.2,2.3499999999999996,2.5,2.65,2.8,2.95,3.1,3.25,3.4,3.55,3.6999999999999997,3.85,4.0,4.15,4.3,4.449999999999999,4.6,4.75,4.9,5.05,5.2,5.35,5.5,5.6499999999999995,5.8,5.95,6.1,6.25,6.3999999999999995,6.55,6.7,6.85,7.0]},"near_unique":false,"sample":["25-94","1615","2250","37-12","71-17","9224","18","227","20","900","W","38","5302","102-06","992","339","3700-06","224","W","316","34-03","3240","2126","519","1020","368","139","234","251-12","4309","279","449","186","1440","6029","440","E","3129","2830","14-16","37-24","N","132","N","W","89-14","1401","364","1027","280"],"top_values":[["N",161],["E",136],["W",122],["S",120],["1",22],["150",21],["180",19],["111",19],["11",18],["60",17],["40",17],["400",16],["30",16],["16",15],["50",15],["101",15],["25",15],["10",15],["885",15],["151",13]],"top_words":[["n",161],["e",137],["w",129],["s",120],["1",22],["150",21],["180",19],["111",19],["11",18],["60",17],["40",17],["25",16],["400",16],["30",16],["16",15],["50",15],["101",15],["10",15],["885",15],["151",13],["20",13],["136",13],["14",13],["605",13],["15",12]],"vocab_skipped":null,"word_histogram":{"counts":[5217,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12],"edges":[1.0,1.0333333333333334,1.0666666666666667,1.1,1.1333333333333333,1.1666666666666667,1.2,1.2333333333333334,1.2666666666666666,1.3,1.3333333333333333,1.3666666666666667,1.4,1.4333333333333333,1.4666666666666668,1.5,1.5333333333333332,1.5666666666666667,1.6,1.6333333333333333,1.6666666666666665,1.7,1.7333333333333334,1.7666666666666666,1.8,1.8333333333333335,1.8666666666666667,1.9,1.9333333333333333,1.9666666666666668,2.0]}},"kind":"text","n":10000,"n_null":4771,"n_unique":2350,"null_rate":0.4771,"stats":{"allcaps_rate":0.7886785236182826,"boilerplate_rate":0.0,"duplicate_rate":0.5505832855230446,"emoji_rate":0.0,"len_max":7,"len_mean":3.3595333715815645,"len_median":3.0,"len_min":1,"len_p95":5.0,"n_duplicates":2879,"n_empty":0,"one_word_rate":0.9977051061388411,"readability_flesch_mean":116.14400000000003,"url_rate":0.0,"vocab_size":2345,"word_mean":1.0022948938611589,"word_median":1.0}},{"alerts":[{"code":"long_tail","level":"info","message":"187 singleton categories"},{"code":"null_rate","level":"warn","message":"78.5% null"}],"column":"time_first_observed","extras":{"singletons":187,"top_values":[["00000",1926],["0230A",3],["0930P",3],["0505P",3],["1115P",3],["0900P",2],["1150P",2],["0920A",2],["1000A",2],["0235A",2],["0645P",2],["0252A",2],["0110A",2],["0955A",2],["0905P",2],["0456P",2],["1149A",2],["1251A",2],["1030P",2],["0119A",2]]},"kind":"categorical","n":10000,"n_null":7845,"n_unique":207,"null_rate":0.7845,"stats":{"cardinality":207,"entropy":1.2988269017281282,"entropy_ratio":0.16882161611544425,"top_rate":0.8937354988399072,"top_value":"00000"}},{"alerts":[],"column":"violation_description","extras":{"singletons":10,"top_values":[["PHTO SCHOOL ZN SPEED VIOLATION",4416],["No Parking Street Cleaning",1428],["14-No Standing",598],["40-Fire Hydrant",463],["20A-No Parking (Non-COM)",131],["19-No Stand (bus stop)",123],["16A-No Std (Com Veh) Non-COM",117],["46A-Double Parking (Non-COM)",106],["Detached Trailer",105],["Fire Hydrant",94],["71A-Insp Sticker Expired (NYS)",77],["70A-Reg. Sticker Expired (NYS)",66],["No Standing",61],["Missing Equipment",56],["50-Crosswalk",53],["74-Missing Display Plate",39],["13-No Stand (taxi stand)",38],["17-No Stand (exc auth veh)",38],["Double Parking",32],["98-Obstructing Driveway",31]]},"kind":"categorical","n":10000,"n_null":1513,"n_unique":74,"null_rate":0.1513,"stats":{"cardinality":74,"entropy":2.807939732895211,"entropy_ratio":0.4522040133899608,"top_rate":0.5203252032520326,"top_value":"PHTO SCHOOL ZN SPEED VIOLATION"}},{"alerts":[{"code":"null_rate","level":"warn","message":"78.7% null"}],"column":"violation_post_code","extras":{"singletons":0,"top_values":[["99",309],["01",207],["311",135],["SPCL",85],["06",71],["10",66],["B",57],["17",56],["U",53],["15",49],["04",48],["05",48],["16",45],["11",42],["A",40],["I",35],["10-P",35],["38",34],["03-A",34],["12",33]]},"kind":"categorical","n":10000,"n_null":7874,"n_unique":53,"null_rate":0.7874,"stats":{"cardinality":53,"entropy":5.089352409063814,"entropy_ratio":0.8885166002976413,"top_rate":0.145343367826905,"top_value":"99"}},{"alerts":[{"code":"null_rate","level":"warn","message":"55.8% null"},{"code":"imbalance","level":"warn","message":"top value is 100.0% of rows"}],"column":"violation_legal_code","extras":{"singletons":0,"top_values":[["T",4416]]},"kind":"categorical","n":10000,"n_null":5584,"n_unique":1,"null_rate":0.5584,"stats":{"cardinality":1,"entropy":-0.0,"entropy_ratio":0.0,"top_rate":1.0,"top_value":"T"}}],"insights":{"errors":[{"message":"Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of concurrent connections has exceeded your rate limit. Please try again later or contact sales at https://claude.com/contact-sales to discuss your options for a rate limit increase.'}, 'request_id': 'req_011Cacguy5aoU3RjynzW1sTb'}","type":"ProviderRateLimitError","where":"column:feet_from_curb:anthropic:claude-opus-4-7"}],"insights":[{"confidence":"high","critiques":[],"evidence_keys":["violation_description","issuing_agency","registration_state","violation_county","vehicle_make","vehicle_body_type","vehicle_color","plate_type","law_section","meter_number","unregistered_vehicle","violation_post_code","feet_from_curb","issuer_precinct"],"featured_charts":[{"caption":"Shows how heavily school-zone speed violations dominate the ticket mix versus traditional parking offenses.","column":"violation_description","kind":"bar"},{"caption":"Confirms NY plates make up the bulk, with NJ and PA as the main out-of-state contributors.","column":"registration_state","kind":"bar"},{"caption":"Highlights the most-ticketed manufacturers, led by Honda and Toyota.","column":"vehicle_make","kind":"bar"},{"caption":"Useful for spotting both the borough distribution and the inconsistent county code spellings that need normalizing.","column":"violation_county","kind":"bar"},{"caption":"Reveals the long tail of color codes and duplicate labels (BK/BLK/BLACK) that should be consolidated.","column":"vehicle_color","kind":"bar"}],"model":"anthropic:claude-opus-4-7","narrative":"This is a 10,000-row sample of NYC parking violations with 40 fields covering ticket metadata, vehicle attributes, and location/precinct codes. The violation mix is dominated by one category \u2014 'PHTO SCHOOL ZN SPEED VIOLATION' accounts for 4,416 of the issued tickets (about 52% of non-null descriptions) \u2014 which also drives the issuing_agency skew toward 'V' and law_section '408'. Geographically, registration_state is heavily NY (6,935) with NJ and PA trailing, and violation_county splits across Queens, Manhattan, Brooklyn, and the Bronx but with inconsistent codes (e.g., 'QN' vs 'Qns', 'BK' vs 'Kings'). Watch out for heavy nulls and placeholder zeros: meter_number, unregistered_vehicle, time_first_observed, and violation_post_code are >70% null, while issuer_precinct, feet_from_curb, and street_code* are dominated by '0' sentinel values. Vehicle_color also has unnormalized variants ('BK'/'BLK'/'BLACK', 'GY'/'GREY'/'GRY') that will need cleanup before any analysis.","scope":"dataset","target":"__global__"},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_min","stats.len_max","stats.len_mean","stats.word_mean","stats.duplicate_rate","stats.one_word_rate","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is a unique 10-digit numeric identifier per row (likely a parking/traffic summons number), with all 10,000 values distinct and uniformly 10 characters long. There are no nulls, no duplicates, and every value is a single token, consistent with a primary key rather than a feature. The allcaps flag is a quirk of the detector treating digit-only strings as uppercase.","role":"identifier","scope":"column","target":"summons_number","treatment":"Drop from modelling; retain as a join key."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.allcaps_rate","stats.one_word_rate","stats.len_min","stats.len_max","stats.len_mean","stats.duplicate_rate","stats.n_duplicates","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This is almost certainly a license/plate identifier: 9,519 unique values across 10,000 rows, all single-token, 99.99% uppercase, with lengths between 2 and 10 characters (mean 6.88). Notably, 'blankplate' appears 34 times and the duplicate rate is 4.81% (481 records), suggesting placeholder values and a small amount of legitimate plate reuse. No nulls or empties, but the placeholder token will pollute any join or uniqueness assumption.","role":"identifier","scope":"column","target":"plate_id","treatment":"Treat as an identifier key; normalize case, map 'BLANKPLATE' to null, then left-join on this id."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","stats.cardinality","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Two-letter US state codes identifying where a vehicle is registered, dominated by NY at 69.35% of 10,000 rows with NJ, PA, CT, and FL trailing \u2014 consistent with an NYC-area enforcement dataset. Cardinality is 50 with entropy ratio 0.36, and one suspicious non-state token \"99\" appears 88 times, likely a sentinel for unknown/out-of-country plates.","role":"feature","scope":"column","target":"registration_state","treatment":"One-hot encode top states, bucket the long tail into 'OTHER', and recode '99' as missing."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","stats.cardinality","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column encodes vehicle plate type codes (e.g., PAS, OMT, COM), almost certainly from a parking or traffic dataset. The distribution is heavily dominated by passenger plates, with PAS accounting for 9072 of 10000 rows (top_rate 0.9072) and entropy_ratio of just 0.146 across 27 categories. A small but notable 77 rows carry the placeholder code '999', which likely represents missing or unknown plate types despite null_rate being 0.","role":"feature","scope":"column","target":"plate_type","treatment":"Collapse rare codes into 'other' and treat '999' as missing before one-hot encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This is an issue_date column stored as ISO-8601 timestamps but profiled as categorical, with 687 distinct dates across 10,000 rows and no nulls. The distribution is severely concentrated: 65.42% of rows fall on 2025-12-28, with 2025-12-30 (1,594) and 2025-12-29 (356) accounting for most of the rest, leaving a long tail of 2026 dates with single- or low-double-digit counts. The clustering at end-of-2025 suggests a bulk-issue or backfill event rather than organic daily issuance.","role":"timestamp","scope":"column","target":"issue_date","treatment":"Parse as datetime and derive features (day, month, days-since-epoch); investigate the 2025-12-28 spike before using as a model feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical violation_code with 62 distinct codes stored as strings and zero nulls across 10,000 rows. Distribution is heavily concentrated: code '36' alone accounts for 44.16% of records, followed by '21' at 14.63%, giving an entropy ratio of 0.52 \u2014 roughly half the maximum for this cardinality. The top 10 codes cover the bulk of traffic, leaving a long tail of rare codes.","role":"feature","scope":"column","target":"violation_code","treatment":"Group rare codes into an 'other' bucket, then one-hot or target-encode before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Column holds short vehicle body-type codes (SUBN, 4DSD, SDN, VAN, PICK...) typical of DMV/parking-violation feeds. Distribution is heavily concentrated: SUBN alone covers 51.9% of rows and the top two codes account for roughly 72%, yet there are 81 distinct codes producing a long tail. Entropy ratio of 0.41 confirms the lopsided spread, and nulls are minor at 1.31%.","role":"feature","scope":"column","target":"vehicle_body_type","treatment":"Group rare codes into an 'other' bucket and one-hot encode the top categories."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical vehicle make field with 126 distinct values across 10,000 rows and a low 0.78% null rate. Values appear truncated to 5 characters (TOYOT, NISSA, ME/BE, CHEVR, HYUND, SUBAR), which will collide brands and complicate joins. HONDA leads at 13.4% followed closely by TOYOT at 13.0%, and entropy ratio of 0.67 indicates a long tail beyond the top 10.","role":"feature","scope":"column","target":"vehicle_make","treatment":"Normalize truncated codes to canonical make names, then group rare levels before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Single-letter codes for the agency that issued each record, with 20 distinct values and no nulls across 10,000 rows. Distribution is heavily concentrated: 'V' alone accounts for 44.16% and the top four codes (V, T, S, P) cover the vast majority, while codes like M, O fall to single or low double digits. Entropy ratio of 0.46 confirms the imbalance.","role":"feature","scope":"column","target":"issuing_agency","treatment":"One-hot encode the top categories and bucket the long tail into 'other'."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_max","stats.one_word_rate","stats.duplicate_rate","stats.n_duplicates","stats.vocab_size","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is `street_code1`, a short numeric street identifier stored as text (len_max 5, one_word_rate 1.0, 1420 distinct codes across 10000 rows). The dominant surprise is that 4817 rows \u2014 nearly half \u2014 carry the placeholder value \"0\", driving the 0.858 duplicate_rate; the next most frequent code (\"13610\") only appears 52 times. No nulls, but the \"0\" sentinel effectively functions as a missing/unknown marker.","role":"foreign_key","scope":"column","target":"street_code1","treatment":"Treat as a categorical code, recode \"0\" to missing, then left-join to a street reference table."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_max","stats.len_mean","stats.one_word_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This looks like a secondary street code stored as text, with 1349 unique short tokens (max length 5, mean 2.94 chars) and every value a single word. The column is dominated by '0', which accounts for 5005 of 10000 rows, with another 359 rows holding '40404'; combined with an 86.51% duplicate rate this leaves very little discriminating signal. No nulls, but the heavy '0' mass likely encodes a missing/sentinel state rather than a real code.","role":"feature","scope":"column","target":"street_code2","treatment":"Treat '0' as a missing sentinel and use as a low-cardinality categorical, or drop given the extreme mode dominance."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","stats.len_max","stats.len_mean","stats.one_word_rate","stats.duplicate_rate","stats.allcaps_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as text but the values are short numeric codes (len_max 5, one_word_rate 1.0, 1317 unique tokens), consistent with a street or address code lookup. The distribution is dominated by '0' (5235/10000) with '40404' a distant second at 359, driving an 86.83% duplicate_rate. The allcaps_rate of 0.4753 is an artifact of digit-only strings being counted as uppercase.","role":"feature","scope":"column","target":"street_code3","treatment":"Treat as a categorical code: keep as string, collapse rare levels, and consider a binary flag for the dominant '0' value."},{"confidence":"high","critiques":[],"evidence_keys":["stats.len_min","stats.len_max","stats.len_mean","stats.one_word_rate","stats.duplicate_rate","stats.n_duplicates","n_unique","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as text but functionally a YYYYMMDD vehicle expiration date, with lengths clustered at 8-10 characters and one token per row. Over half the rows (5,492) are the sentinel '0.00000000' and another 556 are '88888888', plus 126 entries like '20258888' that mix a real year with a placeholder month/day \u2014 so roughly 61% of values are not real dates. Genuine dates such as 20260930 and 20260228 appear only in the dozens, and the column is 89.6% duplicates across just 1,040 unique values.","role":"timestamp","scope":"column","target":"vehicle_expiration_date","treatment":"Parse as YYYYMMDD after mapping '0.00000000', '88888888', and any *8888 tails to null."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be a violation_location code, likely a precinct or zone identifier stored as a zero-padded string (e.g., '0018', '0047') mixed with non-padded variants ('115', '114'), suggesting inconsistent formatting across sources. Cardinality is 87 with very high entropy ratio (0.929), so values are spread evenly with no dominant location \u2014 the top code only accounts for 5.1% of rows. Most striking: 44.85% of rows are null, which is flagged as an alert and severely limits usability.","role":"feature","scope":"column","target":"violation_location","treatment":"Normalize the zero-padding inconsistency and treat nulls as a separate category before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","top_value","top_rate","top_values","entropy_ratio","null_rate"],"model":"anthropic:claude-opus-4-7","narrative":"This column encodes the NYPD precinct where the violation occurred, stored as a string with 88 distinct codes. The dominant surprise is that 44.85% of rows carry the value '0', which is almost certainly a sentinel for missing/non-applicable precinct rather than a real precinct number; legitimate precincts like 18, 115, and 114 follow far behind. Entropy ratio of 0.66 reflects this heavy concentration on the sentinel.","role":"feature","scope":"column","target":"violation_precinct","treatment":"Recode '0' as missing before using as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is the precinct number of the issuing officer, with 121 distinct codes across 10,000 rows and no nulls. The distribution is dominated by the value \"0\" at 64.18% of rows, which is almost certainly a sentinel/placeholder rather than a real precinct, leaving genuine precincts (e.g., 18, 115, 103) as long-tail minorities. Entropy ratio of 0.446 confirms the heavy concentration on that single code.","role":"feature","scope":"column","target":"issuer_precinct","treatment":"Treat \"0\" as missing/sentinel and group rare precincts before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.duplicate_rate","stats.len_max","stats.len_median","stats.len_p95","stats.one_word_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical issuer identifier stored as text, with 1,420 distinct codes across 10,000 rows and an 85.8% duplicate rate. The dominant value '0' accounts for 5,314 rows (over half the column), suggesting it is a sentinel or 'unknown issuer' placeholder rather than a real code. Remaining values look like 6-digit numeric IDs (len_max 6, len_p95 6), but the median length of 1 confirms how heavily the '0' bucket skews the distribution.","role":"foreign_key","scope":"column","target":"issuer_code","treatment":"Treat '0' as missing and join the remaining codes to an issuer reference table."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short alphanumeric codes (e.g., T302, T401, EPIU, MTTF) that look like issuer-side command or instruction tokens, with 228 distinct values across 10,000 rows. It is sparsely populated \u2014 44.16% null \u2014 yet entropy_ratio of 0.83 over the non-null portion shows the codes spread fairly evenly, with the most common value T302 covering only 6.7%. No single code dominates, so the missingness is the more striking signal than concentration.","role":"feature","scope":"column","target":"issuer_command","treatment":"Treat as a categorical feature with an explicit 'missing' level, then target- or frequency-encode before modelling."},{"confidence":"medium","critiques":[],"evidence_keys":["n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"A categorical code labelled issuer_squad with 23 distinct values, dominated by the placeholder-looking '0000' which accounts for 41.6% of non-nulls (1513 rows) while the rest are single-letter tags like F, N, A, L. Most striking: 63.6% of rows are null, and entropy_ratio of 0.73 suggests the remaining values are reasonably spread but anchored by that '0000' bucket which may represent unassigned issuers. The mix of a numeric-looking code with letter codes hints at inconsistent encoding conventions.","role":"feature","scope":"column","target":"issuer_squad","treatment":"Treat '0000' as a missing sentinel, impute or bucket rare letters, then one-hot encode."},{"confidence":"high","critiques":[],"evidence_keys":["len_min","len_max","len_mean","allcaps_rate","one_word_rate","n_unique","duplicate_rate","null_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column encodes the time of a violation as a fixed 5-character token like '0839A' or '1200P' (HHMM plus A/P meridiem indicator), with every value uppercase and exactly one word. Across 10,000 rows there are only 1,432 distinct values and an 85.7% duplicate rate, which is expected for clock times rather than a data quality issue. Null rate is negligible (0.0004) and the format is strikingly uniform \u2014 len_min, len_median, and len_max all equal 5.","role":"timestamp","scope":"column","target":"violation_time","treatment":"Parse the HHMM+A/P token into a proper time-of-day feature (e.g., minutes since midnight) before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a categorical county/borough code for NYC violations, with 13 distinct values across 10,000 rows and a 2.71% null rate. The top value 'QN' (Queens) covers 18.2% of rows, but the codes are inconsistent: Queens appears as both 'QN' and 'Q' (and 'Qns'), Brooklyn as 'BK', 'K', and 'Kings', and Bronx as 'BX' and 'Bronx'\u2014so the true cardinality is lower than 13. Entropy ratio of 0.897 reflects this fragmentation rather than genuine diversity.","role":"feature","scope":"column","target":"violation_county","treatment":"Normalize aliases (QN/Q/Qns\u2192Queens, BK/K/Kings\u2192Brooklyn, BX/Bronx\u2192Bronx) before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","top_values","stats.top_value","stats.top_rate","stats.entropy_ratio"],"model":"anthropic:claude-opus-4-7","narrative":"A categorical position code indicating where a violation occurred relative to a reference point \u2014 almost certainly 'F' (front), 'O' (opposite), 'I' (inside?), and a single 'R'. Nearly half the rows (46.85%) are null, and among the populated rows 'F' dominates at 66.85%. The 'R' code appears just once out of 10000, suggesting either a data-entry error or a rare legitimate category worth investigating.","role":"feature","scope":"column","target":"violation_in_front_of_or_opposite","treatment":"Impute or add an explicit 'missing' level, collapse the singleton 'R', then one-hot encode."},{"confidence":"high","critiques":[],"evidence_keys":["top_values","top_words","stats.allcaps_rate","stats.duplicate_rate","n_unique","stats.len_max","language_counts","alerts"],"model":"anthropic:claude-opus-4-7","narrative":"Street/intersection labels, mostly directional roadway descriptors like 'SB CROSS BAY BLVD @' with cardinal prefixes (sb/wb/nb/eb) and '@' separators indicating cross-streets \u2014 consistent with NYC traffic or transit data. Values are heavily duplicated (6881 of 10000, 3115 unique) and 77% all-caps, with strings capped at 20 chars suggesting upstream truncation. Language detection flags Japanese (614) and Chinese (59), but these are almost certainly false positives from short uppercase abbreviations rather than true multilingual content.","role":"feature","scope":"column","target":"street_name","treatment":"Normalize case and parse into direction/street/cross-street components before using as a categorical feature."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.allcaps_rate","stats.duplicate_rate","stats.len_mean","stats.word_mean","stats.vocab_size","top_values","top_words"],"model":"anthropic:claude-opus-4-7","narrative":"This column holds short uppercase street-name fragments (mean length 9.3 chars, ~2.4 words) describing an intersecting street, likely paired with a primary address elsewhere. Nearly half the rows are null (48.1%) and 72.8% of the non-null values are duplicates, with common tokens like 'ST' (189), 'AVE' (84), and 'RD' dominating \u2014 many top values are bare suffixes ('ST', 'AVE', 'T', 'E') suggesting truncation or partial captures. Vocabulary is small (1,053 words across 1,413 unique values) and 80.9% of entries are all-caps, consistent with a municipal data-entry convention.","role":"metadata","scope":"column","target":"intersecting_street","treatment":"Normalize case and standardize street-suffix abbreviations, then use as a categorical join key with the primary street column."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Stored as a categorical YYYYMMDD string capturing the date a record was first observed, with 147 distinct values across 10,000 rows. The column is overwhelmingly dominated by the sentinel '0' (98.27%), leaving only ~173 rows with real dates clustered in late 2025 through 2026. Entropy ratio of 0.035 confirms almost no information content as-is.","role":"metadata","scope":"column","target":"date_first_observed","treatment":"Replace '0' with null, parse remainder as date, and consider dropping unless the rare observed dates matter."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.cardinality","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a binary categorical field encoding a law/statute section, taking only two codes ('408' and '1180') across all 10,000 rows with no nulls. The split is fairly balanced at 55.84%/44.16%, yielding near-maximal entropy (0.99 of the possible 1.0). Despite the numeric-looking values, with only 2 distinct levels it behaves as a flag rather than a continuous code.","role":"feature","scope":"column","target":"law_section","treatment":"One-hot or treat as a binary indicator before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Categorical sub-division code with 76 distinct values across 10,000 rows and almost no nulls (0.02%). The distribution is heavily concentrated: 'B' alone covers 44.4% of rows and the top two values ('B' and 'd1') together exceed 58%, giving an entropy ratio of just 0.52. The mix of single-letter codes ('B','C','D') with letter-digit codes ('d1','E2','C3') \u2014 including a lowercase 'd1' alongside uppercase letters \u2014 suggests inconsistent coding conventions worth normalising.","role":"feature","scope":"column","target":"sub_division","treatment":"Normalise case and group rare levels before one-hot or target encoding."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a 7-character day-of-week mask indicating which days parking rules are in effect, with each position likely corresponding to Sun-Sat (Y=in effect, B=likely a holiday/exempt marker, space=off). Over half the rows are null (50.59%) and among the populated values 'YYYYYYY' (every day) dominates at 42.38% followed by 'BBBBBBB' at 1449 occurrences, giving low entropy ratio of 0.46 across 21 distinct patterns. The mix of Y, B, and spaces in the same field is unusual and suggests an encoded multi-flag rather than a clean categorical.","role":"feature","scope":"column","target":"days_parking_in_effect","treatment":"Split into seven per-day indicator columns and add a missingness flag before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Start time of a parking/traffic regulation's effective window, encoded as a clock string like '0830A' with a sentinel 'ALL' meaning 24-hour applicability. The column is null 68.3% of the time, and among the 3,170 populated rows 'ALL' alone accounts for 45.6% (1,445), so actual start times are a minority signal. Cardinality is just 26 with entropy ratio 0.63, and the populated values cluster heavily in morning hours (0700A\u20131130A).","role":"feature","scope":"column","target":"from_hours_in_effect","treatment":"Parse to minutes-since-midnight with a separate 'ALL'/missing flag before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be the end-time of a parking or traffic regulation's effective window, encoded as a clock string (e.g., '0100P', '1100A') with 'ALL' meaning the rule applies all day. Two-thirds of rows are null (null_rate 0.683), and among the 3,170 populated values 'ALL' dominates at 45.6%, leaving the 30 actual time codes thinly spread (entropy_ratio 0.62). The mix of sentinel ('ALL') and time-of-day strings in the same field is the main gotcha.","role":"feature","scope":"column","target":"to_hours_in_effect","treatment":"Split into a boolean 'all_day' flag and a parsed time-of-day, and impute or mask the 68% nulls before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This is a vehicle color code field, almost certainly from a parking or traffic citation feed. The encoding is inconsistent: short codes (GY, BK, WH) dominate but overlapping long forms (WHITE, BLACK, GREY) and alternate abbreviations (BLK, GRY) appear as separate categories, inflating cardinality to 99 across only 10000 rows. About 9.4% of values are null and the top code GY covers 22.95% of rows.","role":"feature","scope":"column","target":"vehicle_color","treatment":"Normalize synonyms (e.g. map BK/BLK/BLACK to one code) before encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column appears to be a binary flag for whether a vehicle was unregistered, but it carries no information: 84.87% of rows are null and the remaining 1513 rows all hold the single value \"0\". Cardinality is 1 and entropy is 0, so the field is effectively constant where populated.","role":"feature","scope":"column","target":"unregistered_vehicle","treatment":"Drop; constant value with majority nulls offers no modelling signal."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Vehicle model year stored as a categorical string with 39 distinct values across 10000 rows and no nulls. The dominant value is \"0\" at 23.29% of rows, which almost certainly encodes missing/unknown rather than a real year; legitimate years span recent ones with 2024 (759) and 2025 (696) leading. Entropy ratio of 0.78 reflects a fairly even spread across the remaining year codes.","role":"feature","scope":"column","target":"vehicle_year","treatment":"Recode \"0\" as missing, cast to integer year, and optionally bucket into age bands before modelling."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","stats.cardinality","stats.top_rate","stats.top_value","stats.entropy_ratio","top_values","n"],"model":"anthropic:claude-opus-4-7","narrative":"Likely a utility meter identifier, but it is effectively empty: 84.79% of rows are null and 99.47% of the non-null values are the placeholder \"-\". Only 7 distinct values exist across 10,000 rows, with the genuine-looking numeric IDs (e.g. 309485, 103553) appearing at most twice. Entropy ratio of 0.022 confirms there is almost no information here.","role":"identifier","scope":"column","target":"meter_number","treatment":"Drop; column is near-constant placeholder with overwhelming nulls."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.len_mean","stats.len_max","stats.word_mean","stats.duplicate_rate","stats.one_word_rate","stats.allcaps_rate","stats.vocab_size","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Almost certainly a street address house-number fragment, with values that are short single tokens (word_mean 1.00, len_mean 3.36, max length 7) and a vocabulary of 2,345 over 10,000 rows. Surprisingly, the most frequent values are cardinal-direction letters N/E/W/S (161/136/122/120) rather than digits \u2014 these likely belong in a separate directional prefix field. Nearly half the column is null (47.7%) and 55.1% of non-nulls duplicate, so it's sparse and low-cardinality.","role":"feature","scope":"column","target":"house_number","treatment":"Split into numeric house-number and directional-prefix fields, then impute or flag the 47.7% nulls before joining to address data."},{"confidence":"high","critiques":[],"evidence_keys":["null_rate","n_unique","stats.top_value","stats.top_rate","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"Looks like a time-of-first-observation field encoded as HHMMx where x is A/P (e.g., '0230A', '0930P'). It is 78.45% null and, among the 2,155 non-null rows, the sentinel '00000' dominates at 1,926 occurrences (top_rate 0.8937), leaving only ~229 rows with real timestamps spread across 206 distinct values. Entropy ratio of 0.169 confirms almost no information content once nulls and the placeholder are removed.","role":"timestamp","scope":"column","target":"time_first_observed","treatment":"Treat '00000' as null, then drop or collapse to a binary 'has_observed_time' flag \u2014 too sparse to use as a real timestamp."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.top_value","stats.top_rate","stats.cardinality","stats.entropy_ratio","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column records the parking/traffic violation type as a short text code, with 74 distinct values across 10,000 rows. It's heavily concentrated: 'PHTO SCHOOL ZN SPEED VIOLATION' alone covers 52% of non-null rows, and 15.13% of values are null. The label formatting is inconsistent \u2014 some entries are numbered codes like '14-No Standing' while others are free-form like 'Fire Hydrant' or 'Detached Trailer', suggesting multiple source systems or schema versions merged together.","role":"feature","scope":"column","target":"violation_description","treatment":"Normalise label formatting (strip numeric prefixes) and group rare categories before one-hot encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n","n_unique","null_rate","stats.entropy_ratio","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This appears to be a violation post code categorical field, likely a sub-classifier within a citation or inspection record. It has 53 unique codes with high entropy (ratio 0.89), and the top code \"99\" only covers 14.5% of non-null rows. Two notable surprises: 78.74% of rows are null, and the code values are heterogeneous in format\u2014numeric (\"99\", \"01\", \"311\"), single letters (\"B\", \"U\"), and tokens like \"SPCL\"\u2014suggesting multiple coding schemes coexist.","role":"feature","scope":"column","target":"violation_post_code","treatment":"Treat missingness as its own category and standardize/group the mixed-format codes before one-hot encoding."},{"confidence":"high","critiques":[],"evidence_keys":["n_unique","null_rate","stats.cardinality","stats.entropy","stats.top_rate","stats.top_value","top_values"],"model":"anthropic:claude-opus-4-7","narrative":"This column appears to be a violation legal code indicator, but it carries no information: every one of the 4,416 non-null rows contains the single value \"T\" (top_rate 1.0, cardinality 1, entropy 0.0). On top of that, 55.84% of rows are null. There is nothing here to discriminate records.","role":"feature","scope":"column","target":"violation_legal_code","treatment":"Drop; constant column with majority nulls offers no signal."}],"providers":["anthropic:claude-opus-4-7"],"total_usage":{"completion_tokens":11969,"prompt_tokens":52281,"total_tokens":64250}},"language_counts":{"ar":14,"ca":14,"cs":10,"da":1,"de":56,"en":3596,"es":86,"eu":5,"fr":39,"gl":1,"id":4,"it":21,"ja":614,"ko":14,"lt":1,"mk":1,"ms":1,"nl":25,"no":9,"pl":3,"pt":15,"ro":1,"ru":12,"sv":1,"te":1,"uk":8,"zh":59},"meta":{"generated_at":"2026-05-01T23:03:34+00:00","mode":"full","row_count":10000,"sampled_rows":10000,"seed":42,"source":"/home/coolhand/html/datavis/data_trove/cache/parking/parking_violations_sample_20260119.json"},"notes":[],"saturn_version":"0.2.0","schema":{"date_first_observed":"categorical","days_parking_in_effect":"categorical","feet_from_curb":"categorical","from_hours_in_effect":"categorical","house_number":"text","intersecting_street":"text","issue_date":"categorical","issuer_code":"text","issuer_command":"categorical","issuer_precinct":"categorical","issuer_squad":"categorical","issuing_agency":"categorical","law_section":"categorical","meter_number":"categorical","plate_id":"text","plate_type":"categorical","registration_state":"categorical","street_code1":"text","street_code2":"text","street_code3":"text","street_name":"text","sub_division":"categorical","summons_number":"text","time_first_observed":"categorical","to_hours_in_effect":"categorical","unregistered_vehicle":"categorical","vehicle_body_type":"categorical","vehicle_color":"categorical","vehicle_expiration_date":"text","vehicle_make":"categorical","vehicle_year":"categorical","violation_code":"categorical","violation_county":"categorical","violation_description":"categorical","violation_in_front_of_or_opposite":"categorical","violation_legal_code":"categorical","violation_location":"categorical","violation_post_code":"categorical","violation_precinct":"categorical","violation_time":"text"}}
