#!/usr/bin/env python3
"""
Dow Jones 30 Board Data Fetcher and Verifier
Checks SEC Edgar for latest proxy statements and generates verification report
"""

import json
import os
import re
import time
from datetime import datetime
from typing import List, Dict, Any, Optional
import requests
from pathlib import Path

class DowBoardDataFetcher:
    """Fetches board data verification info for Dow Jones 30 companies."""
    
    def __init__(self, cache_dir: str = "./cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        
        # SEC Edgar API base
        self.sec_base = "https://data.sec.gov"
        
        # User agent required by SEC Edgar
        self.headers = {
            'User-Agent': 'Luke Steuber lukesteuber.com (contact@lukesteuber.com)',
            'Accept-Encoding': 'gzip, deflate'
        }
        
        # Cache for CIK lookups
        self.cik_cache = {}
        self.ticker_data = None
        
        # Dow 30 company info
        self.company_info = {
            'MMM': {'name': '3M', 'sector': 'Industrials'},
            'AXP': {'name': 'American Express', 'sector': 'Financials'},
            'AMGN': {'name': 'Amgen', 'sector': 'Health Care'},
            'AAPL': {'name': 'Apple', 'sector': 'Information Technology'},
            'BA': {'name': 'Boeing', 'sector': 'Industrials'},
            'CAT': {'name': 'Caterpillar', 'sector': 'Industrials'},
            'CVX': {'name': 'Chevron', 'sector': 'Energy'},
            'CSCO': {'name': 'Cisco Systems', 'sector': 'Information Technology'},
            'KO': {'name': 'Coca-Cola', 'sector': 'Consumer Staples'},
            'SHW': {'name': 'Sherwin-Williams', 'sector': 'Materials'},
            'GS': {'name': 'Goldman Sachs', 'sector': 'Financials'},
            'HD': {'name': 'Home Depot', 'sector': 'Consumer Discretionary'},
            'HON': {'name': 'Honeywell International', 'sector': 'Industrials'},
            'IBM': {'name': 'IBM', 'sector': 'Information Technology'},
            'NVDA': {'name': 'Nvidia', 'sector': 'Information Technology'},
            'JNJ': {'name': 'Johnson & Johnson', 'sector': 'Health Care'},
            'JPM': {'name': 'JPMorgan Chase', 'sector': 'Financials'},
            'MCD': {'name': "McDonald's", 'sector': 'Consumer Discretionary'},
            'MRK': {'name': 'Merck & Co.', 'sector': 'Health Care'},
            'MSFT': {'name': 'Microsoft', 'sector': 'Information Technology'},
            'NKE': {'name': 'Nike', 'sector': 'Consumer Discretionary'},
            'PG': {'name': 'Procter & Gamble', 'sector': 'Consumer Staples'},
            'TRV': {'name': 'Travelers Companies', 'sector': 'Financials'},
            'UNH': {'name': 'UnitedHealth Group', 'sector': 'Health Care'},
            'VZ': {'name': 'Verizon Communications', 'sector': 'Communication Services'},
            'V': {'name': 'Visa', 'sector': 'Financials'},
            'WMT': {'name': 'Walmart', 'sector': 'Consumer Staples'},
            'AMZN': {'name': 'Amazon', 'sector': 'Consumer Discretionary'},
            'DIS': {'name': 'Walt Disney', 'sector': 'Communication Services'},
            'CRM': {'name': 'Salesforce', 'sector': 'Information Technology'}
        }
    
    def load_ticker_data(self):
        """Load SEC ticker data (cached)."""
        if self.ticker_data is not None:
            return self.ticker_data
        
        try:
            print("Loading SEC ticker data...")
            response = requests.get(
                "https://www.sec.gov/files/company_tickers.json", 
                headers=self.headers, 
                timeout=15
            )
            response.raise_for_status()
            self.ticker_data = response.json()
            print(f"  Loaded {len(self.ticker_data)} companies from SEC")
            return self.ticker_data
        except Exception as e:
            print(f"  Error loading ticker data: {e}")
            return {}
    
    def get_company_cik(self, ticker: str) -> Optional[Dict[str, Any]]:
        """Get CIK and company info for a ticker."""
        if ticker in self.cik_cache:
            return self.cik_cache[ticker]
        
        ticker_data = self.load_ticker_data()
        if not ticker_data:
            return None
        
        # Search for ticker
        for item in ticker_data.values():
            if item.get('ticker', '').upper() == ticker.upper():
                result = {
                    'cik': str(item['cik_str']).zfill(10),
                    'name': item['title'],
                    'ticker': ticker
                }
                self.cik_cache[ticker] = result
                return result
        
        return None
    
    def get_latest_proxy_date(self, cik: str) -> Optional[Dict[str, str]]:
        """Get latest DEF 14A filing date for a CIK."""
        try:
            url = f"{self.sec_base}/submissions/CIK{cik}.json"
            response = requests.get(url, headers=self.headers, timeout=10)
            response.raise_for_status()
            
            data = response.json()
            recent = data.get('filings', {}).get('recent', {})
            
            forms = recent.get('form', [])
            dates = recent.get('filingDate', [])
            accession_nums = recent.get('accessionNumber', [])
            
            # Find most recent DEF 14A
            for i, form in enumerate(forms):
                if form == 'DEF 14A':
                    return {
                        'filing_date': dates[i],
                        'accession_number': accession_nums[i],
                        'url': f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type=DEF%2014A&dateb=&owner=exclude&count=10"
                    }
            
            return None
        except Exception as e:
            print(f"    Error: {e}")
            return None
    
    def load_current_data(self) -> Dict[str, Any]:
        """Load current data from corporateData.js"""
        try:
            with open('corporateData.js', 'r', encoding='utf-8') as f:
                content = f.read()
            
            # Count persons with multiple roles
            persons = []
            for person_match in re.finditer(
                r'"name":\s*"([^"]+)".*?"roles":\s*\[(.*?)\]',
                content, re.DOTALL):
                
                name = person_match.group(1)
                roles_text = person_match.group(2)
                role_count = roles_text.count('"company"')
                
                if role_count >= 2:
                    companies = re.findall(r'"company":\s*"([^"]+)"', roles_text)
                    persons.append({
                        'name': name,
                        'companies': companies,
                        'count': role_count
                    })
            
            return {
                'persons': persons,
                'multi_role_count': len(persons)
            }
        except Exception as e:
            print(f"Error loading data: {e}")
            return {'persons': [], 'multi_role_count': 0}
    
    def generate_verification_report(self) -> str:
        """Generate verification report."""
        print("\n" + "=" * 80)
        print("BOARD DATA VERIFICATION REPORT")
        print("=" * 80)
        print()
        
        current_data = self.load_current_data()
        
        report = []
        report.append("# Dow Jones 30 Board Data Verification Report\n")
        report.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        report.append(f"**Data Source:** SEC Edgar EDGAR Database\n\n")
        
        report.append("## Current Data Summary\n")
        report.append(f"- Companies tracked: 30 (Dow Jones Industrial Average)")
        report.append(f"- People with board interlocks: {current_data['multi_role_count']}")
        report.append(f"- Total individuals tracked: ~54\n")
        
        if current_data['multi_role_count'] > 0:
            report.append("\n### Top Connectors (Most Board Seats)\n")
            sorted_persons = sorted(current_data['persons'], key=lambda x: x['count'], reverse=True)
            for person in sorted_persons[:10]:
                report.append(f"- **{person['name']}**: {person['count']} connections")
                report.append(f"  - Companies: {', '.join(person['companies'])}")
        
        report.append("\n\n## Latest SEC Proxy Filings\n")
        report.append("_Latest DEF 14A (proxy statement) filings for each company_\n")
        
        # Check each company
        for ticker in sorted(self.company_info.keys()):
            info = self.company_info[ticker]
            print(f"\n{ticker} - {info['name']}")
            report.append(f"\n### {info['name']} ({ticker})")
            
            company_data = self.get_company_cik(ticker)
            if not company_data:
                report.append("- ⚠️ CIK not found")
                print("  ⚠️ CIK not found")
                continue
            
            print(f"  CIK: {company_data['cik']}")
            report.append(f"- **CIK:** {company_data['cik']}")
            
            proxy_info = self.get_latest_proxy_date(company_data['cik'])
            if proxy_info:
                print(f"  Latest proxy: {proxy_info['filing_date']}")
                report.append(f"- **Latest DEF 14A Filing:** {proxy_info['filing_date']}")
                report.append(f"- **Review Link:** [SEC Edgar]({proxy_info['url']})")
                report.append(f"- **Status:** ✅ Available for review")
            else:
                print("  No recent proxy found")
                report.append("- **Status:** ⚠️ No DEF 14A found in recent filings")
            
            # Rate limiting
            time.sleep(0.15)
        
        report.append("\n\n## Recommendations\n")
        report.append("1. **Review Recent Proxies**: Check DEF 14A filings dated after March 2025")
        report.append("2. **Update Board Members**: Look for 'Proposal 1' (Director Elections) in each proxy")
        report.append("3. **Verify Executive Officers**: Section typically titled 'Executive Officers'")
        report.append("4. **Check for Resignations**: Review '8-K' filings for interim board changes")
        report.append("5. **Update Interlocks**: Cross-reference board members serving on multiple Dow 30 boards\n")
        
        report.append("\n## How to Use This Report\n")
        report.append("For each company with a recent proxy (filed after March 2025):")
        report.append("1. Click the SEC Edgar link")
        report.append("2. Open the most recent DEF 14A document")
        report.append("3. Look for 'Election of Directors' section")
        report.append("4. Compare listed directors with data in `corporateData.js`")
        report.append("5. Note any additions, departures, or title changes\n")
        
        report_text = '\n'.join(report)
        return report_text
    
    def save_report(self, report: str):
        """Save verification report."""
        filename = f"BOARD_VERIFICATION_{datetime.now().strftime('%Y%m%d')}.md"
        with open(filename, 'w') as f:
            f.write(report)
        print(f"\n✅ Report saved to {filename}")
        return filename

def main():
    """Main verification function."""
    print("\n" + "=" * 80)
    print("DOW JONES 30 BOARD DATA VERIFICATION TOOL")
    print("=" * 80)
    print()
    print("This tool checks SEC Edgar for the latest proxy statements (DEF 14A)")
    print("for all Dow Jones 30 companies, providing links to verify board data.")
    print()
    
    fetcher = DowBoardDataFetcher()
    
    # Generate report
    report = fetcher.generate_verification_report()
    
    # Save report
    filename = fetcher.save_report(report)
    
    print("\n" + "=" * 80)
    print("VERIFICATION COMPLETE")
    print("=" * 80)
    print(f"\n📄 Review the report: {filename}")
    print("\n💡 Next Steps:")
    print("   1. Open the report and click SEC Edgar links for recent filings")
    print("   2. Compare board members in proxies with corporateData.js")
    print("   3. Update corporateData.js with any changes")
    print("   4. Re-run visualization to see updated connections")
    print("\n" + "=" * 80 + "\n")

if __name__ == "__main__":
    main()
