=============================================================================================================================================
| # Title     : Cloudbleed Scanner - Detects Cloudflare Memory Leak Patterns                                                                |
| # Author    : indoushka                                                                                                                   |
| # Tested on : windows 11 Fr(Pro) / browser : Mozilla firefox 145.0.2 (64 bits)                                                            |
| # Vendor    : https://www.cloudflare.com/                                                                                                 |
=============================================================================================================================================

[+] References : https://packetstorm.news/files/id/212490/ 

[+] Summary : Cloudbleed Scanner is a comprehensive security tool designed to detect memory leak patterns similar to the 2017 Cloudbleed incident, 
              where Cloudflare's reverse proxies leaked uninitialized memory containing sensitive data.


[+]  POC :	python poc.py

#!/usr/bin/env python3
"""
Cloudbleed Scanner - Detects Cloudflare Memory Leak Patterns
Author: indoushka
"""

import asyncio
import aiohttp
import json
import re
import sys
import os
from datetime import datetime, timedelta
import logging
import ssl
import certifi
import hashlib
import base64
from typing import Dict, List, Set, Optional, Any, Tuple
from collections import defaultdict
from dataclasses import dataclass
import sqlite3
from pathlib import Path
from urllib.parse import urlparse

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# SSL Context
ssl_context = ssl.create_default_context(cafile=certifi.where())

@dataclass
class IOCClassification:
    """IOC Classification Levels"""
    critical: List[str]
    suspicious: List[str]
    low_risk: List[str]

@dataclass
class MITRETactic:
    """MITRE ATT&CK Tactic Mapping"""
    id: str
    name: str
    techniques: List[str]
    confidence: float

class CompleteReportSaver:
    """Save COMPLETE reports with ALL details - NO TRUNCATION"""
    
    @staticmethod
    def decode_jwt(token: str) -> Dict:
        """Decode JWT token to header and payload - COMPLETE"""
        try:
            parts = token.split('.')
            if len(parts) != 3:
                return {}
            
            # Decode header
            header_padding = '=' * (4 - len(parts[0]) % 4) if len(parts[0]) % 4 else ''
            payload_padding = '=' * (4 - len(parts[1]) % 4) if len(parts[1]) % 4 else ''
            
            header = base64.b64decode(parts[0] + header_padding).decode('utf-8', errors='ignore')
            payload = base64.b64decode(parts[1] + payload_padding).decode('utf-8', errors='ignore')
            
            return {
                'header': json.loads(header) if header else {},
                'payload': json.loads(payload) if payload else {}
            }
        except Exception as e:
            return {'error': str(e)}
    
    @staticmethod
    def format_hex_string(hex_str: str) -> str:
        """Format hex string with grouping for better readability"""
        if len(hex_str) > 100:
            # Group every 8 characters
            grouped = ' '.join([hex_str[i:i+8] for i in range(0, len(hex_str), 8)])
            return f"{grouped}\nLength: {len(hex_str)} characters"
        return hex_str
    
    @staticmethod
    def format_binary_data(binary_str: str) -> str:
        """Format binary/non-printable data"""
        if not binary_str:
            return ""
        
        # Show hex representation for non-printable
        hex_repr = binary_str.encode('utf-8', errors='ignore').hex()
        printable = ''.join([c if 32 <= ord(c) < 127 else '.' for c in binary_str])
        
        result = f"Raw: {binary_str}\n"
        result += f"Hex: {hex_repr}\n"
        result += f"Printable: {printable}\n"
        result += f"Length: {len(binary_str)} characters"
        
        return result
    
    @staticmethod
    def save_complete_report(result: Dict, filename: str = None) -> str:
        """Save COMPLETE report in TXT format - NO TRUNCATION"""
        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            domain = urlparse(result['url']).netloc.replace('.', '_')[:50]
            filename = f"CLOUDBLEED_COMPLETE_REPORT_{domain}_{timestamp}.txt"
        
        with open(filename, 'w', encoding='utf-8', errors='replace') as f:
            # ==================== REPORT HEADER ====================
            f.write("="*120 + "\n")
            f.write("🚨 CLOUDBLEED COMPLETE THREAT INTELLIGENCE SCAN REPORT 🚨\n")
            f.write("⚠️  Cloudflare Reverse Proxies Memory Leak Detection - COMPLETE DATA DISPLAY ⚠️\n")
            f.write("="*120 + "\n\n")
            
            # ==================== BASIC INFORMATION ====================
            f.write("📊 📊 📊 BASIC INFORMATION 📊 📊 📊\n")
            f.write("="*120 + "\n")
            f.write(f"🌐 URL: {result.get('url', 'N/A')}\n")
            f.write(f"📋 Status Code: {result.get('status', 'N/A')}\n")
            f.write(f"🕐 Scan Time: {result.get('timestamp', 'N/A')}\n")
            f.write(f"📏 Content Size: {result.get('content_length', 0):,} bytes\n")
            f.write(f"📄 Content Type: {result.get('content_type', 'Unknown')}\n")
            f.write(f"🖥️ Server Header: {result.get('server', 'Unknown')}\n")
            f.write(f"🔗 Final URL (after redirects): {result.get('final_url', 'N/A')}\n\n")
            
            if result.get('error'):
                f.write(f"❌ ❌ ❌ SCAN ERROR ❌ ❌ ❌\n")
                f.write(f"Error: {result['error']}\n\n")
                return filename
            
            # ==================== FINGERPRINTING ====================
            fingerprint = result.get('fingerprint', {})
            if fingerprint:
                f.write("🖥️ 🖥️ 🖥️ ADVANCED PLATFORM FINGERPRINTING 🖥️ 🖥️ 🖥️\n")
                f.write("="*120 + "\n")
                
                tech_mapping = [
                    ('🌐 CDN Provider', 'cdn'),
                    ('🛡️ WAF Protection', 'waf'),
                    ('💻 Programming Language', 'language'),
                    ('🏗️ Web Framework', 'framework'),
                    ('🖥️ Server Software', 'server_software'),
                ]
                
                for display_name, key in tech_mapping:
                    if fingerprint.get(key):
                        f.write(f"• {display_name}: {fingerprint[key]}\n")
                
                if fingerprint.get('technologies'):
                    f.write(f"\n🛠️ ALL DETECTED TECHNOLOGIES:\n")
                    for tech in fingerprint['technologies']:
                        f.write(f"  ✓ {tech}\n")
                
                f.write(f"\n📊 FINGERPRINT RISK SCORE: {fingerprint.get('risk_score', 0):.2f}/1.0\n")
                f.write("\n" + "="*120 + "\n\n")
            
            # ==================== HEADERS ANALYSIS ====================
            headers_data = result.get('findings', {}).get('headers', {})
            if headers_data:
                f.write("📋 📋 📋 COMPLETE HEADERS ANALYSIS 📋 📋 📋\n")
                f.write("="*120 + "\n")
                
                # ALL Security Headers Present
                if headers_data.get('security_headers'):
                    f.write("✅ ✅ ✅ PRESENT SECURITY HEADERS:\n")
                    f.write("-"*80 + "\n")
                    for header, data in headers_data['security_headers'].items():
                        f.write(f"\n🔹 {header}:\n")
                        f.write(f"   Value: {data.get('value', '')}\n")
                        f.write(f"   Risk Level: {data.get('risk', 'unknown').upper()}\n")
                    f.write("\n")
                
                # COMPLETE LIST of Missing Security Headers
                if headers_data.get('missing_headers'):
                    f.write("❌ ❌ ❌ MISSING SECURITY HEADERS:\n")
                    f.write("-"*80 + "\n")
                    for idx, header in enumerate(headers_data['missing_headers'], 1):
                        f.write(f"{idx:2d}. {header}\n")
                    
                    # Detailed explanations for EACH missing header
                    security_headers_explanation = {
                        'Strict-Transport-Security': {
                            'risk': 'CRITICAL',
                            'description': 'Prevents SSL stripping attacks and protocol downgrade attacks',
                            'impact': 'Without HSTS, attackers can force HTTPS sites to HTTP',
                            'recommendation': 'Implement: max-age=31536000; includeSubDomains; preload'
                        },
                        'Content-Security-Policy': {
                            'risk': 'CRITICAL',
                            'description': 'Prevents XSS, clickjacking, and other code injection attacks',
                            'impact': 'Site vulnerable to cross-site scripting attacks',
                            'recommendation': 'Implement strict CSP with proper directives'
                        },
                        'X-Frame-Options': {
                            'risk': 'HIGH',
                            'description': 'Prevents clickjacking attacks by controlling framing',
                            'impact': 'Site can be embedded in malicious frames',
                            'recommendation': 'Set to: DENY or SAMEORIGIN'
                        },
                        'X-Content-Type-Options': {
                            'risk': 'MEDIUM',
                            'description': 'Prevents MIME type sniffing attacks',
                            'impact': 'Browsers may interpret files incorrectly',
                            'recommendation': 'Set to: nosniff'
                        },
                        'Referrer-Policy': {
                            'risk': 'MEDIUM',
                            'description': 'Controls how much referrer information is sent',
                            'impact': 'Potential information leakage through referrer headers',
                            'recommendation': 'Set to: strict-origin-when-cross-origin'
                        }
                    }
                    
                    f.write("\n📝 📝 📝 DETAILED EXPLANATION OF MISSING HEADERS 📝 📝 📝\n")
                    f.write("-"*80 + "\n")
                    for header in headers_data['missing_headers']:
                        if header in security_headers_explanation:
                            info = security_headers_explanation[header]
                            f.write(f"\n🔸 {header}:\n")
                            f.write(f"   Risk Level: {info['risk']}\n")
                            f.write(f"   Description: {info['description']}\n")
                            f.write(f"   Impact: {info['impact']}\n")
                            f.write(f"   Recommendation: {info['recommendation']}\n")
                    f.write("\n")
                
                # Server Information with COMPLETE details
                if headers_data.get('server_info', {}).get('server'):
                    server = headers_data['server_info']['server']
                    f.write("🖥️ 🖥️ 🖥️ SERVER INFORMATION 🖥️ 🖥️ 🖥️\n")
                    f.write("-"*80 + "\n")
                    f.write(f"Server Header: {server}\n")
                    
                    # Extract and display ALL version information
                    version_patterns = [
                        r'(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)',  # Standard version
                        r'v(\d+)',  # vX format
                        r'(\d{8})',  # Date format
                        r'(\d{4}[a-z]?)'  # Year + optional letter
                    ]
                    
                    found_versions = []
                    for pattern in version_patterns:
                        matches = re.findall(pattern, server)
                        found_versions.extend(matches)
                    
                    if found_versions:
                        f.write("\n⚠️ ⚠️ ⚠️ EXPOSED VERSION INFORMATION ⚠️ ⚠️ ⚠️\n")
                        f.write("The following version information was exposed:\n")
                        for version in found_versions:
                            if isinstance(version, tuple):
                                version = version[0]
                            f.write(f"  • Version: {version}\n")
                        
                        f.write("\n🚨 SECURITY IMPLICATIONS:\n")
                        f.write("• Attackers can target specific vulnerabilities for this version\n")
                        f.write("• Automated scanners can identify known exploits\n")
                        f.write("• Version disclosure violates security best practices\n")
                    f.write("\n" + "="*120 + "\n\n")
            
            # ==================== SECURITY ANALYSIS ====================
            security = result.get('findings', {}).get('security', {})
            if security:
                f.write("🔒 🔒 🔒 COMPREHENSIVE SECURITY ANALYSIS 🔒 🔒 🔒\n")
                f.write("="*120 + "\n")
                f.write(f"🎯 OVERALL RISK LEVEL: {security.get('risk_level', 'low').upper()}\n")
                f.write(f"📈 RISK SCORE: {security.get('risk_score', 0):.2f}/1.0\n\n")
                
                if security.get('issues'):
                    f.write("⚠️ ⚠️ ⚠️ SECURITY ISSUES FOUND ⚠️ ⚠️ ⚠️\n")
                    f.write("-"*80 + "\n")
                    for idx, issue in enumerate(security.get('issues', []), 1):
                        f.write(f"{idx:2d}. {issue}\n")
                    f.write("\n")
                
                # ==================== COMPLETE MEMORY LEAK PATTERNS ====================
                if security.get('memory_patterns'):
                    f.write("🚨 🚨 🚨 CLOUDBLEED MEMORY LEAK PATTERNS DETECTED 🚨 🚨 🚨\n")
                    f.write("="*120 + "\n")
                    f.write("⚠️  WARNING: These patterns indicate potential Cloudflare memory leaks\n")
                    f.write("ℹ️  Similar to the 2017 Cloudbleed incident where uninitialized memory\n")
                    f.write("    was dumped by Cloudflare reverse proxies\n")
                    f.write("="*120 + "\n\n")
                    
                    memory_patterns = security.get('memory_patterns', [])
                    f.write(f"📊 TOTAL MEMORY LEAK PATTERNS FOUND: {len(memory_patterns)}\n\n")
                    
                    for idx, pattern_info in enumerate(memory_patterns, 1):
                        if isinstance(pattern_info, dict):
                            pattern = pattern_info.get('pattern', '')
                            length = pattern_info.get('length', 0)
                            pattern_type = pattern_info.get('type', 'unknown')
                        else:
                            pattern = pattern_info
                            length = len(pattern)
                            pattern_type = 'unknown'
                        
                        f.write(f"\n{'='*80}\n")
                        f.write(f"PATTERN {idx}/{len(memory_patterns)}\n")
                        f.write(f"{'='*80}\n")
                        f.write(f"Type: {pattern_type}\n")
                        f.write(f"Length: {length} characters\n")
                        f.write(f"MD5 Hash: {hashlib.md5(pattern.encode()).hexdigest()}\n")
                        f.write(f"\n{'─'*80}\n")
                        f.write("COMPLETE PATTERN CONTENT (NO TRUNCATION):\n")
                        f.write(f"{'─'*80}\n")
                        
                        # Display COMPLETE pattern without truncation
                        if length > 500:
                            f.write(f"\nFIRST 1000 CHARACTERS:\n")
                            f.write(pattern[:1000] + "\n")
                            f.write(f"\n... [CONTINUED] ...\n\n")
                            f.write(f"MIDDLE 1000 CHARACTERS:\n")
                            mid_start = length // 2 - 500
                            f.write(pattern[mid_start:mid_start + 1000] + "\n")
                            f.write(f"\n... [CONTINUED] ...\n\n")
                            f.write(f"LAST 1000 CHARACTERS:\n")
                            f.write(pattern[-1000:] + "\n")
                            f.write(f"\nFULL LENGTH: {length} characters\n")
                        else:
                            f.write(pattern + "\n")
                        
                        # Hex representation for binary patterns
                        if any(ord(c) < 32 or ord(c) > 126 for c in pattern[:100]):
                            f.write(f"\n{'─'*80}\n")
                            f.write("HEX REPRESENTATION (first 500 chars):\n")
                            hex_repr = pattern[:500].encode('utf-8', errors='ignore').hex()
                            f.write(CompleteReportSaver.format_hex_string(hex_repr) + "\n")
                        
                        f.write(f"{'='*80}\n")
                    
                    f.write("\n📝 📝 📝 CLOUDBLEED RISK ASSESSMENT 📝 📝 📝\n")
                    f.write("="*120 + "\n")
                    f.write("🔍 PATTERN ANALYSIS:\n")
                    f.write("• Long hex strings (>32 chars) may indicate memory dumps\n")
                    f.write("• Null byte sequences (\\x00\\x00) may indicate uninitialized memory\n")
                    f.write("• Non-printable characters may indicate binary data leaks\n")
                    f.write("• UUID/GUID patterns may indicate memory addressing\n")
                    f.write("• Repetitive patterns may indicate memory structures\n\n")
                    
                    f.write("🚨 SECURITY IMPLICATIONS:\n")
                    f.write("• Sensitive data (passwords, tokens, keys) may be exposed\n")
                    f.write("• Session cookies and authentication tokens may be leaked\n")
                    f.write("• Internal IP addresses and network information may be exposed\n")
                    f.write("• Database credentials and API keys may be compromised\n")
                    f.write("• Cloudflare sites with these patterns need IMMEDIATE investigation\n\n")
                    
                    f.write("🔧 RECOMMENDED ACTIONS:\n")
                    f.write("1. Contact Cloudflare support immediately\n")
                    f.write("2. Rotate ALL API keys and credentials\n")
                    f.write("3. Invalidate ALL session tokens\n")
                    f.write("4. Monitor for unauthorized access\n")
                    f.write("5. Consider moving critical services off Cloudflare\n")
                    f.write("\n" + "="*120 + "\n\n")
                
                if security.get('recommendations'):
                    f.write("💡 💡 💡 SECURITY RECOMMENDATIONS 💡 💡 💡\n")
                    f.write("-"*80 + "\n")
                    for idx, rec in enumerate(security.get('recommendations', []), 1):
                        f.write(f"{idx:2d}. {rec}\n")
                    f.write("\n")
            
            # ==================== COMPLETE SENSITIVE DATA ====================
            sensitive_data = result.get('findings', {}).get('sensitive_data', {})
            if sensitive_data:
                f.write("🚨 🚨 🚨 COMPLETE SENSITIVE DATA DETECTED 🚨 🚨 🚨\n")
                f.write("="*120 + "\n")
                f.write("⚠️  WARNING: The following sensitive data was found in the response\n")
                f.write("    This indicates potential data leakage or misconfiguration\n")
                f.write("="*120 + "\n\n")
                
                total_items = sum(len(items) for items in sensitive_data.values())
                f.write(f"📊 TOTAL SENSITIVE ITEMS FOUND: {total_items}\n\n")
                
                for category, items in sensitive_data.items():
                    if items:
                        f.write(f"\n{'='*80}\n")
                        f.write(f"📁 CATEGORY: {category.upper()} - {len(items)} ITEMS\n")
                        f.write(f"{'='*80}\n\n")
                        
                        for idx, item in enumerate(items, 1):
                            f.write(f"\n{'─'*40} ITEM {idx} {'─'*40}\n")
                            
                            if isinstance(item, dict):
                                value = item.get('value', 'N/A')
                                context = item.get('context', '')
                                confidence = item.get('confidence', 0)
                                
                                f.write(f"CONFIDENCE LEVEL: {confidence:.0%}\n")
                                f.write(f"RISK: {'HIGH' if confidence > 0.7 else 'MEDIUM' if confidence > 0.4 else 'LOW'}\n")
                                f.write(f"\nVALUE (COMPLETE - NO TRUNCATION):\n")
                                f.write(f"{'─'*80}\n")
                                f.write(f"{value}\n")
                                f.write(f"{'─'*80}\n")
                                
                                # Special detailed handling for JWT tokens
                                if category == 'tokens' and value.startswith('eyJ'):
                                    f.write(f"\n🔐 JWT TOKEN ANALYSIS:\n")
                                    decoded = CompleteReportSaver.decode_jwt(value)
                                    
                                    if decoded.get('error'):
                                        f.write(f"JWT Decode Error: {decoded['error']}\n")
                                    else:
                                        if decoded.get('header'):
                                            f.write(f"\nJWT HEADER:\n")
                                            f.write(json.dumps(decoded['header'], indent=2, ensure_ascii=False) + "\n")
                                        
                                        if decoded.get('payload'):
                                            f.write(f"\nJWT PAYLOAD:\n")
                                            f.write(json.dumps(decoded['payload'], indent=2, ensure_ascii=False) + "\n")
                                            
                                            # Extract claims for analysis
                                            payload = decoded['payload']
                                            if isinstance(payload, dict):
                                                if 'exp' in payload:
                                                    exp_time = datetime.fromtimestamp(payload['exp'])
                                                    f.write(f"\n⏰ TOKEN EXPIRATION: {exp_time} (UTC)\n")
                                                if 'iss' in payload:
                                                    f.write(f"📝 ISSUER: {payload['iss']}\n")
                                                if 'sub' in payload:
                                                    f.write(f"👤 SUBJECT: {payload['sub']}\n")
                                
                                # Special detailed handling for API keys
                                elif category == 'api_keys':
                                    f.write(f"\n🔑 API KEY ANALYSIS:\n")
                                    if value.startswith('AKIA'):
                                        f.write("TYPE: AWS Access Key ID\n")
                                        f.write("FORMAT: AKIA[16 uppercase alphanumeric characters]\n")
                                        f.write("🚨 CRITICAL RISK: This should NEVER be exposed in client-side code\n")
                                        f.write("IMPACT: Full AWS account compromise possible\n")
                                        f.write("ACTION REQUIRED: Rotate IMMEDIATELY via AWS IAM\n")
                                    elif value.startswith('sk_'):
                                        f.write("TYPE: Stripe Secret Key\n")
                                        if 'live' in value.lower():
                                            f.write("🚨 CRITICAL: This is a LIVE production Stripe key!\n")
                                            f.write("IMPACT: Complete payment processing compromise\n")
                                            f.write("ACTION REQUIRED: Rotate IMMEDIATELY in Stripe Dashboard\n")
                                        else:
                                            f.write("⚠️ WARNING: Test Stripe key exposed\n")
                                    elif len(value) >= 32 and re.match(r'^[a-fA-F0-9]+$', value):
                                        f.write("TYPE: Hexadecimal API Key\n")
                                        f.write(f"LENGTH: {len(value)} characters\n")
                                        f.write("FORMAT: Hexadecimal string\n")
                                
                                # Special handling for credentials
                                elif category == 'credentials':
                                    f.write(f"\n🔐 CREDENTIAL ANALYSIS:\n")
                                    f.write(f"LENGTH: {len(value)} characters\n")
                                    if len(value) < 8:
                                        f.write("⚠️ WARNING: Password is too short\n")
                                    if re.search(r'\d', value):
                                        f.write("✓ Contains numbers\n")
                                    if re.search(r'[A-Z]', value):
                                        f.write("✓ Contains uppercase letters\n")
                                    if re.search(r'[a-z]', value):
                                        f.write("✓ Contains lowercase letters\n")
                                    if re.search(r'[^A-Za-z0-9]', value):
                                        f.write("✓ Contains special characters\n")
                                
                                # Add context if available
                                if context and context.strip():
                                    f.write(f"\n📄 CONTEXT (surrounding code/text):\n")
                                    f.write(f"{'─'*80}\n")
                                    f.write(f"{context}\n")
                                    f.write(f"{'─'*80}\n")
                            
                            else:
                                # Non-dict item - display complete
                                f.write(f"VALUE (COMPLETE):\n")
                                f.write(f"{'─'*80}\n")
                                f.write(f"{str(item)}\n")
                                f.write(f"{'─'*80}\n")
                            
                            f.write(f"\n{'─'*80}\n")
                        
                        f.write(f"\n{'='*80}\n\n")
            
            # ==================== CLOUDFLARE DETECTION ====================
            cloudflare = result.get('findings', {}).get('cloudflare', {})
            if cloudflare:
                f.write("🛡️ 🛡️ 🛡️ CLOUDFLARE DETECTION ANALYSIS 🛡️ 🛡️ 🛡️\n")
                f.write("="*120 + "\n")
                f.write(f"🔍 CLOUDFLARE DETECTED: {'YES' if cloudflare.get('detected') else 'NO'}\n")
                f.write(f"📊 CONFIDENCE LEVEL: {cloudflare.get('confidence', 0):.0%}\n\n")
                
                if cloudflare.get('detected'):
                    f.write("⚠️  CLOUDFLARE DETECTION IMPLICATIONS:\n")
                    f.write("• Site is behind Cloudflare's reverse proxy network\n")
                    f.write("• Potential for Cloudbleed-style memory leaks exists\n")
                    f.write("• Cloudflare-specific cookies and headers present\n")
                    f.write("• WAF protection (if enabled) may be in place\n\n")
                    
                    if cloudflare.get('indicators'):
                        f.write("📋 CLOUDFLARE INDICATORS FOUND:\n")
                        f.write("-"*80 + "\n")
                        for idx, indicator in enumerate(cloudflare.get('indicators', []), 1):
                            f.write(f"{idx:2d}. {indicator}\n")
                        f.write("\n")
                    
                    # Cloudflare-specific risk assessment
                    f.write("🚨 CLOUDFLARE-SPECIFIC RISK ASSESSMENT:\n")
                    f.write("-"*80 + "\n")
                    if sensitive_data:
                        f.write("❌ HIGH RISK: Sensitive data found on Cloudflare-protected site\n")
                        f.write("   This is a potential Cloudbleed scenario\n")
                    elif security.get('memory_patterns'):
                        f.write("⚠️ MEDIUM RISK: Memory leak patterns detected\n")
                        f.write("   Could indicate uninitialized memory exposure\n")
                    else:
                        f.write("✅ LOW RISK: No immediate Cloudbleed indicators\n")
                    f.write("\n")
            
            # ==================== INTELLIGENCE DATA ====================
            intelligence = result.get('intelligence', {})
            if intelligence:
                f.write("🧠 🧠 🧠 THREAT INTELLIGENCE ANALYSIS 🧠 🧠 🧠\n")
                f.write("="*120 + "\n")
                f.write(f"📊 IOC SCORE: {intelligence.get('ioc_score', 0):.2f}/1.0\n")
                f.write(f"🎯 THREAT LEVEL: {intelligence.get('threat_level', 'low').upper()}\n\n")
                
                ioc_classification = intelligence.get('ioc_classification', {})
                if any(ioc_classification.values()):
                    f.write("🔍 IOC CLASSIFICATION:\n")
                    f.write("-"*80 + "\n")
                    
                    for level, items in ioc_classification.items():
                        if items:
                            f.write(f"\n{level.upper()} IOCS ({len(items)}):\n")
                            for idx, item in enumerate(items[:10], 1):
                                f.write(f"  {idx:2d}. {item}\n")
                    
                    f.write("\n")
                
                mitre_tactics = intelligence.get('mitre_tactics', [])
                if mitre_tactics:
                    f.write("🎯 MITRE ATT&CK TACTIC MAPPING:\n")
                    f.write("-"*80 + "\n")
                    for tactic in mitre_tactics:
                        f.write(f"\n• {tactic.get('id', 'N/A')} - {tactic.get('name', 'N/A')}\n")
                        f.write(f"  Confidence: {tactic.get('confidence', 0):.0%}\n")
                        f.write(f"  Techniques: {', '.join(tactic.get('techniques', []))}\n")
                    f.write("\n")
            
            # ==================== RAW RESPONSE DATA ====================
            f.write("📄 📄 📄 RAW RESPONSE METADATA 📄 📄 📄\n")
            f.write("="*120 + "\n")
            f.write(f"Response Size: {result.get('content_length', 0):,} bytes\n")
            f.write(f"Response Type: {result.get('content_type', 'Unknown')}\n")
            
            if 'content_hash' in result:
                f.write(f"Content MD5: {result['content_hash']}\n")
            
            f.write(f"\nScan Completed: {datetime.now().isoformat()}\n")
            
            # ==================== REPORT FOOTER ====================
            f.write("\n" + "="*120 + "\n")
            f.write("📋 REPORT SUMMARY\n")
            f.write("="*120 + "\n")
            
            summary_points = []
            
            if security.get('risk_level') == 'high':
                summary_points.append("🚨 HIGH RISK - Immediate action required")
            elif security.get('risk_level') == 'medium':
                summary_points.append("⚠️ MEDIUM RISK - Investigation recommended")
            else:
                summary_points.append("✅ LOW RISK - Regular monitoring suggested")
            
            if sensitive_data:
                total_sensitive = sum(len(items) for items in sensitive_data.values())
                summary_points.append(f"🔓 {total_sensitive} sensitive data items found")
            
            if security.get('memory_patterns'):
                summary_points.append(f"💾 {len(security['memory_patterns'])} memory leak patterns detected")
            
            if cloudflare.get('detected'):
                summary_points.append("🛡️ Cloudflare protection detected")
            
            for idx, point in enumerate(summary_points, 1):
                f.write(f"{idx}. {point}\n")
            
            f.write("\n" + "="*120 + "\n")
            f.write("🏁 END OF COMPLETE CLOUDBLEED SCAN REPORT\n")
            f.write(f"📅 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}\n")
            f.write("="*120 + "\n")
        
        print(f"\n💾 COMPLETE report saved to: {filename}")
        print(f"📄 File size: {os.path.getsize(filename):,} bytes")
        
        return filename

class IntelligenceCache:
    """Simple caching system to avoid duplicate requests"""
    
    def __init__(self, cache_dir: str = ".cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        
        self.db_path = self.cache_dir / "intel_cache.db"
        self.init_db()
    
    def init_db(self):
        """Initialize SQLite database"""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS scan_cache (
                url_hash TEXT PRIMARY KEY,
                url TEXT NOT NULL,
                data TEXT NOT NULL,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
            )
        ''')
        
        conn.commit()
        conn.close()
    
    def get_cached_scan(self, url: str) -> Optional[Dict]:
        """Get cached scan results"""
        url_hash = hashlib.md5(url.encode()).hexdigest()
        
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        
        cursor.execute(
            "SELECT data FROM scan_cache WHERE url_hash = ? AND timestamp > datetime('now', '-1 day')",
            (url_hash,)
        )
        
        result = cursor.fetchone()
        conn.close()
        
        if result:
            return json.loads(result[0])
        return None
    
    def cache_scan(self, url: str, data: Dict):
        """Cache scan results"""
        url_hash = hashlib.md5(url.encode()).hexdigest()
        
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        
        cursor.execute(
            "REPLACE INTO scan_cache (url_hash, url, data) VALUES (?, ?, ?)",
            (url_hash, url, json.dumps(data, default=str))
        )
        
        conn.commit()
        conn.close()

class AntiNoiseFilter:
    """Advanced anti-noise and false positive filter"""
    
    def __init__(self):
        self.js_false_positives = {
            'password': [
                r'password.*placeholder',
                r'password.*example',
                r'password.*test',
                r'password.*demo',
                r'type=.*password',
                r'input.*password',
                r'confirm.*password',
                r'new.*password',
                r'old.*password',
                r'change.*password'
            ],
            'api_key': [
                r'api.*key.*example',
                r'api.*key.*test',
                r'api.*key.*demo',
                r'your.*api.*key',
                r'insert.*api.*key',
                r'paste.*api.*key',
                r'sample.*api.*key'
            ],
            'token': [
                r'token.*example',
                r'token.*test',
                r'token.*demo',
                r'your.*token',
                r'sample.*token',
                r'paste.*token'
            ]
        }
        
        self.context_patterns = {
            'high_confidence': [
                r'[\"\']\s*:\s*[\"\']',
                r'=\s*[\"\']',
                r'const\s+\w+\s*=\s*[\"\']',
                r'let\s+\w+\s*=\s*[\"\']',
                r'var\s+\w+\s*=\s*[\"\']',
                r'process\.env\.',
                r'config\[[\"\']',
                r'\.get\([\"\']',
            ],
            'low_confidence': [
                r'placeholder=',
                r'example',
                r'sample',
                r'test',
                r'demo',
                r'changeme',
                r'your_.*here'
            ]
        }
    
    def filter_sensitive_data(self, category: str, value: str, context: str = "") -> bool:
        """Filter out false positives"""
        value_lower = value.lower()
        context_lower = context.lower()
        
        if any(fp in value_lower for fp in ['example', 'test', 'demo', 'placeholder', 'changeme']):
            return False
        
        if category in self.js_false_positives:
            for pattern in self.js_false_positives[category]:
                if re.search(pattern, context_lower, re.IGNORECASE):
                    return False
        
        high_confidence = any(
            re.search(pattern, context_lower) 
            for pattern in self.context_patterns['high_confidence']
        )
        
        low_confidence = any(
            re.search(pattern, context_lower) 
            for pattern in self.context_patterns['low_confidence']
        )
        
        if category == 'api_keys':
            if not re.match(r'^[A-Za-z0-9_\-]{20,50}$', value):
                return False
            if len(value) < 20 or len(value) > 100:
                return False
        
        elif category == 'tokens':
            if value.startswith('eyJ'):
                return True
            if len(value) < 32:
                return False
        
        elif category == 'passwords':
            if len(value) < 8:
                return False
            if any(x in context_lower for x in ['var ', 'const ', 'let ', 'function']):
                return False
        
        if low_confidence and not high_confidence:
            return False
        
        return True

class CompleteRegexPatterns:
    """Enhanced regex patterns for COMPLETE data capture"""
    
    def __init__(self):
        self.patterns = {
            'api_keys': [
                r'(?i)(?:aws)?_?(?:access)?_?key["\']?\s*[:=]\s*["\']?(AKIA[0-9A-Z]{16,})["\']?',
                r'(?i)(?:aws)?_?(?:secret)?_?key["\']?\s*[:=]\s*["\']?([A-Za-z0-9/+]{40,})["\']?',
                r'(?i)(?:stripe)?_?(?:api)?_?key["\']?\s*[:=]\s*["\']?(sk_(?:live|test)_[0-9a-zA-Z]{24,})["\']?',
                r'(?i)(?:github)?_?(?:token)?["\']?\s*[:=]\s*["\']?(gh[ps]_[a-zA-Z0-9]{36,})["\']?',
                r'(?i)["\']?(?:api[_-]?key|apikey)["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,128})["\']?',
                r'(?i)["\']?(?:secret[_-]?key)["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,128})["\']?',
                r'(?i)["\']?(?:private[_-]?key)["\']?\s*[:=]\s*["\']?(\-{5}BEGIN[\s\S]{100,}END[\s\S]+\-{5})["\']?',
            ],
            
            'tokens': [
                r'(?i)["\']?(?:bearer[_-]?token|jwt[_-]?token)["\']?\s*[:=]\s*["\']?(eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,})["\']?',
                r'(?i)["\']?authorization["\']?\s*[:=]\s*["\']?Bearer\s+([a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,})["\']?',
                r'(?i)["\']?(?:access[_-]?token)["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,512})["\']?',
                r'(?i)["\']?(?:session[_-]?(?:id|token))["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,256})["\']?',
                r'(?i)["\']?(?:csrf[_-]?token)["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,128})["\']?',
                r'(?i)["\']?(?:refresh[_-]?token)["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{32,256})["\']?',
            ],
            
            'credentials': [
                r'(?i)["\']?(?:db[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:database[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:admin[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:root[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:mysql[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:postgres[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
                r'(?i)["\']?(?:mongodb[_-]?(?:pass|password))["\']?\s*[:=]\s*["\']?([^"\'\s]{6,100})["\']?',
            ],
            
            'cloudflare_indicators': [
                r'(?i)["\']?__cfduid["\']?\s*[:=]\s*["\']?([a-fA-F0-9]{43})["\']?',
                r'(?i)["\']?cf_clearance["\']?\s*[:=]\s*["\']?([a-fA-F0-9_-]{40,})["\']?',
                r'CF-Ray\s*:\s*([a-fA-F0-9]{16}-[A-Z]{3})',
                r'(?i)cf-cache-status',
                r'(?i)cf-polished',
                r'(?i)cf-bgj',
            ],
            
            'memory_leak_patterns': [
                r'[0-9a-fA-F]{32,}',  # Long hex strings
                r'(?s)\x00{4,}',  # Null byte sequences
                r'[^\x20-\x7E]{20,}',  # Non-printable sequences
                r'[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}',  # UUIDs
                r'(?:[0-9a-fA-F]{2}[:\-\s]?){16,}',  # MAC addresses or similar
                r'0x[0-9a-fA-F]{8,16}',  # Memory addresses
                r'[0-9a-fA-F]{16,}',  # General hex dumps
            ],
            
            'ioc_patterns': [
                r'\b(?:10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2[0-9]|3[0-1])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})\b',
                r'(?i)(?:union\s+select|sleep\(\d+\)|benchmark\(|exec\(|system\(|drop\s+table|insert\s+into)',
            ],
            
            'emails': [
                r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
            ],
            
            'phone_numbers': [
                r'\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b',
            ]
        }
        
        self.compiled_patterns = {}
        for category, pattern_list in self.patterns.items():
            self.compiled_patterns[category] = [
                re.compile(pattern, re.IGNORECASE) for pattern in pattern_list
            ]

class CompleteFingerprintAnalyzer:
    """Complete fingerprinting analyzer"""
    
    def __init__(self):
        self.cdn_waf_fingerprints = {
            'cloudflare': {
                'patterns': ['cloudflare', '__cfduid', 'cf-ray', 'cf-cache-status', 'cf-polished', 'cf-bgj'],
                'cdn': 'Cloudflare',
                'waf': 'Cloudflare WAF',
                'risk_score': 0.3,
                'cloudbleed_risk': 0.8
            },
            'akamai': {
                'patterns': ['akamai', 'x-akamai', 'akamaighost', 'x-akamai-transformed'],
                'cdn': 'Akamai',
                'waf': 'Akamai Kona',
                'risk_score': 0.2,
                'cloudbleed_risk': 0.1
            },
            'sucuri': {
                'patterns': ['sucuri', 'x-sucuri-id', 'x-sucuri-cache', 'sucuri/cloudproxy'],
                'cdn': 'Sucuri',
                'waf': 'Sucuri WAF',
                'risk_score': 0.4,
                'cloudbleed_risk': 0.3
            },
            'fastly': {
                'patterns': ['fastly', 'x-fastly', 'surrogate-key'],
                'cdn': 'Fastly',
                'waf': 'Fastly WAF',
                'risk_score': 0.2,
                'cloudbleed_risk': 0.2
            }
        }
        
        self.language_fingerprints = {
            'php': {
                'headers': ['x-powered-by: php', 'server: php', 'x-php-version'],
                'patterns': [r'\.php\b', r'\?php', r'php_\w+', r'PHP Version'],
            },
            'asp.net': {
                'headers': ['x-powered-by: asp.net', 'x-aspnet-version', 'server: microsoft-iis', 'x-aspnetmvc-version'],
                'patterns': [r'\.aspx\b', r'\.ashx\b', r'__doPostBack', r'ViewState'],
            },
            'node.js': {
                'headers': ['x-powered-by: express', 'server: node', 'x-node-version'],
                'patterns': [r'node\.js', r'require\(', r'module\.exports', r'process\.env'],
            },
            'python': {
                'headers': ['x-powered-by: python', 'server: gunicorn', 'server: uwsgi', 'x-python-version'],
                'patterns': [r'def\s+\w+\(', r'import\s+\w+', r'from\s+\w+', r'__pycache__'],
            },
            'java': {
                'headers': ['x-powered-by: jsp', 'server: tomcat', 'server: jetty', 'x-java-version'],
                'patterns': [r'\.jsp\b', r'\.do\b', r'javax\.servlet', r'java\.'],
            },
        }
        
        self.framework_fingerprints = {
            'laravel': {
                'patterns': ['laravel', 'csrf-token', 'mix-manifest.json', 'App\\Http'],
                'headers': ['x-powered-by: laravel'],
            },
            'django': {
                'patterns': ['django', 'csrfmiddlewaretoken', 'settings.py', 'wsgi.py'],
                'headers': ['x-powered-by: django'],
            },
            'wordpress': {
                'patterns': ['wordpress', 'wp-content', 'wp-includes', 'wp-json', 'wp-admin'],
                'headers': ['x-powered-by: wordpress'],
            },
            'react': {
                'patterns': ['react', 'react-dom', '__NEXT_DATA__', 'webpack'],
                'headers': [],
            },
            'vue.js': {
                'patterns': ['vue', 'vue-router', 'vuex', 'nuxt'],
                'headers': [],
            },
        }
    
    def analyze(self, headers: Dict, content: str, url: str) -> Dict:
        """Comprehensive fingerprint analysis with complete data"""
        fingerprint = {
            'cdn': None,
            'waf': None,
            'language': None,
            'framework': None,
            'server_software': None,
            'technologies': [],
            'risk_score': 0.0,
            'cloudbleed_risk': 0.0,
            'header_details': {},
            'content_indicators': []
        }
        
        headers_lower = {k.lower(): v.lower() for k, v in headers.items()}
        content_lower = content.lower()
        
        # CDN/WAF Detection
        for service, data in self.cdn_waf_fingerprints.items():
            for pattern in data['patterns']:
                pattern_lower = pattern.lower()
                
                # Check headers
                for header_name, header_value in headers_lower.items():
                    if pattern_lower in header_name or pattern_lower in header_value:
                        fingerprint['cdn'] = data['cdn']
                        fingerprint['waf'] = data['waf']
                        fingerprint['risk_score'] += data['risk_score']
                        fingerprint['cloudbleed_risk'] += data['cloudbleed_risk']
                        fingerprint['header_details'][f'cdn_waf_{service}'] = {
                            'header': header_name,
                            'value': header_value,
                            'pattern': pattern
                        }
                        break
                
                # Check content
                if pattern_lower in content_lower:
                    fingerprint['cdn'] = data['cdn']
                    fingerprint['waf'] = data['waf']
                    fingerprint['risk_score'] += data['risk_score']
                    fingerprint['cloudbleed_risk'] += data['cloudbleed_risk']
                    fingerprint['content_indicators'].append(f"Content contains '{pattern}'")
        
        # Server Software
        for header_name, header_value in headers.items():
            if 'server' in header_name.lower():
                fingerprint['server_software'] = header_value
                fingerprint['header_details']['server'] = {
                    'header': header_name,
                    'value': header_value
                }
                
                # Detailed server analysis
                server_lower = header_value.lower()
                if 'nginx' in server_lower:
                    fingerprint['technologies'].append('nginx')
                    version_match = re.search(r'nginx/(\d+\.\d+(?:\.\d+)?)', server_lower)
                    if version_match:
                        fingerprint['header_details']['server']['version'] = version_match.group(1)
                elif 'apache' in server_lower:
                    fingerprint['technologies'].append('apache')
                    version_match = re.search(r'apache/(\d+\.\d+(?:\.\d+)?)', server_lower)
                    if version_match:
                        fingerprint['header_details']['server']['version'] = version_match.group(1)
                elif 'iis' in server_lower or 'microsoft' in server_lower:
                    fingerprint['technologies'].append('iis')
                elif 'cloudflare' in server_lower:
                    fingerprint['technologies'].append('cloudflare')
                elif 'gunicorn' in server_lower:
                    fingerprint['technologies'].append('gunicorn')
                elif 'tomcat' in server_lower:
                    fingerprint['technologies'].append('tomcat')
        
        # Programming Language Detection
        for lang, data in self.language_fingerprints.items():
            detected = False
            
            # Check headers
            for header_pattern in data['headers']:
                header_key, header_value = header_pattern.split(': ', 1) if ': ' in header_pattern else (header_pattern, '')
                
                for header_name, actual_value in headers_lower.items():
                    if header_key.lower() in header_name and header_value in actual_value:
                        fingerprint['language'] = lang
                        fingerprint['technologies'].append(lang)
                        detected = True
                        fingerprint['header_details'][f'language_{lang}'] = {
                            'header': header_name,
                            'value': actual_value
                        }
                        break
                if detected:
                    break
            
            # Check content patterns
            if not detected:
                for pattern in data['patterns']:
                    if re.search(pattern, content_lower, re.IGNORECASE):
                        fingerprint['language'] = lang
                        fingerprint['technologies'].append(lang)
                        fingerprint['content_indicators'].append(f"Language pattern: {pattern}")
                        break
        
        # Framework Detection
        for framework, data in self.framework_fingerprints.items():
            detected = False
            
            # Check headers
            for header_pattern in data['headers']:
                if ': ' in header_pattern:
                    header_key, header_value = header_pattern.split(': ', 1)
                    for header_name, actual_value in headers_lower.items():
                        if header_key.lower() in header_name and header_value in actual_value:
                            fingerprint['framework'] = framework
                            fingerprint['technologies'].append(framework)
                            detected = True
                            break
                if detected:
                    break
            
            # Check content patterns
            if not detected:
                for pattern in data['patterns']:
                    if pattern.lower() in content_lower:
                        fingerprint['framework'] = framework
                        fingerprint['technologies'].append(framework)
                        fingerprint['content_indicators'].append(f"Framework pattern: {pattern}")
                        break
        
        # Remove duplicates and sort
        fingerprint['technologies'] = sorted(list(set(fingerprint['technologies'])))
        
        # Calculate risk scores
        fingerprint['cloudbleed_risk'] = min(fingerprint['cloudbleed_risk'], 1.0)
        fingerprint['risk_score'] = min(fingerprint['risk_score'], 1.0)
        
        return fingerprint

class CompleteIntelligenceScorer:
    """Complete intelligence scoring with MITRE ATT&CK mapping"""
    
    def __init__(self):
        self.mitre_tactics = [
            MITRETactic(
                id="TA0043",
                name="Reconnaissance",
                techniques=["T1595", "T1592", "T1589"],
                confidence=0.7
            ),
            MITRETactic(
                id="TA0009",
                name="Collection",
                techniques=["T1213", "T1005", "T1114"],
                confidence=0.8
            ),
            MITRETactic(
                id="TA0010",
                name="Exfiltration",
                techniques=["T1041", "T1020", "T1030"],
                confidence=0.6
            ),
        ]
        
        self.ioc_weights = {
            'critical': {
                'api_keys': 0.95,
                'database_credentials': 0.85,
                'memory_leak': 0.98,
                'cloudflare_leak': 0.92,
                'jwt_tokens': 0.88,
                'private_keys': 0.96
            },
            'suspicious': {
                'internal_ips': 0.65,
                'suspicious_patterns': 0.55,
                'missing_security_headers': 0.45,
                'exposed_technologies': 0.35,
                'emails': 0.25,
                'phone_numbers': 0.20
            },
            'low_risk': {
                'contact_info': 0.15,
                'general_patterns': 0.25,
                'info_disclosure': 0.20,
                'version_exposure': 0.30
            }
        }
    
    def calculate_ioc_score(self, findings: Dict, fingerprint: Dict) -> Tuple[float, IOCClassification, List[MITRETactic]]:
        """Calculate comprehensive intelligence score with complete analysis"""
        ioc_classification = IOCClassification([], [], [])
        matched_tactics = []
        total_score = 0.0
        
        # Critical IOCs
        critical_score = 0.0
        critical_items = []
        
        if findings.get('sensitive_data'):
            for category, items in findings['sensitive_data'].items():
                if category in self.ioc_weights['critical']:
                    weight = self.ioc_weights['critical'][category]
                    item_count = len(items)
                    critical_score += weight * min(item_count / 5, 1.0)
                    
                    for item in items[:10]:  # First 10 items
                        if isinstance(item, dict):
                            value = item.get('value', 'N/A')
                            confidence = item.get('confidence', 0)
                            critical_items.append(f"{category} ({confidence:.0%}): {value}")
                        else:
                            critical_items.append(f"{category}: {str(item)}")
        
        # Add all critical items to classification
        ioc_classification.critical = critical_items
        
        if findings.get('security', {}).get('risk_level') == 'high':
            critical_score += 0.75
            ioc_classification.critical.append("HIGH SECURITY RISK CONFIGURATION")
        
        # Suspicious IOCs
        suspicious_score = 0.0
        suspicious_items = []
        
        if fingerprint.get('risk_score', 0) > 0.5:
            suspicious_score += 0.45
            suspicious_items.append(f"High-risk infrastructure fingerprint (Score: {fingerprint['risk_score']:.2f})")
        
        if findings.get('headers', {}).get('missing_headers'):
            missing_count = len(findings['headers']['missing_headers'])
            suspicious_score += min(missing_count * 0.12, 0.6)
            suspicious_items.append(f"Missing {missing_count} critical security headers")
        
        if fingerprint.get('header_details', {}).get('server', {}).get('version'):
            suspicious_score += 0.25
            suspicious_items.append(f"Server version exposed: {fingerprint['header_details']['server']['version']}")
        
        # Add all suspicious items
        ioc_classification.suspicious = suspicious_items
        
        # Cloudflare-specific leak risk
        cloudflare_leak_score = 0.0
        if fingerprint.get('cdn') == 'Cloudflare':
            if findings.get('sensitive_data'):
                cloudflare_leak_score += 0.85
                ioc_classification.critical.append("CLOUDFLARE WITH SENSITIVE DATA EXPOSURE - POTENTIAL CLOUDBLEED")
            
            memory_patterns = findings.get('security', {}).get('memory_patterns', [])
            if memory_patterns:
                cloudflare_leak_score += 0.95
                ioc_classification.critical.append(f"POTENTIAL CLOUDBLEED MEMORY LEAK PATTERNS DETECTED ({len(memory_patterns)} patterns)")
            
            cloudflare_leak_score += fingerprint.get('cloudbleed_risk', 0) * 0.5
        
        # MITRE Tactic Mapping
        if critical_score > 0.6:
            matched_tactics.append(self.mitre_tactics[1])  # Collection
            matched_tactics.append(self.mitre_tactics[2])  # Exfiltration
        
        if suspicious_score > 0.4:
            matched_tactics.append(self.mitre_tactics[0])  # Reconnaissance
        
        if cloudflare_leak_score > 0.5:
            matched_tactics.append(self.mitre_tactics[1])  # Collection
        
        # Calculate total score
        total_score = (
            critical_score * 0.55 +
            suspicious_score * 0.30 +
            cloudflare_leak_score * 0.45
        )
        
        total_score = min(total_score, 1.0)
        
        return total_score, ioc_classification, matched_tactics

class CompleteCloudbleedScanner:
    """Complete Cloudbleed Scanner - Shows ALL data with NO truncation"""
    
    def __init__(self, enable_cache: bool = True, enable_intelligence: bool = True):
        self.enable_cache = enable_cache
        self.enable_intelligence = enable_intelligence
        
        self.cache = IntelligenceCache() if enable_cache else None
        self.filter = AntiNoiseFilter()
        self.regex = CompleteRegexPatterns()
        self.fingerprint_analyzer = CompleteFingerprintAnalyzer()
        self.intelligence_scorer = CompleteIntelligenceScorer() if enable_intelligence else None
        self.report_saver = CompleteReportSaver()
        
        self.session_timeout = aiohttp.ClientTimeout(total=30)
        
        self.scan_headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Cache-Control': 'no-cache',
            'Pragma': 'no-cache',
            'DNT': '1'
        }
    
    async def scan_url(self, url: str) -> Dict:
        """Scan URL for Cloudbleed patterns and sensitive data - COMPLETE analysis"""
        
        if self.enable_cache:
            cached = self.cache.get_cached_scan(url)
            if cached:
                logger.info(f"Using cached results for {url}")
                return cached
        
        print(f"\n🔍 🔍 🔍 Scanning: {url}")
        print(f"⏰ Start time: {datetime.now().strftime('%H:%M:%S')}")
        
        result = {
            'url': url,
            'timestamp': datetime.now().isoformat(),
            'success': False,
            'error': None,
            'findings': {},
            'intelligence': {},
            'fingerprint': {},
            'content_hash': None
        }
        
        try:
            connector = aiohttp.TCPConnector(ssl=ssl_context)
            
            async with aiohttp.ClientSession(
                connector=connector,
                timeout=self.session_timeout,
                headers=self.scan_headers
            ) as session:
                
                async with session.get(url, allow_redirects=True, ssl=False) as response:
                    content = await response.text()
                    
                    # Calculate content hash
                    result['content_hash'] = hashlib.md5(content.encode()).hexdigest()
                    
                    # Basic information
                    result['status'] = response.status
                    result['content_length'] = len(content)
                    result['content_type'] = response.headers.get('Content-Type', '')
                    result['server'] = response.headers.get('Server', 'Unknown')
                    result['final_url'] = str(response.url)
                    
                    # Store ALL headers
                    all_headers = dict(response.headers)
                    result['all_headers'] = all_headers
                    
                    # Advanced Fingerprinting - COMPLETE
                    fingerprint = self.fingerprint_analyzer.analyze(all_headers, content, url)
                    result['fingerprint'] = fingerprint
                    
                    # Enhanced Content Analysis - COMPLETE
                    sensitive_findings = self.analyze_content_complete(content)
                    if sensitive_findings:
                        result['findings']['sensitive_data'] = sensitive_findings
                    
                    # Header Analysis - COMPLETE
                    header_analysis = self.analyze_headers_complete(all_headers)
                    if header_analysis:
                        result['findings']['headers'] = header_analysis
                    
                    # Cloudflare Detection - COMPLETE
                    cf_detected = await self.detect_cloudflare_complete(response, content)
                    if cf_detected:
                        result['findings']['cloudflare'] = cf_detected
                    
                    # Security Analysis - COMPLETE
                    security_analysis = await self.security_analysis_complete(response, content, fingerprint)
                    if security_analysis:
                        result['findings']['security'] = security_analysis
                    
                    # Intelligence Enrichment - COMPLETE
                    if self.enable_intelligence:
                        intelligence_data = await self.enrich_intelligence_complete(url, response, content, fingerprint, result['findings'])
                        result['intelligence'] = intelligence_data
                    
                    result['success'] = True
                    
                    if self.enable_cache:
                        self.cache.cache_scan(url, result)
                    
                    print(f"✅ Scan completed: {url}")
                    print(f"📊 Content size: {result['content_length']:,} bytes")
                    
                    return result
        
        except asyncio.TimeoutError:
            result['error'] = "Request timeout (30 seconds)"
            return result
        except aiohttp.ClientError as e:
            result['error'] = f"Client error: {str(e)}"
            return result
        except Exception as e:
            result['error'] = f"Unexpected error: {str(e)}"
            logger.exception(f"Error scanning {url}")
            return result
    
    def analyze_content_complete(self, content: str) -> Dict:
        """Complete content analysis with ALL data - NO truncation"""
        findings = {}
        
        for category, compiled_patterns in self.regex.compiled_patterns.items():
            category_matches = []
            
            for pattern in compiled_patterns:
                # Find ALL matches
                matches = pattern.finditer(content)
                
                for match in matches:
                    if match.group(0):
                        match_text = match.group(0)
                        
                        # Get COMPLETE context (500 chars before and after)
                        start_pos = max(0, match.start() - 500)
                        end_pos = min(len(content), match.end() + 500)
                        context = content[start_pos:end_pos]
                        
                        # Apply anti-noise filtering
                        if self.filter.filter_sensitive_data(category, match_text, context):
                            # Clean the match
                            clean_match = match_text.strip()
                            if len(clean_match) > 3:
                                confidence = self.calculate_confidence_complete(category, clean_match, context)
                                
                                # Store COMPLETE match
                                category_matches.append({
                                    'value': clean_match,
                                    'context': context,
                                    'confidence': confidence,
                                    'position': match.start(),
                                    'length': len(clean_match),
                                    'hex_representation': clean_match.encode('utf-8', errors='ignore').hex()[:200]
                                })
            
            if category_matches:
                # Sort by confidence and length
                category_matches.sort(key=lambda x: (x['confidence'], x['length']), reverse=True)
                findings[category] = category_matches  # ALL matches, no limit
        
        return findings
    
    def calculate_confidence_complete(self, category: str, value: str, context: str) -> float:
        """Calculate confidence score with complete analysis"""
        confidence = 0.5  # Base confidence
        
        # Value characteristics
        if category == 'api_keys':
            if re.match(r'^AKIA[0-9A-Z]{16}$', value):
                confidence = 0.98  # AWS Access Key
            elif re.match(r'^sk_(live|test)_[0-9a-zA-Z]{24}$', value):
                confidence = 0.95  # Stripe Secret Key
            elif re.match(r'^gh[ps]_[a-zA-Z0-9]{36,}$', value):
                confidence = 0.93  # GitHub Token
            elif len(value) >= 32 and re.match(r'^[a-fA-F0-9]+$', value):
                confidence = 0.85
            elif '-----BEGIN' in value and '-----END' in value:
                confidence = 0.96  # Private key
        
        elif category == 'tokens':
            if value.startswith('eyJ'):
                confidence = 0.94  # JWT token
                # Additional JWT validation
                parts = value.split('.')
                if len(parts) == 3:
                    confidence += 0.03
            elif len(value) >= 64:
                confidence = 0.75
        
        elif category == 'credentials':
            if len(value) >= 12:
                confidence += 0.15
            if re.search(r'[A-Z]', value) and re.search(r'[a-z]', value):
                confidence += 0.10
            if re.search(r'\d', value):
                confidence += 0.05
            if re.search(r'[^A-Za-z0-9]', value):
                confidence += 0.05
        
        # Context indicators
        context_lower = context.lower()
        
        high_conf_indicators = {
            'secret': 0.15,
            'key': 0.12,
            'token': 0.12,
            'password': 0.15,
            'credential': 0.10,
            'private': 0.10,
            'auth': 0.08,
            'api': 0.07
        }
        
        for indicator, boost in high_conf_indicators.items():
            if indicator in context_lower:
                confidence += boost
        
        # Negative indicators (reduce confidence)
        low_conf_indicators = ['example', 'sample', 'test', 'demo', 'placeholder']
        for indicator in low_conf_indicators:
            if indicator in context_lower:
                confidence *= 0.7
        
        return min(max(confidence, 0.0), 1.0)
    
    def analyze_headers_complete(self, headers: Dict) -> Dict:
        """Complete header analysis with ALL details"""
        analysis = {
            'security_headers': {},
            'missing_headers': [],
            'server_info': {},
            'vulnerabilities': [],
            'all_headers': [],
            'cookie_analysis': []
        }
        
        # Store ALL headers
        analysis['all_headers'] = [f"{k}: {v}" for k, v in headers.items()]
        
        # Security Headers Configuration
        security_headers_config = {
            'Strict-Transport-Security': {
                'required': True,
                'risk': 'critical',
                'description': 'Prevents SSL stripping and protocol downgrade attacks',
                'recommended_value': 'max-age=31536000; includeSubDomains; preload'
            },
            'Content-Security-Policy': {
                'required': True,
                'risk': 'critical',
                'description': 'Prevents XSS, clickjacking, and code injection attacks',
                'recommended_value': "default-src 'self'; script-src 'self'"
            },
            'X-Frame-Options': {
                'required': True,
                'risk': 'high',
                'description': 'Prevents clickjacking attacks',
                'recommended_value': 'DENY or SAMEORIGIN'
            },
            'X-Content-Type-Options': {
                'required': True,
                'risk': 'medium',
                'description': 'Prevents MIME type sniffing',
                'recommended_value': 'nosniff'
            },
            'Referrer-Policy': {
                'required': False,
                'risk': 'medium',
                'description': 'Controls referrer information leakage',
                'recommended_value': 'strict-origin-when-cross-origin'
            },
            'Permissions-Policy': {
                'required': False,
                'risk': 'medium',
                'description': 'Controls browser features and APIs',
                'recommended_value': 'See latest best practices'
            },
            'X-XSS-Protection': {
                'required': False,
                'risk': 'low',
                'description': 'Legacy XSS protection (deprecated)',
                'recommended_value': '0 (disable as CSP is better)'
            }
        }
        
        # Analyze each security header
        for header, config in security_headers_config.items():
            if header in headers:
                analysis['security_headers'][header] = {
                    'value': headers[header],
                    'risk': config['risk'],
                    'description': config['description'],
                    'recommended': config['recommended_value']
                }
                
                # Check for common misconfigurations
                if header == 'Strict-Transport-Security':
                    if 'max-age' not in headers[header]:
                        analysis['vulnerabilities'].append(f"HSTS missing max-age directive")
                    if 'includeSubDomains' not in headers[header]:
                        analysis['vulnerabilities'].append(f"HSTS missing includeSubDomains directive")
                
                elif header == 'Content-Security-Policy':
                    if "'unsafe-inline'" in headers[header]:
                        analysis['vulnerabilities'].append(f"CSP contains unsafe-inline directive")
                    if "'unsafe-eval'" in headers[header]:
                        analysis['vulnerabilities'].append(f"CSP contains unsafe-eval directive")
                
                elif header == 'X-Frame-Options':
                    if headers[header].upper() not in ['DENY', 'SAMEORIGIN']:
                        analysis['vulnerabilities'].append(f"X-Frame-Options has non-standard value: {headers[header]}")
            
            elif config['required']:
                analysis['missing_headers'].append(header)
                analysis['vulnerabilities'].append(
                    f"Missing {header}: {config['description']}"
                )
        
        # Server Information with COMPLETE analysis
        for header_name, header_value in headers.items():
            if 'server' in header_name.lower():
                analysis['server_info']['header'] = header_name
                analysis['server_info']['value'] = header_value
                
                # Extract ALL version information
                version_patterns = [
                    r'(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)',  # Standard version
                    r'v(\d+(?:\.\d+)?)',  # vX or vX.Y format
                    r'(\d{8})',  # Date format (YYYYMMDD)
                    r'(\d{4}[a-z]?)',  # Year + optional letter
                    r'(\d{1,2}/\d{1,2}/\d{4})',  # Date format
                ]
                
                found_versions = []
                for pattern in version_patterns:
                    matches = re.findall(pattern, header_value)
                    found_versions.extend(matches)
                
                if found_versions:
                    analysis['server_info']['versions'] = found_versions
                    for version in found_versions:
                        if isinstance(version, tuple):
                            version = version[0]
                        analysis['vulnerabilities'].append(
                            f"Server version exposed: {version}"
                        )
        
        # Cookie Analysis
        set_cookie_header = headers.get('Set-Cookie', '')
        if set_cookie_header:
            cookies = set_cookie_header.split(', ')
            for cookie in cookies:
                cookie_analysis = {
                    'raw': cookie[:200],
                    'secure': 'Secure' in cookie,
                    'httponly': 'HttpOnly' in cookie,
                    'samesite': 'SameSite' in cookie,
                    'path': None,
                    'domain': None
                }
                
                # Extract path and domain
                path_match = re.search(r'path=([^;]+)', cookie, re.IGNORECASE)
                if path_match:
                    cookie_analysis['path'] = path_match.group(1)
                
                domain_match = re.search(r'domain=([^;]+)', cookie, re.IGNORECASE)
                if domain_match:
                    cookie_analysis['domain'] = domain_match.group(1)
                
                analysis['cookie_analysis'].append(cookie_analysis)
                
                # Check for insecure cookies
                if not cookie_analysis['secure']:
                    analysis['vulnerabilities'].append("Cookie missing Secure flag")
                if not cookie_analysis['httponly']:
                    analysis['vulnerabilities'].append("Cookie missing HttpOnly flag")
        
        return analysis
    
    async def detect_cloudflare_complete(self, response, content: str) -> Dict:
        """Complete Cloudflare detection with ALL indicators"""
        indicators = []
        headers_dict = dict(response.headers)
        
        # Cloudflare-specific patterns
        cloudflare_patterns = [
            'cloudflare',
            '__cfduid',
            'cf-ray',
            'cf-cache-status',
            'cf-polished',
            'cf-bgj',
            'cf-request-id',
            'cf-worker',
            'cf-connecting-ip'
        ]
        
        # Check ALL headers
        for header_name, header_value in headers_dict.items():
            header_line = f"{header_name}: {header_value}"
            header_lower = header_line.lower()
            
            for pattern in cloudflare_patterns:
                if pattern in header_lower:
                    indicators.append({
                        'type': 'header',
                        'pattern': pattern,
                        'value': header_line
                    })
        
        # Check cookies
        cookies = headers_dict.get('Set-Cookie', '')
        if cookies:
            for pattern in ['__cfduid', 'cf_clearance']:
                if pattern in cookies:
                    indicators.append({
                        'type': 'cookie',
                        'pattern': pattern,
                        'value': cookies[:500] + ('...' if len(cookies) > 500 else '')
                    })
        
        # Check content for Cloudflare-specific patterns
        content_lower = content.lower()
        content_indicators = []
        
        for pattern in cloudflare_patterns:
            if pattern in content_lower:
                # Find all occurrences
                positions = [m.start() for m in re.finditer(pattern, content_lower)]
                for pos in positions[:5]:  # First 5 occurrences
                    start = max(0, pos - 50)
                    end = min(len(content), pos + 50)
                    context = content[start:end]
                    content_indicators.append(f"'{pattern}' at position {pos}: ...{context}...")
        
        if content_indicators:
            indicators.append({
                'type': 'content',
                'patterns': content_indicators[:10]  # First 10 content indicators
            })
        
        # Calculate confidence
        confidence = min(len(indicators) * 0.25, 1.0)
        
        return {
            'detected': len(indicators) > 0,
            'indicators': indicators,
            'confidence': confidence,
            'indicator_count': len(indicators)
        }
    
    async def security_analysis_complete(self, response, content: str, fingerprint: Dict) -> Dict:
        """Complete security analysis with ALL memory leak patterns"""
        analysis = {
            'risk_level': 'low',
            'risk_score': 0.0,
            'issues': [],
            'recommendations': [],
            'memory_patterns': [],
            'mitre_tactics': [],
            'pattern_statistics': {}
        }
        
        # HTTPS Check
        if str(response.url).startswith('http:'):
            analysis['issues'].append("❌ Site not using HTTPS - data transmitted in plain text")
            analysis['risk_score'] += 0.35
        
        # Missing Security Headers - COMPLETE analysis
        headers_dict = dict(response.headers)
        missing_critical = []
        
        critical_headers = ['Strict-Transport-Security', 'Content-Security-Policy', 'X-Frame-Options']
        for header in critical_headers:
            if header not in headers_dict:
                missing_critical.append(header)
        
        if missing_critical:
            analysis['issues'].append(f"❌ Missing critical security headers: {', '.join(missing_critical)}")
            analysis['risk_score'] += len(missing_critical) * 0.15
        
        # Server Information Exposure - COMPLETE
        server_header = headers_dict.get('Server', '')
        if server_header:
            # Find ALL version patterns
            version_patterns = [
                r'\d+\.\d+(?:\.\d+)?(?:\.\d+)?',
                r'v\d+(?:\.\d+)?',
                r'\d{8}',
                r'\d{4}[a-z]?'
            ]
            
            exposed_versions = []
            for pattern in version_patterns:
                matches = re.findall(pattern, server_header)
                exposed_versions.extend(matches)
            
            if exposed_versions:
                analysis['issues'].append(f"⚠️ Server version exposed: {server_header}")
                analysis['risk_score'] += min(len(exposed_versions) * 0.08, 0.25)
        
        # Memory Leak Patterns - COMPLETE analysis
        memory_patterns = self.regex.compiled_patterns['memory_leak_patterns']
        all_memory_matches = []
        
        pattern_statistics = {
            'hex_strings': 0,
            'null_sequences': 0,
            'non_printable': 0,
            'uuids': 0,
            'memory_addresses': 0,
            'total_patterns': 0
        }
        
        for pattern_idx, pattern in enumerate(memory_patterns):
            pattern_matches = list(pattern.finditer(content))
            
            for match in pattern_matches:
                match_text = match.group(0)
                match_start = match.start()
                match_end = match.end()
                
                # Determine pattern type
                if re.match(r'[0-9a-fA-F]{32,}', match_text):
                    pattern_type = 'hex_string'
                    pattern_statistics['hex_strings'] += 1
                elif re.match(r'(?s)\x00{4,}', match_text):
                    pattern_type = 'null_sequence'
                    pattern_statistics['null_sequences'] += 1
                elif re.match(r'[^\x20-\x7E]{20,}', match_text):
                    pattern_type = 'non_printable'
                    pattern_statistics['non_printable'] += 1
                elif re.match(r'[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}', match_text):
                    pattern_type = 'uuid'
                    pattern_statistics['uuids'] += 1
                elif re.match(r'0x[0-9a-fA-F]{8,16}', match_text):
                    pattern_type = 'memory_address'
                    pattern_statistics['memory_addresses'] += 1
                else:
                    pattern_type = 'unknown'
                
                # Get context
                context_start = max(0, match_start - 200)
                context_end = min(len(content), match_end + 200)
                context = content[context_start:context_end]
                
                # Store COMPLETE pattern
                all_memory_matches.append({
                    'pattern': match_text,
                    'type': pattern_type,
                    'length': len(match_text),
                    'position': match_start,
                    'context': context,
                    'hex_representation': match_text.encode('utf-8', errors='ignore').hex(),
                    'risk_score': min(len(match_text) / 1000, 0.8)
                })
                
                pattern_statistics['total_patterns'] += 1
        
        # Update analysis with statistics
        analysis['pattern_statistics'] = pattern_statistics
        
        # Sort patterns by length (longer = more suspicious)
        all_memory_matches.sort(key=lambda x: x['length'], reverse=True)
        
        # Add ALL patterns to analysis
        for match in all_memory_matches:
            analysis['memory_patterns'].append(match)
            analysis['risk_score'] += match['risk_score']
        
        # Cloudflare-specific risks - COMPLETE
        if fingerprint.get('cdn') == 'Cloudflare':
            analysis['issues'].append("🛡️ Cloudflare detected - potential Cloudbleed scenario")
            analysis['risk_score'] += 0.2
            
            if analysis.get('memory_patterns'):
                pattern_count = len(analysis['memory_patterns'])
                analysis['issues'].append(f"🚨 {pattern_count} potential Cloudbleed memory leak patterns detected")
                analysis['risk_score'] += min(pattern_count * 0.1, 0.5)
                analysis['mitre_tactics'].append("TA0009 - Collection (Cloudbleed)")
            
            if str(response.url).startswith('http:'):
                analysis['issues'].append("⚠️ Cloudflare without HTTPS - potential downgrade attacks")
                analysis['risk_score'] += 0.25
        
        # Determine risk level based on COMPLETE score
        if analysis['risk_score'] >= 0.75:
            analysis['risk_level'] = 'critical'
        elif analysis['risk_score'] >= 0.5:
            analysis['risk_level'] = 'high'
        elif analysis['risk_score'] >= 0.3:
            analysis['risk_level'] = 'medium'
        else:
            analysis['risk_level'] = 'low'
        
        # Generate COMPLETE recommendations
        if analysis['risk_score'] > 0.6:
            analysis['recommendations'].append("🔴 IMMEDIATE ACTION REQUIRED: Investigate potential Cloudbleed memory leaks")
            analysis['recommendations'].append("🔴 Contact Cloudflare support and security team immediately")
        
        if analysis.get('memory_patterns'):
            analysis['recommendations'].append("🔍 Investigate ALL memory leak patterns found in the report")
            analysis['recommendations'].append("🔄 Rotate ALL API keys, tokens, and credentials immediately")
        
        if fingerprint.get('cdn') == 'Cloudflare':
            analysis['recommendations'].append("🛡️ Review Cloudflare configuration for potential memory leak issues")
            analysis['recommendations'].append("📊 Enable Cloudflare logging and monitoring for suspicious activity")
        
        if missing_critical:
            analysis['recommendations'].append("🔧 Implement missing security headers immediately")
            analysis['recommendations'].append("📖 Follow OWASP security header guidelines")
        
        return analysis
    
    async def enrich_intelligence_complete(self, url: str, response, content: str, fingerprint: Dict, findings: Dict) -> Dict:
        """Complete intelligence enrichment"""
        intelligence = {
            'ioc_score': 0.0,
            'ioc_classification': {},
            'mitre_tactics': [],
            'threat_level': 'low',
            'enrichment_data': {},
            'timestamp': datetime.now().isoformat()
        }
        
        if self.intelligence_scorer:
            score, classification, tactics = self.intelligence_scorer.calculate_ioc_score(
                findings, fingerprint
            )
            
            intelligence['ioc_score'] = score
            intelligence['ioc_classification'] = {
                'critical': classification.critical,
                'suspicious': classification.suspicious,
                'low_risk': classification.low_risk
            }
            intelligence['mitre_tactics'] = [
                {
                    'id': tactic.id,
                    'name': tactic.name,
                    'confidence': tactic.confidence,
                    'techniques': tactic.techniques
                }
                for tactic in tactics
            ]
            
            # Determine COMPLETE threat level
            if score >= 0.8:
                intelligence['threat_level'] = 'critical'
            elif score >= 0.6:
                intelligence['threat_level'] = 'high'
            elif score >= 0.4:
                intelligence['threat_level'] = 'medium'
            elif score >= 0.2:
                intelligence['threat_level'] = 'low'
            else:
                intelligence['threat_level'] = 'informational'
        
        parsed_url = urlparse(url)
        domain = parsed_url.netloc
        
        intelligence['enrichment_data']['domain_analysis'] = {
            'domain': domain,
            'tld': domain.split('.')[-1] if '.' in domain else '',
            'subdomain_count': len(domain.split('.')) - 2 if '.' in domain else 0,
            'url_structure': {
                'scheme': parsed_url.scheme,
                'netloc': parsed_url.netloc,
                'path': parsed_url.path,
                'params': parsed_url.params,
                'query': parsed_url.query,
                'fragment': parsed_url.fragment
            }
        }
        
        # Content statistics
        intelligence['enrichment_data']['content_stats'] = {
            'size_bytes': len(content),
            'line_count': content.count('\n'),
            'word_count': len(content.split()),
            'character_count': len(content),
            'binary_percentage': sum(1 for c in content if ord(c) < 32 or ord(c) > 126) / len(content) * 100 if content else 0
        }
        
        return intelligence
    
    def display_result_complete(self, result: Dict):
        """Display COMPLETE results with NO truncation"""
        print("\n" + "="*120)
        print(f"🚨 🚨 🚨 CLOUDBLEED COMPLETE SCAN REPORT 🚨 🚨 🚨")
        print(f"🌐 URL: {result['url']}")
        print("="*120)
        
        if result['error']:
            print(f"❌ ❌ ❌ SCAN ERROR ❌ ❌ ❌")
            print(f"Error: {result['error']}")
            print("="*120)
            return
        
        # Basic Info - COMPLETE
        print(f"\n📊 📊 📊 BASIC INFORMATION 📊 📊 📊")
        print(f"   ✅ Status Code: {result.get('status', 'N/A')}")
        print(f"   📏 Content Size: {result.get('content_length', 0):,} bytes")
        print(f"   📄 Content Type: {result.get('content_type', 'Unknown')}")
        print(f"   🔐 Content Hash (MD5): {result.get('content_hash', 'N/A')}")
        print(f"   🖥️ Server: {result.get('server', 'Unknown')}")
        print(f"   🔗 Final URL: {result.get('final_url', 'N/A')}")
        print(f"   🕐 Scan Time: {result.get('timestamp', 'Unknown')}")
        
        # Fingerprinting - COMPLETE
        fingerprint = result.get('fingerprint', {})
        if fingerprint:
            print(f"\n🖥️ 🖥️ 🖥️ COMPLETE PLATFORM FINGERPRINTING 🖥️ 🖥️ 🖥️")
            
            tech_info = [
                ('🌐 CDN Provider', 'cdn'),
                ('🛡️ WAF Protection', 'waf'),
                ('💻 Programming Language', 'language'),
                ('🏗️ Web Framework', 'framework'),
                ('🖥️ Server Software', 'server_software'),
            ]
            
            for display_name, key in tech_info:
                if fingerprint.get(key):
                    print(f"   • {display_name}: {fingerprint[key]}")
            
            if fingerprint.get('technologies'):
                print(f"\n   🛠️ ALL DETECTED TECHNOLOGIES:")
                for tech in fingerprint['technologies']:
                    print(f"     ✓ {tech}")
            
            if fingerprint.get('content_indicators'):
                print(f"\n   🔍 CONTENT INDICATORS:")
                for indicator in fingerprint['content_indicators'][:10]:
                    print(f"     • {indicator}")
            
            print(f"\n   📊 FINGERPRINT RISK SCORE: {fingerprint.get('risk_score', 0):.2f}/1.0")
            if fingerprint.get('cloudbleed_risk', 0) > 0:
                print(f"   🚨 CLOUDBLEED RISK SCORE: {fingerprint.get('cloudbleed_risk', 0):.2f}/1.0")
        
        # Headers Analysis - COMPLETE
        headers_data = result.get('findings', {}).get('headers', {})
        if headers_data:
            print(f"\n📋 📋 📋 COMPLETE HEADERS ANALYSIS 📋 📋 📋")
            
            if headers_data.get('missing_headers'):
                print(f"\n   ❌ MISSING CRITICAL SECURITY HEADERS:")
                for idx, header in enumerate(headers_data['missing_headers'], 1):
                    print(f"     {idx:2d}. {header}")
            
            if headers_data.get('vulnerabilities'):
                print(f"\n   ⚠️ HEADER VULNERABILITIES:")
                for idx, vuln in enumerate(headers_data['vulnerabilities'][:10], 1):
                    print(f"     {idx:2d}. {vuln}")
        
        # Security Analysis - COMPLETE
        security = result.get('findings', {}).get('security', {})
        if security:
            print(f"\n🔒 🔒 🔒 COMPLETE SECURITY ANALYSIS 🔒 🔒 🔒")
            print(f"   🎯 OVERALL RISK LEVEL: {security.get('risk_level', 'low').upper()}")
            print(f"   📈 RISK SCORE: {security.get('risk_score', 0):.2f}/1.0")
            
            if security.get('issues'):
                print(f"\n   ⚠️ ⚠️ ⚠️ SECURITY ISSUES FOUND:")
                for idx, issue in enumerate(security.get('issues', []), 1):
                    print(f"     {idx:2d}. {issue}")
            
            # Memory Leak Patterns - COMPLETE display
            if security.get('memory_patterns'):
                memory_patterns = security['memory_patterns']
                print(f"\n   🚨 🚨 🚨 MEMORY LEAK PATTERNS DETECTED 🚨 🚨 🚨")
                print(f"   📊 TOTAL PATTERNS: {len(memory_patterns)}")
                
                if security.get('pattern_statistics'):
                    stats = security['pattern_statistics']
                    print(f"\n   📈 PATTERN STATISTICS:")
                    print(f"     • Hex Strings: {stats.get('hex_strings', 0)}")
                    print(f"     • Null Sequences: {stats.get('null_sequences', 0)}")
                    print(f"     • Non-Printable: {stats.get('non_printable', 0)}")
                    print(f"     • UUIDs: {stats.get('uuids', 0)}")
                    print(f"     • Memory Addresses: {stats.get('memory_addresses', 0)}")
                    print(f"     • Total Patterns: {stats.get('total_patterns', 0)}")
                
                # Show first 5 patterns completely
                print(f"\n   🔍 FIRST 5 PATTERNS (COMPLETE):")
                for idx, pattern_info in enumerate(memory_patterns[:5], 1):
                    if isinstance(pattern_info, dict):
                        pattern = pattern_info.get('pattern', '')
                        length = pattern_info.get('length', 0)
                        pattern_type = pattern_info.get('type', 'unknown')
                        
                        print(f"\n     {idx}. TYPE: {pattern_type}, LENGTH: {length} chars")
                        print(f"        {'─'*60}")
                        
                        # Display COMPLETE pattern
                        if length > 500:
                            print(f"        FIRST 500 CHARACTERS:")
                            print(f"        {pattern[:500]}...")
                            print(f"        ... [continued in full report] ...")
                        else:
                            print(f"        {pattern}")
                        
                        print(f"        {'─'*60}")
                    else:
                        print(f"\n     {idx}. {str(pattern_info)}")
                
                if len(memory_patterns) > 5:
                    print(f"\n     ... and {len(memory_patterns) - 5} more patterns")
                    print(f"     📄 See complete report for ALL patterns")
            
            if security.get('recommendations'):
                print(f"\n   💡 💡 💡 SECURITY RECOMMENDATIONS:")
                for idx, rec in enumerate(security.get('recommendations', []), 1):
                    print(f"     {idx:2d}. {rec}")
        
        # Sensitive Data - COMPLETE
        sensitive_data = result.get('findings', {}).get('sensitive_data', {})
        if sensitive_data:
            print(f"\n🚨 🚨 🚨 SENSITIVE DATA DETECTED 🚨 🚨 🚨")
            
            total_items = sum(len(items) for items in sensitive_data.values())
            print(f"   📊 TOTAL SENSITIVE ITEMS FOUND: {total_items}")
            
            for category, items in sensitive_data.items():
                if items:
                    print(f"\n   📁 {category.upper()}: {len(items)} items")
                    
                    # Show first 3 items completely
                    for idx, item in enumerate(items[:3], 1):
                        if isinstance(item, dict):
                            value = item.get('value', 'N/A')
                            confidence = item.get('confidence', 0)
                            length = item.get('length', len(value))
                            
                            print(f"\n     {idx}. CONFIDENCE: {confidence:.0%}, LENGTH: {length} chars")
                            print(f"        {'─'*60}")
                            
                            # Display COMPLETE value
                            if length > 300:
                                print(f"        FIRST 300 CHARACTERS:")
                                print(f"        {value[:300]}...")
                                print(f"        ... [full value in report] ...")
                            else:
                                print(f"        {value}")
                            
                            print(f"        {'─'*60}")
                        
                        else:
                            print(f"\n     {idx}. {str(item)[:200]}..." if len(str(item)) > 200 else f"     {idx}. {str(item)}")
                    
                    if len(items) > 3:
                        print(f"\n     ... and {len(items) - 3} more {category}")
        
        # Cloudflare Detection - COMPLETE
        cloudflare = result.get('findings', {}).get('cloudflare', {})
        if cloudflare:
            print(f"\n🛡️ 🛡️ 🛡️ CLOUDFLARE DETECTION 🛡️ 🛡️ 🛡️")
            print(f"   🔍 DETECTED: {'YES' if cloudflare.get('detected') else 'NO'}")
            print(f"   📊 CONFIDENCE: {cloudflare.get('confidence', 0):.0%}")
            
            if cloudflare.get('detected') and cloudflare.get('indicators'):
                print(f"\n   📋 INDICATORS FOUND: {cloudflare.get('indicator_count', 0)}")
                indicators = cloudflare.get('indicators', [])
                for idx, indicator in enumerate(indicators[:5], 1):
                    if isinstance(indicator, dict):
                        print(f"     {idx}. {indicator.get('type', 'unknown')}: {indicator.get('pattern', 'unknown')}")
                    else:
                        print(f"     {idx}. {indicator}")
        
        # Intelligence Data - COMPLETE
        intelligence = result.get('intelligence', {})
        if intelligence:
            print(f"\n🧠 🧠 🧠 THREAT INTELLIGENCE 🧠 🧠 🧠")
            print(f"   📊 IOC SCORE: {intelligence.get('ioc_score', 0):.2f}/1.0")
            print(f"   🎯 THREAT LEVEL: {intelligence.get('threat_level', 'low').upper()}")
            
            ioc_classification = intelligence.get('ioc_classification', {})
            for level, items in ioc_classification.items():
                if items:
                    print(f"\n   📁 {level.upper()} IOCS ({len(items)}):")
                    for idx, item in enumerate(items[:5], 1):
                        print(f"     {idx}. {item[:100]}..." if len(item) > 100 else f"     {idx}. {item}")
        
        print("\n" + "="*120)
        
        # Save COMPLETE report
        try:
            saved_file = self.report_saver.save_complete_report(result)
            print(f"\n💾 💾 💾 COMPLETE CLOUDBLEED REPORT SAVED TO: {saved_file}")
            print(f"📄 File contains ALL data with NO truncation")
            
            # Show file statistics
            if os.path.exists(saved_file):
                file_size = os.path.getsize(saved_file)
                print(f"📏 Report size: {file_size:,} bytes ({file_size/1024:.1f} KB)")
                
                with open(saved_file, 'r', encoding='utf-8') as f:
                    lines = f.readlines()
                    print(f"📝 Total lines: {len(lines):,}")
        except Exception as e:
            print(f"\n⚠️ Could not save complete report: {e}")
    
    async def scan_multiple_complete(self, urls):
        """Scan multiple URLs with COMPLETE analysis"""
        print(f"\n🚀 🚀 🚀 Starting COMPLETE scan of {len(urls)} URLs...")
        print(f"⏰ Start time: {datetime.now().strftime('%H:%M:%S')}")
        
        results = []
        for i, url in enumerate(urls, 1):
            print(f"\n{'='*80}")
            print(f"[{i}/{len(urls)}] 🔍 Scanning: {url}")
            print(f"{'='*80}")
            
            result = await self.scan_url(url)
            results.append(result)
            self.display_result_complete(result)
            
            # Delay between requests
            if i < len(urls):
                delay = 2 if i % 5 == 0 else 1
                print(f"\n⏳ Waiting {delay} second before next scan...")
                await asyncio.sleep(delay)
        
        # Generate COMPLETE report
        self.generate_complete_report(results)
        
        return results
    
    def generate_complete_report(self, results, filename="cloudbleed_complete_master_report.json"):
        """Generate COMPLETE master report"""
        print(f"\n📊 📊 📊 GENERATING COMPLETE MASTER REPORT 📊 📊 📊")
        
        report = {
            'scan_date': datetime.now().isoformat(),
            'scan_version': '4.0-COMPLETE',
            'total_scans': len(results),
            'successful_scans': len([r for r in results if r.get('success', False)]),
            'failed_scans': len([r for r in results if not r.get('success', False)]),
            'results': results
        }
        
        # COMPLETE Statistics
        stats = {
            'cloudflare_sites': 0,
            'sensitive_data_sites': 0,
            'memory_leak_sites': 0,
            'critical_risk_sites': 0,
            'high_risk_sites': 0,
            'medium_risk_sites': 0,
            'low_risk_sites': 0,
            'total_memory_patterns': 0,
            'total_sensitive_items': 0,
            'sites_with_cloudbleed_risk': 0
        }
        
        for result in results:
            if result.get('success'):
                findings = result.get('findings', {})
                
                if findings.get('cloudflare', {}).get('detected'):
                    stats['cloudflare_sites'] += 1
                
                if findings.get('sensitive_data'):
                    sensitive_count = sum(len(items) for items in findings['sensitive_data'].values())
                    stats['total_sensitive_items'] += sensitive_count
                    stats['sensitive_data_sites'] += 1
                
                security = findings.get('security', {})
                if security.get('memory_patterns'):
                    pattern_count = len(security['memory_patterns'])
                    stats['total_memory_patterns'] += pattern_count
                    stats['memory_leak_sites'] += 1
                
                # Risk level classification
                risk_level = security.get('risk_level', 'low')
                if risk_level == 'critical':
                    stats['critical_risk_sites'] += 1
                elif risk_level == 'high':
                    stats['high_risk_sites'] += 1
                elif risk_level == 'medium':
                    stats['medium_risk_sites'] += 1
                else:
                    stats['low_risk_sites'] += 1
                
                # Cloudbleed-specific risk
                fingerprint = result.get('fingerprint', {})
                if fingerprint.get('cdn') == 'Cloudflare' and (findings.get('sensitive_data') or security.get('memory_patterns')):
                    stats['sites_with_cloudbleed_risk'] += 1
        
        report['statistics'] = stats
        
        # Save COMPLETE report
        with open(filename, 'w', encoding='utf-8', errors='replace') as f:
            json.dump(report, f, indent=2, ensure_ascii=False, default=str)
        
        print(f"\n💾 💾 💾 COMPLETE MASTER REPORT SAVED TO: {filename}")
        
        # Display COMPLETE statistics
        print(f"\n📊 📊 📊 CLOUDBLEED SCAN STATISTICS 📊 📊 📊")
        print(f"{'='*80}")
        print(f"Total URLs Scanned: {stats['cloudflare_sites'] + stats['sensitive_data_sites'] + stats['memory_leak_sites'] + stats['critical_risk_sites'] + stats['high_risk_sites'] + stats['medium_risk_sites'] + stats['low_risk_sites']}")
        print(f"Cloudflare Sites: {stats['cloudflare_sites']}")
        print(f"Sites with Sensitive Data: {stats['sensitive_data_sites']} ({stats['total_sensitive_items']} items)")
        print(f"Sites with Memory Leak Patterns: {stats['memory_leak_sites']} ({stats['total_memory_patterns']} patterns)")
        print(f"Sites with Cloudbleed Risk: {stats['sites_with_cloudbleed_risk']}")
        print(f"\nRisk Distribution:")
        print(f"  • Critical Risk: {stats['critical_risk_sites']}")
        print(f"  • High Risk: {stats['high_risk_sites']}")
        print(f"  • Medium Risk: {stats['medium_risk_sites']}")
        print(f"  • Low Risk: {stats['low_risk_sites']}")
        print(f"{'='*80}")
        
        return report

async def main_complete():
    """Main function for COMPLETE scanner"""
    print("""
    ╔══════════════════════════════════════════════════════════════════╗
    ║     CLOUDBLEED SCANNER v4.0 - COMPLETE EDITION                  ║
    ║     Cloudflare Memory Leak Detection - SHOWS ALL DATA           ║
    ║     NO TRUNCATION - COMPLETE INFORMATION DISPLAY                ║
    ╚══════════════════════════════════════════════════════════════════╝
    """)
    
    print("⚠️ ⚠️ ⚠️  WARNING: Use only for authorized security testing!")
    print("   Unauthorized scanning is illegal in most countries.\n")
    print("🔍 This version shows ALL data with NO truncation")
    print("📄 Complete reports are saved for full analysis\n")
    
    scanner = CompleteCloudbleedScanner(
        enable_cache=True,
        enable_intelligence=True
    )
    
    while True:
        try:
            print("\n" + "="*70)
            print("📋 📋 📋 COMPLETE SCANNER OPTIONS 📋 📋 📋")
            print("="*70)
            print("  1. 🔍 Scan single URL (COMPLETE analysis)")
            print("  2. 📁 Scan multiple URLs from file")
            print("  3. 🧪 Test scan with predefined URLs")
            print("  4. 🗑️  Clear cache")
            print("  5. 📊 Show statistics")
            print("  6. 🚪 Exit")
            print("="*70)
            
            choice = input("\nEnter choice (1-6): ").strip()
            
            if choice == '1':
                url = input("\n🌐 Enter URL to scan (COMPLETE analysis): ").strip()
                if not url:
                    print("❌ URL cannot be empty!")
                    continue
                
                if not url.startswith(('http://', 'https://')):
                    url = 'https://' + url
                    print(f"ℹ️  Added https:// automatically: {url}")
                
                print(f"\n🔍 Starting COMPLETE scan of: {url}")
                result = await scanner.scan_url(url)
                scanner.display_result_complete(result)
            
            elif choice == '2':
                filename = input("\n📁 Enter filename with URLs (one per line): ").strip()
                
                try:
                    with open(filename, 'r', encoding='utf-8') as f:
                        urls = [line.strip() for line in f if line.strip()]
                    
                    if not urls:
                        print("❌ File is empty or contains no URLs!")
                        continue
                    
                    print(f"📊 Found {len(urls)} URLs in file")
                    print(f"📝 Sample URLs:")
                    for url in urls[:3]:
                        print(f"  • {url}")
                    if len(urls) > 3:
                        print(f"  ... and {len(urls) - 3} more")
                    
                    confirm = input("\n⚠️ ⚠️ ⚠️  Start COMPLETE scanning of ALL URLs? (yes/no): ").strip().lower()
                    
                    if confirm in ['yes', 'y', '']:
                        print(f"\n🚀 Starting COMPLETE scan of {len(urls)} URLs...")
                        await scanner.scan_multiple_complete(urls)
                    else:
                        print("❌ Scan cancelled")
                
                except FileNotFoundError:
                    print(f"❌ File {filename} not found!")
                except Exception as e:
                    print(f"❌ Error reading file: {e}")
            
            elif choice == '3':
                test_urls = [
                    'https://httpbin.org/headers',
                    'https://httpbin.org/html',
                    'https://example.com',
                    'https://httpbin.org/status/200',
                    'https://httpbin.org/json'
                ]
                
                print(f"\n🧪 Testing with {len(test_urls)} predefined URLs...")
                print("ℹ️  These are public test URLs for demonstration")
                
                confirm = input("\nStart test scan? (yes/no): ").strip().lower()
                
                if confirm in ['yes', 'y', '']:
                    for url in test_urls:
                        result = await scanner.scan_url(url)
                        scanner.display_result_complete(result)
                        await asyncio.sleep(1)
                else:
                    print("❌ Test cancelled")
            
            elif choice == '4':
                if os.path.exists(".cache"):
                    import shutil
                    shutil.rmtree(".cache")
                    print("✅ Cache cleared successfully")
                else:
                    print("ℹ️  No cache directory found")
            
            elif choice == '5':
                print("\n📊 📊 📊 SCANNER STATISTICS 📊 📊 📊")
                print("="*60)
                if os.path.exists(".cache"):
                    cache_size = sum(f.stat().st_size for f in Path(".cache").rglob('*') if f.is_file())
                    print(f"Cache size: {cache_size:,} bytes ({cache_size/1024/1024:.2f} MB)")
                else:
                    print("Cache: Not initialized")
                print("="*60)
            
            elif choice == '6':
                print("\n👋 👋 👋 Goodbye! 👋 👋 👋")
                break
            
            else:
                print(f"❌ Invalid choice: {choice}")
        
        except KeyboardInterrupt:
            print("\n\n⚠️  Scan interrupted by user")
            break
        except Exception as e:
            print(f"\n❌ Unexpected error: {e}")
            import traceback
            traceback.print_exc()

if __name__ == "__main__":
    # Windows compatibility
    if sys.platform == 'win32':
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    
    try:
        asyncio.run(main_complete())
    except KeyboardInterrupt:
        print("\n\n👋 Exiting...")
        sys.exit(0)
    except Exception as e:
        print(f"\n💥 Critical error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
Greetings to :=====================================================================================
jericho * Larry W. Cashdollar * LiquidWorm * Hussin-X * D4NB4R * Malvuln (John Page aka hyp3rlinx)|
===================================================================================================