Version: 1.4.5
Module: core/osint/phone_intel.py
Last Updated: 2025-10-30
The Phone Intelligence system provides comprehensive phone number analysis with support for 11 countries and automatic region detection. It handles various input formats and normalizes numbers for consistent storage and duplicate prevention.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β User Query β
β phone:04167/21 60 111 β
βββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββ
β
βΌ
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β Query Parser (query_parser.py) β
β β’ Extracts phone from query using flexible regex β
β β’ Supports: phone:xxx, phone:"xxx", phonenumber:xxx β
β β’ Handles spaces, slashes, dashes in number β
βββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββ
β
βΌ
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β Phone Intelligence (phone_intel.py) β
β 1. Normalize input (remove non-digits except +) β
β 2. Auto-detect region if not provided β
β 3. Parse with phonenumbers library β
β 4. Validate & extract metadata β
β 5. Generate format variations β
βββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββ
β
βΌ
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β Agent (agent.py) β
β β’ Format results for display β
β β’ Generate AI-powered alternative queries β
β β’ Execute web searches with variations β
βββββββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββ
β
βΌ
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β Memory Store (memory_store.py) β
β β’ Normalize to E.164 format (+4941672160111) β
β β’ Check for duplicates using normalized value β
β β’ Store with original format preserved β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
File: core/osint/query_parser.py:86
'phone': r'(?:phone|phonenumber):(?:"([^"]+)"|([^\s]+(?:[\s/\-][^\s]+)*))'
Supports:
phone:12345678 - Without quotesphone:"123 456 78" - With quotesphone:04167/21 60 111 - With slashesphone:555-123-4567 - With dashesphonenumber:xxx - Alternative keywordExtraction Logic:
# Extract phone operator
phone_match = re.search(self.OPERATORS['phone'], remaining)
if phone_match:
# Handle both quoted and unquoted phone
phone_string = phone_match.group(1) if phone_match.group(1) else phone_match.group(2)
# Split by common separators for multiple phones
phones = [p.strip() for p in re.split(r'[,;]', phone_string) if p.strip()]
if phones:
parsed.phone = phones[0] # Primary phone
parsed.phones = phones # All phones
File: core/osint/phone_intel.py:220-271
When no region is provided, the system attempts to detect it automatically:
def _detect_region(self, normalized: str) -> Optional[str]:
"""Auto-detect country from national format number."""
# Numbers starting with 0 (European national format)
if normalized.startswith('0') and not normalized.startswith('00'):
regions_to_try = [
'DE', # Germany
'GB', # UK
'PL', # Poland
'FR', # France
'IT', # Italy
'ES', # Spain
'AT', # Austria
'CH', # Switzerland
'NL', # Netherlands
'BE', # Belgium
]
for region in regions_to_try:
try:
parsed = phonenumbers.parse(normalized, region)
if phonenumbers.is_valid_number(parsed):
return region
except:
continue
# Numbers without leading 0 (US/CA 10-digit format)
elif normalized.isdigit() and len(normalized) == 10:
try:
parsed = phonenumbers.parse(normalized, 'US')
if phonenumbers.is_valid_number(parsed):
return 'US'
except:
pass
return None
The system tries regions in priority order based on usage patterns:
| Input | Auto-Detected Region | Reasoning |
|---|---|---|
030 12345678 |
DE | German area code (Berlin) |
020 7946 0958 |
GB | UK area code (London) |
5551234567 |
US | 10-digit without leading 0 |
022 123 4567 |
PL | Polish area code (Warsaw) |
04167/21 60 111 |
DE | German area code (4167) |
File: core/memory_store.py:269-350
All phone numbers are normalized to E.164 format for storage:
+[country code][subscriber number]+4941672160111def _normalize_phone(self, phone: str) -> str:
"""Normalize phone to E.164 format."""
try:
import phonenumbers
# Remove all non-digit characters except +
cleaned = re.sub(r'[^\d+]', '', phone)
# Try to parse with auto-detection
try:
parsed = phonenumbers.parse(cleaned, None)
except:
# Try common regions
for region in ['DE', 'US', 'GB']:
try:
parsed = phonenumbers.parse(cleaned, region)
if phonenumbers.is_valid_number(parsed):
break
except:
continue
else:
# Fallback: just digits
return re.sub(r'\D', '', phone)
# Format to E164
return phonenumbers.format_number(
parsed,
phonenumbers.PhoneNumberFormat.E164
)
except ImportError:
# Fallback: remove all non-digits
return re.sub(r'\D', '', phone)
def remember_phone(self, phone: str, metadata: Optional[Dict] = None) -> bool:
# Normalize phone
normalized_phone = self._normalize_phone(phone)
entry = {
'value': normalized_phone, # E.164 format for comparison
'original': phone.strip(), # Original format preserved
'added_at': datetime.now().isoformat(),
'metadata': metadata or {}
}
# Check if already exists (compare normalized values)
if any(self._normalize_phone(p['value']) == normalized_phone
for p in self.data['phones']):
logger.info(f"Phone already in memory")
return False # Duplicate detected
# Store new phone
self.data['phones'].append(entry)
return True
# All these variations are recognized as the SAME number:
remember_phone("04167/21 60 111") # β +4941672160111
remember_phone("041672160111") # β +4941672160111 (duplicate!)
remember_phone("+4941672160111") # β +4941672160111 (duplicate!)
remember_phone("+49 4167 2160111") # β +4941672160111 (duplicate!)
# Result: Only 1 phone stored
File: core/agent.py:1592-1594, 1901-1925
# 1. Parse query
parsed = parser.parse(query) # phone:04167/21 60 111
# 2. Check if phone intelligence enabled
if parsed.phone and phone_intel:
phone_parts = self._process_phone_intelligence(
parsed.phone,
phone_intel
)
# 3. Analyze phone
def _process_phone_intelligence(self, phone: str, phone_intel) -> list:
# Analyze with PhoneIntelligence
phone_result = phone_intel.analyze_phone(phone)
# Format results
response_parts = ["\nβββ Phone Intelligence βββ\n"]
response_parts.append(f"**Phone:** {phone_result['input']}")
response_parts.append(f"**Valid:** {'β' if phone_result['valid'] else 'β'}")
if phone_result['valid']:
# Add analysis results
response_parts.extend(self._format_phone_results(phone_result))
# Generate AI suggestions
ai_queries = self._generate_phone_ai_suggestions(phone_result)
if ai_queries:
response_parts.append("\nβββ AI Analysis βββ\n")
response_parts.append("**Alternative Queries:**")
for query in ai_queries:
response_parts.append(f" β’ {query}")
# Search online
online_parts = self._search_phone_online(phone_result)
response_parts.extend(online_parts)
return response_parts
File: core/agent.py:1927-1965
AI suggestions are generated based on phone analysis metadata:
def _generate_phone_ai_suggestions(self, phone_result: dict) -> list:
"""Generate context-aware alternative queries."""
queries = []
country = phone_result.get('country', '')
phone_type = phone_result.get('type', 'unknown')
carrier = phone_result.get('carrier', '')
formatted = phone_result.get('formatted', '')
# Map phone types to readable text
type_map = {
'fixed_line': 'landline',
'mobile': 'mobile',
'fixed_line_or_mobile': 'phone',
'toll_free': 'toll-free number',
'voip': 'VoIP number'
}
type_text = type_map.get(phone_type, 'phone number')
# Generate queries
if country and formatted:
# Query 1: Country + Type + Number
queries.append(f"{country} {type_text} {formatted}")
if carrier and formatted:
# Query 2: Carrier + Number
queries.append(f"{carrier} {formatted}")
# Query 3: Alternative format
if phone_result.get('variations'):
for var in phone_result['variations'][:2]:
if var != phone_result['input'] and var != formatted:
queries.append(f'"{var}" contact')
break
# Deduplicate and limit to 3
queries = list(dict.fromkeys(queries))[:3]
return queries
Input: phone:04167/21 60 111
Analysis:
Generated Queries:
Apensen landline +49 4167 2160111"041672160111" contactFile: core/osint/phone_intel.py:318-411
def generate_variations(self, phone: str) -> List[str]:
"""Generate format variations for search."""
normalized = self._normalize_phone(phone)
variations = [phone, normalized]
# Country-specific formats
if normalized.startswith('+49'):
# German formats
national = '0' + normalized[3:]
variations.append(national)
variations.append(f"+49 {normalized[3:6]} {normalized[6:]}")
variations.append(f"0{normalized[3:6]} {normalized[6:]}")
elif normalized.startswith('+44'):
# UK formats
national = '0' + normalized[3:]
variations.append(national)
variations.append(f"+44 {normalized[3:5]} {normalized[5:9]} {normalized[9:]}")
elif normalized.startswith('+1'):
# US/Canada formats
area = normalized[2:5]
exchange = normalized[5:8]
number = normalized[8:]
variations.append(f"({area}) {exchange}-{number}")
variations.append(f"{area}-{exchange}-{number}")
variations.append(f"{area}.{exchange}.{number}")
# ... (more countries)
# Remove duplicates
variations = list(set(variations))
return variations
| Country | Formats Generated |
|---|---|
| Germany (DE) | +49 151 12345678, 0151 12345678, 015112345678 |
| UK (GB) | +44 20 7946 0958, 020 7946 0958 |
| USA/CA | +1 555 123 4567, (555) 123-4567, 555-123-4567, 555.123.4567 |
| Poland (PL) | +48 22 123 4567, 22 123 4567 |
| France (FR) | +33 1 42 86 82 00, 01 42 86 82 00 |
class PhoneIntelligence:
"""Phone number OSINT capabilities."""
def __init__(self):
"""Initialize with phonenumbers library if available."""
def analyze_phone(self, phone: str, region: str = None) -> Dict:
"""
Comprehensive phone number analysis.
Args:
phone: Phone number in any format
region: Optional country code (e.g., 'DE', 'US')
Returns:
{
'input': str, # Original input
'valid': bool, # Is valid phone number
'formatted': str, # International format
'country': str, # Country/location name
'region': str, # Region code (e.g., 'DE')
'carrier': str, # Carrier name (if available)
'type': str, # mobile/fixed_line/voip/etc.
'variations': List[str], # Format variations
'confidence': float # 0.0 - 1.0
}
"""
class MemoryStore:
def _normalize_phone(self, phone: str) -> str:
"""Normalize phone to E.164 format."""
def remember_phone(self, phone: str,
metadata: Optional[Dict] = None,
user_id: str = "anonymous") -> bool:
"""
Store phone number (with duplicate detection).
Args:
phone: Phone number in any format
metadata: Optional metadata (country, type, etc.)
user_id: User identifier
Returns:
True if added, False if duplicate
"""
File: tests/unit/test_cloud_llm_client.py
OpenAI tests are automatically skipped if the package is not installed:
try:
import openai
HAS_OPENAI = True
except ImportError:
HAS_OPENAI = False
@pytest.mark.skipif(not HAS_OPENAI, reason="openai package not installed")
class TestOpenAIClient:
# Tests only run if openai is installed
pass
# Test various formats
phone:04167/21 60 111 # German with slash
phone:"555 123 4567" # US with quotes
phone:+44 20 7946 0958 # UK international
phonenumber:022 123 4567 # Poland alternative keyword
# Test memory storage
phone:04167/21 60 111
<merke dir die phone number
status # Should show Phones: 1
# Test duplicate detection
phone:041672160111
<merke phone
status # Should still show Phones: 1 (duplicate!)
# Input: phone:123
# Output: Valid: β False
# Reason: Too short, doesn't match any pattern
# Input: phone:99999999
# Output: Uses basic validation (7-15 digits)
# Type: unknown
# Country: None
# Falls back to basic validation
# Pattern: ^\+?\d{7,15}$
# No carrier/type detection
# Simple country detection based on prefix
Phone analysis results are cached in the session:
Web searches for phone variations respect rate limits:
config.yamlNormalization prevents memory bloat:
Last Updated: 2025-10-30 Maintainer: Development Team Version: 1.4.5