Files
flowsint/flowsint-transforms/tests/transforms/email/to_leaks.py
2025-09-04 18:53:04 +02:00

176 lines
5.3 KiB
Python

import pytest
from unittest.mock import patch, MagicMock
from flowsint_transforms.emails.to_leaks import EmailToBreachesTransform
from flowsint_types.email import Email
from flowsint_types.breach import Breach
transform = EmailToBreachesTransform("sketch_123", "scan_123")
def test_transform_name():
assert EmailToBreachesTransform.name() == "to_leaks"
def test_transform_category():
assert EmailToBreachesTransform.category() == "Email"
def test_transform_key():
assert EmailToBreachesTransform.key() == "email"
def test_preprocess_string_emails():
emails = [
"test@example.com",
"user@domain.org",
]
result = transform.preprocess(emails)
expected_emails = [Email(email=email) for email in emails]
assert result == expected_emails
def test_preprocess_dict_emails():
emails = [
{"email": "test@example.com"},
{"email": "user@domain.org"},
]
result = transform.preprocess(emails)
expected_emails = [Email(email=email["email"]) for email in emails]
assert result == expected_emails
def test_preprocess_email_objects():
emails = [
Email(email="test@example.com"),
Email(email="user@domain.org"),
]
result = transform.preprocess(emails)
assert result == emails
def test_preprocess_mixed_formats():
emails = [
"test@example.com",
{"email": "user@domain.org"},
Email(email="admin@company.com"),
{"invalid_key": "should_be_ignored@test.com"},
]
result = transform.preprocess(emails)
result_emails = [email.email for email in result]
assert "test@example.com" in result_emails
assert "user@domain.org" in result_emails
assert "admin@company.com" in result_emails
assert "should_be_ignored@test.com" not in result_emails
@patch("src.transforms.emails.to_leaks.requests.get")
def test_scan_successful_response(mock_get):
# Mock successful API response
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = [
{"Name": "TestBreach", "Title": "Test Breach", "Domain": "test.com"},
{"Name": "AnotherBreach", "Title": "Another Breach", "Domain": "another.com"},
]
mock_get.return_value = mock_response
emails = [Email(email="test@example.com")]
result = transform.scan(emails)
assert len(result) == 2
assert isinstance(result[0], Breach)
assert isinstance(result[1], Breach)
assert result[0].name == "testbreach"
assert result[1].name == "anotherbreach"
assert result[0].breach["name"] == "testbreach"
assert result[1].breach["name"] == "anotherbreach"
@patch("src.transforms.emails.to_leaks.requests.get")
def test_scan_no_breaches_found(mock_get):
# Mock 404 response (no breaches found)
mock_response = MagicMock()
mock_response.status_code = 404
mock_get.return_value = mock_response
emails = [Email(email="test@example.com")]
result = transform.scan(emails)
assert len(result) == 0
@patch("src.transforms.emails.to_leaks.requests.get")
def test_scan_api_error(mock_get):
# Mock API error
mock_get.side_effect = Exception("API Error")
emails = [Email(email="test@example.com")]
result = transform.scan(emails)
assert len(result) == 0
@patch("src.transforms.emails.to_leaks.requests.get")
def test_scan_missing_name_field(mock_get):
# Mock API response with missing "Name" field
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = [
{"Title": "Test Breach", "Domain": "test.com"}, # Missing "Name" field
{"Name": "ValidBreach", "Title": "Valid Breach", "Domain": "valid.com"},
]
mock_get.return_value = mock_response
emails = [Email(email="test@example.com")]
result = transform.scan(emails)
assert len(result) == 2
assert result[0].name == "unknown" # Should default to "unknown"
assert result[1].name == "validbreach" # Should use the provided name
assert result[0].breach["title"] == "Test Breach"
assert result[1].breach["name"] == "validbreach"
@patch("src.transforms.emails.to_leaks.HIBP_API_KEY", None)
def test_scan_no_api_key():
"""Test that transform raises ValueError when HIBP_API_KEY is not set."""
emails = [Email(email="test@example.com")]
with pytest.raises(ValueError, match="HIBP_API_KEY not set"):
transform.scan(emails)
def test_postprocess():
# Test postprocess method with mocked neo4j connection
transform.neo4j_conn = MagicMock()
# Create breach objects with the new structure
breach1 = Breach(
name="testbreach",
title="Test Breach",
domain="test.com",
pwncount=1000,
breach={"name": "testbreach", "title": "Test Breach"},
)
breach2 = Breach(
name="anotherbreach",
title="Another Breach",
domain="another.com",
pwncount=2000,
breach={"name": "anotherbreach", "title": "Another Breach"},
)
breaches = [breach1, breach2]
original_input = [Email(email="test@example.com")]
result = transform.postprocess(breaches, original_input)
assert result == breaches
# Verify that neo4j queries were called:
# - 2 breach node creation queries
# - 1 email node creation query
# - 2 relationship creation queries
# Total: 5 queries
assert transform.neo4j_conn.query.call_count == 5