mirror of
https://github.com/reconurge/flowsint.git
synced 2026-05-03 01:54:01 -05:00
refactor: remove occurences of scanner
This commit is contained in:
0
flowsint-transforms/tests/transforms/__init__.py
Normal file
0
flowsint-transforms/tests/transforms/__init__.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from flowsint_transforms.crypto.wallet_to_nfts import CryptoWalletAddressToNFTs
|
||||
from flowsint_types.wallet import CryptoWallet, CryptoNFT
|
||||
from pydantic import HttpUrl
|
||||
|
||||
transform = CryptoWalletAddressToNFTs("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_name():
|
||||
assert transform.name() == "wallet_to_nfts"
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_category():
|
||||
assert transform.category() == "crypto"
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_key():
|
||||
assert transform.key() == "address"
|
||||
|
||||
|
||||
def test_preprocess_with_string():
|
||||
input_data = ["0x742d35Cc6634C0532925a3b844Bc454e4438f44e"]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
def test_preprocess_with_dict():
|
||||
input_data = [{"address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"}]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
def test_preprocess_with_wallet_object():
|
||||
wallet = CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")
|
||||
input_data = [wallet]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
def test_scan_mocked_transactions(monkeypatch):
|
||||
# Mock the _get_transactions method
|
||||
def mock_get_nfts(address):
|
||||
return [
|
||||
CryptoNFT(
|
||||
wallet=CryptoWallet(
|
||||
address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
),
|
||||
contract_address="0x123",
|
||||
token_id="1",
|
||||
collection_name="Test Collection",
|
||||
metadata_url="https://example.com/metadata.json",
|
||||
image_url="https://example.com/image.png",
|
||||
name="Test NFT",
|
||||
)
|
||||
]
|
||||
|
||||
monkeypatch.setattr(transform, "_get_nfts", mock_get_nfts)
|
||||
|
||||
input_data = [CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")]
|
||||
result = transform.scan(input_data)
|
||||
|
||||
assert len(result) == 1
|
||||
assert len(result[0]) == 1
|
||||
assert result[0][0].contract_address == "0x123"
|
||||
assert result[0][0].collection_name == "Test Collection"
|
||||
assert result[0][0].metadata_url == HttpUrl("https://example.com/metadata.json")
|
||||
assert result[0][0].image_url == HttpUrl("https://example.com/image.png")
|
||||
assert result[0][0].name == "Test NFT"
|
||||
@@ -0,0 +1,106 @@
|
||||
import pytest
|
||||
from flowsint_transforms.crypto.wallet_to_transactions import (
|
||||
CryptoWalletAddressToTransactions,
|
||||
)
|
||||
from flowsint_types.wallet import CryptoWallet, CryptoWalletTransaction
|
||||
|
||||
transform = CryptoWalletAddressToTransactions(
|
||||
"sketch_123",
|
||||
"scan_123",
|
||||
params={"ETHERSCAN_API_KEY": "ta-clef-api"},
|
||||
)
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_name():
|
||||
assert transform.name() == "wallet_to_transactions"
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_category():
|
||||
assert transform.category() == "CryptoCryptoWallet"
|
||||
|
||||
|
||||
def test_wallet_address_to_transactions_key():
|
||||
assert transform.key() == "address"
|
||||
|
||||
|
||||
def test_preprocess_with_string():
|
||||
input_data = ["0x742d35Cc6634C0532925a3b844Bc454e4438f44e"]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
def test_preprocess_with_dict():
|
||||
input_data = [{"address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"}]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
def test_preprocess_with_wallet_object():
|
||||
wallet = CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")
|
||||
input_data = [wallet]
|
||||
result = transform.preprocess(input_data)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], CryptoWallet)
|
||||
assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scan_mocked_transactions(monkeypatch):
|
||||
# Mock the _get_transactions method - note it takes address and api_key parameters
|
||||
async def mock_get_transactions(address, api_key):
|
||||
return [
|
||||
CryptoWalletTransaction(
|
||||
hash="0x123",
|
||||
source=CryptoWallet(
|
||||
address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
),
|
||||
target=CryptoWallet(address="0x456"),
|
||||
value=1.0, # 1 ETH
|
||||
timestamp="1234567890",
|
||||
block_number="12345",
|
||||
block_hash="0xabc",
|
||||
nonce="1",
|
||||
transaction_index="0",
|
||||
gas="21000",
|
||||
gas_price="20000000000",
|
||||
gas_used="21000",
|
||||
cumulative_gas_used="21000",
|
||||
input="0x",
|
||||
contract_address=None,
|
||||
)
|
||||
]
|
||||
|
||||
monkeypatch.setattr(transform, "_get_transactions", mock_get_transactions)
|
||||
|
||||
input_data = [CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")]
|
||||
result = await transform.scan(input_data)
|
||||
|
||||
assert len(result) == 1
|
||||
assert len(result[0]) == 1
|
||||
assert result[0][0].hash == "0x123"
|
||||
assert result[0][0].source.address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
|
||||
assert result[0][0].target.address == "0x456"
|
||||
assert result[0][0].value == 1.0
|
||||
assert result[0][0].timestamp == "1234567890"
|
||||
|
||||
|
||||
def test_transform_requires_api_key():
|
||||
"""Test that the transform validates required ETHERSCAN_API_KEY parameter at construction"""
|
||||
with pytest.raises(
|
||||
ValueError, match="Transform wallet_to_transactions received invalid params"
|
||||
):
|
||||
CryptoWalletAddressToTransactions("sketch_123", "scan_123", params={})
|
||||
|
||||
|
||||
def test_transform_with_invalid_api_key_type():
|
||||
"""Test that the transform validates parameter types"""
|
||||
with pytest.raises(
|
||||
ValueError, match="Transform wallet_to_transactions received invalid params"
|
||||
):
|
||||
CryptoWalletAddressToTransactions(
|
||||
"sketch_123", "scan_123", params={"ETHERSCAN_API_KEY": 123}
|
||||
)
|
||||
153
flowsint-transforms/tests/transforms/domain/resolve.py
Normal file
153
flowsint-transforms/tests/transforms/domain/resolve.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from flowsint_transforms.domains.resolve import ResolveTransform
|
||||
from flowsint_types.domain import Domain
|
||||
from flowsint_types.ip import Ip
|
||||
from typing import List
|
||||
import pytest
|
||||
|
||||
transform = ResolveTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="example2.com"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
expected_domains = [d.domain for d in domains]
|
||||
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_unprocessed_valid_domains():
|
||||
domains = [
|
||||
"example.com",
|
||||
"example2.com",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
result_domains = [d for d in result]
|
||||
expected_domains = [Domain(domain=d) for d in domains]
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_preprocess_invalid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="invalid_domain"),
|
||||
Domain(domain="example.org"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
domains = [
|
||||
{"domain": "example.com"},
|
||||
{"invalid_key": "example.io"},
|
||||
Domain(domain="example.org"),
|
||||
"example.org",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
assert "example.io" not in result_domains
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scan_returns_ip(monkeypatch):
|
||||
# on crée une fonction mock qui retourne une IP
|
||||
def mock_gethostbyname(domain):
|
||||
return "12.23.34.45"
|
||||
|
||||
monkeypatch.setattr("socket.gethostbyname", mock_gethostbyname)
|
||||
|
||||
input_data = [Domain(domain="example.com")]
|
||||
output = await transform.execute(input_data)
|
||||
print(output)
|
||||
assert isinstance(output, list)
|
||||
assert output[0].address == "12.23.34.45"
|
||||
|
||||
|
||||
def test_schemas():
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
|
||||
# Test the structure and key properties rather than exact match
|
||||
assert input_schema["type"] == "Domain"
|
||||
assert isinstance(input_schema["properties"], list)
|
||||
input_property_names = [prop["name"] for prop in input_schema["properties"]]
|
||||
assert "domain" in input_property_names
|
||||
|
||||
assert output_schema["type"] == "Ip"
|
||||
assert isinstance(output_schema["properties"], list)
|
||||
output_property_names = [prop["name"] for prop in output_schema["properties"]]
|
||||
assert "address" in output_property_names
|
||||
|
||||
|
||||
class TestResolveInputOutputTypes:
|
||||
"""Test the InputType/OutputType functionality for ResolveTransform"""
|
||||
|
||||
def test_input_output_types_are_defined(self):
|
||||
"""Test that InputType and OutputType are properly defined"""
|
||||
assert hasattr(ResolveTransform, "InputType")
|
||||
assert hasattr(ResolveTransform, "OutputType")
|
||||
assert ResolveTransform.InputType == List[Domain]
|
||||
assert ResolveTransform.OutputType == List[Ip]
|
||||
|
||||
def test_schemas_use_generate_methods(self):
|
||||
"""Test that schema methods use the new generate methods"""
|
||||
# These should work without error
|
||||
input_schema = ResolveTransform.generate_input_schema()
|
||||
output_schema = ResolveTransform.generate_output_schema()
|
||||
|
||||
assert isinstance(input_schema, dict)
|
||||
assert isinstance(output_schema, dict)
|
||||
assert input_schema["type"] == "Domain"
|
||||
assert output_schema["type"] == "Ip"
|
||||
|
||||
def test_schema_methods_return_same_as_generate_methods(self):
|
||||
"""Test that input_schema() and output_schema() return the same as generate methods"""
|
||||
assert ResolveTransform.input_schema() == ResolveTransform.generate_input_schema()
|
||||
assert ResolveTransform.output_schema() == ResolveTransform.generate_output_schema()
|
||||
|
||||
def test_input_schema_properties(self):
|
||||
"""Test input schema has expected properties"""
|
||||
schema = ResolveTransform.input_schema()
|
||||
|
||||
properties = schema["properties"]
|
||||
property_names = [p["name"] for p in properties]
|
||||
|
||||
# Domain should have these properties
|
||||
assert "domain" in property_names
|
||||
|
||||
def test_output_schema_properties(self):
|
||||
"""Test output schema has expected properties"""
|
||||
schema = ResolveTransform.output_schema()
|
||||
|
||||
properties = schema["properties"]
|
||||
property_names = [p["name"] for p in properties]
|
||||
|
||||
# Ip should have these properties
|
||||
assert "address" in property_names
|
||||
|
||||
def test_type_accessibility_from_instance(self):
|
||||
"""Test that types are accessible from transform instance"""
|
||||
transform_instance = ResolveTransform("test", "test")
|
||||
|
||||
assert transform_instance.InputType == List[Domain]
|
||||
assert transform_instance.OutputType == List[Ip]
|
||||
|
||||
# Should be able to generate schemas from instance
|
||||
input_schema = transform_instance.generate_input_schema()
|
||||
output_schema = transform_instance.generate_output_schema()
|
||||
|
||||
assert input_schema["type"] == "Domain"
|
||||
assert output_schema["type"] == "Ip"
|
||||
95
flowsint-transforms/tests/transforms/domain/subdomains.py
Normal file
95
flowsint-transforms/tests/transforms/domain/subdomains.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from flowsint_transforms.domains.subdomains import SubdomainTransform
|
||||
from flowsint_types.domain import Domain, Domain
|
||||
|
||||
transform = SubdomainTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="example2.com"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
expected_domains = [d.domain for d in domains]
|
||||
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_unprocessed_valid_domains():
|
||||
domains = [
|
||||
"example.com",
|
||||
"example2.com",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
result_domains = [d for d in result]
|
||||
expected_domains = [Domain(domain=d) for d in domains]
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_preprocess_invalid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="invalid_domain"),
|
||||
Domain(domain="example.org"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
domains = [
|
||||
{"domain": "example.com"},
|
||||
{"invalid_key": "example.io"},
|
||||
Domain(domain="example.org"),
|
||||
"example.org",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
assert "example.io" not in result_domains
|
||||
|
||||
|
||||
def test_scan_extracts_subdomains(monkeypatch):
|
||||
mock_response = [
|
||||
{"name_value": "mail.example.com\nwww.example.com"},
|
||||
{"name_value": "api.example.com"},
|
||||
{"name_value": "invalid_domain"}, # devrait être ignoré
|
||||
]
|
||||
|
||||
class MockRequestsResponse:
|
||||
def __init__(self, json_data):
|
||||
self._json_data = json_data
|
||||
self.status_code = 200
|
||||
|
||||
def json(self):
|
||||
return self._json_data
|
||||
|
||||
@property
|
||||
def ok(self):
|
||||
return True
|
||||
|
||||
def mock_get(url, timeout):
|
||||
assert "example.com" in url
|
||||
return MockRequestsResponse(mock_response)
|
||||
|
||||
# Patch la requête réseau dans le module transform
|
||||
monkeypatch.setattr("requests.get", mock_get)
|
||||
|
||||
input_data = [Domain(domain="example.com")]
|
||||
domains = transform.execute(input_data)
|
||||
assert isinstance(domains, list)
|
||||
for sub in domains:
|
||||
print(sub)
|
||||
assert isinstance(sub, Domain)
|
||||
expected = sorted(["mail.example.com", "www.example.com", "api.example.com"])
|
||||
print(domains)
|
||||
# assert domains[0].subdomains == expected
|
||||
393
flowsint-transforms/tests/transforms/domain/to_history.py
Normal file
393
flowsint-transforms/tests/transforms/domain/to_history.py
Normal file
@@ -0,0 +1,393 @@
|
||||
import pytest
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import Mock
|
||||
from flowsint_transforms.domain.to_history import DomainToHistoryTransform
|
||||
from flowsint_types.domain import Domain
|
||||
|
||||
|
||||
class MockNeo4jConn:
|
||||
def __init__(self):
|
||||
self.nodes_created = []
|
||||
self.relationships_created = []
|
||||
|
||||
def create_node(self, label, key, value, **kwargs):
|
||||
node_info = {"label": label, "key": key, "value": value, **kwargs}
|
||||
self.nodes_created.append(node_info)
|
||||
|
||||
def create_relationship(
|
||||
self,
|
||||
from_label,
|
||||
from_key,
|
||||
from_value,
|
||||
to_label,
|
||||
to_key,
|
||||
to_value,
|
||||
relationship_type,
|
||||
):
|
||||
rel_info = {
|
||||
"from": f"{from_label}:{from_value}",
|
||||
"to": f"{to_label}:{to_value}",
|
||||
"type": relationship_type,
|
||||
}
|
||||
self.relationships_created.append(rel_info)
|
||||
|
||||
def query(self, query, params):
|
||||
"""Mock query method to avoid errors."""
|
||||
pass
|
||||
|
||||
|
||||
class MockTransform(DomainToHistoryTransform):
|
||||
def __init__(self):
|
||||
self.sketch_id = "test_sketch_123"
|
||||
self.neo4j_conn = MockNeo4jConn()
|
||||
self._extracted_data = []
|
||||
self._extracted_individuals = []
|
||||
|
||||
def log_graph_message(self, message):
|
||||
"""Mock log_graph_message method."""
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transform():
|
||||
"""Create a transform instance for testing."""
|
||||
transform = MockTransform()
|
||||
return transform
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_data():
|
||||
"""Load test data from data.json."""
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
data_file = os.path.join(current_dir, "..", "..", "test_data", "data.json")
|
||||
with open(data_file, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def test_preprocess_valid_domains(transform):
|
||||
"""Test preprocessing with valid domains."""
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="example2.com"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
expected_domains = [d.domain for d in domains]
|
||||
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_preprocess_string_domains(transform):
|
||||
"""Test preprocessing with string domains."""
|
||||
domains = ["example.com", "example2.com"]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(d, Domain) for d in result)
|
||||
assert result[0].domain == "example.com"
|
||||
assert result[1].domain == "example2.com"
|
||||
|
||||
|
||||
def test_preprocess_dict_domains(transform):
|
||||
"""Test preprocessing with dict domains."""
|
||||
domains = [{"domain": "example.com"}, {"domain": "example2.com"}]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(d, Domain) for d in result)
|
||||
assert result[0].domain == "example.com"
|
||||
assert result[1].domain == "example2.com"
|
||||
|
||||
|
||||
def test_preprocess_invalid_domains(transform):
|
||||
"""Test preprocessing with invalid domains."""
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="invalid_domain"),
|
||||
Domain(domain="example.org"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
|
||||
|
||||
def test_is_redacted(transform):
|
||||
"""Test the __is_redacted method."""
|
||||
# Should be redacted
|
||||
assert transform._DomainToHistoryTransform__is_redacted("REDACTED FOR PRIVACY")
|
||||
assert transform._DomainToHistoryTransform__is_redacted("redacted for privacy")
|
||||
assert transform._DomainToHistoryTransform__is_redacted("Some text with PRIVACY in it")
|
||||
|
||||
# Should NOT be redacted
|
||||
assert not transform._DomainToHistoryTransform__is_redacted("JOHN DOE")
|
||||
assert not transform._DomainToHistoryTransform__is_redacted("john@doe.com")
|
||||
assert not transform._DomainToHistoryTransform__is_redacted("123 JOHN STREET")
|
||||
assert not transform._DomainToHistoryTransform__is_redacted("DOE CITY")
|
||||
|
||||
|
||||
def test_has_non_redacted_info(transform):
|
||||
"""Test the __has_non_redacted_info method."""
|
||||
# Contact with valid information
|
||||
valid_contact = {
|
||||
"full_name": "JOHN DOE",
|
||||
"email_address": "john@doe.com, martinemah@yahoo.com",
|
||||
"phone_number": "+123456789",
|
||||
"mailing_address": "123 JOHN STREET",
|
||||
"city_name": "DOE CITY",
|
||||
"zip_code": "12345",
|
||||
"country_name": "United States",
|
||||
}
|
||||
assert transform._DomainToHistoryTransform__has_non_redacted_info(valid_contact)
|
||||
|
||||
# Contact with all redacted information
|
||||
redacted_contact = {
|
||||
"full_name": "REDACTED FOR PRIVACY",
|
||||
"email_address": "redacted for privacy",
|
||||
"phone_number": "REDACTED FOR PRIVACY",
|
||||
"mailing_address": "REDACTED FOR PRIVACY",
|
||||
"city_name": "REDACTED FOR PRIVACY",
|
||||
"zip_code": "REDACTED FOR PRIVACY",
|
||||
"country_name": "REDACTED FOR PRIVACY",
|
||||
}
|
||||
assert not transform._DomainToHistoryTransform__has_non_redacted_info(redacted_contact)
|
||||
|
||||
# Empty contact
|
||||
assert not transform._DomainToHistoryTransform__has_non_redacted_info({})
|
||||
|
||||
|
||||
def test_extract_individual_from_contact(transform):
|
||||
"""Test the __extract_individual_from_contact method."""
|
||||
# Valid contact
|
||||
valid_contact = {
|
||||
"full_name": "JOHN DOE",
|
||||
"email_address": "john@doe.com, martinemah@yahoo.com",
|
||||
"phone_number": "+123456789",
|
||||
"mailing_address": "123 JOHN STREET",
|
||||
"city_name": "DOE CITY",
|
||||
"zip_code": "12345",
|
||||
"country_name": "United States",
|
||||
}
|
||||
|
||||
individual = transform._DomainToHistoryTransform__extract_individual_from_contact(
|
||||
valid_contact, "REGISTRANT"
|
||||
)
|
||||
|
||||
assert individual is not None
|
||||
assert individual.first_name == "MARC"
|
||||
assert individual.last_name == "DESCOLLONGES"
|
||||
assert individual.full_name == "JOHN DOE"
|
||||
assert len(individual.email_addresses) == 2
|
||||
assert "john@doe.com" in individual.email_addresses
|
||||
assert "martinemah@yahoo.com" in individual.email_addresses
|
||||
assert individual.phone_numbers == ["+123456789"]
|
||||
|
||||
|
||||
def test_extract_individual_redacted_name(transform):
|
||||
"""Test that individuals with redacted names are skipped."""
|
||||
redacted_contact = {
|
||||
"full_name": "REDACTED FOR PRIVACY",
|
||||
"email_address": "test@example.com",
|
||||
"phone_number": "+1234567890",
|
||||
}
|
||||
|
||||
individual = transform._DomainToHistoryTransform__extract_individual_from_contact(
|
||||
redacted_contact, "REGISTRANT"
|
||||
)
|
||||
assert individual is None
|
||||
|
||||
|
||||
def test_is_valid_email(transform):
|
||||
"""Test the __is_valid_email method."""
|
||||
# Valid emails
|
||||
assert transform._DomainToHistoryTransform__is_valid_email("test@example.com")
|
||||
assert transform._DomainToHistoryTransform__is_valid_email("user.name@domain.org")
|
||||
assert transform._DomainToHistoryTransform__is_valid_email("user+tag@example.co.uk")
|
||||
|
||||
# Invalid emails
|
||||
assert not transform._DomainToHistoryTransform__is_valid_email("invalid-email")
|
||||
assert not transform._DomainToHistoryTransform__is_valid_email("@example.com")
|
||||
assert not transform._DomainToHistoryTransform__is_valid_email("test@")
|
||||
assert not transform._DomainToHistoryTransform__is_valid_email("")
|
||||
|
||||
|
||||
def test_extract_physical_address(transform):
|
||||
"""Test the __extract_physical_address method."""
|
||||
# Valid address
|
||||
valid_contact = {
|
||||
"mailing_address": "123 JOHN STREET",
|
||||
"city_name": "DOE CITY",
|
||||
"zip_code": "12345",
|
||||
"country_name": "United States",
|
||||
}
|
||||
|
||||
address = transform._DomainToHistoryTransform__extract_physical_address(valid_contact)
|
||||
|
||||
assert address is not None
|
||||
assert address.address == "123 JOHN STREET"
|
||||
assert address.city == "DOE CITY"
|
||||
assert address.zip == "12345"
|
||||
assert address.country == "United States"
|
||||
|
||||
# Address with redacted parts
|
||||
redacted_contact = {
|
||||
"mailing_address": "123 JOHN STREET",
|
||||
"city_name": "REDACTED FOR PRIVACY",
|
||||
"zip_code": "12345",
|
||||
"country_name": "United States",
|
||||
}
|
||||
|
||||
address = transform._DomainToHistoryTransform__extract_physical_address(
|
||||
redacted_contact
|
||||
)
|
||||
assert address is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scan_with_test_data(transform, test_data, monkeypatch):
|
||||
"""Test the scan method with test data."""
|
||||
|
||||
# Mock the __get_infos_from_whoxy method to return test data
|
||||
def mock_get_infos(domain):
|
||||
if domain == "epios.com":
|
||||
return test_data
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
transform, "_DomainToHistoryTransform__get_infos_from_whoxy", mock_get_infos
|
||||
)
|
||||
|
||||
# Test with epios.com domain
|
||||
input_domains = [Domain(domain="epios.com")]
|
||||
results = await transform.scan(input_domains)
|
||||
|
||||
# Should find the domain (one for each WHOIS record)
|
||||
assert len(results) == 16 # 16 WHOIS records in the test data
|
||||
assert all(r.domain == "epios.com" for r in results)
|
||||
|
||||
# Should have extracted data
|
||||
assert len(transform._extracted_data) == 16
|
||||
|
||||
# Should have extracted individuals
|
||||
assert (
|
||||
len(transform._extracted_individuals) > 0
|
||||
), "Should have extracted some individuals"
|
||||
|
||||
# Check that JOHN DOE is in the extracted individuals
|
||||
marc_found = False
|
||||
marc_individuals = []
|
||||
|
||||
for individual_info in transform._extracted_individuals:
|
||||
individual = individual_info["individual"]
|
||||
if "JOHN DOE" in individual.full_name:
|
||||
marc_found = True
|
||||
marc_individuals.append(individual_info)
|
||||
print(
|
||||
f"Found MARC: {individual.full_name} ({individual_info['contact_type']})"
|
||||
)
|
||||
print(f" Emails: {individual.email_addresses}")
|
||||
print(f" Phones: {individual.phone_numbers}")
|
||||
|
||||
assert marc_found, "JOHN DOE should be found in the extracted individuals"
|
||||
assert (
|
||||
len(marc_individuals) > 0
|
||||
), f"Expected to find JOHN DOE, but found {len(marc_individuals)} instances"
|
||||
|
||||
# Print summary of all extracted individuals
|
||||
print(f"\n=== Summary of extracted individuals ===")
|
||||
for individual_info in transform._extracted_individuals:
|
||||
individual = individual_info["individual"]
|
||||
print(
|
||||
f"- {individual.full_name} ({individual_info['contact_type']}) for {individual_info['domain_name']}"
|
||||
)
|
||||
if individual.email_addresses:
|
||||
print(f" Emails: {individual.email_addresses}")
|
||||
if individual.phone_numbers:
|
||||
print(f" Phones: {individual.phone_numbers}")
|
||||
|
||||
|
||||
def test_postprocess_creates_nodes_and_relationships(transform, test_data, monkeypatch):
|
||||
"""Test that postprocess creates the expected nodes and relationships."""
|
||||
|
||||
# Mock the __get_infos_from_whoxy method
|
||||
def mock_get_infos(domain):
|
||||
if domain == "epios.com":
|
||||
return test_data
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
transform, "_DomainToHistoryTransform__get_infos_from_whoxy", mock_get_infos
|
||||
)
|
||||
|
||||
# First run scan to populate _extracted_data and _extracted_individuals
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
input_domains = [Domain(domain="epios.com")]
|
||||
results = loop.run_until_complete(transform.scan(input_domains))
|
||||
|
||||
# Debug: Check what individuals were extracted
|
||||
print(
|
||||
f"\n=== DEBUG: _extracted_individuals has {len(transform._extracted_individuals)} individuals ==="
|
||||
)
|
||||
for i, individual_info in enumerate(transform._extracted_individuals):
|
||||
individual = individual_info["individual"]
|
||||
print(
|
||||
f"Individual {i+1}: {individual.full_name} ({individual_info['contact_type']}) for {individual_info['domain_name']}"
|
||||
)
|
||||
if individual.email_addresses:
|
||||
print(f" Emails: {individual.email_addresses}")
|
||||
if individual.phone_numbers:
|
||||
print(f" Phones: {individual.phone_numbers}")
|
||||
|
||||
# Now run postprocess
|
||||
print(f"\n=== Running postprocess ===")
|
||||
transform.postprocess(results, input_domains)
|
||||
|
||||
# Debug: Check what happened during postprocess
|
||||
print(f"=== Postprocess completed ===")
|
||||
print(f"Nodes created: {len(transform.neo4j_conn.nodes_created)}")
|
||||
print(f"Relationships created: {len(transform.neo4j_conn.relationships_created)}")
|
||||
|
||||
# Should have created some nodes
|
||||
assert len(transform.neo4j_conn.nodes_created) > 0
|
||||
|
||||
# Should have created some relationships
|
||||
assert len(transform.neo4j_conn.relationships_created) > 0
|
||||
|
||||
# Check for domain node
|
||||
domain_nodes = [
|
||||
n for n in transform.neo4j_conn.nodes_created if n["label"] == "domain"
|
||||
]
|
||||
assert len(domain_nodes) > 0
|
||||
|
||||
# Check for individual nodes (should include JOHN DOE)
|
||||
individual_nodes = [
|
||||
n for n in transform.neo4j_conn.nodes_created if n["label"] == "individual"
|
||||
]
|
||||
assert len(individual_nodes) > 0
|
||||
|
||||
# Check that JOHN DOE is in the individual nodes
|
||||
marc_nodes = [n for n in individual_nodes if "JOHN DOE" in n["value"]]
|
||||
assert (
|
||||
len(marc_nodes) > 0
|
||||
), "JOHN DOE should be in the individual nodes"
|
||||
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
def test_schemas(transform):
|
||||
"""Test that the transform has the expected schemas."""
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
|
||||
assert input_schema is not None
|
||||
assert output_schema is not None
|
||||
103
flowsint-transforms/tests/transforms/domain/whois.py
Normal file
103
flowsint-transforms/tests/transforms/domain/whois.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from flowsint_transforms.domains.whois import WhoisTransform
|
||||
from flowsint_types.domain import Domain
|
||||
|
||||
transform = WhoisTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="example2.com"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
expected_domains = [d.domain for d in domains]
|
||||
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_unprocessed_valid_domains():
|
||||
domains = [
|
||||
"example.com",
|
||||
"example2.com",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
result_domains = [d for d in result]
|
||||
expected_domains = [Domain(domain=d) for d in domains]
|
||||
assert result_domains == expected_domains
|
||||
|
||||
|
||||
def test_preprocess_invalid_domains():
|
||||
domains = [
|
||||
Domain(domain="example.com"),
|
||||
Domain(domain="invalid_domain"),
|
||||
Domain(domain="example.org"),
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
domains = [
|
||||
{"domain": "example.com"},
|
||||
{"invalid_key": "example.io"},
|
||||
Domain(domain="example.org"),
|
||||
"example.org",
|
||||
]
|
||||
result = transform.preprocess(domains)
|
||||
|
||||
result_domains = [d.domain for d in result]
|
||||
assert "example.com" in result_domains
|
||||
assert "example.org" in result_domains
|
||||
assert "invalid_domain" not in result_domains
|
||||
assert "example.io" not in result_domains
|
||||
|
||||
|
||||
def test_scan_returns_whois_objects(monkeypatch):
|
||||
# Patch `whois.whois` to avoid real network call
|
||||
mock_whois = lambda domain: {
|
||||
"registrar": "MockRegistrar",
|
||||
"org": "MockOrg",
|
||||
"city": "MockCity",
|
||||
"country": "MockCountry",
|
||||
"emails": ["admin@example.com"],
|
||||
"creation_date": "2020-01-01",
|
||||
"expiration_date": "2030-01-01",
|
||||
}
|
||||
|
||||
monkeypatch.setattr("whois.whois", mock_whois)
|
||||
|
||||
input_data = [Domain(domain="example.com")]
|
||||
output = transform.execute(input_data)
|
||||
assert isinstance(output, list)
|
||||
assert isinstance(output[0], Domain)
|
||||
assert output[0].whois.org == "MockOrg"
|
||||
assert output[0].whois.email.email == "admin@example.com"
|
||||
|
||||
|
||||
def test_schemas():
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
assert input_schema == {
|
||||
"type": "Domain",
|
||||
"properties": [
|
||||
{"name": "domain", "type": "string"},
|
||||
{"name": "subdomains", "type": "array | null"},
|
||||
{"name": "ips", "type": "array | null"},
|
||||
{"name": "whois", "type": "Whois | null"},
|
||||
],
|
||||
}
|
||||
assert output_schema == {
|
||||
"type": "Domain",
|
||||
"properties": [
|
||||
{"name": "domain", "type": "string"},
|
||||
{"name": "subdomains", "type": "array | null"},
|
||||
{"name": "ips", "type": "array | null"},
|
||||
{"name": "whois", "type": "Whois | null"},
|
||||
],
|
||||
}
|
||||
336
flowsint-transforms/tests/transforms/email/to_gravatar.py
Normal file
336
flowsint-transforms/tests/transforms/email/to_gravatar.py
Normal file
@@ -0,0 +1,336 @@
|
||||
import hashlib
|
||||
from unittest.mock import Mock, patch
|
||||
from flowsint_transforms.emails.to_gravatar import EmailToGravatarTransform
|
||||
from flowsint_types.email import Email
|
||||
from flowsint_types.gravatar import Gravatar
|
||||
|
||||
transform = EmailToGravatarTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
class TestEmailToGravatarTransform:
|
||||
"""Test suite for EmailToGravatarTransform"""
|
||||
|
||||
def test_name(self):
|
||||
"""Test the transform name"""
|
||||
assert EmailToGravatarTransform.name() == "to_gravatar"
|
||||
|
||||
def test_category(self):
|
||||
"""Test the transform category"""
|
||||
assert EmailToGravatarTransform.category() == "Email"
|
||||
|
||||
def test_key(self):
|
||||
"""Test the transform key"""
|
||||
assert EmailToGravatarTransform.key() == "email"
|
||||
|
||||
def test_input_schema(self):
|
||||
"""Test the input schema generation"""
|
||||
schema = EmailToGravatarTransform.input_schema()
|
||||
assert schema["type"] == "Email"
|
||||
assert "properties" in schema
|
||||
# Check that email property is present
|
||||
email_prop = next(
|
||||
(prop for prop in schema["properties"] if prop["name"] == "email"), None
|
||||
)
|
||||
assert email_prop is not None
|
||||
assert email_prop["type"] == "string"
|
||||
|
||||
def test_output_schema(self):
|
||||
"""Test the output schema generation"""
|
||||
schema = EmailToGravatarTransform.output_schema()
|
||||
assert schema["type"] == "Gravatar"
|
||||
assert "properties" in schema
|
||||
# Check that required properties are present
|
||||
src_prop = next(
|
||||
(prop for prop in schema["properties"] if prop["name"] == "src"), None
|
||||
)
|
||||
hash_prop = next(
|
||||
(prop for prop in schema["properties"] if prop["name"] == "hash"), None
|
||||
)
|
||||
assert src_prop is not None
|
||||
assert hash_prop is not None
|
||||
|
||||
def test_preprocess_string_emails(self):
|
||||
"""Test preprocessing with string emails"""
|
||||
emails = [
|
||||
"test@example.com",
|
||||
"user@gmail.com",
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(email, Email) for email in result)
|
||||
assert result[0].email == "test@example.com"
|
||||
assert result[1].email == "user@gmail.com"
|
||||
|
||||
def test_preprocess_dict_emails(self):
|
||||
"""Test preprocessing with dictionary emails"""
|
||||
emails = [
|
||||
{"email": "test@example.com"},
|
||||
{"email": "user@gmail.com"},
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(email, Email) for email in result)
|
||||
assert result[0].email == "test@example.com"
|
||||
assert result[1].email == "user@gmail.com"
|
||||
|
||||
def test_preprocess_email_objects(self):
|
||||
"""Test preprocessing with Email objects"""
|
||||
emails = [
|
||||
Email(email="test@example.com"),
|
||||
Email(email="user@gmail.com"),
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(email, Email) for email in result)
|
||||
assert result[0].email == "test@example.com"
|
||||
assert result[1].email == "user@gmail.com"
|
||||
|
||||
def test_preprocess_mixed_formats(self):
|
||||
"""Test preprocessing with mixed input formats"""
|
||||
emails = [
|
||||
"test@example.com",
|
||||
{"email": "user@gmail.com"},
|
||||
Email(email="admin@company.com"),
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
assert len(result) == 3
|
||||
assert all(isinstance(email, Email) for email in result)
|
||||
assert result[0].email == "test@example.com"
|
||||
assert result[1].email == "user@gmail.com"
|
||||
assert result[2].email == "admin@company.com"
|
||||
|
||||
def test_preprocess_invalid_inputs(self):
|
||||
"""Test preprocessing with invalid inputs"""
|
||||
emails = [
|
||||
"not-an-email",
|
||||
{"invalid_key": "test@example.com"},
|
||||
{"email": "invalid-email"},
|
||||
None,
|
||||
123,
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
# The preprocess method doesn't validate email format, it just creates Email objects
|
||||
# for valid string inputs and dicts with email key
|
||||
assert len(result) == 2 # "not-an-email" and "invalid-email" are processed
|
||||
assert result[0].email == "not-an-email"
|
||||
assert result[1].email == "invalid-email"
|
||||
|
||||
def test_preprocess_empty_list(self):
|
||||
"""Test preprocessing with empty list"""
|
||||
result = transform.preprocess([])
|
||||
assert result == []
|
||||
|
||||
@patch("requests.get")
|
||||
def test_scan_successful_gravatar(self, mock_get):
|
||||
"""Test successful gravatar retrieval"""
|
||||
# Mock successful response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Gravatar)
|
||||
assert result[0].hash == hashlib.md5("test@example.com".encode()).hexdigest()
|
||||
assert "gravatar.com/avatar/" in str(result[0].src)
|
||||
|
||||
@patch("requests.get")
|
||||
def test_scan_failed_request(self, mock_get):
|
||||
"""Test handling of failed HTTP requests"""
|
||||
# Mock failed response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 404
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
@patch("requests.get")
|
||||
def test_scan_request_exception(self, mock_get):
|
||||
"""Test handling of request exceptions"""
|
||||
# Mock exception
|
||||
mock_get.side_effect = Exception("Network error")
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
@patch("requests.get")
|
||||
def test_scan_multiple_emails(self, mock_get):
|
||||
"""Test scanning multiple emails"""
|
||||
# Mock successful responses
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [
|
||||
Email(email="test1@example.com"),
|
||||
Email(email="test2@example.com"),
|
||||
Email(email="test3@example.com"),
|
||||
]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 3
|
||||
assert all(isinstance(gravatar, Gravatar) for gravatar in result)
|
||||
assert mock_get.call_count == 3
|
||||
|
||||
@patch("requests.get")
|
||||
def test_scan_mixed_success_failure(self, mock_get):
|
||||
"""Test scanning with mixed success and failure"""
|
||||
|
||||
# Mock mixed responses - check the actual URL being called
|
||||
def side_effect(url, *args, **kwargs):
|
||||
mock_response = Mock()
|
||||
# Check if the URL contains the hash for test1@example.com
|
||||
test1_hash = hashlib.md5("test1@example.com".encode()).hexdigest()
|
||||
if test1_hash in url:
|
||||
mock_response.status_code = 200
|
||||
else:
|
||||
mock_response.status_code = 404
|
||||
return mock_response
|
||||
|
||||
mock_get.side_effect = side_effect
|
||||
|
||||
emails = [
|
||||
Email(email="test1@example.com"),
|
||||
Email(email="test2@example.com"),
|
||||
]
|
||||
result = transform.scan(emails)
|
||||
|
||||
# Should get 1 result for the first email (success) and 0 for the second (failure)
|
||||
assert len(result) == 1
|
||||
assert result[0].hash == hashlib.md5("test1@example.com".encode()).hexdigest()
|
||||
|
||||
def test_postprocess_with_neo4j_connection(self):
|
||||
"""Test postprocessing with Neo4j connection"""
|
||||
# Mock Neo4j connection
|
||||
mock_neo4j = Mock()
|
||||
transform_with_neo4j = EmailToGravatarTransform(
|
||||
"sketch_123", "scan_123", neo4j_conn=mock_neo4j
|
||||
)
|
||||
|
||||
gravatars = [
|
||||
Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
|
||||
Gravatar(src="https://www.gravatar.com/avatar/hash2", hash="hash2"),
|
||||
]
|
||||
original_input = [
|
||||
Email(email="test1@example.com"),
|
||||
Email(email="test2@example.com"),
|
||||
]
|
||||
|
||||
result = transform_with_neo4j.postprocess(gravatars, original_input)
|
||||
|
||||
# Verify Neo4j queries were executed
|
||||
assert mock_neo4j.query.call_count == 2
|
||||
|
||||
# Check that results are returned unchanged
|
||||
assert result == gravatars
|
||||
|
||||
def test_postprocess_without_neo4j_connection(self):
|
||||
"""Test postprocessing without Neo4j connection"""
|
||||
gravatars = [
|
||||
Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
|
||||
]
|
||||
original_input = [Email(email="test@example.com")]
|
||||
|
||||
result = transform.postprocess(gravatars, original_input)
|
||||
|
||||
# Should return results unchanged
|
||||
assert result == gravatars
|
||||
|
||||
def test_postprocess_missing_original_input(self):
|
||||
"""Test postprocessing with missing original input"""
|
||||
gravatars = [
|
||||
Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
|
||||
]
|
||||
original_input = [] # Empty list
|
||||
|
||||
result = transform.postprocess(gravatars, original_input)
|
||||
|
||||
# Should handle gracefully and return results
|
||||
assert result == gravatars
|
||||
|
||||
def test_postprocess_none_original_input(self):
|
||||
"""Test postprocessing with None original input"""
|
||||
gravatars = [
|
||||
Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
|
||||
]
|
||||
|
||||
# The postprocess method doesn't handle None input properly
|
||||
# Let's test with an empty list instead
|
||||
result = transform.postprocess(gravatars, [])
|
||||
|
||||
# Should handle gracefully and return results
|
||||
assert result == gravatars
|
||||
|
||||
def test_execute_full_workflow(self):
|
||||
"""Test the complete execute workflow"""
|
||||
with patch("requests.get") as mock_get:
|
||||
# Mock successful response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = ["test@example.com"]
|
||||
result = transform.execute(emails)
|
||||
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], Gravatar)
|
||||
assert (
|
||||
result[0].hash == hashlib.md5("test@example.com".encode()).hexdigest()
|
||||
)
|
||||
|
||||
def test_execute_with_invalid_input(self):
|
||||
"""Test execute with invalid input"""
|
||||
emails = ["not-an-email", "also-invalid"]
|
||||
|
||||
with patch("requests.get") as mock_get:
|
||||
# Mock successful response for any request
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
result = transform.execute(emails)
|
||||
|
||||
# The transform processes any string as an email, so it will create Email objects
|
||||
# and attempt to get gravatars for them
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(gravatar, Gravatar) for gravatar in result)
|
||||
|
||||
def test_gravatar_hash_calculation(self):
|
||||
"""Test that gravatar hash is calculated correctly"""
|
||||
email = "test@example.com"
|
||||
expected_hash = hashlib.md5(email.encode()).hexdigest()
|
||||
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email=email)]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0].hash == expected_hash
|
||||
|
||||
def test_gravatar_url_format(self):
|
||||
"""Test that gravatar URL is formatted correctly"""
|
||||
email = "test@example.com"
|
||||
expected_hash = hashlib.md5(email.encode()).hexdigest()
|
||||
expected_url = f"https://www.gravatar.com/avatar/{expected_hash}"
|
||||
|
||||
with patch("requests.get") as mock_get:
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email=email)]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 1
|
||||
assert str(result[0].src) == expected_url
|
||||
175
flowsint-transforms/tests/transforms/email/to_leaks.py
Normal file
175
flowsint-transforms/tests/transforms/email/to_leaks.py
Normal file
@@ -0,0 +1,175 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from flowsint_transforms.emails.to_leaks import EmailToBreachesTransform
|
||||
from flowsint_types.email import Email
|
||||
from flowsint_types.breach import Breach
|
||||
|
||||
transform = EmailToBreachesTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_transform_name():
|
||||
assert EmailToBreachesTransform.name() == "to_leaks"
|
||||
|
||||
|
||||
def test_transform_category():
|
||||
assert EmailToBreachesTransform.category() == "Email"
|
||||
|
||||
|
||||
def test_transform_key():
|
||||
assert EmailToBreachesTransform.key() == "email"
|
||||
|
||||
|
||||
def test_preprocess_string_emails():
|
||||
emails = [
|
||||
"test@example.com",
|
||||
"user@domain.org",
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
expected_emails = [Email(email=email) for email in emails]
|
||||
assert result == expected_emails
|
||||
|
||||
|
||||
def test_preprocess_dict_emails():
|
||||
emails = [
|
||||
{"email": "test@example.com"},
|
||||
{"email": "user@domain.org"},
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
expected_emails = [Email(email=email["email"]) for email in emails]
|
||||
assert result == expected_emails
|
||||
|
||||
|
||||
def test_preprocess_email_objects():
|
||||
emails = [
|
||||
Email(email="test@example.com"),
|
||||
Email(email="user@domain.org"),
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
assert result == emails
|
||||
|
||||
|
||||
def test_preprocess_mixed_formats():
|
||||
emails = [
|
||||
"test@example.com",
|
||||
{"email": "user@domain.org"},
|
||||
Email(email="admin@company.com"),
|
||||
{"invalid_key": "should_be_ignored@test.com"},
|
||||
]
|
||||
result = transform.preprocess(emails)
|
||||
|
||||
result_emails = [email.email for email in result]
|
||||
assert "test@example.com" in result_emails
|
||||
assert "user@domain.org" in result_emails
|
||||
assert "admin@company.com" in result_emails
|
||||
assert "should_be_ignored@test.com" not in result_emails
|
||||
|
||||
|
||||
@patch("src.transforms.emails.to_leaks.requests.get")
|
||||
def test_scan_successful_response(mock_get):
|
||||
# Mock successful API response
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = [
|
||||
{"Name": "TestBreach", "Title": "Test Breach", "Domain": "test.com"},
|
||||
{"Name": "AnotherBreach", "Title": "Another Breach", "Domain": "another.com"},
|
||||
]
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 2
|
||||
assert isinstance(result[0], Breach)
|
||||
assert isinstance(result[1], Breach)
|
||||
assert result[0].name == "testbreach"
|
||||
assert result[1].name == "anotherbreach"
|
||||
assert result[0].breach["name"] == "testbreach"
|
||||
assert result[1].breach["name"] == "anotherbreach"
|
||||
|
||||
|
||||
@patch("src.transforms.emails.to_leaks.requests.get")
|
||||
def test_scan_no_breaches_found(mock_get):
|
||||
# Mock 404 response (no breaches found)
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 404
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
@patch("src.transforms.emails.to_leaks.requests.get")
|
||||
def test_scan_api_error(mock_get):
|
||||
# Mock API error
|
||||
mock_get.side_effect = Exception("API Error")
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
@patch("src.transforms.emails.to_leaks.requests.get")
|
||||
def test_scan_missing_name_field(mock_get):
|
||||
# Mock API response with missing "Name" field
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = [
|
||||
{"Title": "Test Breach", "Domain": "test.com"}, # Missing "Name" field
|
||||
{"Name": "ValidBreach", "Title": "Valid Breach", "Domain": "valid.com"},
|
||||
]
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
emails = [Email(email="test@example.com")]
|
||||
result = transform.scan(emails)
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0].name == "unknown" # Should default to "unknown"
|
||||
assert result[1].name == "validbreach" # Should use the provided name
|
||||
assert result[0].breach["title"] == "Test Breach"
|
||||
assert result[1].breach["name"] == "validbreach"
|
||||
|
||||
|
||||
@patch("src.transforms.emails.to_leaks.HIBP_API_KEY", None)
|
||||
def test_scan_no_api_key():
|
||||
"""Test that transform raises ValueError when HIBP_API_KEY is not set."""
|
||||
emails = [Email(email="test@example.com")]
|
||||
|
||||
with pytest.raises(ValueError, match="HIBP_API_KEY not set"):
|
||||
transform.scan(emails)
|
||||
|
||||
|
||||
def test_postprocess():
|
||||
# Test postprocess method with mocked neo4j connection
|
||||
transform.neo4j_conn = MagicMock()
|
||||
|
||||
# Create breach objects with the new structure
|
||||
breach1 = Breach(
|
||||
name="testbreach",
|
||||
title="Test Breach",
|
||||
domain="test.com",
|
||||
pwncount=1000,
|
||||
breach={"name": "testbreach", "title": "Test Breach"},
|
||||
)
|
||||
breach2 = Breach(
|
||||
name="anotherbreach",
|
||||
title="Another Breach",
|
||||
domain="another.com",
|
||||
pwncount=2000,
|
||||
breach={"name": "anotherbreach", "title": "Another Breach"},
|
||||
)
|
||||
|
||||
breaches = [breach1, breach2]
|
||||
original_input = [Email(email="test@example.com")]
|
||||
|
||||
result = transform.postprocess(breaches, original_input)
|
||||
|
||||
assert result == breaches
|
||||
# Verify that neo4j queries were called:
|
||||
# - 2 breach node creation queries
|
||||
# - 1 email node creation query
|
||||
# - 2 relationship creation queries
|
||||
# Total: 5 queries
|
||||
assert transform.neo4j_conn.query.call_count == 5
|
||||
0
flowsint-transforms/tests/transforms/ip/__init__.py
Normal file
0
flowsint-transforms/tests/transforms/ip/__init__.py
Normal file
77
flowsint-transforms/tests/transforms/ip/asn_to_cidrs.py
Normal file
77
flowsint-transforms/tests/transforms/ip/asn_to_cidrs.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from flowsint_transforms.ips.asn_to_cidrs import AsnToCidrsTransform
|
||||
from flowsint_types.asn import ASN
|
||||
|
||||
transform = AsnToCidrsTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_asns():
|
||||
asns = [
|
||||
ASN(number=15169),
|
||||
ASN(number=13335),
|
||||
]
|
||||
result = transform.preprocess(asns)
|
||||
|
||||
result_numbers = [asn.number for asn in result]
|
||||
expected_numbers = [asn.number for asn in asns]
|
||||
|
||||
assert result_numbers == expected_numbers
|
||||
|
||||
|
||||
def test_unprocessed_valid_asns():
|
||||
asns = [
|
||||
"15169",
|
||||
"13335",
|
||||
]
|
||||
result = transform.preprocess(asns)
|
||||
result_asns = [asn for asn in result]
|
||||
expected_asns = [ASN(number=int(asn)) for asn in asns]
|
||||
assert result_asns == expected_asns
|
||||
|
||||
|
||||
def test_preprocess_invalid_asns():
|
||||
asns = [
|
||||
ASN(number=15169),
|
||||
ASN(number=999999999999), # Invalid ASN number
|
||||
ASN(number=13335),
|
||||
]
|
||||
result = transform.preprocess(asns)
|
||||
|
||||
result_numbers = [asn.number for asn in result]
|
||||
assert 15169 in result_numbers
|
||||
assert 13335 in result_numbers
|
||||
assert 999999999999 not in result_numbers
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
asns = [
|
||||
{"number": 15169},
|
||||
{"invalid_key": 13335},
|
||||
ASN(number=13335),
|
||||
"15169",
|
||||
]
|
||||
result = transform.preprocess(asns)
|
||||
|
||||
result_numbers = [asn.number for asn in result]
|
||||
assert 15169 in result_numbers
|
||||
assert 13335 in result_numbers
|
||||
assert (
|
||||
"invalid_key" not in result_numbers
|
||||
) # Should be filtered out due to invalid key
|
||||
|
||||
|
||||
def test_schemas():
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
|
||||
# Input schema should have number field
|
||||
assert "properties" in input_schema
|
||||
number_prop = next(
|
||||
(prop for prop in input_schema["properties"] if prop["name"] == "number"), None
|
||||
)
|
||||
assert number_prop is not None
|
||||
assert number_prop["type"] == "integer"
|
||||
|
||||
# Output schema should have network field
|
||||
assert "properties" in output_schema
|
||||
prop_names = [prop["name"] for prop in output_schema["properties"]]
|
||||
assert "network" in prop_names
|
||||
122
flowsint-transforms/tests/transforms/ip/cidr_to_ips.py
Normal file
122
flowsint-transforms/tests/transforms/ip/cidr_to_ips.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from flowsint_transforms.ips.cidr_to_ips import CidrToIpsTransform
|
||||
from flowsint_types.cidr import CIDR
|
||||
from flowsint_types.ip import Ip
|
||||
from tests.logger import TestLogger
|
||||
|
||||
logger = TestLogger()
|
||||
transform = CidrToIpsTransform("sketch_123", "scan_123", logger)
|
||||
|
||||
|
||||
def test_preprocess_valid_cidrs():
|
||||
cidrs = [
|
||||
CIDR(network="8.8.8.0/24"),
|
||||
CIDR(network="1.1.1.0/24"),
|
||||
]
|
||||
result = transform.preprocess(cidrs)
|
||||
|
||||
result_networks = [cidr.network for cidr in result]
|
||||
expected_networks = [cidr.network for cidr in cidrs]
|
||||
|
||||
assert result_networks == expected_networks
|
||||
|
||||
|
||||
def test_preprocess_unprocessed_valid_cidrs():
|
||||
cidrs = [
|
||||
"8.8.8.0/24",
|
||||
"1.1.1.0/24",
|
||||
]
|
||||
result = transform.preprocess(cidrs)
|
||||
result_cidrs = [c for c in result]
|
||||
expected_cidrs = [CIDR(network=c) for c in cidrs]
|
||||
assert result_cidrs == expected_cidrs
|
||||
|
||||
|
||||
def test_preprocess_invalid_cidrs():
|
||||
cidrs = [
|
||||
CIDR(network="8.8.8.0/24"),
|
||||
"invalid-cidr",
|
||||
"not-a-cidr",
|
||||
]
|
||||
result = transform.preprocess(cidrs)
|
||||
result_networks = [str(cidr.network) for cidr in result]
|
||||
assert "8.8.8.0/24" in result_networks
|
||||
assert "invalid-cidr" not in result_networks
|
||||
assert "not-a-cidr" not in result_networks
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
cidrs = [
|
||||
{"network": "8.8.8.0/24"},
|
||||
{"invalid_key": "1.1.1.0/24"},
|
||||
CIDR(network="9.9.9.0/24"),
|
||||
"InvalidCIDR",
|
||||
]
|
||||
result = transform.preprocess(cidrs)
|
||||
result_networks = [str(cidr.network) for cidr in result]
|
||||
assert "8.8.8.0/24" in result_networks
|
||||
assert "9.9.9.0/24" in result_networks
|
||||
assert "1.1.1.0/24" not in result_networks
|
||||
assert "InvalidCIDR" not in result_networks
|
||||
|
||||
|
||||
def test_scan_extracts_ips(monkeypatch):
|
||||
mock_dnsx_output = """8.35.200.12
|
||||
8.35.200.112
|
||||
8.35.200.16
|
||||
8.35.200.170"""
|
||||
|
||||
class MockSubprocessResult:
|
||||
def __init__(self, stdout):
|
||||
self.stdout = stdout
|
||||
self.returncode = 0
|
||||
|
||||
def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
|
||||
assert "dnsx" in cmd
|
||||
assert "-ptr" in cmd
|
||||
return MockSubprocessResult(mock_dnsx_output)
|
||||
|
||||
# Patch the subprocess call in the transform
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [CIDR(network="8.35.200.0/24")]
|
||||
ips = transform.scan(input_data)
|
||||
|
||||
assert isinstance(ips, list)
|
||||
assert len(ips) == 4
|
||||
|
||||
expected_ips = ["8.35.200.12", "8.35.200.112", "8.35.200.16", "8.35.200.170"]
|
||||
|
||||
for ip in ips:
|
||||
assert isinstance(ip, Ip)
|
||||
assert ip.address in expected_ips
|
||||
|
||||
|
||||
def test_scan_handles_empty_output(monkeypatch):
|
||||
class MockSubprocessResult:
|
||||
def __init__(self):
|
||||
self.stdout = ""
|
||||
self.returncode = 0
|
||||
|
||||
def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
|
||||
return MockSubprocessResult()
|
||||
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [CIDR(network="8.8.8.0/24")]
|
||||
ips = transform.scan(input_data)
|
||||
|
||||
assert isinstance(ips, list)
|
||||
assert len(ips) == 0
|
||||
|
||||
|
||||
def test_scan_handles_subprocess_exception(monkeypatch):
|
||||
def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
|
||||
raise Exception("Subprocess failed")
|
||||
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [CIDR(network="8.8.8.0/24")]
|
||||
ips = transform.scan(input_data)
|
||||
|
||||
assert isinstance(ips, list)
|
||||
assert len(ips) == 0
|
||||
267
flowsint-transforms/tests/transforms/ip/ip_to_asn.py
Normal file
267
flowsint-transforms/tests/transforms/ip/ip_to_asn.py
Normal file
@@ -0,0 +1,267 @@
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
from flowsint_transforms.ips.ip_to_asn import IpToAsnTransform
|
||||
from flowsint_types.ip import Ip
|
||||
from flowsint_types.asn import ASN
|
||||
from flowsint_types.cidr import CIDR
|
||||
from tests.logger import TestLogger
|
||||
|
||||
logger = TestLogger()
|
||||
# The transform will get a mock logger from conftest.py automatically
|
||||
transform = IpToAsnTransform("sketch_123", "scan_123", logger)
|
||||
|
||||
|
||||
def test_preprocess_valid_ips():
|
||||
ips = [
|
||||
Ip(address="8.8.8.8"),
|
||||
Ip(address="1.1.1.1"),
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
|
||||
result_addresses = [ip.address for ip in result]
|
||||
expected_addresses = [ip.address for ip in ips]
|
||||
|
||||
assert result_addresses == expected_addresses
|
||||
|
||||
|
||||
def test_unprocessed_valid_ips():
|
||||
ips = [
|
||||
"8.8.8.8",
|
||||
"1.1.1.1",
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
result_ips = [ip for ip in result]
|
||||
expected_ips = [Ip(address=ip) for ip in ips]
|
||||
assert result_ips == expected_ips
|
||||
|
||||
|
||||
def test_preprocess_invalid_ips():
|
||||
ips = [
|
||||
Ip(address="8.8.8.8"),
|
||||
Ip(address="invalid_ip"),
|
||||
Ip(address="192.168.1.1"),
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
|
||||
result_addresses = [ip.address for ip in result]
|
||||
assert "8.8.8.8" in result_addresses
|
||||
assert "192.168.1.1" in result_addresses
|
||||
assert "invalid_ip" not in result_addresses
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
ips = [
|
||||
{"address": "8.8.8.8"},
|
||||
{"invalid_key": "1.1.1.1"},
|
||||
Ip(address="192.168.1.1"),
|
||||
"10.0.0.1",
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
|
||||
result_addresses = [ip.address for ip in result]
|
||||
assert "8.8.8.8" in result_addresses
|
||||
assert "192.168.1.1" in result_addresses
|
||||
assert "10.0.0.1" in result_addresses
|
||||
assert (
|
||||
"1.1.1.1" not in result_addresses
|
||||
) # Should be filtered out due to invalid key
|
||||
|
||||
|
||||
def test_scan_extracts_asn_info(monkeypatch):
|
||||
mock_asnmap_output = {
|
||||
"input": "8.8.8.8",
|
||||
"as_number": "AS15169",
|
||||
"as_name": "GOOGLE",
|
||||
"as_country": "US",
|
||||
"as_range": ["8.8.8.0/24", "8.8.4.0/24"],
|
||||
}
|
||||
|
||||
class MockSubprocessResult:
|
||||
def __init__(self, stdout):
|
||||
self.stdout = stdout
|
||||
self.returncode = 0
|
||||
|
||||
def mock_subprocess_run(cmd, input, capture_output, text, timeout):
|
||||
assert "asnmap" in cmd
|
||||
assert input == "8.8.8.8"
|
||||
return MockSubprocessResult(json.dumps(mock_asnmap_output))
|
||||
|
||||
# Patch the subprocess call in the transform
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [Ip(address="8.8.8.8")]
|
||||
asns = transform.scan(input_data)
|
||||
|
||||
assert isinstance(asns, list)
|
||||
assert len(asns) == 1
|
||||
|
||||
asn = asns[0]
|
||||
assert isinstance(asn, ASN)
|
||||
assert asn.number == 15169 # AS15169 -> 15169
|
||||
assert asn.name == "GOOGLE"
|
||||
assert asn.country == "US"
|
||||
assert len(asn.cidrs) == 2
|
||||
assert str(asn.cidrs[0].network) == "8.8.8.0/24"
|
||||
assert str(asn.cidrs[1].network) == "8.8.4.0/24"
|
||||
|
||||
|
||||
def test_scan_handles_no_asn_found(monkeypatch):
|
||||
class MockSubprocessResult:
|
||||
def __init__(self, stdout):
|
||||
self.stdout = stdout
|
||||
self.returncode = 0
|
||||
|
||||
def mock_subprocess_run(cmd, input, capture_output, text, timeout):
|
||||
# Return empty output to simulate no ASN found
|
||||
return MockSubprocessResult("")
|
||||
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [Ip(address="192.168.1.1")]
|
||||
asns = transform.scan(input_data)
|
||||
|
||||
assert isinstance(asns, list)
|
||||
assert len(asns) == 1
|
||||
|
||||
asn = asns[0]
|
||||
assert isinstance(asn, ASN)
|
||||
assert asn.number == 0
|
||||
assert asn.name == "Unknown"
|
||||
assert asn.country == "Unknown"
|
||||
assert len(asn.cidrs) == 0
|
||||
|
||||
|
||||
def test_scan_handles_subprocess_exception(monkeypatch):
|
||||
def mock_subprocess_run(cmd, input, capture_output, text, timeout):
|
||||
raise Exception("Subprocess failed")
|
||||
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [Ip(address="8.8.8.8")]
|
||||
asns = transform.scan(input_data)
|
||||
|
||||
assert isinstance(asns, list)
|
||||
assert len(asns) == 1
|
||||
|
||||
asn = asns[0]
|
||||
assert isinstance(asn, ASN)
|
||||
assert asn.number == 0
|
||||
assert asn.name == "Unknown"
|
||||
assert asn.country == "Unknown"
|
||||
|
||||
|
||||
def test_scan_multiple_ips(monkeypatch):
|
||||
mock_responses = {
|
||||
"8.8.8.8": {
|
||||
"input": "8.8.8.8",
|
||||
"as_number": "AS15169",
|
||||
"as_name": "GOOGLE",
|
||||
"as_country": "US",
|
||||
"as_range": ["8.8.8.0/24"],
|
||||
},
|
||||
"1.1.1.1": {
|
||||
"input": "1.1.1.1",
|
||||
"as_number": "AS13335",
|
||||
"as_name": "CLOUDFLARE",
|
||||
"as_country": "US",
|
||||
"as_range": ["1.1.1.0/24"],
|
||||
},
|
||||
}
|
||||
|
||||
class MockSubprocessResult:
|
||||
def __init__(self, stdout):
|
||||
self.stdout = stdout
|
||||
self.returncode = 0
|
||||
|
||||
def mock_subprocess_run(cmd, input, capture_output, text, timeout):
|
||||
if input in mock_responses:
|
||||
return MockSubprocessResult(json.dumps(mock_responses[input]))
|
||||
return MockSubprocessResult("")
|
||||
|
||||
monkeypatch.setattr("subprocess.run", mock_subprocess_run)
|
||||
|
||||
input_data = [Ip(address="8.8.8.8"), Ip(address="1.1.1.1")]
|
||||
asns = transform.scan(input_data)
|
||||
|
||||
assert len(asns) == 2
|
||||
|
||||
# Check first ASN
|
||||
assert asns[0].number == 15169
|
||||
assert asns[0].name == "GOOGLE"
|
||||
|
||||
# Check second ASN
|
||||
assert asns[1].number == 13335
|
||||
assert asns[1].name == "CLOUDFLARE"
|
||||
|
||||
|
||||
def test_schemas():
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
|
||||
# Input schema should have address field
|
||||
assert "properties" in input_schema
|
||||
address_prop = next(
|
||||
(prop for prop in input_schema["properties"] if prop["name"] == "address"), None
|
||||
)
|
||||
assert address_prop is not None
|
||||
assert address_prop["type"] == "string"
|
||||
|
||||
# Output schema should have ASN fields
|
||||
assert "properties" in output_schema
|
||||
prop_names = [prop["name"] for prop in output_schema["properties"]]
|
||||
assert "number" in prop_names
|
||||
assert "name" in prop_names
|
||||
assert "country" in prop_names
|
||||
assert "cidrs" in prop_names
|
||||
|
||||
|
||||
def test_postprocess_creates_neo4j_relationships(monkeypatch):
|
||||
# Mock Neo4j connection
|
||||
mock_neo4j = Mock()
|
||||
transform.neo4j_conn = mock_neo4j
|
||||
|
||||
input_data = [Ip(address="8.8.8.8")]
|
||||
asn_results = [
|
||||
ASN(
|
||||
number=15169,
|
||||
name="GOOGLE",
|
||||
country="US",
|
||||
cidrs=[CIDR(network="8.8.8.0/24")],
|
||||
)
|
||||
]
|
||||
|
||||
result = transform.postprocess(asn_results, input_data)
|
||||
|
||||
# Verify Neo4j query was called
|
||||
mock_neo4j.query.assert_called_once()
|
||||
|
||||
# Check the query parameters
|
||||
call_args = mock_neo4j.query.call_args
|
||||
params = call_args[0][1]
|
||||
assert params["ip_address"] == "8.8.8.8"
|
||||
assert params["asn_number"] == 15169
|
||||
assert params["asn_name"] == "GOOGLE"
|
||||
assert params["asn_country"] == "US"
|
||||
assert params["sketch_id"] == "sketch_123"
|
||||
|
||||
# Should return the same results
|
||||
assert result == asn_results
|
||||
|
||||
|
||||
def test_postprocess_skips_unknown_asns(monkeypatch):
|
||||
# Mock Neo4j connection
|
||||
mock_neo4j = Mock()
|
||||
transform.neo4j_conn = mock_neo4j
|
||||
|
||||
input_data = [Ip(address="192.168.1.1")]
|
||||
asn_results = [
|
||||
ASN(number=0, name="Unknown", country="Unknown", cidrs=[]) # Unknown ASN
|
||||
]
|
||||
|
||||
result = transform.postprocess(asn_results, input_data)
|
||||
|
||||
# Verify Neo4j query was NOT called for unknown ASN
|
||||
mock_neo4j.query.assert_not_called()
|
||||
|
||||
# Should return the same results
|
||||
assert result == asn_results
|
||||
103
flowsint-transforms/tests/transforms/ip/ip_to_infos.py
Normal file
103
flowsint-transforms/tests/transforms/ip/ip_to_infos.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from flowsint_transforms.ips.geolocation import IpToInfosTransform
|
||||
from flowsint_types.ip import Ip, Ip
|
||||
|
||||
transform = IpToInfosTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_ips():
|
||||
ips = [
|
||||
Ip(address="8.8.8.8"),
|
||||
Ip(address="1.1.1.1"),
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
result_ips = [d.address for d in result]
|
||||
expected_ips = [d.address for d in ips]
|
||||
assert result_ips == expected_ips
|
||||
|
||||
|
||||
def test_preprocess_string_ips():
|
||||
ips = [
|
||||
"8.8.8.8",
|
||||
"1.1.1.1",
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
result_ips = [d.address for d in result]
|
||||
expected_ips = [d for d in ips]
|
||||
assert [ip.address for ip in result] == expected_ips
|
||||
|
||||
|
||||
def test_preprocess_invalid_ips():
|
||||
ips = [
|
||||
Ip(address="8.8.8.8"),
|
||||
Ip(address="invalid_ip"),
|
||||
Ip(address="1.1.1.1"),
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
result_ips = [d.address for d in result]
|
||||
assert "8.8.8.8" in result_ips
|
||||
assert "1.1.1.1" in result_ips
|
||||
assert "invalid_ip" not in result_ips
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
ips = [
|
||||
{"address": "8.8.8.8"},
|
||||
{"invalid_key": "1.2.3.4"},
|
||||
Ip(address="1.1.1.1"),
|
||||
"1.1.1.1",
|
||||
]
|
||||
result = transform.preprocess(ips)
|
||||
result_ips = [d.address for d in result]
|
||||
assert "8.8.8.8" in result_ips
|
||||
assert "1.1.1.1" in result_ips
|
||||
assert "1.2.3.4" not in result_ips
|
||||
|
||||
|
||||
def test_scan_returns_ip(monkeypatch):
|
||||
# Mock of get_location_data
|
||||
def mock_get_location_data(address):
|
||||
return {
|
||||
"latitude": 37.386,
|
||||
"longitude": -122.0838,
|
||||
"country": "US",
|
||||
"city": "Mountain View",
|
||||
"isp": "Google LLC",
|
||||
}
|
||||
|
||||
monkeypatch.setattr(transform, "get_location_data", mock_get_location_data)
|
||||
|
||||
input_data = [Ip(address="8.8.8.8")]
|
||||
output = transform.execute(input_data)
|
||||
assert isinstance(output, list)
|
||||
assert isinstance(output[0], Ip)
|
||||
assert output[0].address == "8.8.8.8"
|
||||
assert output[0].city == "Mountain View"
|
||||
assert output[0].country == "US"
|
||||
assert output[0].isp == "Google LLC"
|
||||
|
||||
|
||||
def test_schemas():
|
||||
input_schema = transform.input_schema()
|
||||
output_schema = transform.output_schema()
|
||||
assert input_schema == {
|
||||
"type": "Ip",
|
||||
"properties": [
|
||||
{"name": "address", "type": "string"},
|
||||
{"name": "latitude", "type": "number | null"},
|
||||
{"name": "longitude", "type": "number | null"},
|
||||
{"name": "country", "type": "string | null"},
|
||||
{"name": "city", "type": "string | null"},
|
||||
{"name": "isp", "type": "string | null"},
|
||||
],
|
||||
}
|
||||
assert output_schema == {
|
||||
"type": "Ip",
|
||||
"properties": [
|
||||
{"name": "address", "type": "string"},
|
||||
{"name": "latitude", "type": "number | null"},
|
||||
{"name": "longitude", "type": "number | null"},
|
||||
{"name": "country", "type": "string | null"},
|
||||
{"name": "city", "type": "string | null"},
|
||||
{"name": "isp", "type": "string | null"},
|
||||
],
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
from flowsint_transforms.organizations.to_infos import OrgToInfosTransform
|
||||
from flowsint_types.organization import Organization
|
||||
|
||||
transform = OrgToInfosTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_preprocess_valid_names():
|
||||
data = [Organization(name="OpenAI"), {"name": "Inria"}, "OVH"]
|
||||
result = transform.preprocess(data)
|
||||
result_names = [org.name for org in result]
|
||||
|
||||
assert result_names == ["OpenAI", "Inria", "OVH"]
|
||||
|
||||
|
||||
# def test_preprocess_invalid_entries():
|
||||
# data = [
|
||||
# {"wrong_key": "value"},
|
||||
# 123,
|
||||
# None,
|
||||
# "",
|
||||
# {"name": ""},
|
||||
# ]
|
||||
# result = transform.preprocess(data)
|
||||
# assert result == []
|
||||
|
||||
|
||||
def test_execute():
|
||||
transform.execute(["Karim Terrache"])
|
||||
assert True
|
||||
57
flowsint-transforms/tests/transforms/social/maigret.py
Normal file
57
flowsint-transforms/tests/transforms/social/maigret.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from pathlib import Path
|
||||
from flowsint_transforms.socials.maigret import MaigretTransform
|
||||
from flowsint_types.social import SocialProfile
|
||||
|
||||
transform = MaigretTransform("sketch_123", "scan_123")
|
||||
|
||||
|
||||
def test_unprocessed_valid_usernames():
|
||||
usernames = [
|
||||
"toto123",
|
||||
"DorianXd78",
|
||||
]
|
||||
result = transform.preprocess(usernames)
|
||||
result_usernames = [d for d in result]
|
||||
expected_usernames = [SocialProfile(username=d) for d in usernames]
|
||||
assert result_usernames == expected_usernames
|
||||
|
||||
|
||||
def test_preprocess_invalid_usernames():
|
||||
usernames = [
|
||||
SocialProfile(username="toto123"),
|
||||
SocialProfile(username="DorianXd78_Official"),
|
||||
SocialProfile(username="This is not a username"),
|
||||
]
|
||||
result = transform.preprocess(usernames)
|
||||
|
||||
result_usernames = [d.username for d in result]
|
||||
assert "toto123" in result_usernames
|
||||
assert "DorianXd78_Official" in result_usernames
|
||||
assert "This is not a username" not in result_usernames
|
||||
|
||||
|
||||
def test_preprocess_multiple_formats():
|
||||
usernames = [
|
||||
{"username": "toto123"},
|
||||
{"invalid_key": "ValId_UseRnAme"},
|
||||
SocialProfile(username="DorianXd78_Official"),
|
||||
"MySimpleUsername",
|
||||
]
|
||||
result = transform.preprocess(usernames)
|
||||
|
||||
result_usernames = [d.username for d in result]
|
||||
assert "toto123" in result_usernames
|
||||
assert "DorianXd78_Official" in result_usernames
|
||||
assert "ValId_UseRnAme" not in result_usernames
|
||||
assert "MySimpleUsername" in result_usernames
|
||||
|
||||
|
||||
def test_parsing_invalid_output_file():
|
||||
results = transform.parse_maigret_output("toto123", Path("/this/path/does/not/exist"))
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_parsing():
|
||||
results = transform.parse_maigret_output("toto123", Path("/tmp/maigret_test.json"))
|
||||
print(results)
|
||||
assert len(results) == 2
|
||||
275
flowsint-transforms/tests/transforms/website/to_links.py
Normal file
275
flowsint-transforms/tests/transforms/website/to_links.py
Normal file
@@ -0,0 +1,275 @@
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
from flowsint_transforms.websites.to_links import WebsiteToLinks
|
||||
from flowsint_types.website import Website
|
||||
|
||||
|
||||
class MockCrawlResults:
|
||||
def __init__(self, internal=None, external=None):
|
||||
self.internal = internal or []
|
||||
self.external = external or []
|
||||
|
||||
|
||||
class MockCrawler:
|
||||
def __init__(self, url, recursive=True, verbose=False, _on_result_callback=None):
|
||||
self.url = url
|
||||
self.callback = _on_result_callback
|
||||
|
||||
def fetch(self):
|
||||
pass
|
||||
|
||||
def extract_urls(self):
|
||||
# Simulate callback calls
|
||||
if self.callback:
|
||||
self.callback("https://example.com/page1", is_external=False)
|
||||
self.callback("https://example.com/page2", is_external=False)
|
||||
self.callback("https://external.com/page", is_external=True)
|
||||
self.callback("https://another-external.org/resource", is_external=True)
|
||||
|
||||
def get_results(self):
|
||||
return MockCrawlResults(
|
||||
internal=["https://example.com/page1", "https://example.com/page2"],
|
||||
external=[
|
||||
"https://external.com/page",
|
||||
"https://another-external.org/resource",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_website_to_links_real_time_neo4j_creation():
|
||||
"""Test that Neo4j nodes are created in real-time during the callback."""
|
||||
transform = WebsiteToLinks(sketch_id="test", scan_id="test")
|
||||
|
||||
# Mock neo4j connection and methods
|
||||
transform.neo4j_conn = Mock()
|
||||
transform.create_node = Mock()
|
||||
transform.create_relationship = Mock()
|
||||
transform.log_graph_message = Mock()
|
||||
|
||||
# Test input
|
||||
websites = [Website(url="https://example.com")]
|
||||
|
||||
with patch("src.transforms.websites.to_links.Crawler", MockCrawler):
|
||||
results = await transform.scan(websites)
|
||||
|
||||
# Verify main website and domain nodes were created upfront
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
caption="https://example.com",
|
||||
type="website",
|
||||
)
|
||||
transform.create_node.assert_any_call(
|
||||
"domain", "name", "example.com", caption="example.com", type="domain"
|
||||
)
|
||||
|
||||
# Verify main website to domain relationship
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"domain",
|
||||
"name",
|
||||
"example.com",
|
||||
"BELONGS_TO_DOMAIN",
|
||||
)
|
||||
|
||||
# Verify internal website nodes were created in callback
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com/page1",
|
||||
caption="https://example.com/page1",
|
||||
type="website",
|
||||
)
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com/page2",
|
||||
caption="https://example.com/page2",
|
||||
type="website",
|
||||
)
|
||||
|
||||
# Verify internal website relationships
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com/page1",
|
||||
"LINKS_TO",
|
||||
)
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com/page2",
|
||||
"LINKS_TO",
|
||||
)
|
||||
|
||||
# Verify external website nodes were created in callback
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://external.com/page",
|
||||
caption="https://external.com/page",
|
||||
type="website",
|
||||
)
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://another-external.org/resource",
|
||||
caption="https://another-external.org/resource",
|
||||
type="website",
|
||||
)
|
||||
|
||||
# Verify external domain nodes were created in callback
|
||||
transform.create_node.assert_any_call(
|
||||
"domain", "name", "external.com", caption="external.com", type="domain"
|
||||
)
|
||||
transform.create_node.assert_any_call(
|
||||
"domain",
|
||||
"name",
|
||||
"another-external.org",
|
||||
caption="another-external.org",
|
||||
type="domain",
|
||||
)
|
||||
|
||||
# Verify main website to external website relationships
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"website",
|
||||
"url",
|
||||
"https://external.com/page",
|
||||
"LINKS_TO",
|
||||
)
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"website",
|
||||
"url",
|
||||
"https://another-external.org/resource",
|
||||
"LINKS_TO",
|
||||
)
|
||||
|
||||
# Verify external website to domain relationships
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://external.com/page",
|
||||
"domain",
|
||||
"name",
|
||||
"external.com",
|
||||
"BELONGS_TO_DOMAIN",
|
||||
)
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://another-external.org/resource",
|
||||
"domain",
|
||||
"name",
|
||||
"another-external.org",
|
||||
"BELONGS_TO_DOMAIN",
|
||||
)
|
||||
|
||||
# Verify main website to external domain relationships
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"domain",
|
||||
"name",
|
||||
"external.com",
|
||||
"LINKS_TO_DOMAIN",
|
||||
)
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"domain",
|
||||
"name",
|
||||
"another-external.org",
|
||||
"LINKS_TO_DOMAIN",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_website_to_links_error_handling_with_neo4j():
|
||||
"""Test that main nodes are still created even when crawling fails."""
|
||||
transform = WebsiteToLinks(sketch_id="test", scan_id="test")
|
||||
|
||||
# Mock neo4j connection and methods
|
||||
transform.neo4j_conn = Mock()
|
||||
transform.create_node = Mock()
|
||||
transform.create_relationship = Mock()
|
||||
transform.log_graph_message = Mock()
|
||||
|
||||
# Mock crawler that raises an exception
|
||||
def mock_crawler_error(*args, **kwargs):
|
||||
raise Exception("Test error")
|
||||
|
||||
websites = [Website(url="https://example.com")]
|
||||
|
||||
with patch("src.transforms.websites.to_links.Crawler", mock_crawler_error):
|
||||
results = await transform.scan(websites)
|
||||
|
||||
# Verify main website and domain nodes were still created despite error
|
||||
transform.create_node.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
caption="https://example.com",
|
||||
type="website",
|
||||
)
|
||||
transform.create_node.assert_any_call(
|
||||
"domain", "name", "example.com", caption="example.com", type="domain"
|
||||
)
|
||||
|
||||
# Verify main website to domain relationship was created
|
||||
transform.create_relationship.assert_any_call(
|
||||
"website",
|
||||
"url",
|
||||
"https://example.com",
|
||||
"domain",
|
||||
"name",
|
||||
"example.com",
|
||||
"BELONGS_TO_DOMAIN",
|
||||
)
|
||||
|
||||
# Verify result structure
|
||||
assert len(results) == 1
|
||||
result = results[0]
|
||||
assert result["website"] == "https://example.com"
|
||||
assert result["main_domain"] == "example.com"
|
||||
assert result["internal_urls"] == []
|
||||
assert result["external_urls"] == []
|
||||
assert result["external_domains"] == []
|
||||
|
||||
|
||||
def test_postprocess_simplified():
|
||||
"""Test that postprocess now just returns results as-is."""
|
||||
transform = WebsiteToLinks(sketch_id="test", scan_id="test")
|
||||
|
||||
original_input = [Website(url="https://example.com")]
|
||||
results = [
|
||||
{
|
||||
"website": "https://example.com",
|
||||
"main_domain": "example.com",
|
||||
"internal_urls": ["https://example.com/page1"],
|
||||
"external_urls": ["https://external.com/page"],
|
||||
"external_domains": ["external.com"],
|
||||
}
|
||||
]
|
||||
|
||||
processed_results = transform.postprocess(results, original_input)
|
||||
|
||||
# Should just return the same results since Neo4j work is done in real-time
|
||||
assert processed_results == results
|
||||
Reference in New Issue
Block a user