refactor: remove occurences of scanner

2026-05-03 01:54:01 -05:00 · 2025-09-04 18:53:04 +02:00
parent ec92af80b2
commit 980bb85af4
102 changed files with 890 additions and 2371 deletions
--- a/flowsint-transforms/tests/transforms/init.py
+++ b/flowsint-transforms/tests/transforms/init.py
--- a/flowsint-transforms/tests/transforms/crypto/init.py
+++ b/flowsint-transforms/tests/transforms/crypto/init.py
--- a/flowsint-transforms/tests/transforms/crypto/wallet_to_nfts.py
+++ b/flowsint-transforms/tests/transforms/crypto/wallet_to_nfts.py
@@ -0,0 +1,73 @@
+from flowsint_transforms.crypto.wallet_to_nfts import CryptoWalletAddressToNFTs
+from flowsint_types.wallet import CryptoWallet, CryptoNFT
+from pydantic import HttpUrl
+
+transform = CryptoWalletAddressToNFTs("sketch_123", "scan_123")
+
+
+def test_wallet_address_to_transactions_name():
+    assert transform.name() == "wallet_to_nfts"
+
+
+def test_wallet_address_to_transactions_category():
+    assert transform.category() == "crypto"
+
+
+def test_wallet_address_to_transactions_key():
+    assert transform.key() == "address"
+
+
+def test_preprocess_with_string():
+    input_data = ["0x742d35Cc6634C0532925a3b844Bc454e4438f44e"]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+def test_preprocess_with_dict():
+    input_data = [{"address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"}]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+def test_preprocess_with_wallet_object():
+    wallet = CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")
+    input_data = [wallet]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+def test_scan_mocked_transactions(monkeypatch):
+    # Mock the _get_transactions method
+    def mock_get_nfts(address):
+        return [
+            CryptoNFT(
+                wallet=CryptoWallet(
+                    address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+                ),
+                contract_address="0x123",
+                token_id="1",
+                collection_name="Test Collection",
+                metadata_url="https://example.com/metadata.json",
+                image_url="https://example.com/image.png",
+                name="Test NFT",
+            )
+        ]
+
+    monkeypatch.setattr(transform, "_get_nfts", mock_get_nfts)
+
+    input_data = [CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")]
+    result = transform.scan(input_data)
+
+    assert len(result) == 1
+    assert len(result[0]) == 1
+    assert result[0][0].contract_address == "0x123"
+    assert result[0][0].collection_name == "Test Collection"
+    assert result[0][0].metadata_url == HttpUrl("https://example.com/metadata.json")
+    assert result[0][0].image_url == HttpUrl("https://example.com/image.png")
+    assert result[0][0].name == "Test NFT"
--- a/flowsint-transforms/tests/transforms/crypto/wallet_to_transactions.py
+++ b/flowsint-transforms/tests/transforms/crypto/wallet_to_transactions.py
@@ -0,0 +1,106 @@
+import pytest
+from flowsint_transforms.crypto.wallet_to_transactions import (
+    CryptoWalletAddressToTransactions,
+)
+from flowsint_types.wallet import CryptoWallet, CryptoWalletTransaction
+
+transform = CryptoWalletAddressToTransactions(
+    "sketch_123",
+    "scan_123",
+    params={"ETHERSCAN_API_KEY": "ta-clef-api"},
+)
+
+
+def test_wallet_address_to_transactions_name():
+    assert transform.name() == "wallet_to_transactions"
+
+
+def test_wallet_address_to_transactions_category():
+    assert transform.category() == "CryptoCryptoWallet"
+
+
+def test_wallet_address_to_transactions_key():
+    assert transform.key() == "address"
+
+
+def test_preprocess_with_string():
+    input_data = ["0x742d35Cc6634C0532925a3b844Bc454e4438f44e"]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+def test_preprocess_with_dict():
+    input_data = [{"address": "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"}]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+def test_preprocess_with_wallet_object():
+    wallet = CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")
+    input_data = [wallet]
+    result = transform.preprocess(input_data)
+    assert len(result) == 1
+    assert isinstance(result[0], CryptoWallet)
+    assert result[0].address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+
+
+@pytest.mark.asyncio
+async def test_scan_mocked_transactions(monkeypatch):
+    # Mock the _get_transactions method - note it takes address and api_key parameters
+    async def mock_get_transactions(address, api_key):
+        return [
+            CryptoWalletTransaction(
+                hash="0x123",
+                source=CryptoWallet(
+                    address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+                ),
+                target=CryptoWallet(address="0x456"),
+                value=1.0,  # 1 ETH
+                timestamp="1234567890",
+                block_number="12345",
+                block_hash="0xabc",
+                nonce="1",
+                transaction_index="0",
+                gas="21000",
+                gas_price="20000000000",
+                gas_used="21000",
+                cumulative_gas_used="21000",
+                input="0x",
+                contract_address=None,
+            )
+        ]
+
+    monkeypatch.setattr(transform, "_get_transactions", mock_get_transactions)
+
+    input_data = [CryptoWallet(address="0x742d35Cc6634C0532925a3b844Bc454e4438f44e")]
+    result = await transform.scan(input_data)
+
+    assert len(result) == 1
+    assert len(result[0]) == 1
+    assert result[0][0].hash == "0x123"
+    assert result[0][0].source.address == "0x742d35Cc6634C0532925a3b844Bc454e4438f44e"
+    assert result[0][0].target.address == "0x456"
+    assert result[0][0].value == 1.0
+    assert result[0][0].timestamp == "1234567890"
+
+
+def test_transform_requires_api_key():
+    """Test that the transform validates required ETHERSCAN_API_KEY parameter at construction"""
+    with pytest.raises(
+        ValueError, match="Transform wallet_to_transactions received invalid params"
+    ):
+        CryptoWalletAddressToTransactions("sketch_123", "scan_123", params={})
+
+
+def test_transform_with_invalid_api_key_type():
+    """Test that the transform validates parameter types"""
+    with pytest.raises(
+        ValueError, match="Transform wallet_to_transactions received invalid params"
+    ):
+        CryptoWalletAddressToTransactions(
+            "sketch_123", "scan_123", params={"ETHERSCAN_API_KEY": 123}
+        )
--- a/flowsint-transforms/tests/transforms/domain/init.py
+++ b/flowsint-transforms/tests/transforms/domain/init.py
--- a/flowsint-transforms/tests/transforms/domain/resolve.py
+++ b/flowsint-transforms/tests/transforms/domain/resolve.py
@@ -0,0 +1,153 @@
+from flowsint_transforms.domains.resolve import ResolveTransform
+from flowsint_types.domain import Domain
+from flowsint_types.ip import Ip
+from typing import List
+import pytest
+
+transform = ResolveTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="example2.com"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    expected_domains = [d.domain for d in domains]
+
+    assert result_domains == expected_domains
+
+
+def test_unprocessed_valid_domains():
+    domains = [
+        "example.com",
+        "example2.com",
+    ]
+    result = transform.preprocess(domains)
+    result_domains = [d for d in result]
+    expected_domains = [Domain(domain=d) for d in domains]
+    assert result_domains == expected_domains
+
+
+def test_preprocess_invalid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="invalid_domain"),
+        Domain(domain="example.org"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+
+
+def test_preprocess_multiple_formats():
+    domains = [
+        {"domain": "example.com"},
+        {"invalid_key": "example.io"},
+        Domain(domain="example.org"),
+        "example.org",
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+    assert "example.io" not in result_domains
+
+
+@pytest.mark.asyncio
+async def test_scan_returns_ip(monkeypatch):
+    # on crée une fonction mock qui retourne une IP
+    def mock_gethostbyname(domain):
+        return "12.23.34.45"
+
+    monkeypatch.setattr("socket.gethostbyname", mock_gethostbyname)
+
+    input_data = [Domain(domain="example.com")]
+    output = await transform.execute(input_data)
+    print(output)
+    assert isinstance(output, list)
+    assert output[0].address == "12.23.34.45"
+
+
+def test_schemas():
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+
+    # Test the structure and key properties rather than exact match
+    assert input_schema["type"] == "Domain"
+    assert isinstance(input_schema["properties"], list)
+    input_property_names = [prop["name"] for prop in input_schema["properties"]]
+    assert "domain" in input_property_names
+
+    assert output_schema["type"] == "Ip"
+    assert isinstance(output_schema["properties"], list)
+    output_property_names = [prop["name"] for prop in output_schema["properties"]]
+    assert "address" in output_property_names
+
+
+class TestResolveInputOutputTypes:
+    """Test the InputType/OutputType functionality for ResolveTransform"""
+
+    def test_input_output_types_are_defined(self):
+        """Test that InputType and OutputType are properly defined"""
+        assert hasattr(ResolveTransform, "InputType")
+        assert hasattr(ResolveTransform, "OutputType")
+        assert ResolveTransform.InputType == List[Domain]
+        assert ResolveTransform.OutputType == List[Ip]
+
+    def test_schemas_use_generate_methods(self):
+        """Test that schema methods use the new generate methods"""
+        # These should work without error
+        input_schema = ResolveTransform.generate_input_schema()
+        output_schema = ResolveTransform.generate_output_schema()
+
+        assert isinstance(input_schema, dict)
+        assert isinstance(output_schema, dict)
+        assert input_schema["type"] == "Domain"
+        assert output_schema["type"] == "Ip"
+
+    def test_schema_methods_return_same_as_generate_methods(self):
+        """Test that input_schema() and output_schema() return the same as generate methods"""
+        assert ResolveTransform.input_schema() == ResolveTransform.generate_input_schema()
+        assert ResolveTransform.output_schema() == ResolveTransform.generate_output_schema()
+
+    def test_input_schema_properties(self):
+        """Test input schema has expected properties"""
+        schema = ResolveTransform.input_schema()
+
+        properties = schema["properties"]
+        property_names = [p["name"] for p in properties]
+
+        # Domain should have these properties
+        assert "domain" in property_names
+
+    def test_output_schema_properties(self):
+        """Test output schema has expected properties"""
+        schema = ResolveTransform.output_schema()
+
+        properties = schema["properties"]
+        property_names = [p["name"] for p in properties]
+
+        # Ip should have these properties
+        assert "address" in property_names
+
+    def test_type_accessibility_from_instance(self):
+        """Test that types are accessible from transform instance"""
+        transform_instance = ResolveTransform("test", "test")
+
+        assert transform_instance.InputType == List[Domain]
+        assert transform_instance.OutputType == List[Ip]
+
+        # Should be able to generate schemas from instance
+        input_schema = transform_instance.generate_input_schema()
+        output_schema = transform_instance.generate_output_schema()
+
+        assert input_schema["type"] == "Domain"
+        assert output_schema["type"] == "Ip"
--- a/flowsint-transforms/tests/transforms/domain/subdomains.py
+++ b/flowsint-transforms/tests/transforms/domain/subdomains.py
@@ -0,0 +1,95 @@
+from flowsint_transforms.domains.subdomains import SubdomainTransform
+from flowsint_types.domain import Domain, Domain
+
+transform = SubdomainTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="example2.com"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    expected_domains = [d.domain for d in domains]
+
+    assert result_domains == expected_domains
+
+
+def test_unprocessed_valid_domains():
+    domains = [
+        "example.com",
+        "example2.com",
+    ]
+    result = transform.preprocess(domains)
+    result_domains = [d for d in result]
+    expected_domains = [Domain(domain=d) for d in domains]
+    assert result_domains == expected_domains
+
+
+def test_preprocess_invalid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="invalid_domain"),
+        Domain(domain="example.org"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+
+
+def test_preprocess_multiple_formats():
+    domains = [
+        {"domain": "example.com"},
+        {"invalid_key": "example.io"},
+        Domain(domain="example.org"),
+        "example.org",
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+    assert "example.io" not in result_domains
+
+
+def test_scan_extracts_subdomains(monkeypatch):
+    mock_response = [
+        {"name_value": "mail.example.com\nwww.example.com"},
+        {"name_value": "api.example.com"},
+        {"name_value": "invalid_domain"},  # devrait être ignoré
+    ]
+
+    class MockRequestsResponse:
+        def __init__(self, json_data):
+            self._json_data = json_data
+            self.status_code = 200
+
+        def json(self):
+            return self._json_data
+
+        @property
+        def ok(self):
+            return True
+
+    def mock_get(url, timeout):
+        assert "example.com" in url
+        return MockRequestsResponse(mock_response)
+
+    # Patch la requête réseau dans le module transform
+    monkeypatch.setattr("requests.get", mock_get)
+
+    input_data = [Domain(domain="example.com")]
+    domains = transform.execute(input_data)
+    assert isinstance(domains, list)
+    for sub in domains:
+        print(sub)
+        assert isinstance(sub, Domain)
+    expected = sorted(["mail.example.com", "www.example.com", "api.example.com"])
+    print(domains)
+    # assert domains[0].subdomains == expected
--- a/flowsint-transforms/tests/transforms/domain/to_history.py
+++ b/flowsint-transforms/tests/transforms/domain/to_history.py
@@ -0,0 +1,393 @@
+import pytest
+import json
+import os
+from unittest.mock import Mock
+from flowsint_transforms.domain.to_history import DomainToHistoryTransform
+from flowsint_types.domain import Domain
+
+
+class MockNeo4jConn:
+    def __init__(self):
+        self.nodes_created = []
+        self.relationships_created = []
+
+    def create_node(self, label, key, value, **kwargs):
+        node_info = {"label": label, "key": key, "value": value, **kwargs}
+        self.nodes_created.append(node_info)
+
+    def create_relationship(
+        self,
+        from_label,
+        from_key,
+        from_value,
+        to_label,
+        to_key,
+        to_value,
+        relationship_type,
+    ):
+        rel_info = {
+            "from": f"{from_label}:{from_value}",
+            "to": f"{to_label}:{to_value}",
+            "type": relationship_type,
+        }
+        self.relationships_created.append(rel_info)
+
+    def query(self, query, params):
+        """Mock query method to avoid errors."""
+        pass
+
+
+class MockTransform(DomainToHistoryTransform):
+    def __init__(self):
+        self.sketch_id = "test_sketch_123"
+        self.neo4j_conn = MockNeo4jConn()
+        self._extracted_data = []
+        self._extracted_individuals = []
+
+    def log_graph_message(self, message):
+        """Mock log_graph_message method."""
+        pass
+
+
+@pytest.fixture
+def transform():
+    """Create a transform instance for testing."""
+    transform = MockTransform()
+    return transform
+
+
+@pytest.fixture
+def test_data():
+    """Load test data from data.json."""
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    data_file = os.path.join(current_dir, "..", "..", "test_data", "data.json")
+    with open(data_file, "r") as f:
+        return json.load(f)
+
+
+def test_preprocess_valid_domains(transform):
+    """Test preprocessing with valid domains."""
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="example2.com"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    expected_domains = [d.domain for d in domains]
+
+    assert result_domains == expected_domains
+
+
+def test_preprocess_string_domains(transform):
+    """Test preprocessing with string domains."""
+    domains = ["example.com", "example2.com"]
+    result = transform.preprocess(domains)
+
+    assert len(result) == 2
+    assert all(isinstance(d, Domain) for d in result)
+    assert result[0].domain == "example.com"
+    assert result[1].domain == "example2.com"
+
+
+def test_preprocess_dict_domains(transform):
+    """Test preprocessing with dict domains."""
+    domains = [{"domain": "example.com"}, {"domain": "example2.com"}]
+    result = transform.preprocess(domains)
+
+    assert len(result) == 2
+    assert all(isinstance(d, Domain) for d in result)
+    assert result[0].domain == "example.com"
+    assert result[1].domain == "example2.com"
+
+
+def test_preprocess_invalid_domains(transform):
+    """Test preprocessing with invalid domains."""
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="invalid_domain"),
+        Domain(domain="example.org"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+
+
+def test_is_redacted(transform):
+    """Test the __is_redacted method."""
+    # Should be redacted
+    assert transform._DomainToHistoryTransform__is_redacted("REDACTED FOR PRIVACY")
+    assert transform._DomainToHistoryTransform__is_redacted("redacted for privacy")
+    assert transform._DomainToHistoryTransform__is_redacted("Some text with PRIVACY in it")
+
+    # Should NOT be redacted
+    assert not transform._DomainToHistoryTransform__is_redacted("JOHN DOE")
+    assert not transform._DomainToHistoryTransform__is_redacted("john@doe.com")
+    assert not transform._DomainToHistoryTransform__is_redacted("123 JOHN STREET")
+    assert not transform._DomainToHistoryTransform__is_redacted("DOE CITY")
+
+
+def test_has_non_redacted_info(transform):
+    """Test the __has_non_redacted_info method."""
+    # Contact with valid information
+    valid_contact = {
+        "full_name": "JOHN DOE",
+        "email_address": "john@doe.com, martinemah@yahoo.com",
+        "phone_number": "+123456789",
+        "mailing_address": "123 JOHN STREET",
+        "city_name": "DOE CITY",
+        "zip_code": "12345",
+        "country_name": "United States",
+    }
+    assert transform._DomainToHistoryTransform__has_non_redacted_info(valid_contact)
+
+    # Contact with all redacted information
+    redacted_contact = {
+        "full_name": "REDACTED FOR PRIVACY",
+        "email_address": "redacted for privacy",
+        "phone_number": "REDACTED FOR PRIVACY",
+        "mailing_address": "REDACTED FOR PRIVACY",
+        "city_name": "REDACTED FOR PRIVACY",
+        "zip_code": "REDACTED FOR PRIVACY",
+        "country_name": "REDACTED FOR PRIVACY",
+    }
+    assert not transform._DomainToHistoryTransform__has_non_redacted_info(redacted_contact)
+
+    # Empty contact
+    assert not transform._DomainToHistoryTransform__has_non_redacted_info({})
+
+
+def test_extract_individual_from_contact(transform):
+    """Test the __extract_individual_from_contact method."""
+    # Valid contact
+    valid_contact = {
+        "full_name": "JOHN DOE",
+        "email_address": "john@doe.com, martinemah@yahoo.com",
+        "phone_number": "+123456789",
+        "mailing_address": "123 JOHN STREET",
+        "city_name": "DOE CITY",
+        "zip_code": "12345",
+        "country_name": "United States",
+    }
+
+    individual = transform._DomainToHistoryTransform__extract_individual_from_contact(
+        valid_contact, "REGISTRANT"
+    )
+
+    assert individual is not None
+    assert individual.first_name == "MARC"
+    assert individual.last_name == "DESCOLLONGES"
+    assert individual.full_name == "JOHN DOE"
+    assert len(individual.email_addresses) == 2
+    assert "john@doe.com" in individual.email_addresses
+    assert "martinemah@yahoo.com" in individual.email_addresses
+    assert individual.phone_numbers == ["+123456789"]
+
+
+def test_extract_individual_redacted_name(transform):
+    """Test that individuals with redacted names are skipped."""
+    redacted_contact = {
+        "full_name": "REDACTED FOR PRIVACY",
+        "email_address": "test@example.com",
+        "phone_number": "+1234567890",
+    }
+
+    individual = transform._DomainToHistoryTransform__extract_individual_from_contact(
+        redacted_contact, "REGISTRANT"
+    )
+    assert individual is None
+
+
+def test_is_valid_email(transform):
+    """Test the __is_valid_email method."""
+    # Valid emails
+    assert transform._DomainToHistoryTransform__is_valid_email("test@example.com")
+    assert transform._DomainToHistoryTransform__is_valid_email("user.name@domain.org")
+    assert transform._DomainToHistoryTransform__is_valid_email("user+tag@example.co.uk")
+
+    # Invalid emails
+    assert not transform._DomainToHistoryTransform__is_valid_email("invalid-email")
+    assert not transform._DomainToHistoryTransform__is_valid_email("@example.com")
+    assert not transform._DomainToHistoryTransform__is_valid_email("test@")
+    assert not transform._DomainToHistoryTransform__is_valid_email("")
+
+
+def test_extract_physical_address(transform):
+    """Test the __extract_physical_address method."""
+    # Valid address
+    valid_contact = {
+        "mailing_address": "123 JOHN STREET",
+        "city_name": "DOE CITY",
+        "zip_code": "12345",
+        "country_name": "United States",
+    }
+
+    address = transform._DomainToHistoryTransform__extract_physical_address(valid_contact)
+
+    assert address is not None
+    assert address.address == "123 JOHN STREET"
+    assert address.city == "DOE CITY"
+    assert address.zip == "12345"
+    assert address.country == "United States"
+
+    # Address with redacted parts
+    redacted_contact = {
+        "mailing_address": "123 JOHN STREET",
+        "city_name": "REDACTED FOR PRIVACY",
+        "zip_code": "12345",
+        "country_name": "United States",
+    }
+
+    address = transform._DomainToHistoryTransform__extract_physical_address(
+        redacted_contact
+    )
+    assert address is None
+
+
+@pytest.mark.asyncio
+async def test_scan_with_test_data(transform, test_data, monkeypatch):
+    """Test the scan method with test data."""
+
+    # Mock the __get_infos_from_whoxy method to return test data
+    def mock_get_infos(domain):
+        if domain == "epios.com":
+            return test_data
+        return {}
+
+    monkeypatch.setattr(
+        transform, "_DomainToHistoryTransform__get_infos_from_whoxy", mock_get_infos
+    )
+
+    # Test with epios.com domain
+    input_domains = [Domain(domain="epios.com")]
+    results = await transform.scan(input_domains)
+
+    # Should find the domain (one for each WHOIS record)
+    assert len(results) == 16  # 16 WHOIS records in the test data
+    assert all(r.domain == "epios.com" for r in results)
+
+    # Should have extracted data
+    assert len(transform._extracted_data) == 16
+
+    # Should have extracted individuals
+    assert (
+        len(transform._extracted_individuals) > 0
+    ), "Should have extracted some individuals"
+
+    # Check that JOHN DOE is in the extracted individuals
+    marc_found = False
+    marc_individuals = []
+
+    for individual_info in transform._extracted_individuals:
+        individual = individual_info["individual"]
+        if "JOHN DOE" in individual.full_name:
+            marc_found = True
+            marc_individuals.append(individual_info)
+            print(
+                f"Found MARC: {individual.full_name} ({individual_info['contact_type']})"
+            )
+            print(f"  Emails: {individual.email_addresses}")
+            print(f"  Phones: {individual.phone_numbers}")
+
+    assert marc_found, "JOHN DOE should be found in the extracted individuals"
+    assert (
+        len(marc_individuals) > 0
+    ), f"Expected to find JOHN DOE, but found {len(marc_individuals)} instances"
+
+    # Print summary of all extracted individuals
+    print(f"\n=== Summary of extracted individuals ===")
+    for individual_info in transform._extracted_individuals:
+        individual = individual_info["individual"]
+        print(
+            f"- {individual.full_name} ({individual_info['contact_type']}) for {individual_info['domain_name']}"
+        )
+        if individual.email_addresses:
+            print(f"  Emails: {individual.email_addresses}")
+        if individual.phone_numbers:
+            print(f"  Phones: {individual.phone_numbers}")
+
+
+def test_postprocess_creates_nodes_and_relationships(transform, test_data, monkeypatch):
+    """Test that postprocess creates the expected nodes and relationships."""
+
+    # Mock the __get_infos_from_whoxy method
+    def mock_get_infos(domain):
+        if domain == "epios.com":
+            return test_data
+        return {}
+
+    monkeypatch.setattr(
+        transform, "_DomainToHistoryTransform__get_infos_from_whoxy", mock_get_infos
+    )
+
+    # First run scan to populate _extracted_data and _extracted_individuals
+    import asyncio
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        input_domains = [Domain(domain="epios.com")]
+        results = loop.run_until_complete(transform.scan(input_domains))
+
+        # Debug: Check what individuals were extracted
+        print(
+            f"\n=== DEBUG: _extracted_individuals has {len(transform._extracted_individuals)} individuals ==="
+        )
+        for i, individual_info in enumerate(transform._extracted_individuals):
+            individual = individual_info["individual"]
+            print(
+                f"Individual {i+1}: {individual.full_name} ({individual_info['contact_type']}) for {individual_info['domain_name']}"
+            )
+            if individual.email_addresses:
+                print(f"  Emails: {individual.email_addresses}")
+            if individual.phone_numbers:
+                print(f"  Phones: {individual.phone_numbers}")
+
+        # Now run postprocess
+        print(f"\n=== Running postprocess ===")
+        transform.postprocess(results, input_domains)
+
+        # Debug: Check what happened during postprocess
+        print(f"=== Postprocess completed ===")
+        print(f"Nodes created: {len(transform.neo4j_conn.nodes_created)}")
+        print(f"Relationships created: {len(transform.neo4j_conn.relationships_created)}")
+
+        # Should have created some nodes
+        assert len(transform.neo4j_conn.nodes_created) > 0
+
+        # Should have created some relationships
+        assert len(transform.neo4j_conn.relationships_created) > 0
+
+        # Check for domain node
+        domain_nodes = [
+            n for n in transform.neo4j_conn.nodes_created if n["label"] == "domain"
+        ]
+        assert len(domain_nodes) > 0
+
+        # Check for individual nodes (should include JOHN DOE)
+        individual_nodes = [
+            n for n in transform.neo4j_conn.nodes_created if n["label"] == "individual"
+        ]
+        assert len(individual_nodes) > 0
+
+        # Check that JOHN DOE is in the individual nodes
+        marc_nodes = [n for n in individual_nodes if "JOHN DOE" in n["value"]]
+        assert (
+            len(marc_nodes) > 0
+        ), "JOHN DOE should be in the individual nodes"
+
+    finally:
+        loop.close()
+
+
+def test_schemas(transform):
+    """Test that the transform has the expected schemas."""
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+
+    assert input_schema is not None
+    assert output_schema is not None
--- a/flowsint-transforms/tests/transforms/domain/whois.py
+++ b/flowsint-transforms/tests/transforms/domain/whois.py
@@ -0,0 +1,103 @@
+from flowsint_transforms.domains.whois import WhoisTransform
+from flowsint_types.domain import Domain
+
+transform = WhoisTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="example2.com"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    expected_domains = [d.domain for d in domains]
+
+    assert result_domains == expected_domains
+
+
+def test_unprocessed_valid_domains():
+    domains = [
+        "example.com",
+        "example2.com",
+    ]
+    result = transform.preprocess(domains)
+    result_domains = [d for d in result]
+    expected_domains = [Domain(domain=d) for d in domains]
+    assert result_domains == expected_domains
+
+
+def test_preprocess_invalid_domains():
+    domains = [
+        Domain(domain="example.com"),
+        Domain(domain="invalid_domain"),
+        Domain(domain="example.org"),
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+
+
+def test_preprocess_multiple_formats():
+    domains = [
+        {"domain": "example.com"},
+        {"invalid_key": "example.io"},
+        Domain(domain="example.org"),
+        "example.org",
+    ]
+    result = transform.preprocess(domains)
+
+    result_domains = [d.domain for d in result]
+    assert "example.com" in result_domains
+    assert "example.org" in result_domains
+    assert "invalid_domain" not in result_domains
+    assert "example.io" not in result_domains
+
+
+def test_scan_returns_whois_objects(monkeypatch):
+    # Patch `whois.whois` to avoid real network call
+    mock_whois = lambda domain: {
+        "registrar": "MockRegistrar",
+        "org": "MockOrg",
+        "city": "MockCity",
+        "country": "MockCountry",
+        "emails": ["admin@example.com"],
+        "creation_date": "2020-01-01",
+        "expiration_date": "2030-01-01",
+    }
+
+    monkeypatch.setattr("whois.whois", mock_whois)
+
+    input_data = [Domain(domain="example.com")]
+    output = transform.execute(input_data)
+    assert isinstance(output, list)
+    assert isinstance(output[0], Domain)
+    assert output[0].whois.org == "MockOrg"
+    assert output[0].whois.email.email == "admin@example.com"
+
+
+def test_schemas():
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+    assert input_schema == {
+        "type": "Domain",
+        "properties": [
+            {"name": "domain", "type": "string"},
+            {"name": "subdomains", "type": "array | null"},
+            {"name": "ips", "type": "array | null"},
+            {"name": "whois", "type": "Whois | null"},
+        ],
+    }
+    assert output_schema == {
+        "type": "Domain",
+        "properties": [
+            {"name": "domain", "type": "string"},
+            {"name": "subdomains", "type": "array | null"},
+            {"name": "ips", "type": "array | null"},
+            {"name": "whois", "type": "Whois | null"},
+        ],
+    }
--- a/flowsint-transforms/tests/transforms/email/init.py
+++ b/flowsint-transforms/tests/transforms/email/init.py
--- a/flowsint-transforms/tests/transforms/email/to_gravatar.py
+++ b/flowsint-transforms/tests/transforms/email/to_gravatar.py
@@ -0,0 +1,336 @@
+import hashlib
+from unittest.mock import Mock, patch
+from flowsint_transforms.emails.to_gravatar import EmailToGravatarTransform
+from flowsint_types.email import Email
+from flowsint_types.gravatar import Gravatar
+
+transform = EmailToGravatarTransform("sketch_123", "scan_123")
+
+
+class TestEmailToGravatarTransform:
+    """Test suite for EmailToGravatarTransform"""
+
+    def test_name(self):
+        """Test the transform name"""
+        assert EmailToGravatarTransform.name() == "to_gravatar"
+
+    def test_category(self):
+        """Test the transform category"""
+        assert EmailToGravatarTransform.category() == "Email"
+
+    def test_key(self):
+        """Test the transform key"""
+        assert EmailToGravatarTransform.key() == "email"
+
+    def test_input_schema(self):
+        """Test the input schema generation"""
+        schema = EmailToGravatarTransform.input_schema()
+        assert schema["type"] == "Email"
+        assert "properties" in schema
+        # Check that email property is present
+        email_prop = next(
+            (prop for prop in schema["properties"] if prop["name"] == "email"), None
+        )
+        assert email_prop is not None
+        assert email_prop["type"] == "string"
+
+    def test_output_schema(self):
+        """Test the output schema generation"""
+        schema = EmailToGravatarTransform.output_schema()
+        assert schema["type"] == "Gravatar"
+        assert "properties" in schema
+        # Check that required properties are present
+        src_prop = next(
+            (prop for prop in schema["properties"] if prop["name"] == "src"), None
+        )
+        hash_prop = next(
+            (prop for prop in schema["properties"] if prop["name"] == "hash"), None
+        )
+        assert src_prop is not None
+        assert hash_prop is not None
+
+    def test_preprocess_string_emails(self):
+        """Test preprocessing with string emails"""
+        emails = [
+            "test@example.com",
+            "user@gmail.com",
+        ]
+        result = transform.preprocess(emails)
+        assert len(result) == 2
+        assert all(isinstance(email, Email) for email in result)
+        assert result[0].email == "test@example.com"
+        assert result[1].email == "user@gmail.com"
+
+    def test_preprocess_dict_emails(self):
+        """Test preprocessing with dictionary emails"""
+        emails = [
+            {"email": "test@example.com"},
+            {"email": "user@gmail.com"},
+        ]
+        result = transform.preprocess(emails)
+        assert len(result) == 2
+        assert all(isinstance(email, Email) for email in result)
+        assert result[0].email == "test@example.com"
+        assert result[1].email == "user@gmail.com"
+
+    def test_preprocess_email_objects(self):
+        """Test preprocessing with Email objects"""
+        emails = [
+            Email(email="test@example.com"),
+            Email(email="user@gmail.com"),
+        ]
+        result = transform.preprocess(emails)
+        assert len(result) == 2
+        assert all(isinstance(email, Email) for email in result)
+        assert result[0].email == "test@example.com"
+        assert result[1].email == "user@gmail.com"
+
+    def test_preprocess_mixed_formats(self):
+        """Test preprocessing with mixed input formats"""
+        emails = [
+            "test@example.com",
+            {"email": "user@gmail.com"},
+            Email(email="admin@company.com"),
+        ]
+        result = transform.preprocess(emails)
+        assert len(result) == 3
+        assert all(isinstance(email, Email) for email in result)
+        assert result[0].email == "test@example.com"
+        assert result[1].email == "user@gmail.com"
+        assert result[2].email == "admin@company.com"
+
+    def test_preprocess_invalid_inputs(self):
+        """Test preprocessing with invalid inputs"""
+        emails = [
+            "not-an-email",
+            {"invalid_key": "test@example.com"},
+            {"email": "invalid-email"},
+            None,
+            123,
+        ]
+        result = transform.preprocess(emails)
+        # The preprocess method doesn't validate email format, it just creates Email objects
+        # for valid string inputs and dicts with email key
+        assert len(result) == 2  # "not-an-email" and "invalid-email" are processed
+        assert result[0].email == "not-an-email"
+        assert result[1].email == "invalid-email"
+
+    def test_preprocess_empty_list(self):
+        """Test preprocessing with empty list"""
+        result = transform.preprocess([])
+        assert result == []
+
+    @patch("requests.get")
+    def test_scan_successful_gravatar(self, mock_get):
+        """Test successful gravatar retrieval"""
+        # Mock successful response
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_get.return_value = mock_response
+
+        emails = [Email(email="test@example.com")]
+        result = transform.scan(emails)
+
+        assert len(result) == 1
+        assert isinstance(result[0], Gravatar)
+        assert result[0].hash == hashlib.md5("test@example.com".encode()).hexdigest()
+        assert "gravatar.com/avatar/" in str(result[0].src)
+
+    @patch("requests.get")
+    def test_scan_failed_request(self, mock_get):
+        """Test handling of failed HTTP requests"""
+        # Mock failed response
+        mock_response = Mock()
+        mock_response.status_code = 404
+        mock_get.return_value = mock_response
+
+        emails = [Email(email="test@example.com")]
+        result = transform.scan(emails)
+
+        assert len(result) == 0
+
+    @patch("requests.get")
+    def test_scan_request_exception(self, mock_get):
+        """Test handling of request exceptions"""
+        # Mock exception
+        mock_get.side_effect = Exception("Network error")
+
+        emails = [Email(email="test@example.com")]
+        result = transform.scan(emails)
+
+        assert len(result) == 0
+
+    @patch("requests.get")
+    def test_scan_multiple_emails(self, mock_get):
+        """Test scanning multiple emails"""
+        # Mock successful responses
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_get.return_value = mock_response
+
+        emails = [
+            Email(email="test1@example.com"),
+            Email(email="test2@example.com"),
+            Email(email="test3@example.com"),
+        ]
+        result = transform.scan(emails)
+
+        assert len(result) == 3
+        assert all(isinstance(gravatar, Gravatar) for gravatar in result)
+        assert mock_get.call_count == 3
+
+    @patch("requests.get")
+    def test_scan_mixed_success_failure(self, mock_get):
+        """Test scanning with mixed success and failure"""
+
+        # Mock mixed responses - check the actual URL being called
+        def side_effect(url, *args, **kwargs):
+            mock_response = Mock()
+            # Check if the URL contains the hash for test1@example.com
+            test1_hash = hashlib.md5("test1@example.com".encode()).hexdigest()
+            if test1_hash in url:
+                mock_response.status_code = 200
+            else:
+                mock_response.status_code = 404
+            return mock_response
+
+        mock_get.side_effect = side_effect
+
+        emails = [
+            Email(email="test1@example.com"),
+            Email(email="test2@example.com"),
+        ]
+        result = transform.scan(emails)
+
+        # Should get 1 result for the first email (success) and 0 for the second (failure)
+        assert len(result) == 1
+        assert result[0].hash == hashlib.md5("test1@example.com".encode()).hexdigest()
+
+    def test_postprocess_with_neo4j_connection(self):
+        """Test postprocessing with Neo4j connection"""
+        # Mock Neo4j connection
+        mock_neo4j = Mock()
+        transform_with_neo4j = EmailToGravatarTransform(
+            "sketch_123", "scan_123", neo4j_conn=mock_neo4j
+        )
+
+        gravatars = [
+            Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
+            Gravatar(src="https://www.gravatar.com/avatar/hash2", hash="hash2"),
+        ]
+        original_input = [
+            Email(email="test1@example.com"),
+            Email(email="test2@example.com"),
+        ]
+
+        result = transform_with_neo4j.postprocess(gravatars, original_input)
+
+        # Verify Neo4j queries were executed
+        assert mock_neo4j.query.call_count == 2
+
+        # Check that results are returned unchanged
+        assert result == gravatars
+
+    def test_postprocess_without_neo4j_connection(self):
+        """Test postprocessing without Neo4j connection"""
+        gravatars = [
+            Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
+        ]
+        original_input = [Email(email="test@example.com")]
+
+        result = transform.postprocess(gravatars, original_input)
+
+        # Should return results unchanged
+        assert result == gravatars
+
+    def test_postprocess_missing_original_input(self):
+        """Test postprocessing with missing original input"""
+        gravatars = [
+            Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
+        ]
+        original_input = []  # Empty list
+
+        result = transform.postprocess(gravatars, original_input)
+
+        # Should handle gracefully and return results
+        assert result == gravatars
+
+    def test_postprocess_none_original_input(self):
+        """Test postprocessing with None original input"""
+        gravatars = [
+            Gravatar(src="https://www.gravatar.com/avatar/hash1", hash="hash1"),
+        ]
+
+        # The postprocess method doesn't handle None input properly
+        # Let's test with an empty list instead
+        result = transform.postprocess(gravatars, [])
+
+        # Should handle gracefully and return results
+        assert result == gravatars
+
+    def test_execute_full_workflow(self):
+        """Test the complete execute workflow"""
+        with patch("requests.get") as mock_get:
+            # Mock successful response
+            mock_response = Mock()
+            mock_response.status_code = 200
+            mock_get.return_value = mock_response
+
+            emails = ["test@example.com"]
+            result = transform.execute(emails)
+
+            assert len(result) == 1
+            assert isinstance(result[0], Gravatar)
+            assert (
+                result[0].hash == hashlib.md5("test@example.com".encode()).hexdigest()
+            )
+
+    def test_execute_with_invalid_input(self):
+        """Test execute with invalid input"""
+        emails = ["not-an-email", "also-invalid"]
+
+        with patch("requests.get") as mock_get:
+            # Mock successful response for any request
+            mock_response = Mock()
+            mock_response.status_code = 200
+            mock_get.return_value = mock_response
+
+            result = transform.execute(emails)
+
+            # The transform processes any string as an email, so it will create Email objects
+            # and attempt to get gravatars for them
+            assert len(result) == 2
+            assert all(isinstance(gravatar, Gravatar) for gravatar in result)
+
+    def test_gravatar_hash_calculation(self):
+        """Test that gravatar hash is calculated correctly"""
+        email = "test@example.com"
+        expected_hash = hashlib.md5(email.encode()).hexdigest()
+
+        with patch("requests.get") as mock_get:
+            mock_response = Mock()
+            mock_response.status_code = 200
+            mock_get.return_value = mock_response
+
+            emails = [Email(email=email)]
+            result = transform.scan(emails)
+
+            assert len(result) == 1
+            assert result[0].hash == expected_hash
+
+    def test_gravatar_url_format(self):
+        """Test that gravatar URL is formatted correctly"""
+        email = "test@example.com"
+        expected_hash = hashlib.md5(email.encode()).hexdigest()
+        expected_url = f"https://www.gravatar.com/avatar/{expected_hash}"
+
+        with patch("requests.get") as mock_get:
+            mock_response = Mock()
+            mock_response.status_code = 200
+            mock_get.return_value = mock_response
+
+            emails = [Email(email=email)]
+            result = transform.scan(emails)
+
+            assert len(result) == 1
+            assert str(result[0].src) == expected_url
--- a/flowsint-transforms/tests/transforms/email/to_leaks.py
+++ b/flowsint-transforms/tests/transforms/email/to_leaks.py
@@ -0,0 +1,175 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from flowsint_transforms.emails.to_leaks import EmailToBreachesTransform
+from flowsint_types.email import Email
+from flowsint_types.breach import Breach
+
+transform = EmailToBreachesTransform("sketch_123", "scan_123")
+
+
+def test_transform_name():
+    assert EmailToBreachesTransform.name() == "to_leaks"
+
+
+def test_transform_category():
+    assert EmailToBreachesTransform.category() == "Email"
+
+
+def test_transform_key():
+    assert EmailToBreachesTransform.key() == "email"
+
+
+def test_preprocess_string_emails():
+    emails = [
+        "test@example.com",
+        "user@domain.org",
+    ]
+    result = transform.preprocess(emails)
+    expected_emails = [Email(email=email) for email in emails]
+    assert result == expected_emails
+
+
+def test_preprocess_dict_emails():
+    emails = [
+        {"email": "test@example.com"},
+        {"email": "user@domain.org"},
+    ]
+    result = transform.preprocess(emails)
+    expected_emails = [Email(email=email["email"]) for email in emails]
+    assert result == expected_emails
+
+
+def test_preprocess_email_objects():
+    emails = [
+        Email(email="test@example.com"),
+        Email(email="user@domain.org"),
+    ]
+    result = transform.preprocess(emails)
+    assert result == emails
+
+
+def test_preprocess_mixed_formats():
+    emails = [
+        "test@example.com",
+        {"email": "user@domain.org"},
+        Email(email="admin@company.com"),
+        {"invalid_key": "should_be_ignored@test.com"},
+    ]
+    result = transform.preprocess(emails)
+
+    result_emails = [email.email for email in result]
+    assert "test@example.com" in result_emails
+    assert "user@domain.org" in result_emails
+    assert "admin@company.com" in result_emails
+    assert "should_be_ignored@test.com" not in result_emails
+
+
+@patch("src.transforms.emails.to_leaks.requests.get")
+def test_scan_successful_response(mock_get):
+    # Mock successful API response
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"Name": "TestBreach", "Title": "Test Breach", "Domain": "test.com"},
+        {"Name": "AnotherBreach", "Title": "Another Breach", "Domain": "another.com"},
+    ]
+    mock_get.return_value = mock_response
+
+    emails = [Email(email="test@example.com")]
+    result = transform.scan(emails)
+
+    assert len(result) == 2
+    assert isinstance(result[0], Breach)
+    assert isinstance(result[1], Breach)
+    assert result[0].name == "testbreach"
+    assert result[1].name == "anotherbreach"
+    assert result[0].breach["name"] == "testbreach"
+    assert result[1].breach["name"] == "anotherbreach"
+
+
+@patch("src.transforms.emails.to_leaks.requests.get")
+def test_scan_no_breaches_found(mock_get):
+    # Mock 404 response (no breaches found)
+    mock_response = MagicMock()
+    mock_response.status_code = 404
+    mock_get.return_value = mock_response
+
+    emails = [Email(email="test@example.com")]
+    result = transform.scan(emails)
+
+    assert len(result) == 0
+
+
+@patch("src.transforms.emails.to_leaks.requests.get")
+def test_scan_api_error(mock_get):
+    # Mock API error
+    mock_get.side_effect = Exception("API Error")
+
+    emails = [Email(email="test@example.com")]
+    result = transform.scan(emails)
+
+    assert len(result) == 0
+
+
+@patch("src.transforms.emails.to_leaks.requests.get")
+def test_scan_missing_name_field(mock_get):
+    # Mock API response with missing "Name" field
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"Title": "Test Breach", "Domain": "test.com"},  # Missing "Name" field
+        {"Name": "ValidBreach", "Title": "Valid Breach", "Domain": "valid.com"},
+    ]
+    mock_get.return_value = mock_response
+
+    emails = [Email(email="test@example.com")]
+    result = transform.scan(emails)
+
+    assert len(result) == 2
+    assert result[0].name == "unknown"  # Should default to "unknown"
+    assert result[1].name == "validbreach"  # Should use the provided name
+    assert result[0].breach["title"] == "Test Breach"
+    assert result[1].breach["name"] == "validbreach"
+
+
+@patch("src.transforms.emails.to_leaks.HIBP_API_KEY", None)
+def test_scan_no_api_key():
+    """Test that transform raises ValueError when HIBP_API_KEY is not set."""
+    emails = [Email(email="test@example.com")]
+
+    with pytest.raises(ValueError, match="HIBP_API_KEY not set"):
+        transform.scan(emails)
+
+
+def test_postprocess():
+    # Test postprocess method with mocked neo4j connection
+    transform.neo4j_conn = MagicMock()
+
+    # Create breach objects with the new structure
+    breach1 = Breach(
+        name="testbreach",
+        title="Test Breach",
+        domain="test.com",
+        pwncount=1000,
+        breach={"name": "testbreach", "title": "Test Breach"},
+    )
+    breach2 = Breach(
+        name="anotherbreach",
+        title="Another Breach",
+        domain="another.com",
+        pwncount=2000,
+        breach={"name": "anotherbreach", "title": "Another Breach"},
+    )
+
+    breaches = [breach1, breach2]
+    original_input = [Email(email="test@example.com")]
+
+    result = transform.postprocess(breaches, original_input)
+
+    assert result == breaches
+    # Verify that neo4j queries were called:
+    # - 2 breach node creation queries
+    # - 1 email node creation query
+    # - 2 relationship creation queries
+    # Total: 5 queries
+    assert transform.neo4j_conn.query.call_count == 5
--- a/flowsint-transforms/tests/transforms/ip/init.py
+++ b/flowsint-transforms/tests/transforms/ip/init.py
--- a/flowsint-transforms/tests/transforms/ip/asn_to_cidrs.py
+++ b/flowsint-transforms/tests/transforms/ip/asn_to_cidrs.py
@@ -0,0 +1,77 @@
+from flowsint_transforms.ips.asn_to_cidrs import AsnToCidrsTransform
+from flowsint_types.asn import ASN
+
+transform = AsnToCidrsTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_asns():
+    asns = [
+        ASN(number=15169),
+        ASN(number=13335),
+    ]
+    result = transform.preprocess(asns)
+
+    result_numbers = [asn.number for asn in result]
+    expected_numbers = [asn.number for asn in asns]
+
+    assert result_numbers == expected_numbers
+
+
+def test_unprocessed_valid_asns():
+    asns = [
+        "15169",
+        "13335",
+    ]
+    result = transform.preprocess(asns)
+    result_asns = [asn for asn in result]
+    expected_asns = [ASN(number=int(asn)) for asn in asns]
+    assert result_asns == expected_asns
+
+
+def test_preprocess_invalid_asns():
+    asns = [
+        ASN(number=15169),
+        ASN(number=999999999999),  # Invalid ASN number
+        ASN(number=13335),
+    ]
+    result = transform.preprocess(asns)
+
+    result_numbers = [asn.number for asn in result]
+    assert 15169 in result_numbers
+    assert 13335 in result_numbers
+    assert 999999999999 not in result_numbers
+
+
+def test_preprocess_multiple_formats():
+    asns = [
+        {"number": 15169},
+        {"invalid_key": 13335},
+        ASN(number=13335),
+        "15169",
+    ]
+    result = transform.preprocess(asns)
+
+    result_numbers = [asn.number for asn in result]
+    assert 15169 in result_numbers
+    assert 13335 in result_numbers
+    assert (
+        "invalid_key" not in result_numbers
+    )  # Should be filtered out due to invalid key
+
+
+def test_schemas():
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+
+    # Input schema should have number field
+    assert "properties" in input_schema
+    number_prop = next(
+        (prop for prop in input_schema["properties"] if prop["name"] == "number"), None
+    )
+    assert number_prop is not None
+    assert number_prop["type"] == "integer"
+
+    # Output schema should have network field
+    assert "properties" in output_schema
+    prop_names = [prop["name"] for prop in output_schema["properties"]]
+    assert "network" in prop_names
--- a/flowsint-transforms/tests/transforms/ip/cidr_to_ips.py
+++ b/flowsint-transforms/tests/transforms/ip/cidr_to_ips.py
@@ -0,0 +1,122 @@
+from flowsint_transforms.ips.cidr_to_ips import CidrToIpsTransform
+from flowsint_types.cidr import CIDR
+from flowsint_types.ip import Ip
+from tests.logger import TestLogger
+
+logger = TestLogger()
+transform = CidrToIpsTransform("sketch_123", "scan_123", logger)
+
+
+def test_preprocess_valid_cidrs():
+    cidrs = [
+        CIDR(network="8.8.8.0/24"),
+        CIDR(network="1.1.1.0/24"),
+    ]
+    result = transform.preprocess(cidrs)
+
+    result_networks = [cidr.network for cidr in result]
+    expected_networks = [cidr.network for cidr in cidrs]
+
+    assert result_networks == expected_networks
+
+
+def test_preprocess_unprocessed_valid_cidrs():
+    cidrs = [
+        "8.8.8.0/24",
+        "1.1.1.0/24",
+    ]
+    result = transform.preprocess(cidrs)
+    result_cidrs = [c for c in result]
+    expected_cidrs = [CIDR(network=c) for c in cidrs]
+    assert result_cidrs == expected_cidrs
+
+
+def test_preprocess_invalid_cidrs():
+    cidrs = [
+        CIDR(network="8.8.8.0/24"),
+        "invalid-cidr",
+        "not-a-cidr",
+    ]
+    result = transform.preprocess(cidrs)
+    result_networks = [str(cidr.network) for cidr in result]
+    assert "8.8.8.0/24" in result_networks
+    assert "invalid-cidr" not in result_networks
+    assert "not-a-cidr" not in result_networks
+
+
+def test_preprocess_multiple_formats():
+    cidrs = [
+        {"network": "8.8.8.0/24"},
+        {"invalid_key": "1.1.1.0/24"},
+        CIDR(network="9.9.9.0/24"),
+        "InvalidCIDR",
+    ]
+    result = transform.preprocess(cidrs)
+    result_networks = [str(cidr.network) for cidr in result]
+    assert "8.8.8.0/24" in result_networks
+    assert "9.9.9.0/24" in result_networks
+    assert "1.1.1.0/24" not in result_networks
+    assert "InvalidCIDR" not in result_networks
+
+
+def test_scan_extracts_ips(monkeypatch):
+    mock_dnsx_output = """8.35.200.12
+8.35.200.112
+8.35.200.16
+8.35.200.170"""
+
+    class MockSubprocessResult:
+        def __init__(self, stdout):
+            self.stdout = stdout
+            self.returncode = 0
+
+    def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
+        assert "dnsx" in cmd
+        assert "-ptr" in cmd
+        return MockSubprocessResult(mock_dnsx_output)
+
+    # Patch the subprocess call in the transform
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [CIDR(network="8.35.200.0/24")]
+    ips = transform.scan(input_data)
+
+    assert isinstance(ips, list)
+    assert len(ips) == 4
+
+    expected_ips = ["8.35.200.12", "8.35.200.112", "8.35.200.16", "8.35.200.170"]
+
+    for ip in ips:
+        assert isinstance(ip, Ip)
+        assert ip.address in expected_ips
+
+
+def test_scan_handles_empty_output(monkeypatch):
+    class MockSubprocessResult:
+        def __init__(self):
+            self.stdout = ""
+            self.returncode = 0
+
+    def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
+        return MockSubprocessResult()
+
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [CIDR(network="8.8.8.0/24")]
+    ips = transform.scan(input_data)
+
+    assert isinstance(ips, list)
+    assert len(ips) == 0
+
+
+def test_scan_handles_subprocess_exception(monkeypatch):
+    def mock_subprocess_run(cmd, shell, capture_output, text, timeout):
+        raise Exception("Subprocess failed")
+
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [CIDR(network="8.8.8.0/24")]
+    ips = transform.scan(input_data)
+
+    assert isinstance(ips, list)
+    assert len(ips) == 0
--- a/flowsint-transforms/tests/transforms/ip/ip_to_asn.py
+++ b/flowsint-transforms/tests/transforms/ip/ip_to_asn.py
@@ -0,0 +1,267 @@
+import json
+from unittest.mock import Mock
+from flowsint_transforms.ips.ip_to_asn import IpToAsnTransform
+from flowsint_types.ip import Ip
+from flowsint_types.asn import ASN
+from flowsint_types.cidr import CIDR
+from tests.logger import TestLogger
+
+logger = TestLogger()
+# The transform will get a mock logger from conftest.py automatically
+transform = IpToAsnTransform("sketch_123", "scan_123", logger)
+
+
+def test_preprocess_valid_ips():
+    ips = [
+        Ip(address="8.8.8.8"),
+        Ip(address="1.1.1.1"),
+    ]
+    result = transform.preprocess(ips)
+
+    result_addresses = [ip.address for ip in result]
+    expected_addresses = [ip.address for ip in ips]
+
+    assert result_addresses == expected_addresses
+
+
+def test_unprocessed_valid_ips():
+    ips = [
+        "8.8.8.8",
+        "1.1.1.1",
+    ]
+    result = transform.preprocess(ips)
+    result_ips = [ip for ip in result]
+    expected_ips = [Ip(address=ip) for ip in ips]
+    assert result_ips == expected_ips
+
+
+def test_preprocess_invalid_ips():
+    ips = [
+        Ip(address="8.8.8.8"),
+        Ip(address="invalid_ip"),
+        Ip(address="192.168.1.1"),
+    ]
+    result = transform.preprocess(ips)
+
+    result_addresses = [ip.address for ip in result]
+    assert "8.8.8.8" in result_addresses
+    assert "192.168.1.1" in result_addresses
+    assert "invalid_ip" not in result_addresses
+
+
+def test_preprocess_multiple_formats():
+    ips = [
+        {"address": "8.8.8.8"},
+        {"invalid_key": "1.1.1.1"},
+        Ip(address="192.168.1.1"),
+        "10.0.0.1",
+    ]
+    result = transform.preprocess(ips)
+
+    result_addresses = [ip.address for ip in result]
+    assert "8.8.8.8" in result_addresses
+    assert "192.168.1.1" in result_addresses
+    assert "10.0.0.1" in result_addresses
+    assert (
+        "1.1.1.1" not in result_addresses
+    )  # Should be filtered out due to invalid key
+
+
+def test_scan_extracts_asn_info(monkeypatch):
+    mock_asnmap_output = {
+        "input": "8.8.8.8",
+        "as_number": "AS15169",
+        "as_name": "GOOGLE",
+        "as_country": "US",
+        "as_range": ["8.8.8.0/24", "8.8.4.0/24"],
+    }
+
+    class MockSubprocessResult:
+        def __init__(self, stdout):
+            self.stdout = stdout
+            self.returncode = 0
+
+    def mock_subprocess_run(cmd, input, capture_output, text, timeout):
+        assert "asnmap" in cmd
+        assert input == "8.8.8.8"
+        return MockSubprocessResult(json.dumps(mock_asnmap_output))
+
+    # Patch the subprocess call in the transform
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [Ip(address="8.8.8.8")]
+    asns = transform.scan(input_data)
+
+    assert isinstance(asns, list)
+    assert len(asns) == 1
+
+    asn = asns[0]
+    assert isinstance(asn, ASN)
+    assert asn.number == 15169  # AS15169 -> 15169
+    assert asn.name == "GOOGLE"
+    assert asn.country == "US"
+    assert len(asn.cidrs) == 2
+    assert str(asn.cidrs[0].network) == "8.8.8.0/24"
+    assert str(asn.cidrs[1].network) == "8.8.4.0/24"
+
+
+def test_scan_handles_no_asn_found(monkeypatch):
+    class MockSubprocessResult:
+        def __init__(self, stdout):
+            self.stdout = stdout
+            self.returncode = 0
+
+    def mock_subprocess_run(cmd, input, capture_output, text, timeout):
+        # Return empty output to simulate no ASN found
+        return MockSubprocessResult("")
+
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [Ip(address="192.168.1.1")]
+    asns = transform.scan(input_data)
+
+    assert isinstance(asns, list)
+    assert len(asns) == 1
+
+    asn = asns[0]
+    assert isinstance(asn, ASN)
+    assert asn.number == 0
+    assert asn.name == "Unknown"
+    assert asn.country == "Unknown"
+    assert len(asn.cidrs) == 0
+
+
+def test_scan_handles_subprocess_exception(monkeypatch):
+    def mock_subprocess_run(cmd, input, capture_output, text, timeout):
+        raise Exception("Subprocess failed")
+
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [Ip(address="8.8.8.8")]
+    asns = transform.scan(input_data)
+
+    assert isinstance(asns, list)
+    assert len(asns) == 1
+
+    asn = asns[0]
+    assert isinstance(asn, ASN)
+    assert asn.number == 0
+    assert asn.name == "Unknown"
+    assert asn.country == "Unknown"
+
+
+def test_scan_multiple_ips(monkeypatch):
+    mock_responses = {
+        "8.8.8.8": {
+            "input": "8.8.8.8",
+            "as_number": "AS15169",
+            "as_name": "GOOGLE",
+            "as_country": "US",
+            "as_range": ["8.8.8.0/24"],
+        },
+        "1.1.1.1": {
+            "input": "1.1.1.1",
+            "as_number": "AS13335",
+            "as_name": "CLOUDFLARE",
+            "as_country": "US",
+            "as_range": ["1.1.1.0/24"],
+        },
+    }
+
+    class MockSubprocessResult:
+        def __init__(self, stdout):
+            self.stdout = stdout
+            self.returncode = 0
+
+    def mock_subprocess_run(cmd, input, capture_output, text, timeout):
+        if input in mock_responses:
+            return MockSubprocessResult(json.dumps(mock_responses[input]))
+        return MockSubprocessResult("")
+
+    monkeypatch.setattr("subprocess.run", mock_subprocess_run)
+
+    input_data = [Ip(address="8.8.8.8"), Ip(address="1.1.1.1")]
+    asns = transform.scan(input_data)
+
+    assert len(asns) == 2
+
+    # Check first ASN
+    assert asns[0].number == 15169
+    assert asns[0].name == "GOOGLE"
+
+    # Check second ASN
+    assert asns[1].number == 13335
+    assert asns[1].name == "CLOUDFLARE"
+
+
+def test_schemas():
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+
+    # Input schema should have address field
+    assert "properties" in input_schema
+    address_prop = next(
+        (prop for prop in input_schema["properties"] if prop["name"] == "address"), None
+    )
+    assert address_prop is not None
+    assert address_prop["type"] == "string"
+
+    # Output schema should have ASN fields
+    assert "properties" in output_schema
+    prop_names = [prop["name"] for prop in output_schema["properties"]]
+    assert "number" in prop_names
+    assert "name" in prop_names
+    assert "country" in prop_names
+    assert "cidrs" in prop_names
+
+
+def test_postprocess_creates_neo4j_relationships(monkeypatch):
+    # Mock Neo4j connection
+    mock_neo4j = Mock()
+    transform.neo4j_conn = mock_neo4j
+
+    input_data = [Ip(address="8.8.8.8")]
+    asn_results = [
+        ASN(
+            number=15169,
+            name="GOOGLE",
+            country="US",
+            cidrs=[CIDR(network="8.8.8.0/24")],
+        )
+    ]
+
+    result = transform.postprocess(asn_results, input_data)
+
+    # Verify Neo4j query was called
+    mock_neo4j.query.assert_called_once()
+
+    # Check the query parameters
+    call_args = mock_neo4j.query.call_args
+    params = call_args[0][1]
+    assert params["ip_address"] == "8.8.8.8"
+    assert params["asn_number"] == 15169
+    assert params["asn_name"] == "GOOGLE"
+    assert params["asn_country"] == "US"
+    assert params["sketch_id"] == "sketch_123"
+
+    # Should return the same results
+    assert result == asn_results
+
+
+def test_postprocess_skips_unknown_asns(monkeypatch):
+    # Mock Neo4j connection
+    mock_neo4j = Mock()
+    transform.neo4j_conn = mock_neo4j
+
+    input_data = [Ip(address="192.168.1.1")]
+    asn_results = [
+        ASN(number=0, name="Unknown", country="Unknown", cidrs=[])  # Unknown ASN
+    ]
+
+    result = transform.postprocess(asn_results, input_data)
+
+    # Verify Neo4j query was NOT called for unknown ASN
+    mock_neo4j.query.assert_not_called()
+
+    # Should return the same results
+    assert result == asn_results
--- a/flowsint-transforms/tests/transforms/ip/ip_to_infos.py
+++ b/flowsint-transforms/tests/transforms/ip/ip_to_infos.py
@@ -0,0 +1,103 @@
+from flowsint_transforms.ips.geolocation import IpToInfosTransform
+from flowsint_types.ip import Ip, Ip
+
+transform = IpToInfosTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_ips():
+    ips = [
+        Ip(address="8.8.8.8"),
+        Ip(address="1.1.1.1"),
+    ]
+    result = transform.preprocess(ips)
+    result_ips = [d.address for d in result]
+    expected_ips = [d.address for d in ips]
+    assert result_ips == expected_ips
+
+
+def test_preprocess_string_ips():
+    ips = [
+        "8.8.8.8",
+        "1.1.1.1",
+    ]
+    result = transform.preprocess(ips)
+    result_ips = [d.address for d in result]
+    expected_ips = [d for d in ips]
+    assert [ip.address for ip in result] == expected_ips
+
+
+def test_preprocess_invalid_ips():
+    ips = [
+        Ip(address="8.8.8.8"),
+        Ip(address="invalid_ip"),
+        Ip(address="1.1.1.1"),
+    ]
+    result = transform.preprocess(ips)
+    result_ips = [d.address for d in result]
+    assert "8.8.8.8" in result_ips
+    assert "1.1.1.1" in result_ips
+    assert "invalid_ip" not in result_ips
+
+
+def test_preprocess_multiple_formats():
+    ips = [
+        {"address": "8.8.8.8"},
+        {"invalid_key": "1.2.3.4"},
+        Ip(address="1.1.1.1"),
+        "1.1.1.1",
+    ]
+    result = transform.preprocess(ips)
+    result_ips = [d.address for d in result]
+    assert "8.8.8.8" in result_ips
+    assert "1.1.1.1" in result_ips
+    assert "1.2.3.4" not in result_ips
+
+
+def test_scan_returns_ip(monkeypatch):
+    # Mock of get_location_data
+    def mock_get_location_data(address):
+        return {
+            "latitude": 37.386,
+            "longitude": -122.0838,
+            "country": "US",
+            "city": "Mountain View",
+            "isp": "Google LLC",
+        }
+
+    monkeypatch.setattr(transform, "get_location_data", mock_get_location_data)
+
+    input_data = [Ip(address="8.8.8.8")]
+    output = transform.execute(input_data)
+    assert isinstance(output, list)
+    assert isinstance(output[0], Ip)
+    assert output[0].address == "8.8.8.8"
+    assert output[0].city == "Mountain View"
+    assert output[0].country == "US"
+    assert output[0].isp == "Google LLC"
+
+
+def test_schemas():
+    input_schema = transform.input_schema()
+    output_schema = transform.output_schema()
+    assert input_schema == {
+        "type": "Ip",
+        "properties": [
+            {"name": "address", "type": "string"},
+            {"name": "latitude", "type": "number | null"},
+            {"name": "longitude", "type": "number | null"},
+            {"name": "country", "type": "string | null"},
+            {"name": "city", "type": "string | null"},
+            {"name": "isp", "type": "string | null"},
+        ],
+    }
+    assert output_schema == {
+        "type": "Ip",
+        "properties": [
+            {"name": "address", "type": "string"},
+            {"name": "latitude", "type": "number | null"},
+            {"name": "longitude", "type": "number | null"},
+            {"name": "country", "type": "string | null"},
+            {"name": "city", "type": "string | null"},
+            {"name": "isp", "type": "string | null"},
+        ],
+    }
--- a/flowsint-transforms/tests/transforms/organization/init.py
+++ b/flowsint-transforms/tests/transforms/organization/init.py
--- a/flowsint-transforms/tests/transforms/organization/to_infos.py
+++ b/flowsint-transforms/tests/transforms/organization/to_infos.py
@@ -0,0 +1,29 @@
+from flowsint_transforms.organizations.to_infos import OrgToInfosTransform
+from flowsint_types.organization import Organization
+
+transform = OrgToInfosTransform("sketch_123", "scan_123")
+
+
+def test_preprocess_valid_names():
+    data = [Organization(name="OpenAI"), {"name": "Inria"}, "OVH"]
+    result = transform.preprocess(data)
+    result_names = [org.name for org in result]
+
+    assert result_names == ["OpenAI", "Inria", "OVH"]
+
+
+# def test_preprocess_invalid_entries():
+#     data = [
+#         {"wrong_key": "value"},
+#         123,
+#         None,
+#         "",
+#         {"name": ""},
+#     ]
+#     result = transform.preprocess(data)
+#     assert result == []
+
+
+def test_execute():
+    transform.execute(["Karim Terrache"])
+    assert True
--- a/flowsint-transforms/tests/transforms/social/init.py
+++ b/flowsint-transforms/tests/transforms/social/init.py
--- a/flowsint-transforms/tests/transforms/social/maigret.py
+++ b/flowsint-transforms/tests/transforms/social/maigret.py
@@ -0,0 +1,57 @@
+from pathlib import Path
+from flowsint_transforms.socials.maigret import MaigretTransform
+from flowsint_types.social import SocialProfile
+
+transform = MaigretTransform("sketch_123", "scan_123")
+
+
+def test_unprocessed_valid_usernames():
+    usernames = [
+        "toto123",
+        "DorianXd78",
+    ]
+    result = transform.preprocess(usernames)
+    result_usernames = [d for d in result]
+    expected_usernames = [SocialProfile(username=d) for d in usernames]
+    assert result_usernames == expected_usernames
+
+
+def test_preprocess_invalid_usernames():
+    usernames = [
+        SocialProfile(username="toto123"),
+        SocialProfile(username="DorianXd78_Official"),
+        SocialProfile(username="This is not a username"),
+    ]
+    result = transform.preprocess(usernames)
+
+    result_usernames = [d.username for d in result]
+    assert "toto123" in result_usernames
+    assert "DorianXd78_Official" in result_usernames
+    assert "This is not a username" not in result_usernames
+
+
+def test_preprocess_multiple_formats():
+    usernames = [
+        {"username": "toto123"},
+        {"invalid_key": "ValId_UseRnAme"},
+        SocialProfile(username="DorianXd78_Official"),
+        "MySimpleUsername",
+    ]
+    result = transform.preprocess(usernames)
+
+    result_usernames = [d.username for d in result]
+    assert "toto123" in result_usernames
+    assert "DorianXd78_Official" in result_usernames
+    assert "ValId_UseRnAme" not in result_usernames
+    assert "MySimpleUsername" in result_usernames
+
+
+def test_parsing_invalid_output_file():
+    results = transform.parse_maigret_output("toto123", Path("/this/path/does/not/exist"))
+    assert results == []
+
+
+def test_parsing():
+    results = transform.parse_maigret_output("toto123", Path("/tmp/maigret_test.json"))
+    print(results)
+    assert len(results) == 2
--- a/flowsint-transforms/tests/transforms/website/init.py
+++ b/flowsint-transforms/tests/transforms/website/init.py
--- a/flowsint-transforms/tests/transforms/website/to_links.py
+++ b/flowsint-transforms/tests/transforms/website/to_links.py
@@ -0,0 +1,275 @@
+import pytest
+from unittest.mock import Mock, patch
+from flowsint_transforms.websites.to_links import WebsiteToLinks
+from flowsint_types.website import Website
+
+
+class MockCrawlResults:
+    def __init__(self, internal=None, external=None):
+        self.internal = internal or []
+        self.external = external or []
+
+
+class MockCrawler:
+    def __init__(self, url, recursive=True, verbose=False, _on_result_callback=None):
+        self.url = url
+        self.callback = _on_result_callback
+
+    def fetch(self):
+        pass
+
+    def extract_urls(self):
+        # Simulate callback calls
+        if self.callback:
+            self.callback("https://example.com/page1", is_external=False)
+            self.callback("https://example.com/page2", is_external=False)
+            self.callback("https://external.com/page", is_external=True)
+            self.callback("https://another-external.org/resource", is_external=True)
+
+    def get_results(self):
+        return MockCrawlResults(
+            internal=["https://example.com/page1", "https://example.com/page2"],
+            external=[
+                "https://external.com/page",
+                "https://another-external.org/resource",
+            ],
+        )
+
+
+@pytest.mark.asyncio
+async def test_website_to_links_real_time_neo4j_creation():
+    """Test that Neo4j nodes are created in real-time during the callback."""
+    transform = WebsiteToLinks(sketch_id="test", scan_id="test")
+
+    # Mock neo4j connection and methods
+    transform.neo4j_conn = Mock()
+    transform.create_node = Mock()
+    transform.create_relationship = Mock()
+    transform.log_graph_message = Mock()
+
+    # Test input
+    websites = [Website(url="https://example.com")]
+
+    with patch("src.transforms.websites.to_links.Crawler", MockCrawler):
+        results = await transform.scan(websites)
+
+    # Verify main website and domain nodes were created upfront
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        caption="https://example.com",
+        type="website",
+    )
+    transform.create_node.assert_any_call(
+        "domain", "name", "example.com", caption="example.com", type="domain"
+    )
+
+    # Verify main website to domain relationship
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "domain",
+        "name",
+        "example.com",
+        "BELONGS_TO_DOMAIN",
+    )
+
+    # Verify internal website nodes were created in callback
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://example.com/page1",
+        caption="https://example.com/page1",
+        type="website",
+    )
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://example.com/page2",
+        caption="https://example.com/page2",
+        type="website",
+    )
+
+    # Verify internal website relationships
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "website",
+        "url",
+        "https://example.com/page1",
+        "LINKS_TO",
+    )
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "website",
+        "url",
+        "https://example.com/page2",
+        "LINKS_TO",
+    )
+
+    # Verify external website nodes were created in callback
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://external.com/page",
+        caption="https://external.com/page",
+        type="website",
+    )
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://another-external.org/resource",
+        caption="https://another-external.org/resource",
+        type="website",
+    )
+
+    # Verify external domain nodes were created in callback
+    transform.create_node.assert_any_call(
+        "domain", "name", "external.com", caption="external.com", type="domain"
+    )
+    transform.create_node.assert_any_call(
+        "domain",
+        "name",
+        "another-external.org",
+        caption="another-external.org",
+        type="domain",
+    )
+
+    # Verify main website to external website relationships
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "website",
+        "url",
+        "https://external.com/page",
+        "LINKS_TO",
+    )
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "website",
+        "url",
+        "https://another-external.org/resource",
+        "LINKS_TO",
+    )
+
+    # Verify external website to domain relationships
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://external.com/page",
+        "domain",
+        "name",
+        "external.com",
+        "BELONGS_TO_DOMAIN",
+    )
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://another-external.org/resource",
+        "domain",
+        "name",
+        "another-external.org",
+        "BELONGS_TO_DOMAIN",
+    )
+
+    # Verify main website to external domain relationships
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "domain",
+        "name",
+        "external.com",
+        "LINKS_TO_DOMAIN",
+    )
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "domain",
+        "name",
+        "another-external.org",
+        "LINKS_TO_DOMAIN",
+    )
+
+
+@pytest.mark.asyncio
+async def test_website_to_links_error_handling_with_neo4j():
+    """Test that main nodes are still created even when crawling fails."""
+    transform = WebsiteToLinks(sketch_id="test", scan_id="test")
+
+    # Mock neo4j connection and methods
+    transform.neo4j_conn = Mock()
+    transform.create_node = Mock()
+    transform.create_relationship = Mock()
+    transform.log_graph_message = Mock()
+
+    # Mock crawler that raises an exception
+    def mock_crawler_error(*args, **kwargs):
+        raise Exception("Test error")
+
+    websites = [Website(url="https://example.com")]
+
+    with patch("src.transforms.websites.to_links.Crawler", mock_crawler_error):
+        results = await transform.scan(websites)
+
+    # Verify main website and domain nodes were still created despite error
+    transform.create_node.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        caption="https://example.com",
+        type="website",
+    )
+    transform.create_node.assert_any_call(
+        "domain", "name", "example.com", caption="example.com", type="domain"
+    )
+
+    # Verify main website to domain relationship was created
+    transform.create_relationship.assert_any_call(
+        "website",
+        "url",
+        "https://example.com",
+        "domain",
+        "name",
+        "example.com",
+        "BELONGS_TO_DOMAIN",
+    )
+
+    # Verify result structure
+    assert len(results) == 1
+    result = results[0]
+    assert result["website"] == "https://example.com"
+    assert result["main_domain"] == "example.com"
+    assert result["internal_urls"] == []
+    assert result["external_urls"] == []
+    assert result["external_domains"] == []
+
+
+def test_postprocess_simplified():
+    """Test that postprocess now just returns results as-is."""
+    transform = WebsiteToLinks(sketch_id="test", scan_id="test")
+
+    original_input = [Website(url="https://example.com")]
+    results = [
+        {
+            "website": "https://example.com",
+            "main_domain": "example.com",
+            "internal_urls": ["https://example.com/page1"],
+            "external_urls": ["https://external.com/page"],
+            "external_domains": ["external.com"],
+        }
+    ]
+
+    processed_results = transform.postprocess(results, original_input)
+
+    # Should just return the same results since Neo4j work is done in real-time
+    assert processed_results == results