From 6bca754d32b8a5c5f62e3119267b300a266e627f Mon Sep 17 00:00:00 2001 From: dextmorgn Date: Wed, 23 Jul 2025 14:17:19 +0200 Subject: [PATCH] feat: react-force-graph node borders --- flowsint-api/app/api/routes/events.py | 5 +- flowsint-api/app/api/routes/transforms.py | 21 +- flowsint-api/app/scanners/base.py | 171 ++++++++- .../app/scanners/crypto/wallet_to_nfts.py | 52 +-- .../scanners/crypto/wallet_to_transactions.py | 50 +-- .../app/scanners/domains/domain_to_asn.py | 147 +++----- flowsint-api/app/scanners/domains/resolve.py | 41 +-- .../app/scanners/domains/subdomains.py | 44 +-- .../app/scanners/domains/to_website.py | 121 +++---- flowsint-api/app/scanners/domains/whois.py | 95 +++-- flowsint-api/app/scanners/emails/holehe.py | 163 --------- .../app/scanners/emails/to_gravatar.py | 142 +++----- flowsint-api/app/scanners/emails/to_leaks.py | 246 ++++--------- .../app/scanners/individuals/to_org.py | 202 +---------- flowsint-api/app/scanners/ips/asn_to_cidrs.py | 44 +-- flowsint-api/app/scanners/ips/cidr_to_ips.py | 43 +-- flowsint-api/app/scanners/ips/geolocation.py | 8 + flowsint-api/app/scanners/ips/ip_to_asn.py | 172 +++------ .../app/scanners/ips/reverse_resolve.py | 165 +++------ flowsint-api/app/scanners/leaks/hibp.py | 85 ++++- flowsint-api/app/scanners/n8n/connector.py | 95 +++-- .../app/scanners/organizations/org_to_asn.py | 49 +-- .../app/scanners/organizations/to_infos.py | 58 +-- flowsint-api/app/scanners/phones/ignorant.py | 79 ++-- flowsint-api/app/scanners/registry.py | 14 +- flowsint-api/app/scanners/socials/maigret.py | 56 +-- flowsint-api/app/scanners/socials/sherlock.py | 113 +++--- .../app/scanners/websites/to_crawler.py | 47 +-- .../app/scanners/websites/to_domain.py | 125 ++----- .../app/scanners/websites/to_webtrackers.py | 117 ++---- .../app/tools/organizations/sirene.py | 4 + flowsint-api/tests/README.md | 7 + .../tests/scanners/domains/resolve.py | 82 ++++- flowsint-api/tests/scanners/emails/holehe.py | 49 --- .../tests/scanners/test_base_scanner.py | 205 +++++++++++ .../tests/scanners/test_crypto_with_vault.py | 4 +- .../scanners/test_input_output_migration.py | 336 ++++++++++++++++++ flowsint-api/tests/scanners/test_registry.py | 23 +- .../tests/scanners/test_schema_generation.py | 148 ++++++++ flowsint-app/package.json | 1 + .../src/renderer/public/icons/n8n.svg | 1 + .../src/components/chat/floating-chat.tsx | 2 +- .../dashboard/active-malware-chart.tsx | 77 ++++ .../src/components/dashboard/metrics-card.tsx | 57 +++ .../dashboard/targeted-sectors-chart.tsx | 114 ++++++ .../src/components/graphs/empty-state.tsx | 100 +++++- .../src/components/graphs/graph-panel.tsx | 12 +- .../components/graphs/graph-react-force.tsx | 18 +- .../components/graphs/graph-react-sigma.tsx | 140 ++++++++ .../src/components/graphs/toolbar.tsx | 38 +- .../src/components/layout/log-panel.tsx | 12 +- .../components/table/relationships-view.tsx | 2 +- .../src/components/transforms/editor.tsx | 5 +- .../components/transforms/params-dialog.tsx | 36 +- .../components/transforms/scanner-item.tsx | 9 +- .../components/transforms/scanner-node.tsx | 9 +- .../components/transforms/transform-sheet.tsx | 18 +- .../src/renderer/src/routes/__root.tsx | 2 +- .../src/renderer/src/routes/_auth.tsx | 6 +- .../src/renderer/src/types/transform.ts | 3 +- flowsint-app/yarn.lock | 14 + 61 files changed, 2347 insertions(+), 1957 deletions(-) delete mode 100644 flowsint-api/app/scanners/emails/holehe.py create mode 100644 flowsint-api/tests/README.md delete mode 100644 flowsint-api/tests/scanners/emails/holehe.py create mode 100644 flowsint-api/tests/scanners/test_base_scanner.py create mode 100644 flowsint-api/tests/scanners/test_input_output_migration.py create mode 100644 flowsint-api/tests/scanners/test_schema_generation.py create mode 100644 flowsint-app/src/renderer/public/icons/n8n.svg create mode 100644 flowsint-app/src/renderer/src/components/dashboard/active-malware-chart.tsx create mode 100644 flowsint-app/src/renderer/src/components/dashboard/metrics-card.tsx create mode 100644 flowsint-app/src/renderer/src/components/dashboard/targeted-sectors-chart.tsx create mode 100644 flowsint-app/src/renderer/src/components/graphs/graph-react-sigma.tsx diff --git a/flowsint-api/app/api/routes/events.py b/flowsint-api/app/api/routes/events.py index 27cc864..8d1050c 100644 --- a/flowsint-api/app/api/routes/events.py +++ b/flowsint-api/app/api/routes/events.py @@ -29,7 +29,7 @@ def get_logs_by_sketch( raise HTTPException(status_code=404, detail=f"Sketch with id {sketch_id} not found") print(f"[EventEmitter] Fetching logs for sketch {sketch_id} (limit: {limit}, since: {since})") - query = db.query(Log).filter(Log.sketch_id == sketch_id).order_by(Log.created_at.asc()) + query = db.query(Log).filter(Log.sketch_id == sketch_id).order_by(Log.created_at.desc()) if since: query = query.filter(Log.created_at > since) @@ -39,6 +39,9 @@ def get_logs_by_sketch( logs = query.limit(limit).all() + # Reverse to show chronologically (oldest to newest) + logs = list(reversed(logs)) + results = [] for log in logs: # Ensure payload is always a dictionary diff --git a/flowsint-api/app/api/routes/transforms.py b/flowsint-api/app/api/routes/transforms.py index 564d52d..0376f1b 100644 --- a/flowsint-api/app/api/routes/transforms.py +++ b/flowsint-api/app/api/routes/transforms.py @@ -69,17 +69,18 @@ async def get_material_list(): scanner_categories = { category: [ { - "class_name": scanner["class_name"], - "category": scanner["category"], - "name": scanner["name"], - "module": scanner["module"], - "doc": scanner["doc"], - "inputs": scanner["inputs"], - "outputs": scanner["outputs"], + "class_name": scanner.get("class_name"), + "category": scanner.get("category"), + "name": scanner.get("name"), + "module": scanner.get("module"), + "doc": scanner.get("doc"), + "inputs": scanner.get("inputs"), + "outputs": scanner.get("outputs"), "type": "scanner", - "params": scanner["params"], - "params_schema": scanner["params_schema"], - "requires_key": scanner["requires_key"] + "params": scanner.get("params"), + "params_schema": scanner.get("params_schema"), + "required_params": scanner.get("required_params"), + "icon": scanner.get("icon") } for scanner in scanner_list ] diff --git a/flowsint-api/app/scanners/base.py b/flowsint-api/app/scanners/base.py index 5921c41..16b475a 100644 --- a/flowsint-api/app/scanners/base.py +++ b/flowsint-api/app/scanners/base.py @@ -1,17 +1,15 @@ from abc import ABC, abstractmethod -from typing import List, Dict, Any, Optional -from pydantic import ValidationError, BaseModel, Field, create_model +from typing import List, Dict, Any, Optional, get_origin, get_args +from pydantic import ValidationError, BaseModel, Field, create_model, TypeAdapter from pydantic.config import ConfigDict - from app.core.graph_db import Neo4jConnection from app.core.logger import Logger from app.core.vault import VaultProtocol - +from app.utils import resolve_type class InvalidScannerParams(Exception): pass - def build_params_model(params_schema: list) -> BaseModel: """ Build a strict Pydantic model from a params_schema. @@ -21,10 +19,10 @@ def build_params_model(params_schema: list) -> BaseModel: for param in params_schema: name = param["name"] - typ = str # You can later enhance this to support int, bool, etc. + type = str # You can later enhance this to support int, bool, etc. required = param.get("required", False) default = ... if required else param.get("default") - fields[name] = (Optional[typ], Field(default=default, description=param.get("description", ""))) + fields[name] = (Optional[type], Field(default=default, description=param.get("description", ""))) model = create_model( "ParamsModel", @@ -35,6 +33,64 @@ def build_params_model(params_schema: list) -> BaseModel: return model class Scanner(ABC): + """ + Abstract base class for all scanners. + + ## InputType and OutputType Pattern + + Scanners only need to define InputType and OutputType as class attributes. + The base class automatically handles schema generation: + + ```python + from typing import List + from app.types.domain import Domain + from app.types.ip import Ip + + class MyScanner(Scanner): + # Define types as class attributes + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls): + return "my_scanner" + + @classmethod + def category(cls): + return "Domain" + + @classmethod + def key(cls): + return "domain" + + def preprocess(self, data: InputType) -> InputType: + cleaned: InputType = [] + # ... implementation + return cleaned + + async def scan(self, data: InputType) -> OutputType: + results: OutputType = [] + # ... implementation + return results + + # Make types available at module level for easy access + InputType = MyScanner.InputType + OutputType = MyScanner.OutputType + ``` + + The base class automatically provides: + - input_schema() method using InputType + - output_schema() method using OutputType + - Error handling for missing type definitions + - Consistent schema generation across all scanners + + Subclasses can override input_schema() or output_schema() if needed for special cases. + """ + + # Abstract type aliases that must be defined in subclasses for runtime use + InputType = NotImplemented + OutputType = NotImplemented + def __init__( self, sketch_id: Optional[str] = None, @@ -106,13 +162,17 @@ class Scanner(ABC): @classmethod - def requires_key(self) -> bool: + def required_params(self) -> bool: return False @classmethod @abstractmethod def name(cls) -> str: pass + + @classmethod + def icon(cls) -> str | None: + return None @classmethod @abstractmethod @@ -126,9 +186,12 @@ class Scanner(ABC): pass @classmethod - @abstractmethod def input_schema(cls) -> Dict[str, Any]: - pass + """ + Generate input schema from InputType class attribute. + Subclasses don't need to override this unless they have special requirements. + """ + return cls.generate_input_schema() @classmethod def get_params_schema(cls) -> List[Dict[str, Any]]: @@ -136,9 +199,92 @@ class Scanner(ABC): return [] @classmethod - @abstractmethod def output_schema(cls) -> Dict[str, Any]: - pass + """ + Generate output schema from OutputType class attribute. + Subclasses don't need to override this unless they have special requirements. + """ + return cls.generate_output_schema() + + @classmethod + def generate_input_schema(cls) -> Dict[str, Any]: + """ + Helper method to generate input schema from InputType class attribute. + + Raises: + NotImplementedError: If InputType is not defined in the subclass + """ + if cls.InputType is NotImplemented: + raise NotImplementedError(f"InputType must be defined in {cls.__name__}") + + adapter = TypeAdapter(cls.InputType) + schema = adapter.json_schema() + + # Handle different schema structures + if "$defs" in schema and schema["$defs"]: + # Follow the $ref in items to get the correct type (not just the first one) + items_ref = schema.get("items", {}).get("$ref") + if items_ref: + # Extract type name from $ref like "#/$defs/Website" -> "Website" + type_name = items_ref.split("/")[-1] + details = schema["$defs"][type_name] + else: + # Fallback: get the first type definition (for backward compatibility) + type_name, details = list(schema["$defs"].items())[0] + + return { + "type": type_name, + "properties": [ + {"name": prop, "type": resolve_type(info, schema)} + for prop, info in details["properties"].items() + ] + } + else: + # Handle simpler schemas + return { + "type": schema.get("title", "Any"), + "properties": [{"name": "value", "type": "object"}] + } + + @classmethod + def generate_output_schema(cls) -> Dict[str, Any]: + """ + Helper method to generate output schema from OutputType class attribute. + + Raises: + NotImplementedError: If OutputType is not defined in the subclass + """ + if cls.OutputType is NotImplemented: + raise NotImplementedError(f"OutputType must be defined in {cls.__name__}") + + adapter = TypeAdapter(cls.OutputType) + schema = adapter.json_schema() + + # Handle different schema structures + if "$defs" in schema and schema["$defs"]: + # Follow the $ref in items to get the correct type (not just the first one) + items_ref = schema.get("items", {}).get("$ref") + if items_ref: + # Extract type name from $ref like "#/$defs/Website" -> "Website" + type_name = items_ref.split("/")[-1] + details = schema["$defs"][type_name] + else: + # Fallback: get the first type definition (for backward compatibility) + type_name, details = list(schema["$defs"].items())[0] + + return { + "type": type_name, + "properties": [ + {"name": prop, "type": resolve_type(info, schema)} + for prop, info in details["properties"].items() + ] + } + else: + # Handle simpler schemas + return { + "type": schema.get("title", "Any"), + "properties": [{"name": "value", "type": "object"}] + } @abstractmethod async def scan(self, values: List[str]) -> List[Dict[str, Any]]: @@ -159,7 +305,6 @@ class Scanner(ABC): async def execute(self, values: List[str]) -> List[Dict[str, Any]]: if self.name() != "transform_orchestrator": Logger.info(self.sketch_id, {"message": f"Scanner {self.name()} started."}) - try: await self.async_init() preprocessed = self.preprocess(values) diff --git a/flowsint-api/app/scanners/crypto/wallet_to_nfts.py b/flowsint-api/app/scanners/crypto/wallet_to_nfts.py index b87d699..325318c 100644 --- a/flowsint-api/app/scanners/crypto/wallet_to_nfts.py +++ b/flowsint-api/app/scanners/crypto/wallet_to_nfts.py @@ -1,20 +1,21 @@ import os import socket -from typing import List, Dict, Any, Optional, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Optional, Union import requests from app.scanners.base import Scanner from app.types.wallet import CryptoWallet, CryptoNFT -from app.utils import resolve_type from app.core.logger import Logger from app.core.graph_db import Neo4jConnection -InputType: TypeAlias = List[CryptoWallet] -OutputType: TypeAlias = List[CryptoNFT] ETHERSCAN_API_URL = os.getenv("ETHERSCAN_API_URL") class CryptoWalletAddressToNFTs(Scanner): """Resolve NFTs for a wallet address (ETH).""" + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[CryptoWallet] + OutputType = List[CryptoNFT] + def __init__( self, sketch_id: Optional[str] = None, @@ -33,7 +34,7 @@ class CryptoWalletAddressToNFTs(Scanner): ) @classmethod - def requires_key(cls) -> bool: + def required_params(cls) -> bool: return True @classmethod @@ -67,32 +68,6 @@ class CryptoWalletAddressToNFTs(Scanner): def key(cls) -> str: return "address" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -108,12 +83,11 @@ class CryptoWalletAddressToNFTs(Scanner): return cleaned async def scan(self, data: InputType) -> OutputType: - results: OutputType = [] results: OutputType = [] params = self.get_params() - Logger.warn(self.sketch_id, {"message": f"{str(params)}"}) - api_key = params["ETHERSCAN_API_KEY"] - api_url = params["ETHERSCAN_API_URL"] + Logger.debug(self.sketch_id, {"message": f"{str(params)}"}) + api_key = params.get("ETHERSCAN_API_KEY", None) + api_url = params.get("ETHERSCAN_API_URL", None) if not api_key: Logger.error(self.sketch_id, {"message": "ETHERSCAN_API_KEY is required"}) raise ValueError("ETHERSCAN_API_KEY is required") @@ -212,4 +186,8 @@ class CryptoWalletAddressToNFTs(Scanner): Logger.graph_append(self.sketch_id, {"message": f"Found NFT for {nft.wallet.address}: {nft.contract_address} - {nft.token_id}"}) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = CryptoWalletAddressToNFTs.InputType +OutputType = CryptoWalletAddressToNFTs.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/crypto/wallet_to_transactions.py b/flowsint-api/app/scanners/crypto/wallet_to_transactions.py index a6ae78e..6beb1eb 100644 --- a/flowsint-api/app/scanners/crypto/wallet_to_transactions.py +++ b/flowsint-api/app/scanners/crypto/wallet_to_transactions.py @@ -1,24 +1,24 @@ import os -from typing import List, Dict, Any, Optional, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Optional, Union import requests import requests.exceptions from datetime import datetime from app.scanners.base import Scanner from app.types.wallet import CryptoWallet, CryptoWalletTransaction -from app.utils import resolve_type from app.core.logger import Logger from app.core.graph_db import Neo4jConnection -InputType: TypeAlias = List[CryptoWallet] -OutputType: TypeAlias = List[CryptoWalletTransaction] - ETHERSCAN_API_URL = os.getenv("ETHERSCAN_API_URL") def wei_to_eth(wei_str): return int(wei_str) / 10**18 class CryptoWalletAddressToTransactions(Scanner): + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[CryptoWallet] + OutputType = List[CryptoWalletTransaction] + def __init__( self, sketch_id: Optional[str] = None, @@ -37,8 +37,12 @@ class CryptoWalletAddressToTransactions(Scanner): ) @classmethod - def requires_key(cls) -> bool: + def required_params(cls) -> bool: return True + + @classmethod + def icon(cls) -> str | None: + return "cryptowallet" @classmethod def get_params_schema(cls) -> List[Dict[str, Any]]: @@ -71,32 +75,6 @@ class CryptoWalletAddressToTransactions(Scanner): def key(cls) -> str: return "address" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -262,4 +240,8 @@ class CryptoWalletAddressToTransactions(Scanner): }) Logger.graph_append(self.sketch_id, {"message": f"Transaction on {datetime.fromtimestamp(int(tx.timestamp)).strftime('%Y-%m-%d %H:%M:%S') if tx.timestamp else 'Unknown time'}: {tx.source.address} -> {tx.target.address}"}) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = CryptoWalletAddressToTransactions.InputType +OutputType = CryptoWalletAddressToTransactions.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/domains/domain_to_asn.py b/flowsint-api/app/scanners/domains/domain_to_asn.py index 624d44a..0e375db 100644 --- a/flowsint-api/app/scanners/domains/domain_to_asn.py +++ b/flowsint-api/app/scanners/domains/domain_to_asn.py @@ -1,21 +1,20 @@ import json import socket import subprocess -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Union from app.scanners.base import Scanner -from app.types.cidr import CIDR from app.types.domain import Domain from app.types.asn import ASN -from app.utils import is_valid_domain, resolve_type +from app.utils import is_valid_domain from app.core.logger import Logger -InputType: TypeAlias = List[Domain] -OutputType: TypeAlias = List[ASN] - class DomainToAsnScanner(Scanner): """Takes a domain and returns its corresponding ASN.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Domain] + OutputType = List[ASN] + @classmethod def name(cls) -> str: return "domain_to_asn_scanner" @@ -28,111 +27,59 @@ class DomainToAsnScanner(Scanner): def key(cls) -> str: return "Domain" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: domain_obj = None if isinstance(item, str): - domain_obj = Domain(domain=item) + if is_valid_domain(item): + domain_obj = Domain(domain=item) elif isinstance(item, dict) and "domain" in item: - domain_obj = Domain(domain=item["domain"]) + if is_valid_domain(item["domain"]): + domain_obj = Domain(domain=item["domain"]) elif isinstance(item, Domain): domain_obj = item - if domain_obj and is_valid_domain(domain_obj.domain): + if domain_obj: cleaned.append(domain_obj) return cleaned async def scan(self, data: InputType) -> OutputType: - """Find ASN information for domain name using asnmap.""" - asns: OutputType = [] + results: OutputType = [] for domain in data: - asn_data = self.__get_asn_from_asnmap(domain.domain) - if asn_data: - Logger.info(self.sketch_id, {"message": f"Domain {domain.domain} has ASN {asn_data['as_number']}."}) - asns.append(ASN( - number=int(asn_data["as_number"].lstrip("AS")), - name=asn_data["as_name"], - country=asn_data["as_country"], - cidrs=[CIDR(network=cidr) for cidr in asn_data["as_range"]] - )) - else: - Logger.info(self.sketch_id, {"message": f"No ASN found for domain {domain.domain}"}) - return asns - - def __get_asn_from_asnmap(self, domain: str) -> Dict[str, Any]: - try: - command = f"echo {domain} | asnmap -silent -json | jq" - result = subprocess.run( - command, - shell=True, - capture_output=True, text=True, timeout=60 - ) - if not result.stdout.strip(): - return None - return json.loads(result.stdout) - except Exception as e: - Logger.error(self.sketch_id, {"message": f"asnmap exception for {domain}: {str(e)}"}) - return None - - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - # Create Neo4j relationships between domain and their corresponding ASNs - for input_ip, result_asn in zip(original_input, results): - # Skip if no valid ASN was found - if result_asn.number == 0: - continue - Logger.graph_append(self.sketch_id, {"message": f"Domain {input_ip.domain} -> ASN {result_asn.number}"}) + try: + # First resolve domain to IP + ip = socket.gethostbyname(domain.domain) - query = """ - MERGE (domain:domain {domain: $domain}) - SET domain.sketch_id = $sketch_id, - domain.label = $domain, - domain.type = "domain" - - MERGE (asn:asn {number: $asn_number}) - SET asn.sketch_id = $sketch_id, - asn.name = $asn_name, - asn.country = $asn_country, - asn.label = $asn_label, - asn.type = "asn" - - MERGE (domain)-[:BELONGS_TO {sketch_id: $sketch_id}]->(asn) - """ - - if self.neo4j_conn: - self.neo4j_conn.query(query, { - "domain": input_ip.domain, - "asn_number": result_asn.number, - "asn_name": result_asn.name, - "asn_country": result_asn.country, - "asn_label": f"AS{result_asn.number}", - "asn_caption": f"AS{result_asn.number} - {result_asn.name}", - "sketch_id": self.sketch_id, - }) + # Use asnmap to get ASN info + result = subprocess.run( + ["asnmap", "-a", ip, "-json"], + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode == 0: + output = result.stdout.strip() + if output: + asn_data = json.loads(output) + if asn_data and 'as_number' in asn_data: + asn = ASN( + asn=str(asn_data['as_number']), + name=asn_data.get('as_name', ''), + org=asn_data.get('as_org', ''), + country=asn_data.get('as_country', '') + ) + results.append(asn) + + except Exception as e: + Logger.error(self.sketch_id, {"message": f"Error getting ASN for domain {domain.domain}: {e}"}) + continue + + return results - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = DomainToAsnScanner.InputType +OutputType = DomainToAsnScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/domains/resolve.py b/flowsint-api/app/scanners/domains/resolve.py index 6d05a8a..a5e4ec1 100644 --- a/flowsint-api/app/scanners/domains/resolve.py +++ b/flowsint-api/app/scanners/domains/resolve.py @@ -1,5 +1,5 @@ import socket -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union from pydantic import TypeAdapter from app.scanners.base import Scanner from app.types.domain import Domain @@ -9,11 +9,12 @@ import uuid from app.types.transform import Node, Edge from app.core.logger import Logger -InputType: TypeAlias = List[Domain] -OutputType: TypeAlias = List[Ip] - class ResolveScanner(Scanner): """Resolve domain names to IP addresses.""" + + # Define the input and output types as class attributes + InputType = List[Domain] + OutputType = List[Ip] @classmethod def name(cls) -> str: @@ -27,32 +28,6 @@ class ResolveScanner(Scanner): def key(cls) -> str: return "domain" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -129,4 +104,8 @@ class ResolveScanner(Scanner): Logger.graph_append(self.sketch_id, payload) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = ResolveScanner.InputType +OutputType = ResolveScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/domains/subdomains.py b/flowsint-api/app/scanners/domains/subdomains.py index dff556c..a64ad77 100644 --- a/flowsint-api/app/scanners/domains/subdomains.py +++ b/flowsint-api/app/scanners/domains/subdomains.py @@ -1,18 +1,18 @@ import shutil import requests -import subprocess -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Union from app.scanners.base import Scanner -from app.types.domain import Domain, Domain -from app.utils import is_valid_domain, resolve_type -from pydantic import TypeAdapter +from app.types.domain import Domain +from app.utils import is_valid_domain from app.core.logger import Logger from app.tools.network.subfinder import SubfinderTool -InputType: TypeAlias = List[Domain] -OutputType: TypeAlias = List[Domain] class SubdomainScanner(Scanner): """Scanner to find subdomains associated with a domain.""" + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Domain | str] + OutputType = List[Domain] @classmethod def name(cls) -> str: @@ -26,32 +26,6 @@ class SubdomainScanner(Scanner): def key(cls) -> str: return "domain" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] @@ -146,3 +120,7 @@ class SubdomainScanner(Scanner): Logger.graph_append(self.sketch_id, {"message":f"{domain_obj['domain']} -> {len(domain_obj['subdomains'])} subdomain(s) found."}) return output + + +InputType = SubdomainScanner.InputType +OutputType = SubdomainScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/domains/to_website.py b/flowsint-api/app/scanners/domains/to_website.py index 1347815..a23acea 100644 --- a/flowsint-api/app/scanners/domains/to_website.py +++ b/flowsint-api/app/scanners/domains/to_website.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union import requests from app.utils import is_valid_domain, resolve_type from app.scanners.base import Scanner @@ -7,13 +7,13 @@ from app.types.website import Website from pydantic import TypeAdapter from app.core.logger import Logger -InputType: TypeAlias = List[Domain] -OutputType: TypeAlias = List[Website] - - class DomainToWebsiteScanner(Scanner): """From domain to website.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Domain] + OutputType = List[Website] + @classmethod def name(cls) -> str: return "to_website" @@ -26,95 +26,54 @@ class DomainToWebsiteScanner(Scanner): def key(cls) -> str: return "domain" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the Website type in $defs - website_def = schema["$defs"].get("Website") - if not website_def: - raise ValueError("Website type not found in schema") - return { - "type": "Website", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: domain_obj = None if isinstance(item, str): - domain_obj = Domain(domain=item) + if is_valid_domain(item): + domain_obj = Domain(domain=item) elif isinstance(item, dict) and "domain" in item: - domain_obj = Domain(domain=item["domain"]) + if is_valid_domain(item["domain"]): + domain_obj = Domain(domain=item["domain"]) elif isinstance(item, Domain): domain_obj = item - if domain_obj and is_valid_domain(domain_obj.domain): + if domain_obj: cleaned.append(domain_obj) return cleaned - - def __is_site_active(self, url, timeout=5): - try: - session = requests.Session() - response = session.get(url, timeout=timeout, allow_redirects=False) - - redirects = [] - current_url = url - - # Follow redirects manually to capture the chain - while response.status_code in [301, 302, 303, 307, 308]: - redirects.append(current_url) - if 'Location' in response.headers: - next_url = response.headers['Location'] - # Handle relative URLs - if not next_url.startswith(('http://', 'https://')): - from urllib.parse import urljoin - next_url = urljoin(current_url, response.headers['Location']) - - current_url = next_url - response = session.get(current_url, timeout=timeout, allow_redirects=False) - else: - break - - # Get the final response with redirects allowed - final_response = requests.get(url, timeout=timeout, allow_redirects=True) - return final_response.status_code == 200, final_response.url, redirects - except requests.RequestException: - return False, url, [] async def scan(self, data: InputType) -> OutputType: - """To website""" results: OutputType = [] - for d in data: + for domain in data: try: - initial_url = f"https://{d.domain}" - is_active, final_url, redirects = self.__is_site_active(initial_url) + # Try HTTPS first + try: + https_url = f"https://{domain.domain}" + response = requests.head(https_url, timeout=10, allow_redirects=True) + if response.status_code < 400: + results.append(Website(url=https_url)) + continue + except requests.RequestException: + pass - # Use the last redirect URL as the main URL, or the final URL if no redirects - main_url = redirects[-1] if redirects else final_url - - website = Website(url=main_url, redirects=redirects, domain=d, active=is_active) - results.append(website) + # Try HTTP if HTTPS fails + try: + http_url = f"http://{domain.domain}" + response = requests.head(http_url, timeout=10, allow_redirects=True) + if response.status_code < 400: + results.append(Website(url=http_url)) + continue + except requests.RequestException: + pass + + # If both fail, still add HTTPS URL as default + results.append(Website(url=f"https://{domain.domain}")) + except Exception as e: - print(e) - continue - + Logger.error(self.sketch_id, {"message": f"Error converting domain {domain.domain} to website: {e}"}) + # Add HTTPS URL as fallback + results.append(Website(url=f"https://{domain.domain}")) + return results def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: @@ -158,4 +117,8 @@ class DomainToWebsiteScanner(Scanner): } Logger.graph_append(self.sketch_id, payload) - return results \ No newline at end of file + + +# Make types available at module level for easy access +InputType = DomainToWebsiteScanner.InputType +OutputType = DomainToWebsiteScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/domains/whois.py b/flowsint-api/app/scanners/domains/whois.py index 9d7071c..007fc3c 100644 --- a/flowsint-api/app/scanners/domains/whois.py +++ b/flowsint-api/app/scanners/domains/whois.py @@ -1,5 +1,5 @@ import json -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union import whois from app.utils import is_valid_domain, resolve_type from app.scanners.base import Scanner @@ -9,13 +9,13 @@ from app.types.email import Email from pydantic import TypeAdapter from app.core.logger import Logger -InputType: TypeAlias = List[Domain] -OutputType: TypeAlias = List[Whois] - - class WhoisScanner(Scanner): """Scan for WHOIS information of a domain.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Domain] + OutputType = List[Whois] + @classmethod def name(cls) -> str: return "to_whois" @@ -28,70 +28,51 @@ class WhoisScanner(Scanner): def key(cls) -> str: return "domain" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the Website type in $defs - whois_def = schema["$defs"].get("Whois") - if not whois_def: - raise ValueError("Whois type not found in schema") - return { - "type": "Whois", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in whois_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: domain_obj = None if isinstance(item, str): - domain_obj = Domain(domain=item) + if is_valid_domain(item): + domain_obj = Domain(domain=item) elif isinstance(item, dict) and "domain" in item: - domain_obj = Domain(domain=item["domain"]) + if is_valid_domain(item["domain"]): + domain_obj = Domain(domain=item["domain"]) elif isinstance(item, Domain): domain_obj = item - if domain_obj and is_valid_domain(domain_obj.domain): + if domain_obj: cleaned.append(domain_obj) return cleaned async def scan(self, data: InputType) -> OutputType: - """Extract WHOIS data for each domain.""" results: OutputType = [] - for d in data: + for domain in data: try: - w = whois.whois(d.domain) - w_data = json.loads(json.dumps(w, default=str)) - whois_obj = Whois( - registrar=w_data.get("registrar"), - org=w_data.get("org"), - city=w_data.get("city"), - country=w_data.get("country"), - email=Email(email=w_data["emails"][0]) if isinstance(w_data.get("emails"), list) else None, - creation_date=str(w_data.get("creation_date")) if w_data.get("creation_date") else None, - expiration_date=str(w_data.get("expiration_date")) if w_data.get("expiration_date") else None, - ) - results.append({"whois": whois_obj, "domain": d.domain}) - + whois_info = whois.whois(domain.domain) + if whois_info: + # Extract emails from whois data + emails = [] + if whois_info.emails: + if isinstance(whois_info.emails, list): + emails = [Email(email=email) for email in whois_info.emails if email] + else: + emails = [Email(email=whois_info.emails)] + + whois_obj = Whois( + domain=domain.domain, + registrar=str(whois_info.registrar) if whois_info.registrar else None, + creation_date=whois_info.creation_date, + expiration_date=whois_info.expiration_date, + name_servers=whois_info.name_servers if whois_info.name_servers else [], + emails=emails, + raw_text=str(whois_info) + ) + results.append(whois_obj) + except Exception as e: - print(e) + Logger.error(self.sketch_id, {"message": f"Error getting WHOIS for domain {domain.domain}: {e}"}) continue - + return results def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: @@ -100,7 +81,6 @@ class WhoisScanner(Scanner): continue whois_obj = domain["whois"] Logger.graph_append(self.sketch_id, {"message": f"WHOIS for {domain['domain']} -> registrar: {whois_obj.registrar} org: {whois_obj.org} city: {whois_obj.city} country: {whois_obj.country} creation_date: {whois_obj.creation_date} expiration_date: {whois_obj.expiration_date}"}) - props = { "domain": domain["domain"], "registrar": whois_obj.registrar, @@ -184,3 +164,10 @@ class WhoisScanner(Scanner): }) return results + + + + +# Make types available at module level for easy access +InputType = WhoisScanner.InputType +OutputType = WhoisScanner.OutputType diff --git a/flowsint-api/app/scanners/emails/holehe.py b/flowsint-api/app/scanners/emails/holehe.py deleted file mode 100644 index 10c2c3e..0000000 --- a/flowsint-api/app/scanners/emails/holehe.py +++ /dev/null @@ -1,163 +0,0 @@ -from typing import Dict, Any, List, TypeAlias, Union -from app.scanners.base import Scanner -from app.types.email import Email -from app.types.social import SocialProfile -from pydantic import TypeAdapter -from app.utils import is_valid_email, resolve_type -import asyncio -from app.core.logger import Logger -InputType: TypeAlias = List[Email] -OutputType: TypeAlias = List[SocialProfile] - -class HoleheScanner(Scanner): - @classmethod - def name(self) -> str: - return "holehe_scanner" - - @classmethod - def category(self) -> str: - return "Email" - - @classmethod - def key(self) -> str: - return "email" - - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info)} - for prop, info in details["properties"].items() - ] - } - - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: - cleaned: InputType = [] - for item in data: - obj = None - if isinstance(item, str): - obj = Email(email=item) - elif isinstance(item, dict) and "email" in item: - obj = Email(email=item["email"]) - elif isinstance(item, Email): - obj = item - - if obj and obj.email and is_valid_email(obj.email): - cleaned.append(obj) - return cleaned - - async def _perform_holehe_research(self, email: str) -> Dict[str, Any]: - """ - Recherche asynchrone sur le réseau social et autres plateformes. - """ - from holehe.modules.social_media import instagram, twitter, snapchat, bitmoji, crevado, discord, strava, imgur, myspace, fanpop, taringa, tellonym, tumblr, odnoklassniki, wattpad, xing, vsco - from holehe.modules.shopping import amazon, ebay, deliveroo, garmin, vivino - from holehe.modules.mails import google, yahoo, protonmail, mail_ru - from holehe.modules.osint import rocketreach - import httpx - - # Initialise le client httpx pour les requêtes HTTP asynchrones - async with httpx.AsyncClient() as client: - results = [] - - modules = [ - amazon.amazon, google.google, yahoo.yahoo, protonmail.protonmail, - instagram.instagram, twitter.twitter, snapchat.snapchat, - rocketreach.rocketreach - ] - - for module in modules: - module_result = [] - try: - await module(email, client, module_result) - if module_result and module_result[0].get("exists") is not None: - results.append(module_result[0]) - except Exception as e: - results.append({"error": f"Error in {module.__name__}: {str(e)}"}) - - return {"email": email, "results": results} - - async def scan(self, emails: List[str]) -> List[Dict[str, Any]]: - """ - Effectue la recherche Holehe pour chaque email de la liste. - """ - results = [] - for email in emails: - found = [] - try: - result = await self._perform_holehe_research(email) - for result in result["results"]: - if("error" not in result and "exists" in result): - found.append( - SocialProfile( - username=email.email, - profile_url=f"https://{result['domain']}", - platform=result["name"])) - except Exception as e: - print(e) - continue - results.extend(found) - - - return results - - def execute(self, values: List[str]) -> List[Dict[str, Any]]: - preprocessed = self.preprocess(values) - results = asyncio.run(self.scan(preprocessed)) - try: - return self.postprocess(results, preprocessed) - except TypeError as e: - if "positional argument" in str(e) or "unexpected" in str(e): - return self.postprocess(results) - raise - - - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - if not self.neo4j_conn: - return results - - for profile in results: - Logger.graph_append(self.sketch_id, {"message":f"{profile.username} -> account found on {profile.platform}"}) - self.neo4j_conn.query(""" - MERGE (p:social_profile {profile_url: $profile_url}) - SET p.platform = $platform, - p.username = $username, - p.label = $label, - p.caption = $caption, - p.type = $type, - p.sketch_id = $sketch_id - - MERGE (i:email {email: $email}) - SET i.sketch_id = $sketch_id - MERGE (i)-[:HAS_SOCIAL_ACCOUNT {sketch_id: $sketch_id}]->(p) - """, { - "profile_url": profile.profile_url, - "username": profile.username, - "platform": profile.platform, - "label": f"{profile.platform}:{profile.username}", - "caption": f"{profile.platform}:{profile.username}", - "color": "#1DA1F2", - "email": profile.username, - "type": "social_profile", - "sketch_id": self.sketch_id - }) - - - return results diff --git a/flowsint-api/app/scanners/emails/to_gravatar.py b/flowsint-api/app/scanners/emails/to_gravatar.py index d3cb63b..e043a71 100644 --- a/flowsint-api/app/scanners/emails/to_gravatar.py +++ b/flowsint-api/app/scanners/emails/to_gravatar.py @@ -1,23 +1,16 @@ import hashlib -from typing import List, Dict, Any, TypeAlias, Union -from urllib.parse import urlparse - +from typing import List, Dict, Any, Union import requests -from app.utils import resolve_type from app.scanners.base import Scanner -from app.types.website import Website -from app.types.domain import Domain -from pydantic import TypeAdapter from app.core.logger import Logger from app.types.email import Email from app.types.gravatar import Gravatar -InputType: TypeAlias = List[Email] -OutputType: TypeAlias = List[Gravatar] - - class EmailToGravatarScanner(Scanner): - """From email to gravatar.""" + """From md5 hash of email to gravatar.""" + + InputType = List[Email] + OutputType = List[Gravatar] @classmethod def name(cls) -> str: @@ -31,48 +24,14 @@ class EmailToGravatarScanner(Scanner): def key(cls) -> str: return "email" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Email type in $defs - website_def = schema["$defs"].get("Email") - if not website_def: - raise ValueError("Email type not found in schema") - return { - "type": "Email", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the Gravatar type in $defs - domain_def = schema["$defs"].get("Gravatar") - if not domain_def: - raise ValueError("Gravatar type not found in schema") - return { - "type": "Gravatar", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in domain_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: email_obj = None if isinstance(item, str): - # If it's a string, treat it as a str email_obj = Email(email=item) elif isinstance(item, dict) and "email" in item: - email_obj = Email(**item) + email_obj = Email(email=item["email"]) elif isinstance(item, Email): email_obj = item if email_obj: @@ -80,60 +39,51 @@ class EmailToGravatarScanner(Scanner): return cleaned async def scan(self, data: InputType) -> OutputType: - """Fetch gravatar from email.""" results: OutputType = [] + for email in data: try: - # MD5 hash of the email - hash = hashlib.md5(email.email.encode()).hexdigest() - url = f"https://www.gravatar.com/avatar/{hash}" - response = requests.get(url) + # Generate MD5 hash of email + email_hash = hashlib.md5(email.email.lower().encode()).hexdigest() + + # Query Gravatar API + gravatar_url = f"https://www.gravatar.com/avatar/{email_hash}?d=404" + response = requests.head(gravatar_url, timeout=10) + if response.status_code == 200: - results.append(Gravatar(src=url, hash=hash)) - else: - continue + # Gravatar found, get profile info + profile_url = f"https://www.gravatar.com/{email_hash}.json" + profile_response = requests.get(profile_url, timeout=10) + + gravatar_data = { + "email": email.email, + "hash": email_hash, + "avatar_url": gravatar_url, + "profile_url": profile_url + } + + if profile_response.status_code == 200: + profile_data = profile_response.json() + if "entry" in profile_data and profile_data["entry"]: + entry = profile_data["entry"][0] + gravatar_data.update({ + "display_name": entry.get("displayName"), + "about_me": entry.get("aboutMe"), + "current_location": entry.get("currentLocation") + }) + + gravatar = Gravatar(**gravatar_data) + results.append(gravatar) + except Exception as e: - Logger.info(self.sketch_id, {"message": f"No gravatar found for email {email.email}: {e}"}) + Logger.error(self.sketch_id, {"message": f"Error checking Gravatar for email {email.email}: {e}"}) continue - + return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - for i, gravatar in enumerate(results): - email = original_input[i] if i < len(original_input) else None - - query = """ - MERGE (g:gravatar {hash: $hash}) - SET g.sketch_id = $sketch_id, - g.label = $src, - g.type = "gravatar", - g.src = $src - """ - if email: - query += """ - MERGE (e:email {email: $email}) - SET e.sketch_id = $sketch_id, - e.label = $email, - e.type = "email" - MERGE (e)-[:HAS_GRAVATAR {sketch_id: $sketch_id}]->(g) - """ - - if self.neo4j_conn: - params = { - "hash": gravatar.hash, - "src": str(gravatar.src), - "sketch_id": self.sketch_id, - } - if email: - params.update({ - "email": email.email, - }) - self.neo4j_conn.query(query, params) - - email_address = email.email if email else "unknown" - payload: Dict = { - "message": f"{email_address} -> {gravatar.hash}" - } - Logger.graph_append(self.sketch_id, payload) - - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = EmailToGravatarScanner.InputType +OutputType = EmailToGravatarScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/emails/to_leaks.py b/flowsint-api/app/scanners/emails/to_leaks.py index c959bf3..24ff06a 100644 --- a/flowsint-api/app/scanners/emails/to_leaks.py +++ b/flowsint-api/app/scanners/emails/to_leaks.py @@ -1,14 +1,8 @@ -import hashlib import os -from typing import List, Dict, Any, TypeAlias, Union -from urllib.parse import urlparse - +from typing import Any, Dict, List, Union import requests -from app.utils import resolve_type +from urllib.parse import urljoin from app.scanners.base import Scanner -from app.types.website import Website -from app.types.domain import Domain -from pydantic import TypeAdapter from app.core.logger import Logger from app.types.email import Email from app.types.breach import Breach @@ -19,12 +13,11 @@ load_dotenv() HIBP_API_KEY = os.getenv("HIBP_API_KEY") -InputType: TypeAlias = List[Email] -OutputType: TypeAlias = List[Breach] - - class EmailToBreachesScanner(Scanner): - """From email to breaches.""" + """From email to breaches using Have I Been Pwned API.""" + + InputType = List[Email] + OutputType = List[Breach] @classmethod def name(cls) -> str: @@ -37,49 +30,38 @@ class EmailToBreachesScanner(Scanner): @classmethod def key(cls) -> str: return "email" - + @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Email type in $defs - website_def = schema["$defs"].get("Email") - if not website_def: - raise ValueError("Email type not found in schema") - return { - "type": "Email", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - - + def required_params(cls) -> bool: + return True + @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the Breach type in $defs - breach_def = schema["$defs"].get("Breach") - if not breach_def: - raise ValueError("Breach type not found in schema") - return { - "type": "Breach", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in breach_def["properties"].items() - ] - } + def get_params_schema(cls) -> List[Dict[str, Any]]: + """Declare required parameters for this scanner""" + return [ + { + "name": "HIBP_API_KEY", + "type": "vaultSecret", + "description": "The HIBP API key to use for breaches lookup.", + "required": True + }, + { + "name": "HIBP_API_URL", + "type": "url", + "description": "The HIBP API URL to use for breaches lookup.", + "required": False, + "default": "https://haveibeenpwned.com/api/v3/breachedaccount/" + } + ] def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: email_obj = None if isinstance(item, str): - # If it's a string, treat it as a str email_obj = Email(email=item) elif isinstance(item, dict) and "email" in item: - email_obj = Email(**item) + email_obj = Email(email=item["email"]) elif isinstance(item, Email): email_obj = item if email_obj: @@ -87,139 +69,65 @@ class EmailToBreachesScanner(Scanner): return cleaned async def scan(self, data: InputType) -> OutputType: - """Fetch breaches from email using HaveIBeenPwned API.""" results: OutputType = [] - if not HIBP_API_KEY: - raise ValueError("HIBP_API_KEY not set for this account. Usr the Vault to set your haveibeenpwned key. ") + api_key = self.resolve_params()["HIBP_API_KEY"] + api_url = self.resolve_params()["HIBP_API_URL"] + if not api_key: + Logger.error(self.sketch_id, {"message": "A valid HIBP_API_KEY is required to scan for breaches."}) + if not api_url: + Logger.error(self.sketch_id, {"message": "Could not find HIBP_API_URL in params."}) + headers = { + "hibp-api-key": api_key, + "User-Agent": "FlowsInt-Scanner" + } + Logger.info(self.sketch_id, {"message": f"HIBP API key: {api_key}"}) + Logger.info(self.sketch_id, {"message": f"HIBP API URL: {api_url}"}) for email in data: try: - url = f"https://haveibeenpwned.com/api/v3/breachedaccount/{email.email}?truncateResponse=false" - headers = {"hibp-api-key": HIBP_API_KEY} if HIBP_API_KEY else {} - response = requests.get(url, headers=headers) + # Query Have I Been Pwned API + full_url = urljoin(api_url, email.email) + response = requests.get(full_url, headers=headers, timeout=10) + Logger.info(self.sketch_id, {"message": f"HIBP API response: {response.json()}"}) if response.status_code == 200: breaches_data = response.json() - # Create a Breach object for each breach in the response - for breach_item in breaches_data: - # Lowercase all keys for the model - breach_item_lc = {k.lower(): v for k, v in breach_item.items()} - name_value = breach_item.get("Name") - name = name_value.lower() if name_value else "unknown" - # Lowercase the value of the 'name' key in the breach dict as well - if "name" in breach_item_lc and breach_item_lc["name"]: - breach_item_lc["name"] = breach_item_lc["name"].lower() + for breach_data in breaches_data: breach = Breach( - name=name, - **{k: breach_item_lc.get(k) for k in Breach.model_fields.keys() if k not in ("breach", "name")}, - breach=breach_item_lc + name=breach_data.get("Name", ""), + title=breach_data.get("Title", ""), + domain=breach_data.get("Domain", ""), + breach_date=breach_data.get("BreachDate", ""), + added_date=breach_data.get("AddedDate", ""), + modified_date=breach_data.get("ModifiedDate", ""), + pwn_count=breach_data.get("PwnCount", 0), + description=breach_data.get("Description", ""), + data_classes=breach_data.get("DataClasses", []), + is_verified=breach_data.get("IsVerified", False), + is_fabricated=breach_data.get("IsFabricated", False), + is_sensitive=breach_data.get("IsSensitive", False), + is_retired=breach_data.get("IsRetired", False), + is_spam_list=breach_data.get("IsSpamList", False), + logo_path=breach_data.get("LogoPath", "") ) results.append(breach) - else: + + elif response.status_code == 404: + # No breaches found for this email + Logger.info(self.sketch_id, {"message": f"No breaches found for email {email.email}"}) continue + + else: + Logger.error(self.sketch_id, {"message": f"HIBP API error for {email.email}: {response.status_code}"}) + continue + except Exception as e: - Logger.info(self.sketch_id, {"message": f"No breach found for email {email.email}: {e}"}) + Logger.error(self.sketch_id, {"message": f"Error checking breaches for email {email.email}: {e}"}) continue - + return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - # Create a mapping of email to breaches - email_to_breaches = {} - for i, breach in enumerate(results): - # Find the corresponding email (assuming one email can have multiple breaches) - # We need to track which email this breach belongs to - # For now, we'll use the first email if we have multiple breaches - email_index = min(i, len(original_input) - 1) if original_input else None - email = original_input[email_index] if email_index is not None else None - - if email: - if email.email not in email_to_breaches: - email_to_breaches[email.email] = [] - email_to_breaches[email.email].append(breach) - - # Create breach node with all properties - query = """ - MERGE (b:breach {name: $name}) - SET b.sketch_id = $sketch_id, - b.label = $name, - b.type = "breach", - b.title = $title, - b.domain = $domain, - b.breachdate = $breachdate, - b.addeddate = $addeddate, - b.modifieddate = $modifieddate, - b.pwncount = $pwncount, - b.description = $description, - b.src = $logopath, - b.dataclasses = $dataclasses, - b.isverified = $isverified, - b.isfabricated = $isfabricated, - b.issensitive = $issensitive, - b.isretired = $isretired, - b.isspamlist = $isspamlist, - b.ismalware = $ismalware, - b.isstealerlog = $isstealerlog, - b.issubscriptionfree = $issubscriptionfree - """ - - if self.neo4j_conn: - params = { - "name": breach.name, - "sketch_id": self.sketch_id, - "title": breach.title, - "domain": breach.domain, - "breachdate": breach.breachdate, - "addeddate": breach.addeddate, - "modifieddate": breach.modifieddate, - "pwncount": breach.pwncount, - "description": breach.description, - "logopath": breach.logopath, - "dataclasses": breach.dataclasses, - "isverified": breach.isverified, - "isfabricated": breach.isfabricated, - "issensitive": breach.issensitive, - "isretired": breach.isretired, - "isspamlist": breach.isspamlist, - "ismalware": breach.ismalware, - "isstealerlog": breach.isstealerlog, - "issubscriptionfree": breach.issubscriptionfree, - } - self.neo4j_conn.query(query, params) - - # Create email nodes and relationships - for email_email, breaches in email_to_breaches.items(): - email_query = """ - MERGE (e:email {email: $email}) - SET e.sketch_id = $sketch_id, - e.label = $email, - e.type = "email" - """ - - if self.neo4j_conn: - email_params = { - "email": email_email, - "sketch_id": self.sketch_id, - } - self.neo4j_conn.query(email_query, email_params) - - # Create relationships for each breach - for breach in breaches: - rel_query = """ - MATCH (e:email {email: $email}) - MATCH (b:breach {name: $breach_name}) - MERGE (e)-[:HAS_BREACH {sketch_id: $sketch_id}]->(b) - """ - - if self.neo4j_conn: - rel_params = { - "email": email_email, - "breach_name": breach.name, - "sketch_id": self.sketch_id, - } - self.neo4j_conn.query(rel_query, rel_params) - - payload: Dict = { - "message": f"{email_email} -> {breach.name}" - } - Logger.graph_append(self.sketch_id, payload) - - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = EmailToBreachesScanner.InputType +OutputType = EmailToBreachesScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/individuals/to_org.py b/flowsint-api/app/scanners/individuals/to_org.py index a8e8e05..12938af 100644 --- a/flowsint-api/app/scanners/individuals/to_org.py +++ b/flowsint-api/app/scanners/individuals/to_org.py @@ -1,20 +1,18 @@ import requests -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Union from app.scanners.base import Scanner from app.types.organization import Organization from app.types.individual import Individual -from app.utils import resolve_type from app.core.logger import Logger from app.tools.organizations.sirene import SireneTool - -InputType: TypeAlias = List[Individual] -OutputType: TypeAlias = List[Organization] - class IndividualToOrgScanner(Scanner): """Find organization from a person with data from SIRENE (France only).""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Individual] + OutputType = List[Organization] + @classmethod def name(cls) -> str: return "to_org" @@ -27,45 +25,6 @@ class IndividualToOrgScanner(Scanner): def key(cls) -> str: return "fullname" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Organization type in $defs - organization_def = schema["$defs"].get("Individual") - if not organization_def: - raise ValueError("Individual type not found in schema") - return { - "type": "Individual", - "properties": [ - {"name": "fullname", "type": "string"} - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # the items property contains the Organization type reference - items_schema = schema.get("items", {}) - if "$ref" in items_schema: - # Extract the type name from the $ref (e.g., "#/$defs/Organization" -> "Organization") - ref_path = items_schema["$ref"] - type_name = ref_path.split("/")[-1] - organization_def = schema["$defs"].get(type_name) - if not organization_def: - raise ValueError(f"Type {type_name} not found in schema") - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in organization_def["properties"].items() - ] - } - else: - raise ValueError("Expected $ref in items schema for List type") - - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: if not isinstance(data, list): raise ValueError(f"Expected list input, got {type(data).__name__}") @@ -88,17 +47,17 @@ class IndividualToOrgScanner(Scanner): async def scan(self, data: InputType) -> OutputType: results: OutputType = [] - for indiv in data: + for individual in data: try: sirene = SireneTool() - raw_orgs = sirene.launch(f'{indiv.first_name}+{indiv.last_name}', limit=25) + raw_orgs = sirene.launch(individual.full_name, limit=25) if len(raw_orgs)> 0: for org_dict in raw_orgs: enriched_org = self.enrich_org(org_dict) if enriched_org is not None: results.append(enriched_org) except Exception as e: - continue + Logger.error(self.sketch_id, {"message": f"Error finding organization for {individual.full_name}: {e}"}) return results def enrich_org(self, company: Dict) -> Organization: @@ -121,7 +80,6 @@ class IndividualToOrgScanner(Scanner): # Extract dirigeants and convert to Individual objects dirigeants = [] for dirigeant_data in company.get("dirigeants", []): - from app.types.individual import Individual dirigeant = Individual( first_name=dirigeant_data.get("prenoms", ""), last_name=dirigeant_data.get("nom", ""), @@ -272,26 +230,10 @@ class IndividualToOrgScanner(Scanner): self.neo4j_conn.query(""" MERGE (o:Organization {name: $name, country: $country}) SET o.siren = $siren, + o.siege_siret = $siret, o.nom_complet = $nom_complet, o.nom_raison_sociale = $nom_raison_sociale, o.sigle = $sigle, - o.nombre_etablissements = $nombre_etablissements, - o.nombre_etablissements_ouverts = $nombre_etablissements_ouverts, - o.activite_principale = $activite_principale, - o.section_activite_principale = $section_activite_principale, - o.categorie_entreprise = $categorie_entreprise, - o.annee_categorie_entreprise = $annee_categorie_entreprise, - o.caractere_employeur = $caractere_employeur, - o.tranche_effectif_salarie = $tranche_effectif_salarie, - o.annee_tranche_effectif_salarie = $annee_tranche_effectif_salarie, - o.date_creation = $date_creation, - o.date_fermeture = $date_fermeture, - o.date_mise_a_jour = $date_mise_a_jour, - o.date_mise_a_jour_insee = $date_mise_a_jour_insee, - o.date_mise_a_jour_rne = $date_mise_a_jour_rne, - o.nature_juridique = $nature_juridique, - o.etat_administratif = $etat_administratif, - o.statut_diffusion = $statut_diffusion, o.sketch_id = $sketch_id, o.label = $name, o.caption = $name, @@ -300,133 +242,19 @@ class IndividualToOrgScanner(Scanner): "name": org.name, "country": "FR", "siren": org.siren, + "siret": org.siege_siret, "nom_complet": org.nom_complet, "nom_raison_sociale": org.nom_raison_sociale, "sigle": org.sigle, - "nombre_etablissements": org.nombre_etablissements, - "nombre_etablissements_ouverts": org.nombre_etablissements_ouverts, - "activite_principale": org.activite_principale, - "section_activite_principale": org.section_activite_principale, - "categorie_entreprise": org.categorie_entreprise, - "annee_categorie_entreprise": org.annee_categorie_entreprise, - "caractere_employeur": org.caractere_employeur, - "tranche_effectif_salarie": org.tranche_effectif_salarie, - "annee_tranche_effectif_salarie": org.annee_tranche_effectif_salarie, - "date_creation": org.date_creation, - "date_fermeture": org.date_fermeture, - "date_mise_a_jour": org.date_mise_a_jour, - "date_mise_a_jour_insee": org.date_mise_a_jour_insee, - "date_mise_a_jour_rne": org.date_mise_a_jour_rne, - "nature_juridique": org.nature_juridique, - "etat_administratif": org.etat_administratif, - "statut_diffusion": org.statut_diffusion, "sketch_id": self.sketch_id, }) - # Add SIREN as identifier if available - if org.siren: - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: SIREN {org.siren} -> {org.name}"}) - - # Add SIRET as identifier if available - if org.siege_siret: - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: SIRET {org.siege_siret} -> {org.name}"}) - - # Add dirigeants (leaders) as Individual nodes with relationships - if org.dirigeants: - for dirigeant in org.dirigeants: - self.neo4j_conn.query(""" - MERGE (i:Individual {full_name: $full_name}) - SET i.first_name = $first_name, - i.last_name = $last_name, - i.birth_date = $birth_date, - i.gender = $gender, - i.sketch_id = $sketch_id, - i.label = $full_name, - i.caption = $full_name, - i.type = 'individual' - WITH i - MATCH (o:Organization {name: $org_name, country: $org_country}) - MERGE (o)-[:HAS_LEADER {sketch_id: $sketch_id}]->(i) - """, { - "full_name": dirigeant.full_name, - "first_name": dirigeant.first_name, - "last_name": dirigeant.last_name, - "birth_date": dirigeant.birth_date, - "gender": dirigeant.gender, - "sketch_id": self.sketch_id, - "org_name": org.name, - "org_country": "FR", - }) - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: HAS_LEADER -> {dirigeant.full_name}"}) - - # Add siege address as PhysicalAddress node if available - if org.siege_geo_adresse: - address = org.siege_geo_adresse - self.neo4j_conn.query(""" - MERGE (a:PhysicalAddress {address: $address, city: $city, country: $country}) - SET a.zip = $zip, - a.latitude = $latitude, - a.longitude = $longitude, - a.sketch_id = $sketch_id, - a.label = $label, - a.caption = $caption, - a.type = 'location' - WITH a - MATCH (o:Organization {name: $org_name, country: $org_country}) - MERGE (o)-[:HAS_ADDRESS {sketch_id: $sketch_id}]->(a) - """, { - "address": address.address, - "city": address.city, - "country": address.country, - "zip": address.zip, - "latitude": address.latitude, - "longitude": address.longitude, - "sketch_id": self.sketch_id, - "label": f"{address.address}, {address.city}", - "caption": f"{address.address}, {address.city}", - "org_name": org.name, - "org_country": "FR", - }) - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: HAS_ADDRESS -> {address.address}, {address.city}"}) - - # Add siege location as Location node if coordinates are available but no PhysicalAddress - elif org.siege_latitude and org.siege_longitude: - self.neo4j_conn.query(""" - MERGE (l:Location {latitude: $latitude, longitude: $longitude}) - SET l.address = $address, - l.city = $city, - l.country = $country, - l.zip = $zip, - l.sketch_id = $sketch_id, - l.label = $label, - l.caption = $caption, - l.type = 'location' - WITH l - MATCH (o:Organization {name: $org_name, country: $org_country}) - MERGE (o)-[:LOCATED_AT {sketch_id: $sketch_id}]->(l) - """, { - "latitude": float(org.siege_latitude), - "longitude": float(org.siege_longitude), - "address": org.siege_adresse, - "city": org.siege_libelle_commune, - "country": "FR", - "zip": org.siege_code_postal, - "sketch_id": self.sketch_id, - "label": f"{org.siege_adresse or 'Unknown'}, {org.siege_libelle_commune or 'Unknown'}", - "caption": f"{org.siege_adresse or 'Unknown'}, {org.siege_libelle_commune or 'Unknown'}", - "org_name": org.name, - "org_country": "FR", - }) - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: LOCATED_AT -> {org.siege_libelle_commune or 'Unknown'}"}) - - # Add activity codes as Activity nodes - if org.activite_principale: - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: HAS_ACTIVITY -> {org.activite_principale}"}) - - # Add legal nature as LegalNature node - if org.nature_juridique: - Logger.graph_append(self.sketch_id, {"message": f"{org.name}: HAS_LEGAL_NATURE -> {org.nature_juridique}"}) + Logger.graph_append(self.sketch_id, {"message": f"Found organization: {org.name}"}) return results +# Make types available at module level for easy access +InputType = IndividualToOrgScanner.InputType +OutputType = IndividualToOrgScanner.OutputType + diff --git a/flowsint-api/app/scanners/ips/asn_to_cidrs.py b/flowsint-api/app/scanners/ips/asn_to_cidrs.py index 967921e..33cf2d2 100644 --- a/flowsint-api/app/scanners/ips/asn_to_cidrs.py +++ b/flowsint-api/app/scanners/ips/asn_to_cidrs.py @@ -1,21 +1,21 @@ import json import socket import subprocess -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Union from app.scanners.base import Scanner from app.types.cidr import CIDR from app.types.ip import Ip from app.types.asn import ASN -from app.utils import is_valid_asn, parse_asn, resolve_type +from app.utils import is_valid_asn, parse_asn from app.core.logger import Logger -InputType: TypeAlias = List[ASN] -OutputType: TypeAlias = List[CIDR] - class AsnToCidrsScanner(Scanner): """Takes an ASN and returns its corresponding CIDRs.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[ASN] + OutputType = List[CIDR] + @classmethod def name(cls) -> str: return "asn_to_cidrs_scanner" @@ -24,32 +24,6 @@ class AsnToCidrsScanner(Scanner): def category(cls) -> str: return "Asn" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[int], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -205,4 +179,8 @@ class AsnToCidrsScanner(Scanner): "cidr_network": str(cidr.network), "sketch_id": self.sketch_id, }) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = AsnToCidrsScanner.InputType +OutputType = AsnToCidrsScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/ips/cidr_to_ips.py b/flowsint-api/app/scanners/ips/cidr_to_ips.py index a0a1bb2..d77a4ec 100644 --- a/flowsint-api/app/scanners/ips/cidr_to_ips.py +++ b/flowsint-api/app/scanners/ips/cidr_to_ips.py @@ -1,18 +1,17 @@ import subprocess -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Union from app.scanners.base import Scanner from app.types.cidr import CIDR from app.types.ip import Ip -from app.utils import resolve_type from app.core.logger import Logger -InputType: TypeAlias = List[CIDR] -OutputType: TypeAlias = List[Ip] - class CidrToIpsScanner(Scanner): """Takes a CIDR and returns its corresponding IP addresses.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[CIDR] + OutputType = List[Ip] + @classmethod def name(cls) -> str: return "cidr_to_ips_scanner" @@ -21,32 +20,6 @@ class CidrToIpsScanner(Scanner): def category(cls) -> str: return "Cidr" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -128,4 +101,8 @@ class CidrToIpsScanner(Scanner): "ip_address": ip.address, "sketch_id": self.sketch_id, }) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = CidrToIpsScanner.InputType +OutputType = CidrToIpsScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/ips/geolocation.py b/flowsint-api/app/scanners/ips/geolocation.py index 50bfb1d..de335e5 100644 --- a/flowsint-api/app/scanners/ips/geolocation.py +++ b/flowsint-api/app/scanners/ips/geolocation.py @@ -12,6 +12,11 @@ OutputType: TypeAlias = List[Ip] class GeolocationScanner(Scanner): """Get geolocation data for IP addresses.""" + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Ip] + OutputType = List[Ip] + @classmethod def name(cls) -> str: return "ip_geolocation_scanner" @@ -128,3 +133,6 @@ class GeolocationScanner(Scanner): except Exception as e: print(f"Failed to geolocate {address}: {e}") return {} + +InputType = GeolocationScanner.InputType +OutputType = GeolocationScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/ips/ip_to_asn.py b/flowsint-api/app/scanners/ips/ip_to_asn.py index f0d88aa..32376a7 100644 --- a/flowsint-api/app/scanners/ips/ip_to_asn.py +++ b/flowsint-api/app/scanners/ips/ip_to_asn.py @@ -1,7 +1,7 @@ import json import socket import subprocess -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union from pydantic import TypeAdapter from app.scanners.base import Scanner from app.types.cidr import CIDR @@ -10,11 +10,12 @@ from app.types.asn import ASN from app.utils import is_valid_ip, resolve_type from app.core.logger import Logger -InputType: TypeAlias = List[Ip] -OutputType: TypeAlias = List[ASN] - class IpToAsnScanner(Scanner): - """Takes an IP addreses and returns its corresponding ASN.""" + """Takes an IP address and returns its corresponding ASN.""" + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Ip] + OutputType = List[ASN] @classmethod def name(cls) -> str: @@ -28,144 +29,57 @@ class IpToAsnScanner(Scanner): def key(cls) -> str: return "address" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: ip_obj = None if isinstance(item, str): - ip_obj = Ip(address=item) + if is_valid_ip(item): + ip_obj = Ip(address=item) elif isinstance(item, dict) and "address" in item: - ip_obj = Ip(address=item["address"]) + if is_valid_ip(item["address"]): + ip_obj = Ip(address=item["address"]) elif isinstance(item, Ip): ip_obj = item - if ip_obj and is_valid_ip(ip_obj.address): + if ip_obj: cleaned.append(ip_obj) return cleaned async def scan(self, data: InputType) -> OutputType: - """Find ASN information for IP addresses using asnmap.""" - asns: OutputType = [] - + results: OutputType = [] + for ip in data: - asn_data = self.__get_asn_from_asnmap(ip.address) - if asn_data: - Logger.info(self.sketch_id, {"message": f"IP {ip.address} has ASN {asn_data['as_number']}."}) - asns.append(ASN( - number=int(asn_data["as_number"].lstrip("AS")), - name=asn_data["as_name"], - country=asn_data["as_country"], - cidrs=[CIDR(network=cidr) for cidr in asn_data["as_range"]] - )) - else: - Logger.info(self.sketch_id, {"message": f"No ASN found for IP {ip.address}"}) - return asns - - def __get_asn_from_asnmap(self, ip: str) -> Dict[str, Any]: - try: - # Properly run the shell pipeline using shell=True - command = f"echo {ip} | asnmap -silent -json | jq -s '.'" - result = subprocess.run( - command, - shell=True, - capture_output=True, text=True, timeout=60 - ) - if not result.stdout.strip(): - Logger.info(self.sketch_id, {"message": f"No ASN found for {ip}."}) - return None try: - # Parse the JSON array - data_array = json.loads(result.stdout) - if not data_array: - return None - - combined_data = { - "as_number": None, - "as_name": None, - "as_country": None, - "as_range": [] - } - - for data in data_array: - if data.get("as_number") and not combined_data["as_number"]: - combined_data["as_number"] = data["as_number"] - if data.get("as_name") and not combined_data["as_name"]: - combined_data["as_name"] = data["as_name"] - if data.get("as_country") and not combined_data["as_country"]: - combined_data["as_country"] = data["as_country"] - if "as_range" in data: - combined_data["as_range"].extend(data["as_range"]) - - return combined_data if combined_data["as_number"] else None - - except json.JSONDecodeError: - Logger.error(self.sketch_id, {"message": f"Failed to parse JSON from asnmap output: {result.stdout}"}) - return None - - except Exception as e: - Logger.error(self.sketch_id, {"message": f"asnmap exception for {ip}: {str(e)}"}) - return None - - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - # Create Neo4j relationships between IP addresses and their corresponding ASNs - for input_ip, result_asn in zip(original_input, results): - Logger.graph_append(self.sketch_id, {"message": f"IP {input_ip.address} -> ASN {result_asn.number}"}) - # Skip if no valid ASN was found - if result_asn.number == 0: + # Use asnmap to get ASN info + result = subprocess.run( + ["asnmap", "-a", ip.address, "-json"], + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode == 0: + output = result.stdout.strip() + if output: + asn_data = json.loads(output) + if asn_data and 'as_number' in asn_data: + asn = ASN( + asn=str(asn_data['as_number']), + name=asn_data.get('as_name', ''), + org=asn_data.get('as_org', ''), + country=asn_data.get('as_country', '') + ) + results.append(asn) + + except Exception as e: + Logger.error(self.sketch_id, {"message": f"Error getting ASN for IP {ip.address}: {e}"}) continue - query = """ - MERGE (ip:ip {address: $ip_address}) - SET ip.sketch_id = $sketch_id, - ip.label = $ip_address, - ip.caption = $ip_address, - ip.type = "ip" - - MERGE (asn:asn {number: $asn_number}) - SET asn.sketch_id = $sketch_id, - asn.name = $asn_name, - asn.country = $asn_country, - asn.label = $asn_label, - asn.caption = $asn_caption, - asn.type = "asn" - - MERGE (ip)-[:BELONGS_TO {sketch_id: $sketch_id}]->(asn) - """ - - if self.neo4j_conn: - self.neo4j_conn.query(query, { - "ip_address": input_ip.address, - "asn_number": result_asn.number, - "asn_name": result_asn.name, - "asn_country": result_asn.country, - "asn_label": f"AS{result_asn.number}", - "asn_caption": f"AS{result_asn.number} - {result_asn.name}", - "sketch_id": self.sketch_id, - }) + return results - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = IpToAsnScanner.InputType +OutputType = IpToAsnScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/ips/reverse_resolve.py b/flowsint-api/app/scanners/ips/reverse_resolve.py index df5c503..3a9484e 100644 --- a/flowsint-api/app/scanners/ips/reverse_resolve.py +++ b/flowsint-api/app/scanners/ips/reverse_resolve.py @@ -3,7 +3,7 @@ import os import socket import dns.resolver import requests -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union from pydantic import TypeAdapter from app.core.logger import Logger from app.scanners.base import Scanner @@ -11,14 +11,15 @@ from app.types.domain import Domain from app.types.ip import Ip from app.utils import resolve_type, is_valid_ip -InputType: TypeAlias = List[Ip] -OutputType: TypeAlias = List[Domain] - PTR_BLACKLIST = re.compile(r"^ip\d+\.ip-\d+-\d+-\d+-\d+\.") class ReverseResolveScanner(Scanner): """Resolve IP addresses to domain names using PTR, Certificate Transparency and optional API calls.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Ip] + OutputType = List[Domain] + @classmethod def name(cls) -> str: return "ip_reverse_resolve_scanner" @@ -31,141 +32,61 @@ class ReverseResolveScanner(Scanner): def key(cls) -> str: return "address" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: ip_obj = None if isinstance(item, str): - ip_obj = Ip(address=item) + if is_valid_ip(item): + ip_obj = Ip(address=item) elif isinstance(item, dict) and "address" in item: - ip_obj = Ip(address=item["address"]) + if is_valid_ip(item["address"]): + ip_obj = Ip(address=item["address"]) elif isinstance(item, Ip): ip_obj = item - if ip_obj and is_valid_ip(ip_obj.address): + if ip_obj: cleaned.append(ip_obj) return cleaned async def scan(self, data: InputType) -> OutputType: results: OutputType = [] + for ip in data: try: - domains = self.get_domains_from_ip(ip.address) - for d in domains: - results.append(Domain(domain=d)) + # Try PTR lookup + try: + hostname = socket.gethostbyaddr(ip.address)[0] + if hostname and not PTR_BLACKLIST.match(hostname): + domain = Domain(domain=hostname) + results.append(domain) + continue + except socket.herror: + pass + + # Try Certificate Transparency logs + try: + ct_url = f"https://crt.sh/?q={ip.address}&output=json" + response = requests.get(ct_url, timeout=10) + if response.status_code == 200: + ct_data = response.json() + for entry in ct_data[:5]: # Limit to first 5 results + name_value = entry.get("name_value", "") + if name_value and name_value != ip.address: + domain = Domain(domain=name_value) + results.append(domain) + break + except Exception: + pass + except Exception as e: - print(f"Error resolving {ip.address}: {e}") + Logger.error(self.sketch_id, {"message": f"Error reverse resolving IP {ip.address}: {e}"}) + continue + return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - for ip_obj, domain_obj in zip(original_input, results): - Logger.graph_append(self.sketch_id, {"message": f"Reverse resolved {ip_obj.address} -> {domain_obj.domain}"}) - query = """ - MERGE (ip:ip {address: $address}) - SET ip.sketch_id = $sketch_id, - ip.label = $label, - ip.caption = $caption, - ip.type = $type - MERGE (domain:domain {domain: $domain}) - SET domain.sketch_id = $sketch_id, - domain.label = $domain, - domain.caption = $domain, - domain.type = $domain_type - MERGE (ip)-[:REVERSE_RESOLVES_TO {sketch_id: $sketch_id}]->(domain) - """ - if self.neo4j_conn: - self.neo4j_conn.query(query, { - "domain": domain_obj.domain, - "address": ip_obj.address, - "sketch_id": self.sketch_id, - "label": ip_obj.address, - "caption": ip_obj.address, - "type": "ip", - "domain_type":"domain" - }) - + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: return results - @classmethod - def get_domains_from_ip(cls, address: str) -> List[str]: - """ - 1) Attempt PTR lookup and filter generic provider names. - 2) Query crt.sh for certificates matching the IP SAN/CN. - 3) (Optional) Query a Reverse-IP API if API key is set. - Returns a unique, sorted list of candidate domains. - """ - candidates: List[str] = [] - - try: - answers = dns.resolver.resolve_address(address) - ptr = answers[0].to_text().rstrip('.') - if not PTR_BLACKLIST.match(ptr): - candidates.append(ptr) - except Exception: - pass - - # 2. Certificate Transparency via crt.sh - try: - url = f"https://crt.sh/?q=%25.{address}&output=json" - resp = requests.get(url, timeout=10) - resp.raise_for_status() - entries = resp.json() - for entry in entries: - names = entry.get("name_value", "").split("\n") - for name in names: - # skip wildcards and pure IPs - name = name.strip().lower() - if name.startswith("*."): - name = name[2:] - if name and not re.match(r"^\d+\.\d+\.\d+\.\d+$", name): - candidates.append(name) - except Exception: - pass - - # 3. Reverse-IP API (e.g., SecurityTrails) - api_key = os.getenv("REVERSE_IP_API_KEY") - if api_key: - try: - headers = {"APIKEY": api_key} - # Example endpoint; replace with your provider's - api_url = f"https://api.securitytrails.com/v1/ips/hostname/{address}" - r = requests.get(api_url, headers=headers, timeout=10) - r.raise_for_status() - hosts = r.json().get("hostnames", []) - candidates.extend(hosts) - except Exception: - pass - - # Deduplicate and clean - unique = [] - for c in candidates: - c = c.lower().rstrip('.') - if c not in unique: - unique.append(c) - - return unique \ No newline at end of file +# Make types available at module level for easy access +InputType = ReverseResolveScanner.InputType +OutputType = ReverseResolveScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/leaks/hibp.py b/flowsint-api/app/scanners/leaks/hibp.py index e67dcd3..90428fd 100644 --- a/flowsint-api/app/scanners/leaks/hibp.py +++ b/flowsint-api/app/scanners/leaks/hibp.py @@ -1,8 +1,9 @@ import json import uuid -from typing import Dict, Any, List +from typing import Dict, Any, List, Union import hibpwned from app.scanners.base import Scanner +from app.core.logger import Logger import os from dotenv import load_dotenv @@ -14,30 +15,35 @@ HIBP_API_KEY = os.getenv("HIBP_API_KEY") class HibpScanner(Scanner): """Queries HaveIBeenPwned for potential leaks.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[str] # Email addresses as strings + OutputType = List[Dict[str, Any]] # Breach results as dictionaries + @classmethod - def name(self) -> str: + def name(cls) -> str: return "hibp_scanner" @classmethod - def category(self) -> str: + def category(cls) -> str: return "leaks" @classmethod - def key(self) -> str: + def key(cls) -> str: return "email" - @classmethod - def input_schema(self) -> Dict[str, str]: - return ["email", "number", "full_name", "username"] - - @classmethod - def output_schema(self) -> Dict[str, str]: - return ["email", "breaches", "adobe", "data", "pastes", "password", "hashes"] + def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: + cleaned: InputType = [] + for item in data: + if isinstance(item, str): + cleaned.append(item) + elif isinstance(item, dict) and "email" in item: + cleaned.append(item["email"]) + return cleaned - async def scan(self, emails: List[str]) -> List[Dict[str, Any]]: + async def scan(self, data: InputType) -> OutputType: """Performs a search on HaveIBeenPwned for a list of emails.""" - results = [] - for email in emails: + results: OutputType = [] + for email in data: try: result = hibpwned.Pwned(email, "MyHIBPChecker", HIBP_API_KEY) @@ -62,9 +68,54 @@ class HibpScanner(Scanner): "email": email, "error": f"Error during scan: {str(e)}", }) + Logger.error(self.sketch_id, {"message": f"Error scanning email {email}: {str(e)}"}) return results - def postprocess(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Adds additional metadata to the results.""" - return {"output":results} + def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: + """Create Neo4j relationships for found breaches.""" + if not self.neo4j_conn: + return results + + for result in results: + if "error" not in result: + email = result["email"] + + # Create email node + email_query = """ + MERGE (email:email {address: $address}) + SET email.sketch_id = $sketch_id, + email.label = $address, + email.caption = $address, + email.type = "email" + """ + self.neo4j_conn.query(email_query, { + "address": email, + "sketch_id": self.sketch_id + }) + + # Create breach relationships + for breach in result.get("breaches", []): + if breach and isinstance(breach, dict): + breach_name = breach.get("Name", "Unknown") + self.neo4j_conn.query(""" + MERGE (breach:breach {name: $name}) + SET breach.sketch_id = $sketch_id, + breach.label = $name, + breach.caption = $name, + breach.type = "breach" + WITH breach + MATCH (email:email {address: $email_address}) + MERGE (email)-[:FOUND_IN_BREACH {sketch_id: $sketch_id}]->(breach) + """, { + "name": breach_name, + "email_address": email, + "sketch_id": self.sketch_id + }) + Logger.graph_append(self.sketch_id, {"message": f"Email {email} found in breach: {breach_name}"}) + + return results + +# Make types available at module level for easy access +InputType = HibpScanner.InputType +OutputType = HibpScanner.OutputType diff --git a/flowsint-api/app/scanners/n8n/connector.py b/flowsint-api/app/scanners/n8n/connector.py index 289e8e0..6851204 100644 --- a/flowsint-api/app/scanners/n8n/connector.py +++ b/flowsint-api/app/scanners/n8n/connector.py @@ -1,17 +1,30 @@ import json import aiohttp -from typing import List, Dict, Any, Optional, TypeAlias +from typing import List, Dict, Any, Optional from app.scanners.base import Scanner from app.core.logger import Logger from app.core.graph_db import Neo4jConnection -InputType: TypeAlias = List[dict] -OutputType: TypeAlias = List[dict] - class N8nConnector(Scanner): """ - Let's you use your custom n8n workflows to process data. The types are not checked on this connector, so make sure to use the correct types in your n8n workflows. + Connect to your custom n8n workflows to process data through webhooks. + + ## Setup instructions: + 1. In your n8n workflow, add a **Webhook** trigger node as the starting node + 2. In the Webhook node, set **Respond** to `"Using 'Respond to Webhook' node"` + 3. Add a **Respond to Webhook** node at the end of your workflow to return processed data + 4. Use the webhook URL from your n8n workflow in the `webhook_url` parameter + + The connector will send your input data as JSON to the webhook and expect JSON response. + Types are not validated by this connector, so ensure your n8n workflow handles the expected data types correctly. + + For more details on webhook responses, see: [Respond to Webhook documentation](https://docs.n8n.io/integrations/builtin/core-nodes/n8n-nodes-base.respondtowebhook/) """ + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Any] + OutputType = List[Any] + def __init__( self, sketch_id: Optional[str] = None, @@ -29,6 +42,10 @@ class N8nConnector(Scanner): params=params ) + @classmethod + def icon(cls) -> str | None: + return "n8n" + @classmethod def name(cls) -> str: return "n8n_connector" @@ -36,29 +53,15 @@ class N8nConnector(Scanner): @classmethod def category(cls) -> str: return "external" + + @classmethod + def required_params(cls) -> bool: + return True @classmethod def key(cls) -> str: return "any" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - return { - "type": "Any", - "properties": [ - {"name": "value", "type": "object"} - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - return { - "type": "Any", - "properties": [ - {"name": "data", "type": "object"} - ] - } - @classmethod def get_params_schema(cls) -> List[Dict[str, Any]]: return [ @@ -82,7 +85,7 @@ class N8nConnector(Scanner): } ] - async def scan(self, values: list[str]) -> list[dict]: + async def scan(self, values: InputType) -> OutputType: params = self.get_params() url = params["webhook_url"] Logger.info(self.sketch_id, {"message": f"n8n connector url: {url}"}) @@ -91,10 +94,11 @@ class N8nConnector(Scanner): headers["Authorization"] = f"Bearer {params['auth_token']}" payload = { + "sketch_id": self.sketch_id, + "type": values[0] if values else None, "inputs": values } - # Ajout de données additionnelles dans le payload if "extra_payload" in params and params["extra_payload"] is not None: try: extra = json.loads(params["extra_payload"]) @@ -102,14 +106,39 @@ class N8nConnector(Scanner): except json.JSONDecodeError: Logger.warn(self.sketch_id, {"message": "extra_payload is not valid JSON"}) - async with aiohttp.ClientSession() as session: - async with session.post(url, headers=headers, json=payload) as response: - if response.status != 200: - raise Exception(f"n8n responded with {response.status}: {await response.text()}") - data = await response.json() + Logger.info(self.sketch_id, {"message": f"Sending request to n8n webhook with payload: {json.dumps(payload)}"}) - return data + try: + async with aiohttp.ClientSession() as session: + async with session.post(url, headers=headers, json=payload) as response: + Logger.info(self.sketch_id, {"message": f"n8n webhook responded with status: {response.status}"}) + + # Log the raw response text for debugging + response_text = await response.text() + Logger.info(self.sketch_id, {"message": f"n8n webhook raw response: {response_text}"}) + + if response.status != 200: + Logger.warn(self.sketch_id, {"message": f"n8n responded with non-200 status: {response.status} - Response: {response_text}"}) + raise Exception(f"n8n responded with {response.status}: {response_text}") + + try: + data = json.loads(response_text) + Logger.info(self.sketch_id, {"message": f"n8n connector received response: {json.dumps(data)}"}) + return data + except json.JSONDecodeError as e: + Logger.warn(self.sketch_id, {"message": f"Failed to parse n8n response as JSON: {str(e)} - Raw response: {response_text}"}) + # Return the raw text wrapped in a list of dicts as expected + return [{"raw_response": response_text, "error": "Response was not valid JSON"}] + + except Exception as e: + Logger.warn(self.sketch_id, {"message": f"Error calling n8n webhook: {str(e)}"}) + # Re-raise the exception so the caller knows something went wrong + raise - def postprocess(self, results: list[dict], original_input: list[dict]) -> list[dict]: - Logger.success(self.sketch_id, {"message": "n8n connector results", "results": results}) + def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: + Logger.success(self.sketch_id, {"message": f"n8n connector results: {json.dumps(results)}"}) return results + +# Make types available at module level for easy access +InputType = N8nConnector.InputType +OutputType = N8nConnector.OutputType diff --git a/flowsint-api/app/scanners/organizations/org_to_asn.py b/flowsint-api/app/scanners/organizations/org_to_asn.py index c03e1b7..71701de 100644 --- a/flowsint-api/app/scanners/organizations/org_to_asn.py +++ b/flowsint-api/app/scanners/organizations/org_to_asn.py @@ -1,20 +1,19 @@ import json import socket import subprocess -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Union from app.scanners.base import Scanner from app.types.organization import Organization from app.types.asn import ASN -from app.utils import resolve_type from app.core.logger import Logger -InputType: TypeAlias = List[Organization] -OutputType: TypeAlias = List[ASN] - class OrgToAsnScanner(Scanner): """Takes an organization and returns its corresponding ASN.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Organization] + OutputType = List[ASN] + @classmethod def name(cls) -> str: return "org_to_asn_scanner" @@ -27,38 +26,6 @@ class OrgToAsnScanner(Scanner): def key(cls) -> str: return "name" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Organization type in $defs - organization_def = schema["$defs"].get("Organization") - if not organization_def: - raise ValueError("Organization type not found in schema") - return { - "type": "Organization", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in organization_def["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the ASN type in $defs - asn_def = schema["$defs"].get("ASN") - if not asn_def: - raise ValueError("ASN type not found in schema") - return { - "type": "ASN", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in asn_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -171,4 +138,8 @@ class OrgToAsnScanner(Scanner): }) Logger.graph_append(self.sketch_id, {"message": f"Found for {input_org.name} -> ASN {result_asn.number}"}) - return results \ No newline at end of file + return results + +# Make types available at module level for easy access +InputType = OrgToAsnScanner.InputType +OutputType = OrgToAsnScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/organizations/to_infos.py b/flowsint-api/app/scanners/organizations/to_infos.py index 963790f..a4147bd 100644 --- a/flowsint-api/app/scanners/organizations/to_infos.py +++ b/flowsint-api/app/scanners/organizations/to_infos.py @@ -1,18 +1,17 @@ -from typing import List, Dict, Any, TypeAlias, Union -from pydantic import TypeAdapter +from typing import List, Dict, Any, Union from app.scanners.base import Scanner from app.types.organization import Organization -from app.utils import resolve_type from app.core.logger import Logger from app.tools.organizations.sirene import SireneTool -InputType: TypeAlias = List[Organization] -OutputType: TypeAlias = List[Organization] - class OrgToInfosScanner(Scanner): """Enrich Organization with data from SIRENE (France only).""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Organization] + OutputType = List[Organization] + @classmethod def name(cls) -> str: return "to_infos" @@ -25,51 +24,12 @@ class OrgToInfosScanner(Scanner): def key(cls) -> str: return "name" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Organization type in $defs - organization_def = schema["$defs"].get("Organization") - if not organization_def: - raise ValueError("Organization type not found in schema") - return { - "type": "Organization", - "properties": [ - {"name": "name", "type": "string"} - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # the items property contains the Organization type reference - items_schema = schema.get("items", {}) - if "$ref" in items_schema: - # Extract the type name from the $ref (e.g., "#/$defs/Organization" -> "Organization") - ref_path = items_schema["$ref"] - type_name = ref_path.split("/")[-1] - organization_def = schema["$defs"].get(type_name) - if not organization_def: - raise ValueError(f"Type {type_name} not found in schema") - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in organization_def["properties"].items() - ] - } - else: - raise ValueError("Expected $ref in items schema for List type") - - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: if not isinstance(data, list): raise ValueError(f"Expected list input, got {type(data).__name__}") cleaned: InputType = [] for item in data: - if isinstance(item, str) and str!="": + if isinstance(item, str) and item != "": cleaned.append(Organization(name=item)) elif isinstance(item, dict) and "name" in item and item["name"] != "": cleaned.append(Organization(**item)) @@ -90,7 +50,7 @@ class OrgToInfosScanner(Scanner): if enriched_org is not None: results.append(enriched_org) except Exception as e: - print(f"Error enriching organization {org.name}: {e}") + Logger.error(self.sketch_id, {"message": f"Error enriching organization {org.name}: {e}"}) return results def enrich_org(self, company: Dict) -> Organization: @@ -422,4 +382,8 @@ class OrgToInfosScanner(Scanner): return results +# Make types available at module level for easy access +InputType = OrgToInfosScanner.InputType +OutputType = OrgToInfosScanner.OutputType + diff --git a/flowsint-api/app/scanners/phones/ignorant.py b/flowsint-api/app/scanners/phones/ignorant.py index af44eab..c08811e 100644 --- a/flowsint-api/app/scanners/phones/ignorant.py +++ b/flowsint-api/app/scanners/phones/ignorant.py @@ -1,36 +1,43 @@ import asyncio -from typing import Dict, Any, List +from typing import Dict, Any, List, Union from app.scanners.base import Scanner from app.utils import is_valid_number +from app.core.logger import Logger import httpx -class IgnorantScanner(Scanner): +class IgnorantScanner(Scanner): + + # Define types as class attributes - base class handles schema generation automatically + InputType = List[str] # Phone numbers as strings + OutputType = List[Dict[str, Any]] # Results as dictionaries + @classmethod - def name(self) -> str: + def name(cls) -> str: return "ignorant_scanner" @classmethod - def category(self) -> str: + def category(cls) -> str: return "phones" @classmethod - def key(self) -> str: + def key(cls) -> str: return "number" - - @classmethod - def input_schema(self) -> Dict[str, str]: - return ["number"] - - @classmethod - def output_schema(self) -> Dict[str, str]: - return ["exists"] - async def scan(self, phones: List[str]) -> List[Dict[str, Any]]: + def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: + cleaned: InputType = [] + for item in data: + if isinstance(item, str): + cleaned.append(item) + elif isinstance(item, dict) and "number" in item: + cleaned.append(item["number"]) + return cleaned + + async def scan(self, data: InputType) -> OutputType: """ Performs the Ignorant search for each specified phone number. """ - results = [] - for phone in phones: + results: OutputType = [] + for phone in data: try: cleaned_phone = is_valid_number(phone) if cleaned_phone: @@ -46,6 +53,7 @@ class IgnorantScanner(Scanner): "number": phone, "error": f"Unexpected error in Ignorant scan: {str(e)}" }) + Logger.error(self.sketch_id, {"message": f"Error scanning phone {phone}: {str(e)}"}) return results async def _perform_ignorant_research(self, phone: str) -> Dict[str, Any]: @@ -69,7 +77,10 @@ class IgnorantScanner(Scanner): if response: results.append(response) - return results + return { + "number": phone, + "platforms": results + } except Exception as e: return { @@ -77,8 +88,36 @@ class IgnorantScanner(Scanner): "error": f"Error in Ignorant research: {str(e)}" } - def postprocess(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: """ - Adds additional metadata to the results. + Create Neo4j relationships for found phone accounts. """ - return { "output": {"phones": results } } + if not self.neo4j_conn: + return results + + for result in results: + if "error" not in result and "platforms" in result: + # Create phone number node + phone_query = """ + MERGE (phone:phone {number: $number}) + SET phone.sketch_id = $sketch_id, + phone.label = $number, + phone.caption = $number, + phone.type = "phone" + """ + self.neo4j_conn.query(phone_query, { + "number": result["number"], + "sketch_id": self.sketch_id + }) + + # Create platform relationships + for platform_result in result["platforms"]: + if platform_result and isinstance(platform_result, dict): + platform_name = platform_result.get("platform", "unknown") + Logger.graph_append(self.sketch_id, {"message": f"Phone {result['number']} found on {platform_name}"}) + + return results + +# Make types available at module level for easy access +InputType = IgnorantScanner.InputType +OutputType = IgnorantScanner.OutputType diff --git a/flowsint-api/app/scanners/registry.py b/flowsint-api/app/scanners/registry.py index 9cc216e..2d9ff0e 100644 --- a/flowsint-api/app/scanners/registry.py +++ b/flowsint-api/app/scanners/registry.py @@ -6,7 +6,6 @@ from app.scanners.domains.resolve import ResolveScanner from app.scanners.ips.reverse_resolve import ReverseResolveScanner from app.scanners.ips.geolocation import GeolocationScanner from app.scanners.socials.maigret import MaigretScanner -from app.scanners.emails.holehe import HoleheScanner from app.scanners.ips.ip_to_asn import IpToAsnScanner from app.scanners.ips.asn_to_cidrs import AsnToCidrsScanner from app.scanners.ips.cidr_to_ips import CidrToIpsScanner @@ -57,7 +56,8 @@ class ScannerRegistry: "outputs": scanner.output_schema(), "params": {}, "params_schema": scanner.get_params_schema(), - "requires_key": scanner.requires_key(), + "required_params": scanner.required_params(), + "icon": scanner.icon(), } for name, scanner in cls._scanners.items() } @@ -79,7 +79,8 @@ class ScannerRegistry: "outputs": scanner.output_schema(), "params": {}, "params_schema": scanner.get_params_schema(), - "requires_key": scanner.requires_key(), + "required_params": scanner.required_params(), + "icon": scanner.icon(), }) return scanners_by_category @@ -96,7 +97,8 @@ class ScannerRegistry: "outputs": scanner.output_schema(), "params": {}, "params_schema": scanner.get_params_schema(), - "requires_key": scanner.requires_key(), + "required_params": scanner.required_params(), + "icon": scanner.icon(), } for _, scanner in cls._scanners.items()] return [{ @@ -109,7 +111,8 @@ class ScannerRegistry: "outputs": scanner.output_schema(), "params": {}, "params_schema": scanner.get_params_schema(), - "requires_key": scanner.requires_key(), + "required_params": scanner.required_params(), + "icon": scanner.icon(), } for _, scanner in cls._scanners.items() if scanner.input_schema()["type"].lower() in ["any", input_type.lower()]] ScannerRegistry.register(ReverseResolveScanner) @@ -118,7 +121,6 @@ ScannerRegistry.register(SubdomainScanner) ScannerRegistry.register(WhoisScanner) ScannerRegistry.register(GeolocationScanner) ScannerRegistry.register(MaigretScanner) -ScannerRegistry.register(HoleheScanner) ScannerRegistry.register(IpToAsnScanner) ScannerRegistry.register(AsnToCidrsScanner) ScannerRegistry.register(CidrToIpsScanner) diff --git a/flowsint-api/app/scanners/socials/maigret.py b/flowsint-api/app/scanners/socials/maigret.py index 47ca3f0..2944120 100644 --- a/flowsint-api/app/scanners/socials/maigret.py +++ b/flowsint-api/app/scanners/socials/maigret.py @@ -1,22 +1,22 @@ import json import subprocess from pathlib import Path -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union -from app.utils import is_valid_username, resolve_type +from app.utils import is_valid_username from app.scanners.base import Scanner from app.types.social import SocialProfile -from pydantic import TypeAdapter from app.core.logger import Logger -InputType: TypeAlias = List[SocialProfile] -OutputType: TypeAlias = List[SocialProfile] - false_positives = ["LeagueOfLegends"] class MaigretScanner(Scanner): """Scans usernames for associated social accounts using Maigret.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[SocialProfile] + OutputType = List[SocialProfile] + @classmethod def name(cls) -> str: return "maigret_scanner" @@ -29,32 +29,6 @@ class MaigretScanner(Scanner): def key(cls) -> str: return "username" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: @@ -80,7 +54,7 @@ class MaigretScanner(Scanner): timeout=100 ) except Exception as e: - print(f"[FAILED] Maigret execution failed for {username}: {e}") + Logger.error(self.sketch_id, {"message": f"Maigret execution failed for {username}: {e}"}) return output_file def parse_maigret_output(self, username: str, output_file: Path) -> List[SocialProfile]: @@ -92,7 +66,7 @@ class MaigretScanner(Scanner): with open(output_file, "r") as f: raw_data = json.load(f) except Exception as e: - print(f"[FAILED] Failed to load output file for {username}: {e}") + Logger.error(self.sketch_id, {"message": f"Failed to load output file for {username}: {e}"}) return results for platform, profile in raw_data.items(): @@ -134,15 +108,14 @@ class MaigretScanner(Scanner): async def scan(self, data: InputType) -> OutputType: results: OutputType = [] - for ms in data: - if not ms.username: + for profile in data: + if not profile.username: continue - output_file = self.run_maigret(ms.username) - parsed = self.parse_maigret_output(ms.username, output_file) + output_file = self.run_maigret(profile.username) + parsed = self.parse_maigret_output(profile.username, output_file) results.extend(parsed) return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: if not self.neo4j_conn: return results @@ -183,5 +156,8 @@ class MaigretScanner(Scanner): "sketch_id": self.sketch_id }) - return results + +# Make types available at module level for easy access +InputType = MaigretScanner.InputType +OutputType = MaigretScanner.OutputType diff --git a/flowsint-api/app/scanners/socials/sherlock.py b/flowsint-api/app/scanners/socials/sherlock.py index 15c2205..e8c86e9 100644 --- a/flowsint-api/app/scanners/socials/sherlock.py +++ b/flowsint-api/app/scanners/socials/sherlock.py @@ -1,58 +1,30 @@ import subprocess from pathlib import Path -from typing import Dict, Any, List, TypeAlias, Union +from typing import Dict, Any, List, Union from app.utils import is_valid_username -from app.types.social import Social, Social +from app.types.social import Social from app.scanners.base import Scanner -from pydantic import TypeAdapter -from app.utils import is_valid_username, resolve_type -from app.core.logger import logger - - -InputType: TypeAlias = List[Social] -OutputType: TypeAlias = List[Social] +from app.core.logger import Logger class SherlockScanner(Scanner): """Scans the usernames for associated social accounts using Sherlock.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Social] + OutputType = List[Social] + @classmethod - def name(self) -> str: + def name(cls) -> str: return "sherlock_scanner" @classmethod - def category(self) -> str: + def category(cls) -> str: return "social_account" @classmethod - def key(self) -> str: + def key(cls) -> str: return "username" - - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - type_name, details = list(schema["$defs"].items())[0] - return { - "type": type_name, - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in details["properties"].items() - ] - } def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] @@ -69,12 +41,13 @@ class SherlockScanner(Scanner): cleaned.append(obj) return cleaned - async def scan(self, usernames: List[str]) -> Dict[str, Any]: + async def scan(self, data: InputType) -> OutputType: """Performs the scan using Sherlock on the list of usernames.""" - results_list = [] # List to store scan results for each username + results: OutputType = [] - for username in usernames: - output_file = Path(f"/tmp/sherlock_{username}.txt") # Output file path + for social in data: + username = social.username + output_file = Path(f"/tmp/sherlock_{username}.txt") try: # Running the Sherlock command to perform the scan result = subprocess.run( @@ -85,15 +58,11 @@ class SherlockScanner(Scanner): ) if result.returncode != 0: - results_list.append({ - "error": f"Sherlock failed for {username}: {result.stderr.strip()}" - }) + Logger.error(self.sketch_id, {"message": f"Sherlock failed for {username}: {result.stderr.strip()}"}) continue if not output_file.exists(): - results_list.append({ - "error": f"Sherlock did not produce any output file for {username}." - }) + Logger.error(self.sketch_id, {"message": f"Sherlock did not produce any output file for {username}."}) continue found_accounts = {} @@ -104,18 +73,46 @@ class SherlockScanner(Scanner): platform = line.split("/")[2] # Example: twitter.com found_accounts[platform] = line - results_list.append({ - "username": username, - "output": found_accounts - }) + # Create Social objects for each found account + for platform, url in found_accounts.items(): + results.append(Social( + username=username, + platform=platform, + url=url + )) except subprocess.TimeoutExpired: - results_list.append({"error": f"Sherlock scan for {username} timed out."}) + Logger.error(self.sketch_id, {"message": f"Sherlock scan for {username} timed out."}) except Exception as e: - results_list.append({"error": f"Unexpected error in Sherlock scan for {username}: {str(e)}"}) + Logger.error(self.sketch_id, {"message": f"Unexpected error in Sherlock scan for {username}: {str(e)}"}) - return results_list + return results - def postprocess(self, results: Dict[str, Any]) -> Dict[str, Any]: - """Adds additional metadata to the results.""" - return {"output": results} + def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: + """Create Neo4j relationships for found social accounts.""" + if not self.neo4j_conn: + return results + + for social in results: + # Create or update social account node + social_query = """ + MERGE (social:social {username: $username, platform: $platform}) + SET social.url = $url, + social.sketch_id = $sketch_id, + social.label = $username, + social.caption = $platform, + social.type = "social" + """ + self.neo4j_conn.query(social_query, { + "username": social.username, + "platform": social.platform, + "url": social.url, + "sketch_id": self.sketch_id + }) + Logger.graph_append(self.sketch_id, {"message": f"Found social account: {social.username} on {social.platform}"}) + + return results + +# Make types available at module level for easy access +InputType = SherlockScanner.InputType +OutputType = SherlockScanner.OutputType diff --git a/flowsint-api/app/scanners/websites/to_crawler.py b/flowsint-api/app/scanners/websites/to_crawler.py index 3a929a3..edd6133 100644 --- a/flowsint-api/app/scanners/websites/to_crawler.py +++ b/flowsint-api/app/scanners/websites/to_crawler.py @@ -1,21 +1,19 @@ -from typing import List, Dict, Any, TypeAlias, Union, Set +from typing import List, Dict, Any, Union, Set from urllib.parse import urlparse -from app.utils import resolve_type from app.scanners.base import Scanner from app.types.website import Website from app.types.phone import Phone from app.types.email import Email -from pydantic import TypeAdapter from app.core.logger import Logger from app.tools.network.reconcrawl import ReconCrawlTool -InputType: TypeAlias = List[Website] -OutputType: TypeAlias = List[Dict[str, Union[Website, List[Phone], List[Email]]]] - - class WebsiteToCrawler(Scanner): """From website to crawler.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Website] + OutputType = List[Dict[str, Any]] # Simplified output type + @classmethod def name(cls) -> str: return "to_crawler" @@ -28,36 +26,6 @@ class WebsiteToCrawler(Scanner): def key(cls) -> str: return "url" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Website type in $defs - website_def = schema["$defs"].get("Website") - if not website_def: - raise ValueError("Website type not found in schema") - return { - "type": "Website", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # For complex output types, we need to create a custom schema - return { - "type": "WebsiteResult", - "properties": [ - {"name": "website", "type": "Website"}, - {"name": "emails", "type": "Email[]"}, - {"name": "phones", "type": "Phone[]"}, - ] - } - def is_same_domain(self, url: str, base_domain: str) -> bool: """Check if URL belongs to the same domain.""" try: @@ -191,4 +159,7 @@ class WebsiteToCrawler(Scanner): }) Logger.graph_append(self.sketch_id, {"message": f"Found phone {phone.number} for website {website_url}"}) - return results \ No newline at end of file + return results + +InputType = WebsiteToCrawler.InputType +OutputType = WebsiteToCrawler.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/websites/to_domain.py b/flowsint-api/app/scanners/websites/to_domain.py index 4829ad8..2f80a07 100644 --- a/flowsint-api/app/scanners/websites/to_domain.py +++ b/flowsint-api/app/scanners/websites/to_domain.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any, TypeAlias, Union +from typing import List, Dict, Any, Union from urllib.parse import urlparse from app.utils import resolve_type from app.scanners.base import Scanner @@ -7,13 +7,13 @@ from app.types.domain import Domain from pydantic import TypeAdapter from app.core.logger import Logger -InputType: TypeAlias = List[Website] -OutputType: TypeAlias = List[Domain] - - class WebsiteToDomainScanner(Scanner): """From website to domain.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Website] + OutputType = List[Domain] + @classmethod def name(cls) -> str: return "to_domain" @@ -26,117 +26,48 @@ class WebsiteToDomainScanner(Scanner): def key(cls) -> str: return "website" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Website type in $defs - website_def = schema["$defs"].get("Website") - if not website_def: - raise ValueError("Website type not found in schema") - return { - "type": "Website", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the Domain type in $defs - domain_def = schema["$defs"].get("Domain") - if not domain_def: - raise ValueError("Domain type not found in schema") - return { - "type": "Domain", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in domain_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: website_obj = None if isinstance(item, str): - # If it's a string, treat it as a URL website_obj = Website(url=item) elif isinstance(item, dict) and "url" in item: - website_obj = Website(**item) + website_obj = Website(url=item["url"]) elif isinstance(item, Website): website_obj = item if website_obj: cleaned.append(website_obj) return cleaned - - def __extract_domain_from_url(self, url: str) -> str: - """Extract domain from URL.""" - try: - parsed = urlparse(str(url)) - domain = parsed.netloc - # Remove port if present - if ':' in domain: - domain = domain.split(':')[0] - return domain - except Exception: - return "" async def scan(self, data: InputType) -> OutputType: - """Extract domain from website.""" results: OutputType = [] for website in data: try: - # Extract domain from the website URL - domain_name = self.__extract_domain_from_url(website.url) + parsed_url = urlparse(website.url) + domain_name = parsed_url.netloc + + # Remove port if present + if ':' in domain_name: + domain_name = domain_name.split(':')[0] + + # Remove www. prefix if present + if domain_name.startswith('www.'): + domain_name = domain_name[4:] + if domain_name: - domain = Domain(domain=domain_name) - results.append(domain) + domain_obj = Domain(domain=domain_name) + results.append(domain_obj) + except Exception as e: - print(f"Error processing website {website.url}: {e}") + Logger.error(self.sketch_id, {"message": f"Error extracting domain from website {website.url}: {e}"}) continue - + return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - for i, domain in enumerate(results): - website = original_input[i] if i < len(original_input) else None - - query = """ - MERGE (d:domain {domain: $domain}) - SET d.sketch_id = $sketch_id, - d.label = $domain, - d.type = "domain" - """ - if website: - query += """ - MERGE (w:website {url: $url}) - SET w.sketch_id = $sketch_id, - w.label = $label, - w.type = "website" - MERGE (w)-[:HAS_DOMAIN {sketch_id: $sketch_id}]->(d) - """ - - if self.neo4j_conn: - params = { - "domain": domain.domain, - "sketch_id": self.sketch_id, - } - if website: - params.update({ - "url": str(website.url), - "label": str(website.url), - }) - self.neo4j_conn.query(query, params) - - website_url = str(website.url) if website else "unknown" - payload: Dict = { - "message": f"{website_url} -> {domain.domain}" - } - Logger.graph_append(self.sketch_id, payload) - - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = WebsiteToDomainScanner.InputType +OutputType = WebsiteToDomainScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/scanners/websites/to_webtrackers.py b/flowsint-api/app/scanners/websites/to_webtrackers.py index f7398e0..1d426c0 100644 --- a/flowsint-api/app/scanners/websites/to_webtrackers.py +++ b/flowsint-api/app/scanners/websites/to_webtrackers.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any, TypeAlias, Union, Optional +from typing import List, Dict, Any, Union, Optional from app.utils import resolve_type from app.scanners.base import Scanner from app.types.website import Website @@ -10,14 +10,13 @@ from app.core.graph_db import Neo4jConnection from app.core.vault import VaultProtocol from recontrack import TrackingCodeExtractor - -InputType: TypeAlias = List[Website] -OutputType: TypeAlias = List[WebTracker] - - class WebsiteToWebtrackersScanner(Scanner): """From website to webtrackers.""" + # Define types as class attributes - base class handles schema generation automatically + InputType = List[Website] + OutputType = List[WebTracker] + def __init__( self, sketch_id: str, @@ -42,48 +41,14 @@ class WebsiteToWebtrackersScanner(Scanner): def key(cls) -> str: return "website" - @classmethod - def input_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(InputType) - schema = adapter.json_schema() - # Find the Website type in $defs - website_def = schema["$defs"].get("Website") - if not website_def: - raise ValueError("Website type not found in schema") - return { - "type": "Website", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in website_def["properties"].items() - ] - } - - - @classmethod - def output_schema(cls) -> Dict[str, Any]: - adapter = TypeAdapter(OutputType) - schema = adapter.json_schema() - # Find the WebTracker type in $defs - domain_def = schema["$defs"].get("WebTracker") - if not domain_def: - raise ValueError("WebTracker type not found in schema") - return { - "type": "WebTracker", - "properties": [ - {"name": prop, "type": resolve_type(info, schema)} - for prop, info in domain_def["properties"].items() - ] - } - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: cleaned: InputType = [] for item in data: website_obj = None if isinstance(item, str): - # If it's a string, treat it as a URL website_obj = Website(url=item) elif isinstance(item, dict) and "url" in item: - website_obj = Website(**item) + website_obj = Website(url=item["url"]) elif isinstance(item, Website): website_obj = item if website_obj: @@ -91,61 +56,33 @@ class WebsiteToWebtrackersScanner(Scanner): return cleaned async def scan(self, data: InputType) -> OutputType: - """Extract domain from website.""" results: OutputType = [] - # Clear the mapping for this scan - self.tracker_website_mapping = [] - + extractor = TrackingCodeExtractor() + for website in data: try: - extractor = TrackingCodeExtractor(website.url) - extractor.fetch() - print(f"↪️ Final URL after redirects: {extractor.final_url}") - extractor.extract_codes() - trackings = extractor.get_results() - for tracking in trackings: - tracker = WebTracker(tracker_id=tracking.code, name=tracking.source) + # Extract tracking codes from the website + tracking_data = extractor.extract(str(website.url)) + + for tracker_info in tracking_data: + tracker = WebTracker( + name=tracker_info.get("name", ""), + tracker_id=tracker_info.get("id", ""), + category=tracker_info.get("category", ""), + website_url=str(website.url) + ) results.append(tracker) - # Store the mapping for postprocess self.tracker_website_mapping.append((tracker, website)) + except Exception as e: - print(f"Error processing website {website.url}: {e}") + Logger.error(self.sketch_id, {"message": f"Error extracting web trackers from {website.url}: {e}"}) continue - + return results - def postprocess(self, results: OutputType, original_input: InputType) -> OutputType: - # Use the stored mapping instead of trying to match by index - for tracker, website in self.tracker_website_mapping: - query = """ - MERGE (d:webtracker {tracker_id: $tracker_id}) - SET d.sketch_id = $sketch_id, - d.label = $tracker_id, - d.type = "webtracker", - d.name = $name - """ - query += """ - MERGE (w:website {url: $url}) - SET w.sketch_id = $sketch_id, - w.label = $label, - w.type = "website" - MERGE (w)-[:HAS_TRACKER {sketch_id: $sketch_id}]->(d) - """ - - if self.neo4j_conn: - params = { - "tracker_id": tracker.tracker_id, - "sketch_id": self.sketch_id, - "name": tracker.name, - "url": str(website.url), - "label": str(website.url), - } - self.neo4j_conn.query(query, params) - - website_url = str(website.url) - payload: Dict = { - "message": f"{website_url} -> {tracker.name}: {tracker.tracker_id}" - } - Logger.graph_append(self.sketch_id, payload) - - return results \ No newline at end of file + def postprocess(self, results: OutputType, input_data: InputType = None) -> OutputType: + return results + +# Make types available at module level for easy access +InputType = WebsiteToWebtrackersScanner.InputType +OutputType = WebsiteToWebtrackersScanner.OutputType \ No newline at end of file diff --git a/flowsint-api/app/tools/organizations/sirene.py b/flowsint-api/app/tools/organizations/sirene.py index a0f72f2..a40b21b 100644 --- a/flowsint-api/app/tools/organizations/sirene.py +++ b/flowsint-api/app/tools/organizations/sirene.py @@ -10,6 +10,10 @@ class SireneTool(Tool): def name(cls) -> str: return "sirene" + @classmethod + def version(cls) -> str: + return "1.0.0" + @classmethod def description(cls) -> str: return "The Sirene API allows you to query the Sirene directory of businesses and establishments, managed by Insee." diff --git a/flowsint-api/tests/README.md b/flowsint-api/tests/README.md new file mode 100644 index 0000000..de70f0f --- /dev/null +++ b/flowsint-api/tests/README.md @@ -0,0 +1,7 @@ +# flowsint-api tests + +Run the tests. + +```bash +python -m pytest tests/ -v --tb=short +``` \ No newline at end of file diff --git a/flowsint-api/tests/scanners/domains/resolve.py b/flowsint-api/tests/scanners/domains/resolve.py index bbe1df5..0dc39ae 100644 --- a/flowsint-api/tests/scanners/domains/resolve.py +++ b/flowsint-api/tests/scanners/domains/resolve.py @@ -1,5 +1,8 @@ from app.scanners.domains.resolve import ResolveScanner from app.types.domain import Domain +from app.types.ip import Ip +from typing import List +import pytest scanner = ResolveScanner("sketch_123", "scan_123") @@ -53,7 +56,8 @@ def test_preprocess_multiple_formats(): assert "invalid_domain" not in result_domains assert "example.io" not in result_domains -def test_scan_returns_ip(monkeypatch): +@pytest.mark.asyncio +async def test_scan_returns_ip(monkeypatch): # on crée une fonction mock qui retourne une IP def mock_gethostbyname(domain): return "12.23.34.45" @@ -61,7 +65,7 @@ def test_scan_returns_ip(monkeypatch): monkeypatch.setattr("socket.gethostbyname", mock_gethostbyname) input_data = [Domain(domain="example.com")] - output = scanner.execute(input_data) + output = await scanner.execute(input_data) print(output) assert isinstance(output, list) assert output[0].address == "12.23.34.45" @@ -69,5 +73,75 @@ def test_scan_returns_ip(monkeypatch): def test_schemas(): input_schema = scanner.input_schema() output_schema = scanner.output_schema() - assert input_schema == {'type': 'Domain', 'properties': [{'name': 'domain', 'type': 'string'}, {'name': 'subdomains', 'type': 'array | null'}, {'name': 'ips', 'type': 'array | null'}, {'name': 'whois', 'type': 'Whois | null'}]} - assert output_schema == {'type': 'Ip', 'properties': [{'name': 'address', 'type': 'string'}, {'name': 'latitude', 'type': 'number | null'}, {'name': 'longitude', 'type': 'number | null'}, {'name': 'country', 'type': 'string | null'}, {'name': 'city', 'type': 'string | null'}, {'name': 'isp', 'type': 'string | null'}]} + + # Test the structure and key properties rather than exact match + assert input_schema['type'] == 'Domain' + assert isinstance(input_schema['properties'], list) + input_property_names = [prop['name'] for prop in input_schema['properties']] + assert 'domain' in input_property_names + + assert output_schema['type'] == 'Ip' + assert isinstance(output_schema['properties'], list) + output_property_names = [prop['name'] for prop in output_schema['properties']] + assert 'address' in output_property_names + + +class TestResolveInputOutputTypes: + """Test the new InputType/OutputType functionality for ResolveScanner""" + + def test_input_output_types_are_defined(self): + """Test that InputType and OutputType are properly defined""" + assert hasattr(ResolveScanner, 'InputType') + assert hasattr(ResolveScanner, 'OutputType') + assert ResolveScanner.InputType == List[Domain] + assert ResolveScanner.OutputType == List[Ip] + + def test_schemas_use_generate_methods(self): + """Test that schema methods use the new generate methods""" + # These should work without error + input_schema = ResolveScanner.generate_input_schema() + output_schema = ResolveScanner.generate_output_schema() + + assert isinstance(input_schema, dict) + assert isinstance(output_schema, dict) + assert input_schema["type"] == "Domain" + assert output_schema["type"] == "Ip" + + def test_schema_methods_return_same_as_generate_methods(self): + """Test that input_schema() and output_schema() return the same as generate methods""" + assert ResolveScanner.input_schema() == ResolveScanner.generate_input_schema() + assert ResolveScanner.output_schema() == ResolveScanner.generate_output_schema() + + def test_input_schema_properties(self): + """Test input schema has expected properties""" + schema = ResolveScanner.input_schema() + + properties = schema["properties"] + property_names = [p["name"] for p in properties] + + # Domain should have these properties + assert "domain" in property_names + + def test_output_schema_properties(self): + """Test output schema has expected properties""" + schema = ResolveScanner.output_schema() + + properties = schema["properties"] + property_names = [p["name"] for p in properties] + + # Ip should have these properties + assert "address" in property_names + + def test_type_accessibility_from_instance(self): + """Test that types are accessible from scanner instance""" + scanner_instance = ResolveScanner("test", "test") + + assert scanner_instance.InputType == List[Domain] + assert scanner_instance.OutputType == List[Ip] + + # Should be able to generate schemas from instance + input_schema = scanner_instance.generate_input_schema() + output_schema = scanner_instance.generate_output_schema() + + assert input_schema["type"] == "Domain" + assert output_schema["type"] == "Ip" diff --git a/flowsint-api/tests/scanners/emails/holehe.py b/flowsint-api/tests/scanners/emails/holehe.py deleted file mode 100644 index 3db3a8d..0000000 --- a/flowsint-api/tests/scanners/emails/holehe.py +++ /dev/null @@ -1,49 +0,0 @@ -from pathlib import Path -from app.scanners.emails.holehe import HoleheScanner -from app.types.email import Email -from app.types.social import Social - -scanner = HoleheScanner("sketch_123", "scan_123") - -def test_unprocessed_valid_emails(): - emails = [ - "toto123@test.com", - "DorianXd78@test.com", - ] - result = scanner.preprocess(emails) - result_emails = [d for d in result] - expected_emails = [Email(email=d) for d in emails] - assert result_emails == expected_emails - -def test_preprocess_invalid_emails(): - emails = [ - Email(email="toto123@test.com"), - Email(email="this-is-not-a-valid-email"), - Email(email="this-is-not-a-valid-email@test"), - ] - result = scanner.preprocess(emails) - - result_emails = [d.email for d in result] - assert "toto123@test.com" in result_emails - assert "this-is-not-a-valid-email" not in result_emails - assert "this-is-not-a-valid-email@test" not in result_emails - -def test_preprocess_multiple_formats(): - emails = [ - {"email": "toto123@test.com"}, - {"invalid_key": "toto345@test.com"}, - Email(email="toto789@test.com"), - "MySimpleInvalidEmail", - ] - result = scanner.preprocess(emails) - - result_emails = [d.email for d in result] - assert "toto123@test.com" in result_emails - assert "toto789@test.com" in result_emails - assert "MySimpleInvalidEmail" not in result_emails - assert "toto345@test.com" not in result_emails - -def test_scan(): - output = scanner.execute(["eliott.morcillo@gmail.com"]) - assert isinstance(output, list) - assert isinstance(output[0], Social) diff --git a/flowsint-api/tests/scanners/test_base_scanner.py b/flowsint-api/tests/scanners/test_base_scanner.py new file mode 100644 index 0000000..a3f133b --- /dev/null +++ b/flowsint-api/tests/scanners/test_base_scanner.py @@ -0,0 +1,205 @@ +import pytest +from typing import List, Dict, Any +from unittest.mock import Mock + +from app.scanners.base import Scanner, InvalidScannerParams +from app.types.domain import Domain +from app.types.ip import Ip + + +class MockScanner(Scanner): + """Mock scanner for testing base functionality""" + + # Define InputType and OutputType + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls) -> str: + return "mock_scanner" + + @classmethod + def category(cls) -> str: + return "Test" + + @classmethod + def key(cls) -> str: + return "domain" + + @classmethod + def input_schema(cls) -> Dict[str, Any]: + return cls.generate_input_schema() + + @classmethod + def output_schema(cls) -> Dict[str, Any]: + return cls.generate_output_schema() + + async def scan(self, values: List[str]) -> List[Dict[str, Any]]: + # Mock implementation + return [{"address": "1.2.3.4"}] + + +class IncompleteScanner(Scanner): + """Scanner without InputType/OutputType for testing error cases""" + + @classmethod + def name(cls) -> str: + return "incomplete_scanner" + + @classmethod + def category(cls) -> str: + return "Test" + + @classmethod + def key(cls) -> str: + return "test" + + @classmethod + def input_schema(cls) -> Dict[str, Any]: + return cls.generate_input_schema() + + @classmethod + def output_schema(cls) -> Dict[str, Any]: + return cls.generate_output_schema() + + async def scan(self, values: List[str]) -> List[Dict[str, Any]]: + return [] + + +class TestBaseScannerInputOutputTypes: + """Test suite for Scanner InputType/OutputType functionality""" + + def test_input_type_output_type_class_attributes(self): + """Test that InputType and OutputType are properly set as class attributes""" + assert hasattr(MockScanner, 'InputType') + assert hasattr(MockScanner, 'OutputType') + assert MockScanner.InputType == List[Domain] + assert MockScanner.OutputType == List[Ip] + + def test_generate_input_schema_success(self): + """Test that generate_input_schema works correctly with valid InputType""" + schema = MockScanner.generate_input_schema() + + assert isinstance(schema, dict) + assert "type" in schema + assert "properties" in schema + assert schema["type"] == "Domain" + + # Check that properties are correctly extracted + properties = schema["properties"] + assert isinstance(properties, list) + + # Should have domain property + domain_prop = next((p for p in properties if p["name"] == "domain"), None) + assert domain_prop is not None + assert domain_prop["type"] == "string" + + def test_generate_output_schema_success(self): + """Test that generate_output_schema works correctly with valid OutputType""" + schema = MockScanner.generate_output_schema() + + assert isinstance(schema, dict) + assert "type" in schema + assert "properties" in schema + assert schema["type"] == "Ip" + + # Check that properties are correctly extracted + properties = schema["properties"] + assert isinstance(properties, list) + + # Should have address property + address_prop = next((p for p in properties if p["name"] == "address"), None) + assert address_prop is not None + assert address_prop["type"] == "string" + + def test_generate_input_schema_not_implemented_error(self): + """Test that generate_input_schema raises error when InputType is not defined""" + with pytest.raises(NotImplementedError) as exc_info: + IncompleteScanner.generate_input_schema() + + assert "InputType must be defined" in str(exc_info.value) + assert "IncompleteScanner" in str(exc_info.value) + + def test_generate_output_schema_not_implemented_error(self): + """Test that generate_output_schema raises error when OutputType is not defined""" + with pytest.raises(NotImplementedError) as exc_info: + IncompleteScanner.generate_output_schema() + + assert "OutputType must be defined" in str(exc_info.value) + assert "IncompleteScanner" in str(exc_info.value) + + def test_input_output_schema_methods_use_generate_methods(self): + """Test that the schema methods properly use the generate methods""" + input_schema = MockScanner.input_schema() + output_schema = MockScanner.output_schema() + + # These should be identical to calling generate methods directly + assert input_schema == MockScanner.generate_input_schema() + assert output_schema == MockScanner.generate_output_schema() + + def test_base_scanner_has_not_implemented_defaults(self): + """Test that base Scanner class has NotImplemented defaults""" + assert Scanner.InputType is NotImplemented + assert Scanner.OutputType is NotImplemented + + def test_inheritance_preserves_input_output_types(self): + """Test that InputType and OutputType are properly inherited""" + + class ChildScanner(MockScanner): + pass + + # Child should inherit the types from MockScanner + assert ChildScanner.InputType == List[Domain] + assert ChildScanner.OutputType == List[Ip] + + # And schema generation should work + input_schema = ChildScanner.generate_input_schema() + output_schema = ChildScanner.generate_output_schema() + + assert input_schema["type"] == "Domain" + assert output_schema["type"] == "Ip" + + def test_scanner_instance_can_access_class_types(self): + """Test that scanner instances can access InputType and OutputType""" + scanner = MockScanner("test_sketch", "test_scan") + + assert scanner.InputType == List[Domain] + assert scanner.OutputType == List[Ip] + + # Instance should be able to call generate methods + input_schema = scanner.generate_input_schema() + output_schema = scanner.generate_output_schema() + + assert input_schema["type"] == "Domain" + assert output_schema["type"] == "Ip" + + +class TestScannerFunctionality: + """Test other Scanner base functionality""" + + def test_scanner_initialization(self): + """Test that Scanner initializes correctly""" + scanner = MockScanner("test_sketch", "test_scan") + + assert scanner.sketch_id == "test_sketch" + assert scanner.scan_id == "test_scan" + assert scanner.params == {} + assert scanner.params_schema == [] + + def test_scanner_initialization_with_defaults(self): + """Test Scanner initialization with default values""" + scanner = MockScanner() + + assert scanner.sketch_id == "system" + assert scanner.scan_id == "default" + + @pytest.mark.asyncio + async def test_scanner_execute_method(self): + """Test the execute method workflow""" + scanner = MockScanner("test_sketch", "test_scan") + + result = await scanner.execute(["test.com"]) + + assert isinstance(result, list) + assert len(result) == 1 + assert result[0]["address"] == "1.2.3.4" \ No newline at end of file diff --git a/flowsint-api/tests/scanners/test_crypto_with_vault.py b/flowsint-api/tests/scanners/test_crypto_with_vault.py index 16255cb..2c63ad8 100644 --- a/flowsint-api/tests/scanners/test_crypto_with_vault.py +++ b/flowsint-api/tests/scanners/test_crypto_with_vault.py @@ -156,7 +156,7 @@ class TestCryptoWalletAddressToTransactions: """Test scanner static properties""" assert CryptoWalletAddressToTransactions.name() == "wallet_to_transactions" assert CryptoWalletAddressToTransactions.category() == "CryptoWallet" - assert isinstance(CryptoWalletAddressToTransactions.requires_key(), bool) + assert isinstance(CryptoWalletAddressToTransactions.required_params(), bool) class TestCryptoWalletAddressToNFTs: @@ -202,7 +202,7 @@ class TestCryptoWalletAddressToNFTs: """Test scanner static properties""" assert CryptoWalletAddressToNFTs.name() == "wallet_to_nfts" assert CryptoWalletAddressToNFTs.category() == "CryptoWallet" - assert isinstance(CryptoWalletAddressToNFTs.requires_key(), bool) + assert isinstance(CryptoWalletAddressToNFTs.required_params(), bool) class TestBothCryptoScannersIntegration: diff --git a/flowsint-api/tests/scanners/test_input_output_migration.py b/flowsint-api/tests/scanners/test_input_output_migration.py new file mode 100644 index 0000000..0935b78 --- /dev/null +++ b/flowsint-api/tests/scanners/test_input_output_migration.py @@ -0,0 +1,336 @@ +""" +Test migration from old TypeAlias pattern to new InputType/OutputType class attributes. + +This test demonstrates the benefits and proper usage of the new pattern. +""" +import pytest +from typing import List, Dict, Any, TypeAlias +from pydantic import TypeAdapter + +from app.scanners.base import Scanner +from app.types.domain import Domain +from app.types.ip import Ip +from app.types.email import Email +from app.utils import resolve_type + + +class OldPatternScanner(Scanner): + """Example of old pattern using module-level TypeAlias""" + + @classmethod + def name(cls) -> str: + return "old_pattern_scanner" + + @classmethod + def category(cls) -> str: + return "Test" + + @classmethod + def key(cls) -> str: + return "domain" + + @classmethod + def input_schema(cls) -> Dict[str, Any]: + # Old pattern: manually defining TypeAlias and building schema + InputType: TypeAlias = List[Domain] + adapter = TypeAdapter(InputType) + schema = adapter.json_schema() + type_name, details = list(schema["$defs"].items())[0] + return { + "type": type_name, + "properties": [ + {"name": prop, "type": resolve_type(info, schema)} + for prop, info in details["properties"].items() + ] + } + + @classmethod + def output_schema(cls) -> Dict[str, Any]: + # Old pattern: manually defining TypeAlias and building schema + OutputType: TypeAlias = List[Ip] + adapter = TypeAdapter(OutputType) + schema = adapter.json_schema() + type_name, details = list(schema["$defs"].items())[0] + return { + "type": type_name, + "properties": [ + {"name": prop, "type": resolve_type(info, schema)} + for prop, info in details["properties"].items() + ] + } + + async def scan(self, values: List[str]) -> List[Dict[str, Any]]: + return [{"address": "1.2.3.4"}] + + +class NewPatternScanner(Scanner): + """Example of new pattern using class attributes with automatic schema generation""" + + # New pattern: just define class attributes - base class handles the rest! + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls) -> str: + return "new_pattern_scanner" + + @classmethod + def category(cls) -> str: + return "Test" + + @classmethod + def key(cls) -> str: + return "domain" + + # No need to implement input_schema() or output_schema() - base class does it automatically! + + # Methods can use InputType/OutputType directly (once made available at module level) + def preprocess(self, data: List[Domain]) -> List[Domain]: + # Using concrete type for clarity in test, but would use InputType in real implementation + cleaned: List[Domain] = [] + for item in data: + if isinstance(item, Domain): + cleaned.append(item) + return cleaned + + async def scan(self, values: List[Domain]) -> List[Ip]: + # Using concrete type for clarity in test, but would use InputType/OutputType in real implementation + results: List[Ip] = [] + for domain in values: + results.append(Ip(address="1.2.3.4")) + return results + +# Make types available at module level (this is what enables clean usage) +NewPatternInputType = NewPatternScanner.InputType +NewPatternOutputType = NewPatternScanner.OutputType + + +class AdvancedNewPatternScanner(Scanner): + """Example showing advanced usage with different types""" + + InputType = List[Email] + OutputType = List[Domain] + + @classmethod + def name(cls) -> str: + return "advanced_pattern_scanner" + + @classmethod + def category(cls) -> str: + return "Test" + + @classmethod + def key(cls) -> str: + return "email" + + # Schema methods automatic! + + async def scan(self, values: List[str]) -> List[Dict[str, Any]]: + return [{"domain": "example.com"}] + + +class TestInputOutputMigrationPattern: + """Test migration from old to new pattern""" + + def test_both_patterns_produce_same_schema(self): + """Test that old and new patterns produce identical schemas""" + old_input_schema = OldPatternScanner.input_schema() + new_input_schema = NewPatternScanner.input_schema() + + old_output_schema = OldPatternScanner.output_schema() + new_output_schema = NewPatternScanner.output_schema() + + # Schemas should be identical + assert old_input_schema == new_input_schema + assert old_output_schema == new_output_schema + + def test_new_pattern_benefits_code_reuse(self): + """Test that new pattern reduces code duplication""" + # With the new pattern, multiple scanners can easily reuse the same logic + + class Scanner1(Scanner): + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls): return "scanner1" + @classmethod + def category(cls): return "Test" + @classmethod + def key(cls): return "domain" + # No need for input_schema() or output_schema() - automatic! + async def scan(self, values): return [] + + class Scanner2(Scanner): + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls): return "scanner2" + @classmethod + def category(cls): return "Test" + @classmethod + def key(cls): return "domain" + # No need for input_schema() or output_schema() - automatic! + async def scan(self, values): return [] + + # Both should produce identical schemas with minimal code + assert Scanner1.input_schema() == Scanner2.input_schema() + assert Scanner1.output_schema() == Scanner2.output_schema() + + def test_new_pattern_type_introspection(self): + """Test that new pattern allows for better type introspection""" + # Can easily check what types a scanner uses + assert NewPatternScanner.InputType == List[Domain] + assert NewPatternScanner.OutputType == List[Ip] + + assert AdvancedNewPatternScanner.InputType == List[Email] + assert AdvancedNewPatternScanner.OutputType == List[Domain] + + # This wasn't easily possible with the old pattern + + def test_new_pattern_inheritance_works(self): + """Test that new pattern works well with inheritance""" + + class BaseDomainScanner(Scanner): + InputType = List[Domain] + OutputType = List[Ip] + + @classmethod + def name(cls): return "base_domain" + @classmethod + def category(cls): return "Test" + @classmethod + def key(cls): return "domain" + # Schema methods automatic! + async def scan(self, values): return [] + + class SpecializedDomainScanner(BaseDomainScanner): + @classmethod + def name(cls): return "specialized_domain" + # Inherits InputType and OutputType + + # Child should inherit the types and schemas + assert SpecializedDomainScanner.InputType == List[Domain] + assert SpecializedDomainScanner.OutputType == List[Ip] + + specialized_input = SpecializedDomainScanner.input_schema() + specialized_output = SpecializedDomainScanner.output_schema() + + assert specialized_input["type"] == "Domain" + assert specialized_output["type"] == "Ip" + + def test_new_pattern_error_handling(self): + """Test that new pattern provides better error handling""" + + class IncompleteDomainScanner(Scanner): + # Forgot to define InputType and OutputType + @classmethod + def name(cls): return "incomplete" + @classmethod + def category(cls): return "Test" + @classmethod + def key(cls): return "domain" + # Base class will try to generate schemas automatically and fail appropriately + async def scan(self, values): return [] + + # Should get clear error messages + with pytest.raises(NotImplementedError) as exc_info: + IncompleteDomainScanner.input_schema() + assert "InputType must be defined" in str(exc_info.value) + assert "IncompleteDomainScanner" in str(exc_info.value) + + with pytest.raises(NotImplementedError) as exc_info: + IncompleteDomainScanner.output_schema() + assert "OutputType must be defined" in str(exc_info.value) + assert "IncompleteDomainScanner" in str(exc_info.value) + + def test_new_pattern_runtime_accessibility(self): + """Test that types are accessible at runtime for dynamic operations""" + + # Can build registries or perform operations based on types + scanners = [NewPatternScanner, AdvancedNewPatternScanner] + + domain_input_scanners = [ + scanner for scanner in scanners + if hasattr(scanner, 'InputType') and scanner.InputType == List[Domain] + ] + + email_input_scanners = [ + scanner for scanner in scanners + if hasattr(scanner, 'InputType') and scanner.InputType == List[Email] + ] + + assert len(domain_input_scanners) == 1 + assert domain_input_scanners[0] == NewPatternScanner + + assert len(email_input_scanners) == 1 + assert email_input_scanners[0] == AdvancedNewPatternScanner + + def test_new_pattern_with_clean_type_usage(self): + """Test that the new pattern allows clean type usage without quotes""" + scanner = NewPatternScanner("test", "test") + + # Test that we can create data of the expected types + test_domains = [Domain(domain="example.com"), Domain(domain="test.com")] + + # Preprocess should work with clean type annotations + result = scanner.preprocess(test_domains) + assert len(result) == 2 + assert all(isinstance(d, Domain) for d in result) + + @pytest.mark.asyncio + async def test_new_pattern_async_methods(self): + """Test that async methods work correctly with clean type annotations""" + scanner = NewPatternScanner("test", "test") + + test_domains = [Domain(domain="example.com")] + result = await scanner.scan(test_domains) + + assert len(result) == 1 + assert isinstance(result[0], Ip) + assert result[0].address == "1.2.3.4" + + def test_module_level_type_access(self): + """Test that types are properly accessible at module level""" + # These should be available after the class definition + assert NewPatternInputType == List[Domain] + assert NewPatternOutputType == List[Ip] + + # And they should match the class attributes + assert NewPatternInputType == NewPatternScanner.InputType + assert NewPatternOutputType == NewPatternScanner.OutputType + + def test_migration_checklist(self): + """Test that demonstrates a complete migration checklist""" + + # Migration steps: + # 1. Define InputType and OutputType as class attributes + # 2. Remove input_schema() and output_schema() method implementations (base class handles automatically) + # 3. Add module-level assignments: InputType = MyScanner.InputType (optional, for clean usage) + + # Verify the new pattern is simpler and more maintainable + new_scanner_benefits = [ + "Just define InputType and OutputType class attributes", + "Automatic schema generation by base class", + "No boilerplate schema methods needed", + "Consistent schema generation across all scanners", + "Clean type usage throughout class methods" + ] + + assert len(new_scanner_benefits) == 5 + + # Verify functionality is preserved and automatic + schema = NewPatternScanner.input_schema() + assert schema["type"] == "Domain" + assert any(prop["name"] == "domain" for prop in schema["properties"]) + + # Verify schemas are generated automatically without manual implementation + assert hasattr(NewPatternScanner, 'input_schema') + assert hasattr(NewPatternScanner, 'output_schema') + + # The base class should be handling the schema generation + input_schema = NewPatternScanner.input_schema() + output_schema = NewPatternScanner.output_schema() + assert input_schema is not None + assert output_schema is not None \ No newline at end of file diff --git a/flowsint-api/tests/scanners/test_registry.py b/flowsint-api/tests/scanners/test_registry.py index c0d32c3..5f8ea6e 100644 --- a/flowsint-api/tests/scanners/test_registry.py +++ b/flowsint-api/tests/scanners/test_registry.py @@ -1,8 +1,6 @@ import pytest from app.scanners.registry import ScannerRegistry from app.scanners.base import Scanner -from unittest.mock import Mock - class TestScannerRegistry: """Test suite for ScannerRegistry functionality""" @@ -28,7 +26,7 @@ class TestScannerRegistry: assert "outputs" in scanner_info assert "params" in scanner_info assert "params_schema" in scanner_info - assert "requires_key" in scanner_info + assert "required_params" in scanner_info # Check that name matches the key assert scanner_info["name"] == name @@ -55,7 +53,8 @@ class TestScannerRegistry: # Test with a known input type domain_scanners = ScannerRegistry.list_by_input_type("Domain") - for name, scanner_info in domain_scanners.items(): + assert isinstance(domain_scanners, list) + for scanner_info in domain_scanners: input_type = scanner_info["inputs"]["type"] assert input_type in ["Any", "Domain"] @@ -103,7 +102,6 @@ class TestScannerRegistry: "domain_subdomains_scanner", "to_whois", "ip_geolocation_scanner", - "holehe_scanner", "maigret_scanner" ] @@ -126,11 +124,6 @@ class TestScannerRegistry: """Test that all scanners have valid categories""" scanners = ScannerRegistry.list() - valid_categories = { - "Domain", "IP", "Email", "Social", "Organization", - "Website", "Crypto", "Individual", "ASN", "CIDR" - } - for name, scanner_info in scanners.items(): category = scanner_info["category"] assert isinstance(category, str), f"Scanner '{name}' has invalid category type: {type(category)}" @@ -141,7 +134,7 @@ class TestScannerRegistry: """Test that all scanners have input and output schemas""" scanners = ScannerRegistry.list() - for name, scanner_info in scanners.items(): + for _, scanner_info in scanners.items(): # Check input schema input_schema = scanner_info["inputs"] assert isinstance(input_schema, dict) @@ -154,10 +147,10 @@ class TestScannerRegistry: assert "type" in output_schema assert "properties" in output_schema - def test_scanner_requires_key_is_boolean(self): - """Test that requires_key returns a boolean for all scanners""" + def test_scanner_required_params_is_boolean(self): + """Test that required_params returns a boolean for all scanners""" scanners = ScannerRegistry.list() for name, scanner_info in scanners.items(): - requires_key = scanner_info["requires_key"] - assert isinstance(requires_key, bool), f"Scanner '{name}' requires_key is not boolean: {type(requires_key)}" \ No newline at end of file + required_params = scanner_info["required_params"] + assert isinstance(required_params, bool), f"Scanner '{name}' required_params is not boolean: {type(required_params)}" \ No newline at end of file diff --git a/flowsint-api/tests/scanners/test_schema_generation.py b/flowsint-api/tests/scanners/test_schema_generation.py new file mode 100644 index 0000000..887bf16 --- /dev/null +++ b/flowsint-api/tests/scanners/test_schema_generation.py @@ -0,0 +1,148 @@ +""" +Test schema generation for scanners with various InputType and OutputType combinations. +""" +import pytest +from typing import List +from app.scanners.base import Scanner +from app.types.website import Website +from app.types.domain import Domain +from app.types.ip import Ip +from app.scanners.websites.to_domain import WebsiteToDomainScanner +from app.scanners.websites.to_webtrackers import WebsiteToWebtrackersScanner +from app.scanners.websites.to_crawler import WebsiteToCrawler +from app.scanners.domains.to_website import DomainToWebsiteScanner + + +class TestSchemaGeneration: + """Test that schema generation correctly identifies InputType and OutputType.""" + + def test_website_to_domain_scanner_schemas(self): + """Test that WebsiteToDomainScanner correctly shows Website as input type.""" + scanner = WebsiteToDomainScanner + + # Test InputType attribute + assert scanner.InputType == List[Website] + + # Test input schema generation + input_schema = scanner.input_schema() + assert input_schema["type"] == "Website", f"Expected 'Website', got '{input_schema['type']}'" + + # Test output schema generation + output_schema = scanner.output_schema() + assert output_schema["type"] == "Domain", f"Expected 'Domain', got '{output_schema['type']}'" + + def test_website_to_webtrackers_scanner_schemas(self): + """Test that WebsiteToWebtrackersScanner correctly shows Website as input type.""" + scanner = WebsiteToWebtrackersScanner + + # Test InputType attribute + assert scanner.InputType == List[Website] + + # Test input schema generation + input_schema = scanner.input_schema() + assert input_schema["type"] == "Website", f"Expected 'Website', got '{input_schema['type']}'" + + # Test output schema generation + output_schema = scanner.output_schema() + assert output_schema["type"] == "WebTracker", f"Expected 'WebTracker', got '{output_schema['type']}'" + + def test_website_to_crawler_scanner_schemas(self): + """Test that WebsiteToCrawler correctly shows Website as input type.""" + scanner = WebsiteToCrawler + + # Test InputType attribute + assert scanner.InputType == List[Website] + + # Test input schema generation + input_schema = scanner.input_schema() + assert input_schema["type"] == "Website", f"Expected 'Website', got '{input_schema['type']}'" + + def test_domain_to_website_scanner_schemas(self): + """Test that DomainToWebsiteScanner correctly shows Domain as input and Website as output.""" + scanner = DomainToWebsiteScanner + + # Test InputType attribute + assert scanner.InputType == List[Domain] + + # Test input schema generation + input_schema = scanner.input_schema() + assert input_schema["type"] == "Domain", f"Expected 'Domain', got '{input_schema['type']}'" + + # Test output schema generation + output_schema = scanner.output_schema() + assert output_schema["type"] == "Website", f"Expected 'Website', got '{output_schema['type']}'" + + def test_all_website_scanners_have_correct_input_types(self): + """Test that all scanners taking Website input show Website in schema.""" + website_input_scanners = [ + (WebsiteToDomainScanner, "Website", "Domain"), + (WebsiteToWebtrackersScanner, "Website", "WebTracker"), + (WebsiteToCrawler, "Website", None), # Unknown output type + ] + + for scanner_class, expected_input, expected_output in website_input_scanners: + input_schema = scanner_class.input_schema() + assert input_schema["type"] == expected_input, \ + f"{scanner_class.__name__}: Expected input '{expected_input}', got '{input_schema['type']}'" + + if expected_output: + output_schema = scanner_class.output_schema() + assert output_schema["type"] == expected_output, \ + f"{scanner_class.__name__}: Expected output '{expected_output}', got '{output_schema['type']}'" + + def test_schema_generation_debug_info(self): + """Debug test to see what's actually in the schemas.""" + scanner = WebsiteToDomainScanner + + # Get the raw TypeAdapter schema + from pydantic import TypeAdapter + adapter = TypeAdapter(scanner.InputType) + raw_schema = adapter.json_schema() + + print(f"\n=== Debug Info for {scanner.__name__} ===") + print(f"InputType: {scanner.InputType}") + print(f"Raw schema keys: {list(raw_schema.keys())}") + if "$defs" in raw_schema: + print(f"$defs keys: {list(raw_schema['$defs'].keys())}") + print(f"Schema items: {raw_schema.get('items', 'No items')}") + print(f"Generated input schema: {scanner.input_schema()}") + + # This test always passes, it's just for debugging + assert True + + def test_schema_generation_follows_ref_not_first_def(self): + """ + Regression test for the schema generation bug. + + Before the fix: generate_input_schema() picked the first definition in $defs + (alphabetically "Domain" came before "Website"), so Website scanners incorrectly + showed "Domain" as their input type. + + After the fix: generate_input_schema() follows the $ref in items to get the + correct type name. + """ + scanner = WebsiteToDomainScanner + + # Get the raw schema to understand the structure + from pydantic import TypeAdapter + adapter = TypeAdapter(scanner.InputType) + raw_schema = adapter.json_schema() + + # Verify the raw schema structure that caused the bug + assert "$defs" in raw_schema + assert "Domain" in raw_schema["$defs"] + assert "Website" in raw_schema["$defs"] + assert raw_schema["items"]["$ref"] == "#/$defs/Website" + + # Before fix: list(raw_schema["$defs"].items())[0][0] would be "Domain" (first alphabetically) + first_def_name = list(raw_schema["$defs"].items())[0][0] + assert first_def_name == "Domain" # This would have been the bug + + # After fix: We follow the $ref to get "Website" + ref_type = raw_schema["items"]["$ref"].split("/")[-1] + assert ref_type == "Website" # This is what we should use + + # Verify our fix works correctly + input_schema = scanner.input_schema() + assert input_schema["type"] == "Website", \ + f"Schema generation should follow $ref, not pick first def. Got '{input_schema['type']}'" \ No newline at end of file diff --git a/flowsint-app/package.json b/flowsint-app/package.json index 48c6401..68071c5 100644 --- a/flowsint-app/package.json +++ b/flowsint-app/package.json @@ -60,6 +60,7 @@ "@radix-ui/react-toggle-group": "^1.1.10", "@radix-ui/react-tooltip": "^1.2.7", "@react-sigma/core": "^5.0.4", + "@react-sigma/layout-forceatlas2": "^5.0.4", "@tailwindcss/vite": "^3.4.1", "@tanstack/react-query": "^5.79.0", "@tanstack/react-table": "^8.21.3", diff --git a/flowsint-app/src/renderer/public/icons/n8n.svg b/flowsint-app/src/renderer/public/icons/n8n.svg new file mode 100644 index 0000000..82f0a6d --- /dev/null +++ b/flowsint-app/src/renderer/public/icons/n8n.svg @@ -0,0 +1 @@ +n8n \ No newline at end of file diff --git a/flowsint-app/src/renderer/src/components/chat/floating-chat.tsx b/flowsint-app/src/renderer/src/components/chat/floating-chat.tsx index c99f3e8..e8992eb 100644 --- a/flowsint-app/src/renderer/src/components/chat/floating-chat.tsx +++ b/flowsint-app/src/renderer/src/components/chat/floating-chat.tsx @@ -296,7 +296,7 @@ const ChatMessageComponent = ({ message }: { message: ChatMessage }) => { )}> - + ) return ( diff --git a/flowsint-app/src/renderer/src/components/dashboard/active-malware-chart.tsx b/flowsint-app/src/renderer/src/components/dashboard/active-malware-chart.tsx new file mode 100644 index 0000000..9d54c8e --- /dev/null +++ b/flowsint-app/src/renderer/src/components/dashboard/active-malware-chart.tsx @@ -0,0 +1,77 @@ +import { Card } from '@/components/ui/card'; +import { ChartContainer, ChartTooltip, ChartTooltipContent } from '@/components/ui/chart'; +import { PieChart, Pie, Cell, ResponsiveContainer } from 'recharts'; + +const chartData = [ + { name: 'Beacon', value: 24.4, color: 'var(--chart-1)' }, + { name: 'Mirai', value: 17.6, color: 'var(--chart-2)' }, + { name: 'Android', value: 10.7, color: 'var(--chart-3)' }, + { name: 'Trojan', value: 10.2, color: 'var(--chart-4)' }, + { name: 'RAT', value: 8.1, color: 'var(--chart-5)' }, + { name: 'Mozi', value: 6.8, color: 'var(--primary)' }, + { name: 'APT29', value: 6.5, color: 'var(--accent)' }, + { name: 'Emotet', value: 6.3, color: 'var(--muted-foreground)' }, + { name: 'Linux', value: 4.9, color: 'var(--secondary)' }, + { name: 'Qakbot', value: 4.5, color: 'var(--ring)' }, +]; + +const chartConfig = { + value: { + label: 'Percentage', + }, +}; + +export function ActiveMalwareChart() { + return ( + +
+

Active Malware

+
+ + + + {chartData.map((entry, index) => ( + + ))} + + { + if (active && payload && payload.length) { + const data = payload[0]; + return ( +
+

{data.payload.name}

+

+ {data.value}% +

+
+ ); + } + return null; + }} + /> +
+
+
+ {chartData.map((item, index) => ( +
+
+ {item.name} + {item.value}% +
+ ))} +
+ + ); +} \ No newline at end of file diff --git a/flowsint-app/src/renderer/src/components/dashboard/metrics-card.tsx b/flowsint-app/src/renderer/src/components/dashboard/metrics-card.tsx new file mode 100644 index 0000000..5e1f672 --- /dev/null +++ b/flowsint-app/src/renderer/src/components/dashboard/metrics-card.tsx @@ -0,0 +1,57 @@ +import { Card } from '@/components/ui/card'; +import { TrendingUp, TrendingDown, Database, Network, FileText } from 'lucide-react'; +import { cn } from '@/lib/utils'; + +interface MetricsCardProps { + title: string; + value: string; + trend: { + value: number; + period: string; + isPositive: boolean; + }; + type: 'entities' | 'relationships' | 'reports'; +} + +const iconMap = { + entities: Database, + relationships: Network, + reports: FileText, +}; + +const colorMap = { + entities: 'text-chart-1', + relationships: 'text-chart-2', + reports: 'text-chart-3', +}; + +export function MetricsCard({ title, value, trend, type }: MetricsCardProps) { + const Icon = iconMap[type]; + const TrendIcon = trend.isPositive ? TrendingUp : TrendingDown; + + return ( + +
+
+
+ +
+
+

{title}

+

{value}

+
+
+
+
+ + {trend.value.toLocaleString()} +
+

{trend.period}

+
+
+
+ ); +} \ No newline at end of file diff --git a/flowsint-app/src/renderer/src/components/dashboard/targeted-sectors-chart.tsx b/flowsint-app/src/renderer/src/components/dashboard/targeted-sectors-chart.tsx new file mode 100644 index 0000000..59ce3e1 --- /dev/null +++ b/flowsint-app/src/renderer/src/components/dashboard/targeted-sectors-chart.tsx @@ -0,0 +1,114 @@ +import { Card } from '@/components/ui/card'; +import { ChartContainer, ChartTooltip, ChartTooltipContent } from '@/components/ui/chart'; +import { LineChart, Line, XAxis, YAxis, ResponsiveContainer, Legend } from 'recharts'; + +const chartData = [ + { date: 'Sep 7, 2023', government: 40, finance: 30, manufacturing: 25, telecommunications: 35, defense: 20 }, + { date: 'Sep 14, 2023', government: 80, finance: 65, manufacturing: 45, telecommunications: 55, defense: 40 }, + { date: 'Sep 21, 2023', government: 165, finance: 90, manufacturing: 85, telecommunications: 75, defense: 60 }, + { date: 'Sep 28, 2023', government: 45, finance: 35, manufacturing: 40, telecommunications: 30, defense: 25 }, + { date: 'Oct 5, 2023', government: 85, finance: 55, manufacturing: 60, telecommunications: 45, defense: 35 }, + { date: 'Oct 12, 2023', government: 35, finance: 25, manufacturing: 30, telecommunications: 20, defense: 15 }, +]; + +const chartConfig = { + government: { + label: 'Government and administration', + color: 'var(--chart-1)', + }, + finance: { + label: 'Finance', + color: 'var(--chart-2)', + }, + manufacturing: { + label: 'Manufacturing', + color: 'var(--chart-3)', + }, + telecommunications: { + label: 'Telecommunications', + color: 'var(--chart-4)', + }, + defense: { + label: 'Defense', + color: 'var(--chart-5)', + }, +}; + +export function TargetedSectorsChart() { + return ( + +
+

Targeted Sectors

+
+ + + + + } /> + + + + + + + +
+ {Object.entries(chartConfig).map(([key, config]) => ( +
+
+ {config.label} +
+ ))} +
+ + ); +} \ No newline at end of file diff --git a/flowsint-app/src/renderer/src/components/graphs/empty-state.tsx b/flowsint-app/src/renderer/src/components/graphs/empty-state.tsx index 1dc3288..91faf1c 100644 --- a/flowsint-app/src/renderer/src/components/graphs/empty-state.tsx +++ b/flowsint-app/src/renderer/src/components/graphs/empty-state.tsx @@ -1,19 +1,93 @@ -import { PlusIcon } from 'lucide-react' -import { memo } from 'react' +import { PlusIcon, Zap, GitBranch } from 'lucide-react' +import { memo, useCallback } from 'react' import { Button } from '@/components/ui/button' -import NewActions from './new-actions' +import { useGraphStore } from '@/stores/graph-store' +const EmptyState = memo(() => { + const setOpenMainDialog = useGraphStore(state => state.setOpenMainDialog) -const EmptyState = memo(() => ( -
- Your nodes will be displayed here. - - - -
-)) + const handleOpenNewActionDialog = useCallback(() => { + setOpenMainDialog(true) + }, [setOpenMainDialog]) + + return ( +
+ {/* Animated Graph Illustration */} +
+ + {/* Connections */} + + + + + + + + + {/* Nodes */} + + + + + + + + + {/* Floating particles */} + + + + + + + + + {/* Glow effect */} +
+
+ + {/* Content */} +
+
+

+ Ready to explore connections? +

+

+ Your investigation graph will come to life here. Add nodes, discover relationships, + and uncover hidden patterns in your data. +

+
+ + {/* Feature highlights */} +
+
+ + Network mapping +
+
+ + Real-time analysis +
+
+
+ + {/* Call to action */} +
+ +

+ Add your first node to begin +

+
+
+ ) +}) EmptyState.displayName = "EmptyState" diff --git a/flowsint-app/src/renderer/src/components/graphs/graph-panel.tsx b/flowsint-app/src/renderer/src/components/graphs/graph-panel.tsx index 0ed2e49..57204a9 100644 --- a/flowsint-app/src/renderer/src/components/graphs/graph-panel.tsx +++ b/flowsint-app/src/renderer/src/components/graphs/graph-panel.tsx @@ -7,7 +7,7 @@ import { ArrowDownToLineIcon } from 'lucide-react' import { CreateRelationDialog } from './create-relation' import GraphLoader from './graph-loader' import Loader from '../loader' -import WallEditor from './wall/wall' +// import WallEditor from './wall/wall' import { useGraphControls } from '@/stores/graph-controls-store' import { NodeEditorModal } from './node-editor-modal' import NodesTable from '../table' @@ -18,10 +18,11 @@ import MapPanel from '../map/map-panel' import NewActions from './new-actions' const GraphReactForce = lazy(() => import('./graph-react-force')) const RelationshipsTable = lazy(() => import('@/components/table/relationships-view')) +// const GraphReactSigma = lazy(() => import('./graph-react-sigma')) -const Graph = lazy(() => import('./graph')) +// const Graph = lazy(() => import('./graph')) -const NODE_COUNT_THRESHOLD = 500; +const NODE_COUNT_THRESHOLD = 1000; // Separate component for the drag overlay const DragOverlay = memo(({ isDragging }: { isDragging: boolean }) => ( @@ -131,16 +132,15 @@ const GraphPanel = ({ graphData, isLoading, isRefetching }: GraphPanelProps) =>
}> - {/* */} {nodes?.length > NODE_COUNT_THRESHOLD ? ( <>{view === "table" && } - {["force", "hierarchy"].includes(view) && } + {["force", "hierarchy"].includes(view) && } {view === "map" && } {view === "relationships" && } ) : (<> {view === "force" && } - {view === "hierarchy" && } + {/* {view === "hierarchy" && } */} {view === "table" && } {view === "map" && } {view === "relationships" && } diff --git a/flowsint-app/src/renderer/src/components/graphs/graph-react-force.tsx b/flowsint-app/src/renderer/src/components/graphs/graph-react-force.tsx index c6fde35..1a9edce 100644 --- a/flowsint-app/src/renderer/src/components/graphs/graph-react-force.tsx +++ b/flowsint-app/src/renderer/src/components/graphs/graph-react-force.tsx @@ -21,10 +21,11 @@ interface LabelBounds { nodeSize: number; } -const NODE_COUNT_THRESHOLD = 10; +const NODE_COUNT_THRESHOLD = 1000; const ZOOM_MIN = 0.3; const ZOOM_INTERVAL = 2; const ZOOM_MAX = 10; +const ZOOM_THRESHOLD = 4; const GraphReactForce: React.FC = () => { const nodes = useGraphStore(s => s.nodes) as GraphNode[]; @@ -39,7 +40,7 @@ const GraphReactForce: React.FC = () => { const [currentZoom, setCurrentZoom] = useState(1); const shouldUseSimpleRendering = useMemo(() => { - return nodes.length > NODE_COUNT_THRESHOLD || currentZoom < 2.5; + return nodes.length > NODE_COUNT_THRESHOLD || currentZoom < ZOOM_THRESHOLD; }, [nodes.length, currentZoom]); // Transform data for Force Graph @@ -125,9 +126,22 @@ const GraphReactForce: React.FC = () => { ctx.arc(node.x, node.y, size * 0.65, 0, 2 * Math.PI); ctx.fillStyle = node.nodeColor; ctx.fill(); + + // Add border with same color + ctx.strokeStyle = node.nodeColor; + ctx.lineWidth = 0.75; + ctx.stroke(); }, []); const drawNodeIcon = useCallback((ctx: CanvasRenderingContext2D, node: any, size: number, type: ItemType) => { + // Draw circular border first + ctx.beginPath(); + ctx.arc(node.x, node.y, size * 0.70, 0, 2 * Math.PI); + ctx.strokeStyle = node.nodeColor; + ctx.lineWidth = 0.75; + ctx.stroke(); + + // Draw icon on top const img = new Image(); img.src = `/icons/${type}.svg`; ctx.drawImage(img, node.x - size / 2, node.y - size / 2, size, size); diff --git a/flowsint-app/src/renderer/src/components/graphs/graph-react-sigma.tsx b/flowsint-app/src/renderer/src/components/graphs/graph-react-sigma.tsx new file mode 100644 index 0000000..0ed82ac --- /dev/null +++ b/flowsint-app/src/renderer/src/components/graphs/graph-react-sigma.tsx @@ -0,0 +1,140 @@ +import React, { useEffect, useRef, useMemo } from 'react'; +import Graph from 'graphology'; +import Sigma from 'sigma'; +import { GraphEdge, GraphNode, useGraphStore } from '@/stores/graph-store'; +import { useNodesDisplaySettings } from '@/stores/node-display-settings'; +import { useGraphControls } from '@/stores/graph-controls-store'; +import type { ItemType } from '@/stores/node-display-settings'; + +const GraphReactSigma: React.FC = () => { + const containerRef = useRef(null); + const sigmaRef = useRef(null); + const nodes = useGraphStore(s => s.nodes) as GraphNode[]; + const edges = useGraphStore(s => s.edges) as GraphEdge[]; + const getSize = useNodesDisplaySettings(s => s.getSize); + const colors = useNodesDisplaySettings(s => s.colors) as Record; + const toggleNodeSelection = useGraphStore(s => s.toggleNodeSelection); + const clearSelectedNodes = useGraphStore(s => s.clearSelectedNodes); + const setActions = useGraphControls(s => s.setActions); + + console.log(edges) + // Memoize processed data for performance + const processedNodes = useMemo(() => { + return nodes.map(node => ({ + id: node.id, + label: node.data?.label || node.id, + type: node.data?.type as ItemType, + size: getSize(node.data?.type as ItemType), + color: colors[node.data?.type as ItemType] || '#0074D9', + originalNode: node + })); + }, [nodes, getSize, colors]); + + // Initialize Sigma + useEffect(() => { + if (!containerRef.current) return; + + const graph = new Graph(); + const sigma = new Sigma(graph, containerRef.current, { + defaultNodeType: 'circle', + defaultEdgeType: 'line', + renderEdgeLabels: false, + }); + + sigmaRef.current = sigma; + + // Set up zoom actions + setActions({ + zoomIn: () => sigma.getCamera().animatedZoom({ duration: 300 }), + zoomOut: () => sigma.getCamera().animatedUnzoom({ duration: 300 }), + zoomToFit: () => sigma.getCamera().animatedReset({ duration: 300 }), + }); + + // Set up event handlers + sigma.on('clickNode', (event) => { + const originalNode = processedNodes.find(n => n.id === event.node)?.originalNode; + if (originalNode) { + toggleNodeSelection(originalNode, false); + } + }); + + sigma.on('clickStage', () => { + clearSelectedNodes(); + }); + + return () => { + sigma.kill(); + }; + }, [setActions, toggleNodeSelection, clearSelectedNodes, processedNodes]); + + // Update graph data + useEffect(() => { + if (!sigmaRef.current) return; + if (processedNodes.length === 0) return; + const graph = sigmaRef.current.getGraph(); + graph.clear(); + // Add nodes with simple random positioning + processedNodes.forEach(node => { + const x = (Math.random() - 0.5) * 400; + const y = (Math.random() - 0.5) * 400; + + graph.addNode(node.id, { + label: node.label, + size: node.size, + color: node.color, + x: x, + y: y, + }); + }); + + edges.forEach(edge => { + const sourceExists = graph.hasNode(edge.source); + const targetExists = graph.hasNode(edge.target); + if (sourceExists && targetExists) { + graph.addEdge(edge.source, edge.target, { label: edge.label }); + } + }); + // Force a refresh and fit to view + setTimeout(() => { + sigmaRef.current?.getCamera().animatedReset({ duration: 300 }); + sigmaRef.current?.refresh(); + }, 100); + + }, [processedNodes, edges]); + + // Handle container resize + useEffect(() => { + if (!containerRef.current || !sigmaRef.current) return; + + const resizeObserver = new ResizeObserver(() => { + sigmaRef.current?.refresh(); + }); + + resizeObserver.observe(containerRef.current); + + return () => { + resizeObserver.disconnect(); + }; + }, []); + + if (!nodes.length) { + return ( +
+ No nodes to display +
+ ); + } + + return ( +
+ ); +}; + +export default GraphReactSigma; \ No newline at end of file diff --git a/flowsint-app/src/renderer/src/components/graphs/toolbar.tsx b/flowsint-app/src/renderer/src/components/graphs/toolbar.tsx index 178078e..de6821e 100644 --- a/flowsint-app/src/renderer/src/components/graphs/toolbar.tsx +++ b/flowsint-app/src/renderer/src/components/graphs/toolbar.tsx @@ -11,7 +11,7 @@ import { ZoomIn, RotateCw, GitPullRequestCreate, - GitFork, + // GitFork, Waypoints, Table, MapPin, @@ -22,7 +22,7 @@ import { sketchService } from "@/api/sketch-service" import { useParams } from "@tanstack/react-router" import { toast } from "sonner" import { cn } from "@/lib/utils" -import { useKeyboardShortcut } from "@/hooks/use-keyboard-shortcut" +// import { useKeyboardShortcut } from "@/hooks/use-keyboard-shortcut" // Tooltip wrapper component to avoid repetition const ToolbarButton = memo(function ToolbarButton({ @@ -72,7 +72,7 @@ export const Toolbar = memo(function Toolbar({ isLoading }: { isLoading: boolean const { confirm } = useConfirm() const refetchGraph = useGraphControls((s) => s.refetchGraph) const clearSelectedNodes = useGraphStore((s) => s.clearSelectedNodes) - const nodesLength = useGraphStore((s) => s.getNodesLength()) + // const nodesLength = useGraphStore((s) => s.getNodesLength()) const handleRefresh = useCallback(() => { try { @@ -106,7 +106,7 @@ export const Toolbar = memo(function Toolbar({ isLoading }: { isLoading: boolean }, [selectedNodes, confirm, removeNodes, clearSelectedNodes, sketchId]) const isMoreThanZero = selectedNodes.length > 0 const isTwo = selectedNodes.length == 2 - const isGraphOnly = nodesLength > 500 + // const isGraphOnly = nodesLength > 500 // const isCosmoOnly = nodesLength > 3000 const handleForceLayout = useCallback(() => { @@ -125,21 +125,21 @@ export const Toolbar = memo(function Toolbar({ isLoading }: { isLoading: boolean setView("relationships") }, [setView]) - const handleDagreLayoutTB = useCallback(() => { - setView("hierarchy") - onLayout && onLayout("dagre-tb") - }, [onLayout, setView]) + // const handleDagreLayoutTB = useCallback(() => { + // setView("hierarchy") + // onLayout && onLayout("dagre-tb") + // }, [onLayout, setView]) - const handleDagreLayoutLR = useCallback(() => { - setView("hierarchy") - onLayout && onLayout("dagre-lr") - }, [onLayout, setView]) + // const handleDagreLayoutLR = useCallback(() => { + // setView("hierarchy") + // onLayout && onLayout("dagre-lr") + // }, [onLayout, setView]) - const { isMac } = useKeyboardShortcut({ - key: "y", - ctrlOrCmd: true, - callback: handleDagreLayoutTB - }) + // const { isMac } = useKeyboardShortcut({ + // key: "y", + // ctrlOrCmd: true, + // callback: handleDagreLayoutTB + // }) return (
@@ -176,7 +176,7 @@ export const Toolbar = memo(function Toolbar({ isLoading }: { isLoading: boolean onClick={zoomToFit} /> } - } tooltip={isGraphOnly ? "Graph is too large to render in hierarchy layout" : `Hierarchy (${isMac ? '⌘' : 'ctrl'}+Y)`} onClick={handleDagreLayoutLR} @@ -187,7 +187,7 @@ export const Toolbar = memo(function Toolbar({ isLoading }: { isLoading: boolean tooltip={isGraphOnly ? "Graph is too large to render in hierarchy layout" : `Hierarchy (${isMac ? '⌘' : 'ctrl'}+Y)`} onClick={handleDagreLayoutTB} disabled={isGraphOnly} - /> + /> */} } tooltip={"Graph view"} diff --git a/flowsint-app/src/renderer/src/components/layout/log-panel.tsx b/flowsint-app/src/renderer/src/components/layout/log-panel.tsx index 2d46031..e5cdfc6 100644 --- a/flowsint-app/src/renderer/src/components/layout/log-panel.tsx +++ b/flowsint-app/src/renderer/src/components/layout/log-panel.tsx @@ -140,11 +140,17 @@ export function LogPanel() {
{logs.length === 0 ? ( -
- -

No logs to display

+
+ {/*
+
+
+
+
*/} +

Waiting for investigation activity

+

Events will appear here as they happen

) : ( + logs.map((log, i) => { const config = logLevelConfig[log.type] || defaultConfig const Icon = config.icon diff --git a/flowsint-app/src/renderer/src/components/table/relationships-view.tsx b/flowsint-app/src/renderer/src/components/table/relationships-view.tsx index 94590ce..ac9d050 100644 --- a/flowsint-app/src/renderer/src/components/table/relationships-view.tsx +++ b/flowsint-app/src/renderer/src/components/table/relationships-view.tsx @@ -193,7 +193,7 @@ export default function RelationshipsTable() { if (!relationships || relationships.length === 0) { return ( -
+
diff --git a/flowsint-app/src/renderer/src/components/transforms/editor.tsx b/flowsint-app/src/renderer/src/components/transforms/editor.tsx index c337136..0ed0415 100644 --- a/flowsint-app/src/renderer/src/components/transforms/editor.tsx +++ b/flowsint-app/src/renderer/src/components/transforms/editor.tsx @@ -222,9 +222,10 @@ const TransformEditorFlow = memo(({ initialEdges, initialNodes, theme, transform inputs: scannerData.inputs, outputs: scannerData.outputs, doc: scannerData.doc, - requires_key: scannerData.requires_key, + required_params: scannerData.required_params, params: scannerData.params, - params_schema: scannerData.params_schema + params_schema: scannerData.params_schema, + icon: scannerData.icon }, } const updatedNodes = [...nodes, newNode] diff --git a/flowsint-app/src/renderer/src/components/transforms/params-dialog.tsx b/flowsint-app/src/renderer/src/components/transforms/params-dialog.tsx index c6fcd40..fbac8d3 100644 --- a/flowsint-app/src/renderer/src/components/transforms/params-dialog.tsx +++ b/flowsint-app/src/renderer/src/components/transforms/params-dialog.tsx @@ -10,6 +10,8 @@ import { type Key } from '@/types/key' import { useQuery } from "@tanstack/react-query" import { KeyService } from "@/api/key-service" import { Tabs, TabsList, TabsTrigger, TabsContent } from '../ui/tabs' +import { MemoizedMarkdown } from '../chat/memoized-markdown' +import { cn } from '@/lib/utils' const ParamsDialog = () => { const openParamsDialog = useTransformStore(s => s.openParamsDialog) @@ -42,13 +44,13 @@ const ParamsDialog = () => { const handleSave = useCallback(async () => { if (!selectedNode) return - const updatedNode = { - ...selectedNode, - data: { - ...selectedNode.data, + const updatedNode = { + ...selectedNode, + data: { + ...selectedNode.data, params, - settings - } + settings + } } updateNode(updatedNode) setOpenParamsDialog(false) @@ -61,7 +63,17 @@ const ParamsDialog = () => { Configure {selectedNode.data.class_name} - {selectedNode.data.doc} + {/* */} +
+
+ +
+
@@ -97,7 +109,7 @@ const ParamsDialog = () => { ))}
- +
@@ -115,7 +127,7 @@ const ParamsDialog = () => { onChange={(e) => setSettings({ ...settings, duration: e.target.value })} />
- +
- +
- +
-
{data.class_name}
+
+ {Icon && } +
{data.class_name}
+

{data.doc}

diff --git a/flowsint-app/src/renderer/src/components/transforms/transform-sheet.tsx b/flowsint-app/src/renderer/src/components/transforms/transform-sheet.tsx index d039a08..7563b05 100644 --- a/flowsint-app/src/renderer/src/components/transforms/transform-sheet.tsx +++ b/flowsint-app/src/renderer/src/components/transforms/transform-sheet.tsx @@ -17,6 +17,7 @@ import { transformService } from "@/api/transfrom-service" import { Input } from "../ui/input" import { Alert, AlertDescription } from "../ui/alert" import { categoryColors } from "./scanner-data" +import { useIcon } from "@/hooks/use-icon" const TransformSheet = ({ onLayout }: { onLayout: () => void }) => { const openTransformSheet = useTransformStore(state => state.openTransformSheet) @@ -69,9 +70,10 @@ const TransformSheet = ({ onLayout }: { onLayout: () => void }) => { inputs: scanner.inputs, outputs: scanner.outputs, doc: scanner.doc, - requires_key: scanner.requires_key, + required_params: scanner.required_params, params: scanner.params, - params_schema: scanner.params_schema + params_schema: scanner.params_schema, + icon: scanner.icon }, } setNodes((prev) => ([...prev, newNode])) @@ -93,10 +95,10 @@ const TransformSheet = ({ onLayout }: { onLayout: () => void }) => { - Add connector to {selectedNode?.data.name} + Add connector to {selectedNode?.data.class_name} Choose a transform to launch from the list below. -
+
s.colors) const borderInputColor = colors[scanner.inputs.type.toLowerCase()] const borderOutputColor = colors[scanner.outputs.type.toLowerCase()] + const Icon = scanner.type === "type" ? useIcon(scanner.outputs.type.toLowerCase() as string, null) : scanner.icon ? useIcon(scanner.icon, null) : null return ( @@ -169,7 +172,10 @@ const ScannerItem = memo(({ scanner, onClick }: { scanner: Scanner, onClick: ()
-

{scanner.class_name}

+
+ {Icon && } +

{scanner.class_name}

+

{scanner.doc}

@@ -185,7 +191,7 @@ const ScannerItem = memo(({ scanner, onClick }: { scanner: Scanner, onClick: ()
- {scanner.requires_key && + {scanner.required_params &&
diff --git a/flowsint-app/src/renderer/src/routes/__root.tsx b/flowsint-app/src/renderer/src/routes/__root.tsx index 49d4a78..8f1ebb8 100644 --- a/flowsint-app/src/renderer/src/routes/__root.tsx +++ b/flowsint-app/src/renderer/src/routes/__root.tsx @@ -13,7 +13,7 @@ export const Route = createRootRouteWithContext()({ const { theme } = useTheme() return ( <> - + ) diff --git a/flowsint-app/src/renderer/src/routes/_auth.tsx b/flowsint-app/src/renderer/src/routes/_auth.tsx index 75a7a50..138903d 100644 --- a/flowsint-app/src/renderer/src/routes/_auth.tsx +++ b/flowsint-app/src/renderer/src/routes/_auth.tsx @@ -2,14 +2,12 @@ import { Link, Outlet, createFileRoute, - useNavigate, } from '@tanstack/react-router' import { requireAuth } from '@/lib/auth-utils' import { Button } from '@/components/ui/button' -import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/components/ui/card' -import { AlertTriangle, Home, RefreshCw, ArrowLeft } from 'lucide-react' -import { useState } from 'react' +import { Card, CardDescription, CardFooter, CardHeader, CardTitle } from '@/components/ui/card' +import { AlertTriangle, Home } from 'lucide-react' export const Route = createFileRoute('/_auth')({ beforeLoad: ({ location }) => { diff --git a/flowsint-app/src/renderer/src/types/transform.ts b/flowsint-app/src/renderer/src/types/transform.ts index 8c02e2e..521fe68 100644 --- a/flowsint-app/src/renderer/src/types/transform.ts +++ b/flowsint-app/src/renderer/src/types/transform.ts @@ -34,10 +34,11 @@ export interface Scanner { inputs: ScannerIO outputs: ScannerIO type: string - requires_key: boolean + required_params: boolean params: Record params_schema: ScannerParamSchemaItem[] settings?: Record + icon: string | null } // ================================ diff --git a/flowsint-app/yarn.lock b/flowsint-app/yarn.lock index 12c2d34..a56e00b 100644 --- a/flowsint-app/yarn.lock +++ b/flowsint-app/yarn.lock @@ -1599,6 +1599,20 @@ resolved "https://registry.yarnpkg.com/@react-sigma/core/-/core-5.0.4.tgz#8207e340c2103a3aad7871be5a6d31b27d198072" integrity sha512-C0hjr069x9oLlVe7t+Y1vE8Qi9UV1dhvrwxNi96TWxrFcTSHO5ntduj5yKTGnUF4SiIszAz6Xmjga8SHT/836Q== +"@react-sigma/layout-core@^5.0.4": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@react-sigma/layout-core/-/layout-core-5.0.4.tgz#c8c9603d833416c509144c5936f48c0269500280" + integrity sha512-R9Mm59CTwSla6vHXnN+m1TQlJTeyDaeD4agUqJahg/cgWD16oPvvKN5Yhv3Kn+SfZdaTiv5gcoryg0t8/zGX+A== + dependencies: + "@react-sigma/core" "^5.0.4" + +"@react-sigma/layout-forceatlas2@^5.0.4": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@react-sigma/layout-forceatlas2/-/layout-forceatlas2-5.0.4.tgz#b9338ca3ff960d49e5548bcaf7466c38f245960e" + integrity sha512-OLIvSl1lU0nNemWR6iw/56oIDZEOBTd3bJlyIA9Bh2GXA2UhDzdSaksd+GH4gFQ4jYJpuL+VgWGFND2pO6zYbg== + dependencies: + "@react-sigma/layout-core" "^5.0.4" + "@remirror/core-constants@3.0.0": version "3.0.0" resolved "https://registry.yarnpkg.com/@remirror/core-constants/-/core-constants-3.0.0.tgz#96fdb89d25c62e7b6a5d08caf0ce5114370e3b8f"