diff --git a/flowsint-transforms/src/flowsint_transforms/domain/to_history.py b/flowsint-transforms/src/flowsint_transforms/domain/to_history.py index 1cef979..ed3d895 100644 --- a/flowsint-transforms/src/flowsint_transforms/domain/to_history.py +++ b/flowsint-transforms/src/flowsint_transforms/domain/to_history.py @@ -505,18 +505,19 @@ class DomainToHistoryTransform(Transform): # Process email addresses if individual.email_addresses: - for email in individual.email_addresses: - if email and email not in processed_emails: - processed_emails.add(email) + for email_obj in individual.email_addresses: + email_str = email_obj.email + if email_str and email_str not in processed_emails: + processed_emails.add(email_str) Logger.info( self.sketch_id, - {"message": f"[WHOXY] Creating email node: {email}"}, + {"message": f"[WHOXY] Creating email node: {email_str}"}, ) self.create_node( "email", "email", - email, - caption=email, + email_str, + caption=email_str, type="email", ) self.create_relationship( @@ -525,24 +526,25 @@ class DomainToHistoryTransform(Transform): individual.full_name, "email", "email", - email, + email_str, "HAS_EMAIL", ) # Process phone numbers if individual.phone_numbers: - for phone in individual.phone_numbers: - if phone and phone not in processed_phones: - processed_phones.add(phone) + for phone_obj in individual.phone_numbers: + phone_str = phone_obj.number + if phone_str and phone_str not in processed_phones: + processed_phones.add(phone_str) Logger.info( self.sketch_id, - {"message": f"[WHOXY] Creating phone node: {phone}"}, + {"message": f"[WHOXY] Creating phone node: {phone_str}"}, ) self.create_node( "phone", "number", - phone, - caption=phone, + phone_str, + caption=phone_str, type="phone", ) self.create_relationship( @@ -551,7 +553,7 @@ class DomainToHistoryTransform(Transform): individual.full_name, "phone", "number", - phone, + phone_str, "HAS_PHONE", ) diff --git a/flowsint-transforms/src/flowsint_transforms/domain/to_whois.py b/flowsint-transforms/src/flowsint_transforms/domain/to_whois.py index 77cfe3a..681a087 100644 --- a/flowsint-transforms/src/flowsint_transforms/domain/to_whois.py +++ b/flowsint-transforms/src/flowsint_transforms/domain/to_whois.py @@ -1,12 +1,10 @@ -from typing import List, Union +from typing import List import whois -from flowsint_core.utils import is_valid_domain from flowsint_core.core.transform_base import Transform -from flowsint_types.domain import Domain, Domain +from flowsint_types.domain import Domain from flowsint_types.whois import Whois from flowsint_types.email import Email from flowsint_core.core.logger import Logger -from datetime import datetime class WhoisTransform(Transform): @@ -28,22 +26,6 @@ class WhoisTransform(Transform): def key(cls) -> str: return "domain" - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: - cleaned: InputType = [] - for item in data: - domain_obj = None - if isinstance(item, str): - if is_valid_domain(item): - domain_obj = Domain(domain=item) - elif isinstance(item, dict) and "domain" in item: - if is_valid_domain(item["domain"]): - domain_obj = Domain(domain=item["domain"]) - elif isinstance(item, Domain): - domain_obj = item - if domain_obj: - cleaned.append(domain_obj) - return cleaned - async def scan(self, data: InputType) -> OutputType: results: OutputType = [] for domain in data: @@ -85,8 +67,19 @@ class WhoisTransform(Transform): else: expiration_date_str = whois_info.expiration_date.isoformat() + # Extract registry domain ID + registry_domain_id = None + if ( + hasattr(whois_info, "registry_domain_id") + and whois_info.registry_domain_id + ): + registry_domain_id = str(whois_info.registry_domain_id) + elif hasattr(whois_info, "domain_id") and whois_info.domain_id: + registry_domain_id = str(whois_info.domain_id) + whois_obj = Whois( - domain=domain.domain, + domain=domain, + registry_domain_id=registry_domain_id, registrar=( str(whois_info.registrar) if whois_info.registrar else None ), @@ -114,23 +107,27 @@ class WhoisTransform(Transform): continue # Create domain node - self.create_node("domain", "domain", whois_obj.domain, **whois_obj.__dict__) + self.create_node( + "domain", + "domain", + whois_obj.domain.domain, + root=whois_obj.domain.root, + type="domain", + ) # Create whois node - whois_key = f"{whois_obj.domain}_{self.sketch_id}" - whois_label = f"Whois-{whois_obj.domain}" - # Creating unique label - date_format = "%Y-%m-%dT%H:%M:%S" - try: - year = datetime.strptime(whois_obj.creation_date, date_format).year - whois_label = f"{whois_label}-{year}" - except Exception: - continue + whois_key = f"{whois_obj.domain.domain}_{self.sketch_id}" + + if whois_obj.registry_domain_id: + whois_label = whois_obj.registry_domain_id + else: + whois_label = whois_obj.domain.domain self.create_node( "whois", "whois_id", whois_key, - domain=whois_obj.domain, + domain=whois_obj.domain.domain, + registry_domain_id=whois_obj.registry_domain_id, registrar=whois_obj.registrar, org=whois_obj.org, city=whois_obj.city, @@ -146,7 +143,7 @@ class WhoisTransform(Transform): self.create_relationship( "domain", "domain", - whois_obj.domain, + whois_obj.domain.domain, "whois", "whois_id", whois_key, @@ -161,7 +158,7 @@ class WhoisTransform(Transform): whois_obj.org, country=whois_obj.country, founding_date=whois_obj.creation_date, - description=f"Organization from WHOIS data for {whois_obj.domain}", + description=f"Organization from WHOIS data for {whois_obj.domain.domain}", caption=whois_obj.org, type="organization", ) @@ -173,12 +170,12 @@ class WhoisTransform(Transform): whois_obj.org, "domain", "domain", - whois_obj.domain, + whois_obj.domain.domain, "HAS_DOMAIN", ) self.log_graph_message( - f"{whois_obj.domain} -> {whois_obj.org} (organization)" + f"{whois_obj.domain.domain} -> {whois_obj.org} (organization)" ) if whois_obj.email: @@ -196,7 +193,7 @@ class WhoisTransform(Transform): ) self.log_graph_message( - f"WHOIS for {whois_obj.domain} -> registrar: {whois_obj.registrar} org: {whois_obj.org} city: {whois_obj.city} country: {whois_obj.country} creation_date: {whois_obj.creation_date} expiration_date: {whois_obj.expiration_date}" + f"WHOIS for {whois_obj.domain.domain} -> registry_id: {whois_obj.registry_domain_id} registrar: {whois_obj.registrar} org: {whois_obj.org} city: {whois_obj.city} country: {whois_obj.country} creation_date: {whois_obj.creation_date} expiration_date: {whois_obj.expiration_date}" ) return results diff --git a/flowsint-transforms/src/flowsint_transforms/email/to_domains.py b/flowsint-transforms/src/flowsint_transforms/email/to_domains.py index 417e4f1..33c7a56 100644 --- a/flowsint-transforms/src/flowsint_transforms/email/to_domains.py +++ b/flowsint-transforms/src/flowsint_transforms/email/to_domains.py @@ -1,6 +1,6 @@ import os import re -from typing import Any, List, Union, Dict, Set, Optional +from typing import Any, List, Dict, Set, Optional from flowsint_core.core.transform_base import Transform from flowsint_types.domain import Domain from flowsint_types.individual import Individual @@ -68,20 +68,6 @@ class EmailToDomainsTransform(Transform): def key(cls) -> str: return "email" - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: - cleaned: InputType = [] - for item in data: - email_obj = None - if isinstance(item, str): - email_obj = Email(email=item) - elif isinstance(item, dict) and "email" in item: - email_obj = Email(email=item["email"]) - elif isinstance(item, Email): - email_obj = item - if email_obj: - cleaned.append(email_obj) - return cleaned - async def scan(self, data: InputType) -> OutputType: """Find domains related to emails using whoxy api.""" domains: OutputType = [] @@ -360,16 +346,17 @@ class EmailToDomainsTransform(Transform): # Process email addresses if individual.email_addresses: - for email in individual.email_addresses: - if email and email not in processed_emails: - processed_emails.add(email) + for email_obj in individual.email_addresses: + email_str = email_obj.email + if email_str and email_str not in processed_emails: + processed_emails.add(email_str) # Create email node self.create_node( "email", "email", - email, - caption=email, + email_str, + caption=email_str, type="email", ) @@ -380,22 +367,23 @@ class EmailToDomainsTransform(Transform): individual.full_name, "email", "email", - email, + email_str, "HAS_EMAIL", ) # Process phone numbers if individual.phone_numbers: - for phone in individual.phone_numbers: - if phone and phone not in processed_phones: - processed_phones.add(phone) + for phone_obj in individual.phone_numbers: + phone_str = phone_obj.number + if phone_str and phone_str not in processed_phones: + processed_phones.add(phone_str) # Create phone node self.create_node( "phone", "number", - phone, - caption=phone, + phone_str, + caption=phone_str, type="phone", ) @@ -406,7 +394,7 @@ class EmailToDomainsTransform(Transform): individual.full_name, "phone", "number", - phone, + phone_str, "HAS_PHONE", ) diff --git a/flowsint-transforms/src/flowsint_transforms/individual/to_domains.py b/flowsint-transforms/src/flowsint_transforms/individual/to_domains.py index d070ec2..c04b310 100644 --- a/flowsint-transforms/src/flowsint_transforms/individual/to_domains.py +++ b/flowsint-transforms/src/flowsint_transforms/individual/to_domains.py @@ -375,16 +375,17 @@ class IndividualToDomainsTransform(Transform): # Process email addresses if contact_individual.email_addresses: - for email in contact_individual.email_addresses: - if email and email not in processed_emails: - processed_emails.add(email) + for email_obj in contact_individual.email_addresses: + email_str = email_obj.email + if email_str and email_str not in processed_emails: + processed_emails.add(email_str) # Create email node self.create_node( "email", "email", - email, - email=email, + email_str, + email=email_str, ) # Create relationship between individual and email @@ -394,22 +395,23 @@ class IndividualToDomainsTransform(Transform): contact_individual.full_name, "email", "email", - email, + email_str, "HAS_EMAIL", ) # Process phone numbers if contact_individual.phone_numbers: - for phone in contact_individual.phone_numbers: - if phone and phone not in processed_phones: - processed_phones.add(phone) + for phone_obj in contact_individual.phone_numbers: + phone_str = phone_obj.number + if phone_str and phone_str not in processed_phones: + processed_phones.add(phone_str) # Create phone node self.create_node( "phone", "number", - phone, - number=phone, + phone_str, + number=phone_str, ) # Create relationship between individual and phone @@ -419,7 +421,7 @@ class IndividualToDomainsTransform(Transform): contact_individual.full_name, "phone", "number", - phone, + phone_str, "HAS_PHONE", ) diff --git a/flowsint-transforms/src/flowsint_transforms/organization/to_domains.py b/flowsint-transforms/src/flowsint_transforms/organization/to_domains.py index 9a8a0fc..67aa62b 100644 --- a/flowsint-transforms/src/flowsint_transforms/organization/to_domains.py +++ b/flowsint-transforms/src/flowsint_transforms/organization/to_domains.py @@ -1,6 +1,6 @@ import os import re -from typing import Any, List, Union, Dict, Set, Optional +from typing import Any, List, Dict, Set, Optional from flowsint_core.core.transform_base import Transform from flowsint_types.domain import Domain from flowsint_types.organization import Organization @@ -67,20 +67,6 @@ class OrgToDomainsTransform(Transform): def key(cls) -> str: return "name" - def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType: - cleaned: InputType = [] - for item in data: - org_obj = None - if isinstance(item, str): - org_obj = Organization(name=item) - elif isinstance(item, dict) and "name" in item: - org_obj = Organization(name=item["name"]) - elif isinstance(item, Organization): - org_obj = item - if org_obj: - cleaned.append(org_obj) - return cleaned - async def scan(self, data: InputType) -> OutputType: """Find domains related to organizations using whoxy api.""" domains: OutputType = [] @@ -526,18 +512,19 @@ class OrgToDomainsTransform(Transform): # Process email addresses if individual.email_addresses: - for email in individual.email_addresses: - if email and email not in processed_emails: - processed_emails.add(email) + for email_obj in individual.email_addresses: + email_str = email_obj.email + if email_str and email_str not in processed_emails: + processed_emails.add(email_str) Logger.info( self.sketch_id, - {"message": f"[WHOXY] Creating email node: {email}"}, + {"message": f"[WHOXY] Creating email node: {email_str}"}, ) self.create_node( "email", "email", - email, - caption=email, + email_str, + caption=email_str, type="email", ) self.create_relationship( @@ -546,24 +533,25 @@ class OrgToDomainsTransform(Transform): individual.full_name, "email", "email", - email, + email_str, "HAS_EMAIL", ) # Process phone numbers if individual.phone_numbers: - for phone in individual.phone_numbers: - if phone and phone not in processed_phones: - processed_phones.add(phone) + for phone_obj in individual.phone_numbers: + phone_str = phone_obj.number + if phone_str and phone_str not in processed_phones: + processed_phones.add(phone_str) Logger.info( self.sketch_id, - {"message": f"[WHOXY] Creating phone node: {phone}"}, + {"message": f"[WHOXY] Creating phone node: {phone_str}"}, ) self.create_node( "phone", "number", - phone, - caption=phone, + phone_str, + caption=phone_str, type="phone", ) self.create_relationship( @@ -572,7 +560,7 @@ class OrgToDomainsTransform(Transform): individual.full_name, "phone", "number", - phone, + phone_str, "HAS_PHONE", )