mirror of
https://github.com/reconurge/flowsint.git
synced 2026-03-11 17:34:31 -05:00
feat(transforms): remove some usage of preprocess
This commit is contained in:
@@ -505,18 +505,19 @@ class DomainToHistoryTransform(Transform):
|
||||
|
||||
# Process email addresses
|
||||
if individual.email_addresses:
|
||||
for email in individual.email_addresses:
|
||||
if email and email not in processed_emails:
|
||||
processed_emails.add(email)
|
||||
for email_obj in individual.email_addresses:
|
||||
email_str = email_obj.email
|
||||
if email_str and email_str not in processed_emails:
|
||||
processed_emails.add(email_str)
|
||||
Logger.info(
|
||||
self.sketch_id,
|
||||
{"message": f"[WHOXY] Creating email node: {email}"},
|
||||
{"message": f"[WHOXY] Creating email node: {email_str}"},
|
||||
)
|
||||
self.create_node(
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
caption=email,
|
||||
email_str,
|
||||
caption=email_str,
|
||||
type="email",
|
||||
)
|
||||
self.create_relationship(
|
||||
@@ -525,24 +526,25 @@ class DomainToHistoryTransform(Transform):
|
||||
individual.full_name,
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
email_str,
|
||||
"HAS_EMAIL",
|
||||
)
|
||||
|
||||
# Process phone numbers
|
||||
if individual.phone_numbers:
|
||||
for phone in individual.phone_numbers:
|
||||
if phone and phone not in processed_phones:
|
||||
processed_phones.add(phone)
|
||||
for phone_obj in individual.phone_numbers:
|
||||
phone_str = phone_obj.number
|
||||
if phone_str and phone_str not in processed_phones:
|
||||
processed_phones.add(phone_str)
|
||||
Logger.info(
|
||||
self.sketch_id,
|
||||
{"message": f"[WHOXY] Creating phone node: {phone}"},
|
||||
{"message": f"[WHOXY] Creating phone node: {phone_str}"},
|
||||
)
|
||||
self.create_node(
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
caption=phone,
|
||||
phone_str,
|
||||
caption=phone_str,
|
||||
type="phone",
|
||||
)
|
||||
self.create_relationship(
|
||||
@@ -551,7 +553,7 @@ class DomainToHistoryTransform(Transform):
|
||||
individual.full_name,
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
phone_str,
|
||||
"HAS_PHONE",
|
||||
)
|
||||
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
from typing import List, Union
|
||||
from typing import List
|
||||
import whois
|
||||
from flowsint_core.utils import is_valid_domain
|
||||
from flowsint_core.core.transform_base import Transform
|
||||
from flowsint_types.domain import Domain, Domain
|
||||
from flowsint_types.domain import Domain
|
||||
from flowsint_types.whois import Whois
|
||||
from flowsint_types.email import Email
|
||||
from flowsint_core.core.logger import Logger
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class WhoisTransform(Transform):
|
||||
@@ -28,22 +26,6 @@ class WhoisTransform(Transform):
|
||||
def key(cls) -> str:
|
||||
return "domain"
|
||||
|
||||
def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType:
|
||||
cleaned: InputType = []
|
||||
for item in data:
|
||||
domain_obj = None
|
||||
if isinstance(item, str):
|
||||
if is_valid_domain(item):
|
||||
domain_obj = Domain(domain=item)
|
||||
elif isinstance(item, dict) and "domain" in item:
|
||||
if is_valid_domain(item["domain"]):
|
||||
domain_obj = Domain(domain=item["domain"])
|
||||
elif isinstance(item, Domain):
|
||||
domain_obj = item
|
||||
if domain_obj:
|
||||
cleaned.append(domain_obj)
|
||||
return cleaned
|
||||
|
||||
async def scan(self, data: InputType) -> OutputType:
|
||||
results: OutputType = []
|
||||
for domain in data:
|
||||
@@ -85,8 +67,19 @@ class WhoisTransform(Transform):
|
||||
else:
|
||||
expiration_date_str = whois_info.expiration_date.isoformat()
|
||||
|
||||
# Extract registry domain ID
|
||||
registry_domain_id = None
|
||||
if (
|
||||
hasattr(whois_info, "registry_domain_id")
|
||||
and whois_info.registry_domain_id
|
||||
):
|
||||
registry_domain_id = str(whois_info.registry_domain_id)
|
||||
elif hasattr(whois_info, "domain_id") and whois_info.domain_id:
|
||||
registry_domain_id = str(whois_info.domain_id)
|
||||
|
||||
whois_obj = Whois(
|
||||
domain=domain.domain,
|
||||
domain=domain,
|
||||
registry_domain_id=registry_domain_id,
|
||||
registrar=(
|
||||
str(whois_info.registrar) if whois_info.registrar else None
|
||||
),
|
||||
@@ -114,23 +107,27 @@ class WhoisTransform(Transform):
|
||||
continue
|
||||
|
||||
# Create domain node
|
||||
self.create_node("domain", "domain", whois_obj.domain, **whois_obj.__dict__)
|
||||
self.create_node(
|
||||
"domain",
|
||||
"domain",
|
||||
whois_obj.domain.domain,
|
||||
root=whois_obj.domain.root,
|
||||
type="domain",
|
||||
)
|
||||
|
||||
# Create whois node
|
||||
whois_key = f"{whois_obj.domain}_{self.sketch_id}"
|
||||
whois_label = f"Whois-{whois_obj.domain}"
|
||||
# Creating unique label
|
||||
date_format = "%Y-%m-%dT%H:%M:%S"
|
||||
try:
|
||||
year = datetime.strptime(whois_obj.creation_date, date_format).year
|
||||
whois_label = f"{whois_label}-{year}"
|
||||
except Exception:
|
||||
continue
|
||||
whois_key = f"{whois_obj.domain.domain}_{self.sketch_id}"
|
||||
|
||||
if whois_obj.registry_domain_id:
|
||||
whois_label = whois_obj.registry_domain_id
|
||||
else:
|
||||
whois_label = whois_obj.domain.domain
|
||||
self.create_node(
|
||||
"whois",
|
||||
"whois_id",
|
||||
whois_key,
|
||||
domain=whois_obj.domain,
|
||||
domain=whois_obj.domain.domain,
|
||||
registry_domain_id=whois_obj.registry_domain_id,
|
||||
registrar=whois_obj.registrar,
|
||||
org=whois_obj.org,
|
||||
city=whois_obj.city,
|
||||
@@ -146,7 +143,7 @@ class WhoisTransform(Transform):
|
||||
self.create_relationship(
|
||||
"domain",
|
||||
"domain",
|
||||
whois_obj.domain,
|
||||
whois_obj.domain.domain,
|
||||
"whois",
|
||||
"whois_id",
|
||||
whois_key,
|
||||
@@ -161,7 +158,7 @@ class WhoisTransform(Transform):
|
||||
whois_obj.org,
|
||||
country=whois_obj.country,
|
||||
founding_date=whois_obj.creation_date,
|
||||
description=f"Organization from WHOIS data for {whois_obj.domain}",
|
||||
description=f"Organization from WHOIS data for {whois_obj.domain.domain}",
|
||||
caption=whois_obj.org,
|
||||
type="organization",
|
||||
)
|
||||
@@ -173,12 +170,12 @@ class WhoisTransform(Transform):
|
||||
whois_obj.org,
|
||||
"domain",
|
||||
"domain",
|
||||
whois_obj.domain,
|
||||
whois_obj.domain.domain,
|
||||
"HAS_DOMAIN",
|
||||
)
|
||||
|
||||
self.log_graph_message(
|
||||
f"{whois_obj.domain} -> {whois_obj.org} (organization)"
|
||||
f"{whois_obj.domain.domain} -> {whois_obj.org} (organization)"
|
||||
)
|
||||
|
||||
if whois_obj.email:
|
||||
@@ -196,7 +193,7 @@ class WhoisTransform(Transform):
|
||||
)
|
||||
|
||||
self.log_graph_message(
|
||||
f"WHOIS for {whois_obj.domain} -> registrar: {whois_obj.registrar} org: {whois_obj.org} city: {whois_obj.city} country: {whois_obj.country} creation_date: {whois_obj.creation_date} expiration_date: {whois_obj.expiration_date}"
|
||||
f"WHOIS for {whois_obj.domain.domain} -> registry_id: {whois_obj.registry_domain_id} registrar: {whois_obj.registrar} org: {whois_obj.org} city: {whois_obj.city} country: {whois_obj.country} creation_date: {whois_obj.creation_date} expiration_date: {whois_obj.expiration_date}"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Any, List, Union, Dict, Set, Optional
|
||||
from typing import Any, List, Dict, Set, Optional
|
||||
from flowsint_core.core.transform_base import Transform
|
||||
from flowsint_types.domain import Domain
|
||||
from flowsint_types.individual import Individual
|
||||
@@ -68,20 +68,6 @@ class EmailToDomainsTransform(Transform):
|
||||
def key(cls) -> str:
|
||||
return "email"
|
||||
|
||||
def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType:
|
||||
cleaned: InputType = []
|
||||
for item in data:
|
||||
email_obj = None
|
||||
if isinstance(item, str):
|
||||
email_obj = Email(email=item)
|
||||
elif isinstance(item, dict) and "email" in item:
|
||||
email_obj = Email(email=item["email"])
|
||||
elif isinstance(item, Email):
|
||||
email_obj = item
|
||||
if email_obj:
|
||||
cleaned.append(email_obj)
|
||||
return cleaned
|
||||
|
||||
async def scan(self, data: InputType) -> OutputType:
|
||||
"""Find domains related to emails using whoxy api."""
|
||||
domains: OutputType = []
|
||||
@@ -360,16 +346,17 @@ class EmailToDomainsTransform(Transform):
|
||||
|
||||
# Process email addresses
|
||||
if individual.email_addresses:
|
||||
for email in individual.email_addresses:
|
||||
if email and email not in processed_emails:
|
||||
processed_emails.add(email)
|
||||
for email_obj in individual.email_addresses:
|
||||
email_str = email_obj.email
|
||||
if email_str and email_str not in processed_emails:
|
||||
processed_emails.add(email_str)
|
||||
|
||||
# Create email node
|
||||
self.create_node(
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
caption=email,
|
||||
email_str,
|
||||
caption=email_str,
|
||||
type="email",
|
||||
)
|
||||
|
||||
@@ -380,22 +367,23 @@ class EmailToDomainsTransform(Transform):
|
||||
individual.full_name,
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
email_str,
|
||||
"HAS_EMAIL",
|
||||
)
|
||||
|
||||
# Process phone numbers
|
||||
if individual.phone_numbers:
|
||||
for phone in individual.phone_numbers:
|
||||
if phone and phone not in processed_phones:
|
||||
processed_phones.add(phone)
|
||||
for phone_obj in individual.phone_numbers:
|
||||
phone_str = phone_obj.number
|
||||
if phone_str and phone_str not in processed_phones:
|
||||
processed_phones.add(phone_str)
|
||||
|
||||
# Create phone node
|
||||
self.create_node(
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
caption=phone,
|
||||
phone_str,
|
||||
caption=phone_str,
|
||||
type="phone",
|
||||
)
|
||||
|
||||
@@ -406,7 +394,7 @@ class EmailToDomainsTransform(Transform):
|
||||
individual.full_name,
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
phone_str,
|
||||
"HAS_PHONE",
|
||||
)
|
||||
|
||||
|
||||
@@ -375,16 +375,17 @@ class IndividualToDomainsTransform(Transform):
|
||||
|
||||
# Process email addresses
|
||||
if contact_individual.email_addresses:
|
||||
for email in contact_individual.email_addresses:
|
||||
if email and email not in processed_emails:
|
||||
processed_emails.add(email)
|
||||
for email_obj in contact_individual.email_addresses:
|
||||
email_str = email_obj.email
|
||||
if email_str and email_str not in processed_emails:
|
||||
processed_emails.add(email_str)
|
||||
|
||||
# Create email node
|
||||
self.create_node(
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
email=email,
|
||||
email_str,
|
||||
email=email_str,
|
||||
)
|
||||
|
||||
# Create relationship between individual and email
|
||||
@@ -394,22 +395,23 @@ class IndividualToDomainsTransform(Transform):
|
||||
contact_individual.full_name,
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
email_str,
|
||||
"HAS_EMAIL",
|
||||
)
|
||||
|
||||
# Process phone numbers
|
||||
if contact_individual.phone_numbers:
|
||||
for phone in contact_individual.phone_numbers:
|
||||
if phone and phone not in processed_phones:
|
||||
processed_phones.add(phone)
|
||||
for phone_obj in contact_individual.phone_numbers:
|
||||
phone_str = phone_obj.number
|
||||
if phone_str and phone_str not in processed_phones:
|
||||
processed_phones.add(phone_str)
|
||||
|
||||
# Create phone node
|
||||
self.create_node(
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
number=phone,
|
||||
phone_str,
|
||||
number=phone_str,
|
||||
)
|
||||
|
||||
# Create relationship between individual and phone
|
||||
@@ -419,7 +421,7 @@ class IndividualToDomainsTransform(Transform):
|
||||
contact_individual.full_name,
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
phone_str,
|
||||
"HAS_PHONE",
|
||||
)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
import re
|
||||
from typing import Any, List, Union, Dict, Set, Optional
|
||||
from typing import Any, List, Dict, Set, Optional
|
||||
from flowsint_core.core.transform_base import Transform
|
||||
from flowsint_types.domain import Domain
|
||||
from flowsint_types.organization import Organization
|
||||
@@ -67,20 +67,6 @@ class OrgToDomainsTransform(Transform):
|
||||
def key(cls) -> str:
|
||||
return "name"
|
||||
|
||||
def preprocess(self, data: Union[List[str], List[dict], InputType]) -> InputType:
|
||||
cleaned: InputType = []
|
||||
for item in data:
|
||||
org_obj = None
|
||||
if isinstance(item, str):
|
||||
org_obj = Organization(name=item)
|
||||
elif isinstance(item, dict) and "name" in item:
|
||||
org_obj = Organization(name=item["name"])
|
||||
elif isinstance(item, Organization):
|
||||
org_obj = item
|
||||
if org_obj:
|
||||
cleaned.append(org_obj)
|
||||
return cleaned
|
||||
|
||||
async def scan(self, data: InputType) -> OutputType:
|
||||
"""Find domains related to organizations using whoxy api."""
|
||||
domains: OutputType = []
|
||||
@@ -526,18 +512,19 @@ class OrgToDomainsTransform(Transform):
|
||||
|
||||
# Process email addresses
|
||||
if individual.email_addresses:
|
||||
for email in individual.email_addresses:
|
||||
if email and email not in processed_emails:
|
||||
processed_emails.add(email)
|
||||
for email_obj in individual.email_addresses:
|
||||
email_str = email_obj.email
|
||||
if email_str and email_str not in processed_emails:
|
||||
processed_emails.add(email_str)
|
||||
Logger.info(
|
||||
self.sketch_id,
|
||||
{"message": f"[WHOXY] Creating email node: {email}"},
|
||||
{"message": f"[WHOXY] Creating email node: {email_str}"},
|
||||
)
|
||||
self.create_node(
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
caption=email,
|
||||
email_str,
|
||||
caption=email_str,
|
||||
type="email",
|
||||
)
|
||||
self.create_relationship(
|
||||
@@ -546,24 +533,25 @@ class OrgToDomainsTransform(Transform):
|
||||
individual.full_name,
|
||||
"email",
|
||||
"email",
|
||||
email,
|
||||
email_str,
|
||||
"HAS_EMAIL",
|
||||
)
|
||||
|
||||
# Process phone numbers
|
||||
if individual.phone_numbers:
|
||||
for phone in individual.phone_numbers:
|
||||
if phone and phone not in processed_phones:
|
||||
processed_phones.add(phone)
|
||||
for phone_obj in individual.phone_numbers:
|
||||
phone_str = phone_obj.number
|
||||
if phone_str and phone_str not in processed_phones:
|
||||
processed_phones.add(phone_str)
|
||||
Logger.info(
|
||||
self.sketch_id,
|
||||
{"message": f"[WHOXY] Creating phone node: {phone}"},
|
||||
{"message": f"[WHOXY] Creating phone node: {phone_str}"},
|
||||
)
|
||||
self.create_node(
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
caption=phone,
|
||||
phone_str,
|
||||
caption=phone_str,
|
||||
type="phone",
|
||||
)
|
||||
self.create_relationship(
|
||||
@@ -572,7 +560,7 @@ class OrgToDomainsTransform(Transform):
|
||||
individual.full_name,
|
||||
"phone",
|
||||
"number",
|
||||
phone,
|
||||
phone_str,
|
||||
"HAS_PHONE",
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user