diff --git a/flowsint-core/src/flowsint_core/core/graph/repository.py b/flowsint-core/src/flowsint_core/core/graph/repository.py index 30e1d28..d1c93df 100644 --- a/flowsint-core/src/flowsint_core/core/graph/repository.py +++ b/flowsint-core/src/flowsint_core/core/graph/repository.py @@ -120,6 +120,7 @@ class Neo4jGraphRepository: MERGE (n:{node_type} {{ nodeLabel: $node_label, sketch_id: $sketch_id }}) ON CREATE SET n.created_at = $created_at SET n += $props + SET n.deleted_at = null RETURN elementId(n) AS id """ @@ -145,9 +146,12 @@ class Neo4jGraphRepository: query = f""" MATCH (from:{from_type} {{nodeLabel: $from_label, sketch_id: $sketch_id}}) + WHERE from.deleted_at IS NULL MATCH (to:{to_type} {{nodeLabel: $to_label, sketch_id: $sketch_id}}) + WHERE to.deleted_at IS NULL MERGE (from)-[r:{rel_label} {{sketch_id: $sketch_id}}]->(to) SET r += $props + SET r.deleted_at = null """ return query, params @@ -395,7 +399,7 @@ class Neo4jGraphRepository: query = """ MATCH (n) - WHERE elementId(n) = $element_id AND n.sketch_id = $sketch_id + WHERE elementId(n) = $element_id AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL SET n += $props RETURN elementId(n) AS id """ @@ -411,14 +415,14 @@ class Neo4jGraphRepository: def delete_nodes(self, node_ids: List[str], sketch_id: str) -> int: """ - Delete nodes by their element IDs. + Soft delete nodes by their element IDs. Args: node_ids: List of Neo4j element IDs sketch_id: Investigation sketch ID (for safety) Returns: - Number of nodes deleted + Number of nodes soft-deleted """ if not self._connection or not node_ids: return 0 @@ -426,26 +430,34 @@ class Neo4jGraphRepository: query = """ UNWIND $node_ids AS node_id MATCH (n) - WHERE elementId(n) = node_id AND n.sketch_id = $sketch_id - DETACH DELETE n - RETURN count(n) as deleted_count + WHERE elementId(n) = node_id AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL + OPTIONAL MATCH (n)-[r]-() + WHERE r.sketch_id = $sketch_id AND r.deleted_at IS NULL + SET n.deleted_at = $deleted_at + SET r.deleted_at = $deleted_at + RETURN count(DISTINCT n) as deleted_count """ result = self._connection.query( - query, {"node_ids": node_ids, "sketch_id": sketch_id} + query, + { + "node_ids": node_ids, + "sketch_id": sketch_id, + "deleted_at": datetime.now(timezone.utc).isoformat(), + }, ) return result[0]["deleted_count"] if result else 0 def delete_relationships(self, relationship_ids: List[str], sketch_id: str) -> int: """ - Delete relationships by their element IDs. + Soft delete relationships by their element IDs. Args: relationship_ids: List of Neo4j element IDs sketch_id: Investigation sketch ID (for safety) Returns: - Number of relationships deleted + Number of relationships soft-deleted """ if not self._connection or not relationship_ids: return 0 @@ -453,37 +465,51 @@ class Neo4jGraphRepository: query = """ UNWIND $relationship_ids AS rel_id MATCH ()-[r]->() - WHERE elementId(r) = rel_id AND r.sketch_id = $sketch_id - DELETE r + WHERE elementId(r) = rel_id AND r.sketch_id = $sketch_id AND r.deleted_at IS NULL + SET r.deleted_at = $deleted_at RETURN count(r) as deleted_count """ result = self._connection.query( - query, {"relationship_ids": relationship_ids, "sketch_id": sketch_id} + query, + { + "relationship_ids": relationship_ids, + "sketch_id": sketch_id, + "deleted_at": datetime.now(timezone.utc).isoformat(), + }, ) return result[0]["deleted_count"] if result else 0 def delete_all_sketch_nodes(self, sketch_id: str) -> int: """ - Delete all nodes and relationships for a sketch. + Soft delete all nodes and relationships for a sketch. Args: sketch_id: Investigation sketch ID Returns: - Number of nodes deleted + Number of nodes soft-deleted """ if not self._connection: return 0 query = """ OPTIONAL MATCH (n {sketch_id: $sketch_id}) - WHERE n IS NOT NULL - DETACH DELETE n - RETURN count(n) as deleted_count + WHERE n IS NOT NULL AND n.deleted_at IS NULL + OPTIONAL MATCH (n)-[r]-() + WHERE r.sketch_id = $sketch_id AND r.deleted_at IS NULL + SET n.deleted_at = $deleted_at + SET r.deleted_at = $deleted_at + RETURN count(DISTINCT n) as deleted_count """ - result = self._connection.query(query, {"sketch_id": sketch_id}) + result = self._connection.query( + query, + { + "sketch_id": sketch_id, + "deleted_at": datetime.now(timezone.utc).isoformat(), + }, + ) return result[0]["deleted_count"] if result else 0 def get_sketch_graph( @@ -506,7 +532,7 @@ class Neo4jGraphRepository: # Use OPTIONAL MATCH to avoid Neo4j warning when sketch_id property doesn't exist yet nodes_query = """ OPTIONAL MATCH (n) - WHERE n.sketch_id = $sketch_id + WHERE n.sketch_id = $sketch_id AND n.deleted_at IS NULL WITH n WHERE n IS NOT NULL RETURN elementId(n) as id, labels(n) as labels, properties(n) as data @@ -524,7 +550,9 @@ class Neo4jGraphRepository: rels_query = """ UNWIND $node_ids AS nid MATCH (a)-[r]->(b) - WHERE elementId(a) = nid AND elementId(b) IN $node_ids + WHERE elementId(a) = nid + AND elementId(b) IN $node_ids + AND r.deleted_at IS NULL RETURN elementId(r) as id, type(r) as type, elementId(a) as source, elementId(b) as target, properties(r) as data """ @@ -544,12 +572,13 @@ class Neo4jGraphRepository: # delete the old relationship and create a new one with the new type. query = f""" MATCH (a)-[r]->(b) - WHERE elementId(r) = $element_id AND r.sketch_id = $sketch_id + WHERE elementId(r) = $element_id AND r.sketch_id = $sketch_id AND r.deleted_at IS NULL WITH a, b, r, properties(r) AS old_props DELETE r CREATE (a)-[r2:`{new_label}`]->(b) SET r2 = old_props SET r2 += $props + SET r2.deleted_at = null RETURN elementId(r2) AS id, type(r2) AS type, @@ -558,7 +587,7 @@ class Neo4jGraphRepository: else: query = """ MATCH ()-[r]->() - WHERE elementId(r) = $element_id AND r.sketch_id = $sketch_id + WHERE elementId(r) = $element_id AND r.sketch_id = $sketch_id AND r.deleted_at IS NULL SET r += $props RETURN elementId(r) AS id, @@ -603,9 +632,10 @@ class Neo4jGraphRepository: rel_props = f"{{{props_str}}}" query = f""" - MATCH (a) WHERE elementId(a) = $from_id - MATCH (b) WHERE elementId(b) = $to_id + MATCH (a) WHERE elementId(a) = $from_id AND a.deleted_at IS NULL + MATCH (b) WHERE elementId(b) = $to_id AND b.deleted_at IS NULL MERGE (a)-[r:`{rel_label}` {rel_props}]->(b) + SET r.deleted_at = null RETURN properties(r) as rel """ @@ -655,7 +685,7 @@ class Neo4jGraphRepository: query = """ UNWIND $positions AS pos MATCH (n) - WHERE elementId(n) = pos.nodeId AND n.sketch_id = $sketch_id + WHERE elementId(n) = pos.nodeId AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL SET n.x = pos.x, n.y = pos.y RETURN count(n) as updated_count """ @@ -684,7 +714,7 @@ class Neo4jGraphRepository: query = """ UNWIND $node_ids AS node_id MATCH (n) - WHERE elementId(n) = node_id AND n.sketch_id = $sketch_id + WHERE elementId(n) = node_id AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL RETURN properties(n) as data """ @@ -726,7 +756,7 @@ class Neo4jGraphRepository: set_clause = ", ".join(f"n.{key} = ${key}" for key in properties.keys()) create_query = f""" MATCH (n) - WHERE elementId(n) = $nodeId AND n.sketch_id = $sketch_id + WHERE elementId(n) = $nodeId AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL SET {set_clause} RETURN elementId(n) as newElementId """ @@ -750,25 +780,33 @@ class Neo4jGraphRepository: MATCH (new) WHERE elementId(new) = $newElementId UNWIND $oldNodeIds AS oldNodeId - MATCH (old) WHERE elementId(old) = oldNodeId AND old.sketch_id = $sketch_id + MATCH (old) WHERE elementId(old) = oldNodeId AND old.sketch_id = $sketch_id AND old.deleted_at IS NULL WITH new, collect(old) as oldNodes UNWIND oldNodes as old MATCH (src)-[r]->(old) - WHERE elementId(src) NOT IN $oldNodeIds AND elementId(src) <> $newElementId + WHERE elementId(src) NOT IN $oldNodeIds + AND elementId(src) <> $newElementId + AND src.deleted_at IS NULL + AND r.deleted_at IS NULL WITH new, src, type(r) as relType, properties(r) as relProps, r MERGE (src)-[newRel:RELATED_TO {sketch_id: $sketch_id}]->(new) SET newRel = relProps + SET newRel.deleted_at = null WITH new, $oldNodeIds as oldNodeIds UNWIND oldNodeIds AS oldNodeId - MATCH (old) WHERE elementId(old) = oldNodeId AND old.sketch_id = $sketch_id + MATCH (old) WHERE elementId(old) = oldNodeId AND old.sketch_id = $sketch_id AND old.deleted_at IS NULL MATCH (old)-[r]->(dst) - WHERE elementId(dst) NOT IN oldNodeIds AND elementId(dst) <> $newElementId + WHERE elementId(dst) NOT IN oldNodeIds + AND elementId(dst) <> $newElementId + AND dst.deleted_at IS NULL + AND r.deleted_at IS NULL WITH new, dst, type(r) as relType, properties(r) as relProps MERGE (new)-[newRel:RELATED_TO {sketch_id: $sketch_id}]->(dst) SET newRel = relProps + SET newRel.deleted_at = null """ self._connection.query( @@ -785,11 +823,19 @@ class Neo4jGraphRepository: delete_query = """ UNWIND $nodeIds AS nodeId MATCH (old) - WHERE elementId(old) = nodeId AND old.sketch_id = $sketch_id - DETACH DELETE old + WHERE elementId(old) = nodeId AND old.sketch_id = $sketch_id AND old.deleted_at IS NULL + OPTIONAL MATCH (old)-[r]-() + WHERE r.sketch_id = $sketch_id AND r.deleted_at IS NULL + SET old.deleted_at = $deleted_at + SET r.deleted_at = $deleted_at """ self._connection.query( - delete_query, {"nodeIds": nodes_to_delete, "sketch_id": sketch_id} + delete_query, + { + "nodeIds": nodes_to_delete, + "sketch_id": sketch_id, + "deleted_at": datetime.now(timezone.utc).isoformat(), + }, ) return new_node_element_id @@ -811,10 +857,13 @@ class Neo4jGraphRepository: query = """ MATCH (n) - WHERE elementId(n) = $node_id AND n.sketch_id = $sketch_id + WHERE elementId(n) = $node_id AND n.sketch_id = $sketch_id AND n.deleted_at IS NULL OPTIONAL MATCH (n)-[r]-(other) - WHERE other.sketch_id = $sketch_id AND other <> n + WHERE other.sketch_id = $sketch_id + AND other <> n + AND other.deleted_at IS NULL + AND r.deleted_at IS NULL RETURN elementId(n) AS center_id, @@ -894,7 +943,7 @@ class Neo4jGraphRepository: """ query = """ OPTIONAL MATCH (n) - WHERE n.sketch_id = $sketch_id AND n IS NOT NULL + WHERE n.sketch_id = $sketch_id AND n.deleted_at IS NULL AND n IS NOT NULL RETURN count(n) as total """ @@ -915,7 +964,11 @@ class Neo4jGraphRepository: """ query = """ OPTIONAL MATCH (n)-[r]->(m) - WHERE n.sketch_id = $sketch_id AND m.sketch_id = $sketch_id + WHERE n.sketch_id = $sketch_id + AND m.sketch_id = $sketch_id + AND n.deleted_at IS NULL + AND m.deleted_at IS NULL + AND r.deleted_at IS NULL RETURN count(r) as total """ diff --git a/flowsint-core/tests/core/graph/in_memory_graph_repository.py b/flowsint-core/tests/core/graph/in_memory_graph_repository.py index a681262..84d4cd1 100644 --- a/flowsint-core/tests/core/graph/in_memory_graph_repository.py +++ b/flowsint-core/tests/core/graph/in_memory_graph_repository.py @@ -45,6 +45,7 @@ class InMemoryGraphRepository: ): # Update existing node self._nodes[element_id].update(node_obj) + self._nodes[element_id]["deleted_at"] = None return element_id # Create new node @@ -53,6 +54,7 @@ class InMemoryGraphRepository: **node_obj, "sketch_id": sketch_id, "created_at": datetime.now(timezone.utc).isoformat(), + "deleted_at": None, "_labels": [node_type] if node_type else ["Node"], } return element_id @@ -65,48 +67,46 @@ class InMemoryGraphRepository: return None if self._nodes[element_id].get("sketch_id") != sketch_id: return None + if self._nodes[element_id].get("deleted_at") is not None: + return None self._nodes[element_id].update(updates) return element_id def delete_nodes(self, node_ids: List[str], sketch_id: str) -> int: - """Delete nodes by their element IDs. Returns count deleted.""" + """Soft delete nodes by their element IDs. Returns count soft-deleted.""" deleted = 0 + deleted_at = datetime.now(timezone.utc).isoformat() for node_id in node_ids: if node_id in self._nodes: if self._nodes[node_id].get("sketch_id") == sketch_id: - # Also delete related edges - edges_to_delete = [ - eid - for eid, edge in self._edges.items() - if edge.get("source") == node_id - or edge.get("target") == node_id - ] - for eid in edges_to_delete: - del self._edges[eid] - del self._nodes[node_id] + if self._nodes[node_id].get("deleted_at") is not None: + continue + + # Also soft delete related edges + for edge in self._edges.values(): + if edge.get("source") == node_id or edge.get("target") == node_id: + if edge.get("sketch_id") == sketch_id and edge.get("deleted_at") is None: + edge["deleted_at"] = deleted_at + + self._nodes[node_id]["deleted_at"] = deleted_at deleted += 1 return deleted def delete_all_sketch_nodes(self, sketch_id: str) -> int: - """Delete all nodes for a sketch. Returns count deleted.""" - to_delete = [ - eid - for eid, data in self._nodes.items() - if data.get("sketch_id") == sketch_id - ] - for eid in to_delete: - del self._nodes[eid] + """Soft delete all nodes for a sketch. Returns count soft-deleted.""" + deleted_at = datetime.now(timezone.utc).isoformat() + deleted_count = 0 - # Also delete related edges - edges_to_delete = [ - eid - for eid, data in self._edges.items() - if data.get("sketch_id") == sketch_id - ] - for eid in edges_to_delete: - del self._edges[eid] + for data in self._nodes.values(): + if data.get("sketch_id") == sketch_id and data.get("deleted_at") is None: + data["deleted_at"] = deleted_at + deleted_count += 1 - return len(to_delete) + for data in self._edges.values(): + if data.get("sketch_id") == sketch_id and data.get("deleted_at") is None: + data["deleted_at"] = deleted_at + + return deleted_count def get_nodes_by_ids( self, node_ids: List[str], sketch_id: str @@ -116,7 +116,7 @@ class InMemoryGraphRepository: for node_id in node_ids: if node_id in self._nodes: node = self._nodes[node_id] - if node.get("sketch_id") == sketch_id: + if node.get("sketch_id") == sketch_id and node.get("deleted_at") is None: result.append({"data": node}) return result @@ -129,6 +129,8 @@ class InMemoryGraphRepository: node_id = pos.get("nodeId") if node_id in self._nodes: if self._nodes[node_id].get("sketch_id") == sketch_id: + if self._nodes[node_id].get("deleted_at") is not None: + continue self._nodes[node_id]["x"] = pos.get("x") self._nodes[node_id]["y"] = pos.get("y") updated += 1 @@ -153,6 +155,8 @@ class InMemoryGraphRepository: for eid, node in self._nodes.items(): if node.get("sketch_id") != sketch_id: continue + if node.get("deleted_at") is not None: + continue if node.get("nodeLabel") == from_label: source_id = eid if node.get("nodeLabel") == to_label: @@ -166,6 +170,7 @@ class InMemoryGraphRepository: "target": target_id, "type": rel_label, "sketch_id": sketch_id, + "deleted_at": None, } def create_relationship_by_element_id( @@ -178,6 +183,10 @@ class InMemoryGraphRepository: """Create a relationship using element IDs.""" if from_element_id not in self._nodes or to_element_id not in self._nodes: return None + if self._nodes[from_element_id].get("deleted_at") is not None: + return None + if self._nodes[to_element_id].get("deleted_at") is not None: + return None element_id = self._generate_element_id("rel") edge_data = { @@ -185,6 +194,7 @@ class InMemoryGraphRepository: "target": to_element_id, "type": rel_label, "sketch_id": sketch_id, + "deleted_at": None, } self._edges[element_id] = edge_data return {"sketch_id": sketch_id} @@ -197,6 +207,8 @@ class InMemoryGraphRepository: return None if self._edges[element_id].get("sketch_id") != sketch_id: return None + if self._edges[element_id].get("deleted_at") is not None: + return None self._edges[element_id].update(rel_obj) return { "id": element_id, @@ -205,12 +217,15 @@ class InMemoryGraphRepository: } def delete_relationships(self, relationship_ids: List[str], sketch_id: str) -> int: - """Delete relationships by their element IDs. Returns count deleted.""" + """Soft delete relationships by their element IDs. Returns count soft-deleted.""" deleted = 0 + deleted_at = datetime.now(timezone.utc).isoformat() for rel_id in relationship_ids: if rel_id in self._edges: if self._edges[rel_id].get("sketch_id") == sketch_id: - del self._edges[rel_id] + if self._edges[rel_id].get("deleted_at") is not None: + continue + self._edges[rel_id]["deleted_at"] = deleted_at deleted += 1 return deleted @@ -226,7 +241,7 @@ class InMemoryGraphRepository: node_ids = set() for eid, data in self._nodes.items(): - if data.get("sketch_id") == sketch_id: + if data.get("sketch_id") == sketch_id and data.get("deleted_at") is None: nodes.append( { "id": eid, @@ -240,7 +255,7 @@ class InMemoryGraphRepository: edges = [] for eid, data in self._edges.items(): - if data.get("sketch_id") == sketch_id: + if data.get("sketch_id") == sketch_id and data.get("deleted_at") is None: if data.get("source") in node_ids and data.get("target") in node_ids: edges.append( { @@ -262,6 +277,8 @@ class InMemoryGraphRepository: center = self._nodes[node_id] if center.get("sketch_id") != sketch_id: return {"nodes": [], "edges": []} + if center.get("deleted_at") is not None: + return {"nodes": [], "edges": []} nodes = {node_id: {"id": node_id, "data": center}} edges = {} @@ -269,6 +286,8 @@ class InMemoryGraphRepository: for eid, edge in self._edges.items(): if edge.get("sketch_id") != sketch_id: continue + if edge.get("deleted_at") is not None: + continue source = edge.get("source") target = edge.get("target") @@ -276,6 +295,8 @@ class InMemoryGraphRepository: if source == node_id: # Outgoing edge if target in self._nodes: + if self._nodes[target].get("deleted_at") is not None: + continue nodes[target] = {"id": target, "data": self._nodes[target]} edges[eid] = { "id": eid, @@ -286,6 +307,8 @@ class InMemoryGraphRepository: elif target == node_id: # Incoming edge if source in self._nodes: + if self._nodes[source].get("deleted_at") is not None: + continue nodes[source] = {"id": source, "data": self._nodes[source]} edges[eid] = { "id": eid, @@ -315,25 +338,30 @@ class InMemoryGraphRepository: if new_node_id and new_node_id in old_node_ids: target_id = new_node_id self._nodes[target_id].update(new_node_data) + self._nodes[target_id]["deleted_at"] = None else: target_id = self._generate_element_id("node") self._nodes[target_id] = { **new_node_data, "sketch_id": sketch_id, "created_at": datetime.now(timezone.utc).isoformat(), + "deleted_at": None, } # Transfer relationships for eid, edge in list(self._edges.items()): + if edge.get("deleted_at") is not None: + continue if edge.get("source") in old_node_ids and edge.get("source") != target_id: edge["source"] = target_id if edge.get("target") in old_node_ids and edge.get("target") != target_id: edge["target"] = target_id - # Delete old nodes (except target) + # Soft delete old nodes (except target) + deleted_at = datetime.now(timezone.utc).isoformat() for node_id in old_node_ids: if node_id != target_id and node_id in self._nodes: - del self._nodes[node_id] + self._nodes[node_id]["deleted_at"] = deleted_at return target_id @@ -458,17 +486,21 @@ class InMemoryGraphRepository: """Get total node count, optionally filtered by sketch_id.""" if sketch_id: return sum( - 1 for n in self._nodes.values() if n.get("sketch_id") == sketch_id + 1 + for n in self._nodes.values() + if n.get("sketch_id") == sketch_id and n.get("deleted_at") is None ) - return len(self._nodes) + return sum(1 for n in self._nodes.values() if n.get("deleted_at") is None) def get_edge_count(self, sketch_id: Optional[str] = None) -> int: """Get total edge count, optionally filtered by sketch_id.""" if sketch_id: return sum( - 1 for e in self._edges.values() if e.get("sketch_id") == sketch_id + 1 + for e in self._edges.values() + if e.get("sketch_id") == sketch_id and e.get("deleted_at") is None ) - return len(self._edges) + return sum(1 for e in self._edges.values() if e.get("deleted_at") is None) def clear(self) -> None: """Clear all data (useful between tests)."""