feat: neo4j migrations

This commit is contained in:
dextmorgn
2026-01-24 19:08:02 +01:00
parent 531bcf970e
commit 1ce2cbd7a5
5 changed files with 548 additions and 4 deletions

View File

@@ -0,0 +1,56 @@
// 001_indexes.cypher
// Foundation indexes for query performance
// All statements are idempotent (IF NOT EXISTS)
// Index for filtering nodes by sketch_id (most common query pattern)
CREATE INDEX idx_sketch_id IF NOT EXISTS FOR (n:domain) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_email IF NOT EXISTS FOR (n:email) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_ip IF NOT EXISTS FOR (n:ip) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_phone IF NOT EXISTS FOR (n:phone) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_username IF NOT EXISTS FOR (n:username) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_organization IF NOT EXISTS FOR (n:organization) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_individual IF NOT EXISTS FOR (n:individual) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_socialaccount IF NOT EXISTS FOR (n:socialaccount) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_asn IF NOT EXISTS FOR (n:asn) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_cidr IF NOT EXISTS FOR (n:cidr) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_cryptowallet IF NOT EXISTS FOR (n:cryptowallet) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_cryptowallettransaction IF NOT EXISTS FOR (n:cryptowallettransaction) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_cryptonft IF NOT EXISTS FOR (n:cryptonft) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_website IF NOT EXISTS FOR (n:website) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_port IF NOT EXISTS FOR (n:port) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_phrase IF NOT EXISTS FOR (n:phrase) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_breach IF NOT EXISTS FOR (n:breach) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_credential IF NOT EXISTS FOR (n:credential) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_device IF NOT EXISTS FOR (n:device) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_document IF NOT EXISTS FOR (n:document) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_file IF NOT EXISTS FOR (n:file) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_malware IF NOT EXISTS FOR (n:malware) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_sslcertificate IF NOT EXISTS FOR (n:sslcertificate) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_location IF NOT EXISTS FOR (n:location) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_affiliation IF NOT EXISTS FOR (n:affiliation) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_alias IF NOT EXISTS FOR (n:alias) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_bankaccount IF NOT EXISTS FOR (n:bankaccount) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_creditcard IF NOT EXISTS FOR (n:creditcard) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_dnsrecord IF NOT EXISTS FOR (n:dnsrecord) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_gravatar IF NOT EXISTS FOR (n:gravatar) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_leak IF NOT EXISTS FOR (n:leak) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_message IF NOT EXISTS FOR (n:message) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_reputationscore IF NOT EXISTS FOR (n:reputationscore) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_riskprofile IF NOT EXISTS FOR (n:riskprofile) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_script IF NOT EXISTS FOR (n:script) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_session IF NOT EXISTS FOR (n:session) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_webtracker IF NOT EXISTS FOR (n:webtracker) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_weapon IF NOT EXISTS FOR (n:weapon) ON (n.sketch_id);
CREATE INDEX idx_sketch_id_whois IF NOT EXISTS FOR (n:whois) ON (n.sketch_id);
// Index for searching by nodeLabel (text search on common types)
CREATE INDEX idx_nodeLabel_domain IF NOT EXISTS FOR (n:domain) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_email IF NOT EXISTS FOR (n:email) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_ip IF NOT EXISTS FOR (n:ip) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_phone IF NOT EXISTS FOR (n:phone) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_username IF NOT EXISTS FOR (n:username) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_individual IF NOT EXISTS FOR (n:individual) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_organization IF NOT EXISTS FOR (n:organization) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_socialaccount IF NOT EXISTS FOR (n:socialaccount) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_website IF NOT EXISTS FOR (n:website) ON (n.nodeLabel);
CREATE INDEX idx_nodeLabel_cryptowallet IF NOT EXISTS FOR (n:cryptowallet) ON (n.nodeLabel);

View File

@@ -0,0 +1,204 @@
/**
* 003_migrate_v1_format.js
*
* Migrates nodes from V1 format to V2 format.
*
* V1 format (old):
* label: "example.com"
* type: "domain"
* created_at: "2026-01-23T18:28:46.048223+00:00"
* domain: "example.com"
* root: true
* sketch_id: "..."
* x, y: coordinates
*
* V2 format (new):
* nodeLabel: "example.com"
* nodeType: "domain"
* nodeMetadata.created_at: "2026-01-23T18:28:46.048223+00:00"
* nodeProperties.domain: "example.com"
* nodeProperties.root: true
* sketch_id: "..."
* x, y: coordinates
*
* This migration is IDEMPOTENT:
* - Only processes nodes that have V1 format (have `label` or `type` but NOT `nodeLabel`)
* - Safe to run multiple times
* - Processes in batches to handle large datasets
*/
// Reserved properties that should NOT be moved to nodeProperties
const RESERVED_PROPERTIES = new Set([
"id",
"x",
"y",
"nodeLabel",
"label",
"nodeType",
"type",
"nodeImage",
"nodeIcon",
"nodeColor",
"nodeSize",
"nodeFlag",
"nodeShape",
"nodeMetadata",
"nodeProperties",
"created_at",
"sketch_id",
]);
// Properties that are part of nodeMetadata
const METADATA_PROPERTIES = new Set(["created_at"]);
const BATCH_SIZE = 500;
/**
* Main migration function
* @param {import('neo4j-driver').Driver} driver
* @param {import('neo4j-driver').Session} session
* @param {boolean} dryRun
* @returns {Promise<string>} Summary message
*/
export async function migrate(driver, session, dryRun) {
// Count nodes needing migration (V1 format: has `type` but no `nodeType`)
const countResult = await session.run(`
MATCH (n)
WHERE n.type IS NOT NULL AND n.nodeType IS NULL
RETURN count(n) AS count
`);
const totalCount = countResult.records[0].get("count").toNumber();
if (totalCount === 0) {
return "No V1 format nodes found - nothing to migrate";
}
console.log(`[INFO] Found ${totalCount} nodes in V1 format to migrate`);
if (dryRun) {
// In dry-run, show sample of what would be migrated
const sampleResult = await session.run(`
MATCH (n)
WHERE n.type IS NOT NULL AND n.nodeType IS NULL
RETURN n, labels(n) AS labels
LIMIT 5
`);
console.log("[DRY-RUN] Sample nodes that would be migrated:");
for (const record of sampleResult.records) {
const node = record.get("n").properties;
const labels = record.get("labels");
console.log(` - [${labels.join(":")}] label="${node.label}", type="${node.type}"`);
}
return `Would migrate ${totalCount} nodes from V1 to V2 format`;
}
// Process in batches
let migratedCount = 0;
let batchNum = 0;
while (migratedCount < totalCount) {
batchNum++;
console.log(
`[INFO] Processing batch ${batchNum} (${migratedCount}/${totalCount} done)`
);
// Fetch a batch of V1 nodes
const batchResult = await session.run(
`
MATCH (n)
WHERE n.type IS NOT NULL AND n.nodeType IS NULL
RETURN elementId(n) AS elementId, n, labels(n) AS labels
LIMIT $limit
`,
{ limit: BATCH_SIZE }
);
if (batchResult.records.length === 0) {
break;
}
// Process each node in the batch
for (const record of batchResult.records) {
const elementId = record.get("elementId");
const node = record.get("n").properties;
// Build the new properties
const updates = buildV2Properties(node);
// Apply the update
await session.run(
`
MATCH (n)
WHERE elementId(n) = $elementId
SET n += $updates
REMOVE n.label, n.type, n.created_at
`,
{ elementId, updates }
);
// Remove old dynamic properties that were moved to nodeProperties
const propsToRemove = Object.keys(node).filter(
(key) =>
!RESERVED_PROPERTIES.has(key) &&
!key.startsWith("nodeProperties.") &&
!key.startsWith("nodeMetadata.")
);
if (propsToRemove.length > 0) {
// Build dynamic REMOVE clause
const removeClause = propsToRemove.map((p) => `n.\`${p}\``).join(", ");
await session.run(
`
MATCH (n)
WHERE elementId(n) = $elementId
REMOVE ${removeClause}
`,
{ elementId }
);
}
migratedCount++;
}
}
return `Migrated ${migratedCount} nodes from V1 to V2 format`;
}
/**
* Builds V2 format properties from V1 node
* @param {Record<string, any>} node - V1 node properties
* @returns {Record<string, any>} - V2 format properties to SET
*/
function buildV2Properties(node) {
const updates = {};
// Map core fields
updates.nodeLabel = node.label || node.nodeLabel || "";
updates.nodeType = node.type || node.nodeType || "";
// Handle created_at -> nodeMetadata.created_at
if (node.created_at) {
updates["nodeMetadata.created_at"] = node.created_at;
} else if (!node["nodeMetadata.created_at"]) {
// Set current timestamp if no created_at exists
updates["nodeMetadata.created_at"] = new Date().toISOString();
}
// Move non-reserved properties to nodeProperties.*
for (const [key, value] of Object.entries(node)) {
// Skip reserved properties
if (RESERVED_PROPERTIES.has(key)) continue;
// Skip properties already in nodeProperties/nodeMetadata namespace
if (key.startsWith("nodeProperties.") || key.startsWith("nodeMetadata.")) {
continue;
}
// Move to nodeProperties
updates[`nodeProperties.${key}`] = value;
}
return updates;
}