diff --git a/client/src/tools/Parser.jsx b/client/src/tools/Parser.jsx index 481064db..cfb13551 100644 --- a/client/src/tools/Parser.jsx +++ b/client/src/tools/Parser.jsx @@ -1,92 +1,75 @@ export function NetworkParserPath(data) { - let parsedData = { nodes: [], edges: [], nodeList: [], edgeList: [] }; - // console.log(data) - //Iterate through data where each element is a path - for (let i = 0; i < data.length; i++) { - let currentPath = data[i]._fields[4]; - let startNode = null; - let endNode = null; - let sourceId = null; - for (let j = 0; j < currentPath.length - 1; j++) { - let nodeName = currentPath[j].properties.name; - let nodeId = currentPath[j].properties.id; - let nodeAltName = currentPath[j].properties.alt_name; - let nodeGeneName = currentPath[j].properties.gene_name; - let physicalDegree = currentPath[j].properties.degree.low; - - // handles the case where name param doesnt exist. representing node that only has regulatory interactions - if (nodeName) { - nodeName = nodeName === "-" ? nodeId : nodeName; - } else if (nodeGeneName) { - nodeName = nodeGeneName === "-" ? nodeId : nodeGeneName; - } else if (nodeAltName) { - nodeName = nodeAltName; - } else { - nodeName = nodeId; - } + const parsedData = { nodes: [], edges: [], nodeList: [], edgeList: [] }; - // source protein is always the first element - if (j == 0) { - sourceId = currentPath[j].properties.id; - } + const getNodeLabel = (node) => { + const { name, id, alt_name, gene_name } = node.properties; + if (name && name !== "-") return name; + if (gene_name && gene_name !== "-") return gene_name; + return alt_name || id; + }; - //Add each node in a path, and label them accordingly (source, go_protein, or intermediate) - //Keep track of all the nodes in nodeList - let nodeEntry = { - data: { - id: nodeId, - label: nodeName, - degree: physicalDegree, - alt_name: nodeAltName, - gene_name: nodeGeneName, - }, - }; - if ( - nodeId.toUpperCase() == sourceId.toUpperCase() && - j == currentPath.length - 2 - ) { - nodeEntry.data.type = "go_source"; - } else if (nodeId.toUpperCase() == sourceId.toUpperCase()) { - nodeEntry.data.type = "source"; - } else if (j == currentPath.length - 2) { - nodeEntry.data.type = "go_protein"; - } else { - nodeEntry.data.type = "intermediate"; - } - if (!parsedData.nodeList.includes(nodeId)) { - parsedData.nodeList.push(nodeId); - parsedData.nodes.push(nodeEntry); - } + const createNodeEntry = (node, nodeType, degree) => ({ + data: { + id: node.properties.id, + label: getNodeLabel(node), + degree: degree.low, + alt_name: node.properties.alt_name, + gene_name: node.properties.gene_name, + type: nodeType, + }, + }); + + const addNodeIfNotExists = (nodeEntry, parsedData) => { + if (!parsedData.nodeList.includes(nodeEntry.data.id)) { + parsedData.nodeList.push(nodeEntry.data.id); + parsedData.nodes.push(nodeEntry); } - for (let j = 1; j < currentPath.length - 1; j++) { - //Add the edges in a path and keep track in the edgeList - startNode = currentPath[j - 1].properties.id; - endNode = currentPath[j].properties.id; + }; + + const determineNodeType = (node, sourceId, currentIndex, pathLength) => { + const isSource = node.properties.id.toUpperCase() === sourceId.toUpperCase(); + const isLastNode = currentIndex === pathLength - 2; + if (isSource && isLastNode) return "go_source"; + if (isSource) return "source"; + if (isLastNode) return "go_protein"; + return "intermediate"; + }; + + const addEdges = (path, parsedData) => { + for (let j = 1; j < path.length - 1; j++) { + const startNode = path[j - 1].properties.id; + const endNode = path[j].properties.id; if ( !parsedData.edgeList.includes(startNode + endNode) && !parsedData.edgeList.includes(endNode + startNode) ) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - }, - }; - // console.log(i, "Adding an edge") - // console.log(edgeEntry) parsedData.edgeList.push(startNode + endNode); - parsedData.edges.push(edgeEntry); - // console.log(parsedData.edges) + parsedData.edges.push({ data: { source: endNode, target: startNode } }); } } - } - parsedData.goTerm = - data[0]._fields[data[0]._fields.length - 1][ - data[0]._fields[data[0]._fields.length - 1].length - 1 - ].properties; + }; + + data.forEach((item) => { + const currentPath = item._fields[4]; + const sourceId = currentPath[0].properties.id; + + currentPath.forEach((node, index) => { + if (index < currentPath.length - 1) { + const nodeType = determineNodeType(node, sourceId, index, currentPath.length); + const nodeEntry = createNodeEntry(node, nodeType, node.properties.degree); + addNodeIfNotExists(nodeEntry, parsedData); + } + }); + + addEdges(currentPath, parsedData); + }); + + parsedData.goTerm = data[0]._fields[data[0]._fields.length - 1] + [data[0]._fields[data[0]._fields.length - 1].length - 1].properties; return parsedData; } + /** * Parser that handles API response data from Neo4j all edges in an induced subgraph query. * Adds shared edge information and Protein to GO Term relationship properties @@ -97,411 +80,165 @@ export function NetworkParserPath(data) { */ // tag::EdgeDataParser export function EdgeDataParser(networkData, edgeData, ppi, regulatory) { - //Iterate through al the edges in the induced subgraph - // console.log(edgeData) - //Iterate through al the edges in the induced subgraph - let tempEdgeList = []; - let tempEdges = []; + const tempEdgeList = []; + const tempEdges = []; + + // Helper function to create edge entry + function createEdgeEntry(source, target, relType, evidence, dataSource, type = "", regType = "") { + return { + data: { + source, + target, + relType, + evidence: evidence || "No Evidence", + dataSource, + type: type || undefined, + regType: regType || undefined, + }, + }; + } + + // Helper function to process ProPro edges + function processProProEdge(startNode, endNode, relType, evidence, dataSource, type = "") { + const edgeEntry = createEdgeEntry(endNode, startNode, relType, evidence, dataSource, type); + tempEdgeList.push(startNode + endNode); + tempEdges.push(edgeEntry); + } + + // Helper function to process Reg edges + function processRegEdge(startNode, endNode, relType, evidence, dataSource, regType, type = "") { + const edgeEntry = createEdgeEntry(endNode, startNode, relType, evidence, dataSource, type, regType); + tempEdgeList.push(startNode + endNode); + tempEdges.push(edgeEntry); + } + + // Helper function to process ProGo edges + function processProGoEdge(startNode, edgeRelType) { + networkData.nodes.forEach((node) => { + if (node.data.id === startNode) { + node.data.go_protein = edgeRelType; + } + }); + } + + // Main loop through all edges for (let i = 0; i < edgeData.length; i++) { - let startNode = edgeData[i]._fields[0].start.properties.id; - let endNode = edgeData[i]._fields[0].end.properties.id; - let relType = edgeData[i]._fields[0].segments[0].relationship.type; - let pubmed = - edgeData[i]._fields[0].segments[0].relationship.properties.pubmed; - let link = - edgeData[i]._fields[0].segments[0].relationship.properties.link; - let fbRef = - edgeData[i]._fields[0].segments[0].relationship.properties - .reference; - let interaction = - edgeData[i]._fields[0].segments[0].relationship.properties - .interaction; - let dataSource = edgeData[i]._fields[0].segments[0].relationship.properties - .source - //Check for shared edges - //If the edge already exists in the initial network data, add it to the temp edge list - if ( - networkData.edgeList.includes(endNode + startNode) || - networkData.edgeList.includes(startNode + endNode) - ) { - if (relType === "ProPro" && ppi) { - if (pubmed) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: pubmed, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else if (link) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: link, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else if (fbRef) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: fbRef, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else if (interaction) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: interaction, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: "No Evidence", - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } + const startNode = edgeData[i]._fields[0].start.properties.id; + const endNode = edgeData[i]._fields[0].end.properties.id; + const relType = edgeData[i]._fields[0].segments[0].relationship.type; + const properties = edgeData[i]._fields[0].segments[0].relationship.properties; + const { pubmed, link, reference: fbRef, interaction, source: dataSource, relationship: regType } = properties; + + const evidence = pubmed || link || fbRef || interaction; + + // // Check for existing edges in network data + // const edgeExists = networkData.edgeList.includes(startNode + endNode) || networkData.edgeList.includes(endNode + startNode); + // Check for existing edges in the exact specified order (startNode -> endNode) + const edgeExists = networkData.edgeList.includes(startNode + endNode); + + // Check for shared edges (existing but reversed order: endNode -> startNode) + const sharedEdgeExists = networkData.edgeList.includes(endNode + startNode); + + // Handle edge existence + if (edgeExists) { + // Handle ProPro edges + if (relType === "ProPro" && ppi && edgeExists) { + processProProEdge(startNode, endNode, relType, evidence, dataSource); } - if (relType === "Reg" && regulatory) { - let regType = edgeData[i]._fields[0].segments[0].relationship.properties.relationship; - if (pubmed) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: pubmed, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (link) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: link, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (fbRef) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: fbRef, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (interaction) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: interaction, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - evidence: "No Link", - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } + // Handle Reg edges + else if (relType === "Reg" && regulatory && edgeExists) { + processRegEdge(startNode, endNode, relType, evidence, dataSource, regType); } - } - //If the edge type is ProGo, add the edges relationship properties to the network data - else if (relType === "ProGo") { - for (let k = 0; k < networkData.nodes.length; k++) { - let currentNode = networkData.nodes[k]; - if (currentNode.data.id === startNode) { - networkData.nodes[k].data.go_protein = - edgeData[ - i - ]._fields[0].segments[0].relationship.properties.relationship; - } + // Handle ProGo edges + else if (relType === "ProGo") { + processProGoEdge(startNode, properties.relationship); } - } - //If an edge is found that was not a part of the inital network data, add it to the temp edge list with the shared tag - else { - if (relType === "ProPro") { - if (pubmed) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - type: "shared", - relType: relType, - evidence: pubmed, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (link) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - type: "shared", - relType: relType, - evidence: link, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (fbRef) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - type: "shared", - relType: relType, - evidence: fbRef, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else if (interaction) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - type: "shared", - relType: relType, - evidence: interaction, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - type: "shared", - evidence: "No Link", - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } + } else if (sharedEdgeExists) { + // Edge exists but in reversed order, label it as "shared" + if (relType === "ProPro" && ppi && sharedEdgeExists) { + processProProEdge(startNode, endNode, relType, evidence, dataSource); + } else if (relType === "Reg" && sharedEdgeExists) { + processRegEdge(startNode, endNode, relType, evidence, dataSource, regType, "shared"); } - else if (relType === "Reg") { - let regType = edgeData[i]._fields[0].segments[0].relationship.properties.relationship; - if (pubmed) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - type: "shared", - evidence: pubmed, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (link) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - type: "shared", - evidence: link, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - else if (fbRef) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - type: "shared", - evidence: fbRef, - regType: regType, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } - } else if (interaction) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - type: "shared", - relType: relType, - evidence: interaction, - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); - } else { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - relType: relType, - type: "shared", - evidence: "No Evidence", - dataSource: dataSource, - }, - }; - tempEdgeList.push(startNode + endNode); - tempEdges.push(edgeEntry); + } else if (!edgeExists) { + // Edge exists but in reversed order, label it as "shared" + if (relType === "ProPro") { + processProProEdge(startNode, endNode, relType, evidence, dataSource, "shared"); + } else if (relType === "Reg") { + processRegEdge(startNode, endNode, relType, evidence, dataSource, regType, "shared"); } } } + networkData.edgeList = tempEdgeList; networkData.edges = tempEdges; return networkData; } + export function NetworkParserNode(data, k) { - let parsedData = { nodes: [], edges: [], nodeList: [], edgeList: [] }; - for (let i = 0; i < Math.min(k, data.length - 1); i++) { - let currentPath = data[i]; - let sourceId = null; - for (let j = 0; j < currentPath.length; j++) { - //Add each node in a path, and label them accordingly (source, go_protein, or intermediate) - //Keep track of all the nodes in nodeList - //If the edge already exists in the initial network data, add it to the temp edge list\ - let nodeName = currentPath[j].properties.name; - let nodeId = currentPath[j].properties.id; - let nodeAltName = currentPath[j].properties.alt_name; - let nodeGeneName = currentPath[j].properties.gene_name; - let physicalDegree = currentPath[j].properties.degree.low; - - // handles the case where name param doesnt exist. representing node that only has regulatory interactions - if (nodeName) { - nodeName = nodeName === "-" ? nodeId : nodeName; - } else if (nodeGeneName) { - nodeName = nodeGeneName === "-" ? nodeId : nodeGeneName; - } else if (nodeAltName) { - nodeName = nodeAltName; - } else { - nodeName = nodeId; - } + const parsedData = { nodes: [], edges: [], nodeList: [], edgeList: [] }; + const getNodeLabel = (node) => { + const { name, id, alt_name, gene_name } = node.properties; + if (name && name !== "-") return name; + if (gene_name && gene_name !== "-") return gene_name; + return alt_name || id; + }; - // source protein is always the first element - if (j == 0) { - sourceId = currentPath[j].properties.id; - } + const createNodeEntry = (node, nodeType, degree) => ({ + data: { + id: node.properties.id, + label: getNodeLabel(node), + degree: degree.low, + alt_name: node.properties.alt_name, + gene_name: node.properties.gene_name, + type: nodeType, + }, + }); - let nodeEntry = { - data: { - id: nodeId, - label: nodeName, - degree: physicalDegree, - alt_name: nodeAltName, - gene_name: nodeGeneName, - }, - }; - if ( - nodeId.toUpperCase() === sourceId.toUpperCase() && - j == currentPath.length - 1 - ) { - nodeEntry.data.type = "go_source"; - } else if (nodeId.toUpperCase() === sourceId.toUpperCase()) { - nodeEntry.data.type = "source"; - } else if (j == currentPath.length - 1) { - nodeEntry.data.type = "go_protein"; - } else { - nodeEntry.data.type = "intermediate"; - } - if (!parsedData.nodeList.includes(nodeId)) { - parsedData.nodeList.push(nodeId); - parsedData.nodes.push(nodeEntry); - } + const addNodeIfNotExists = (nodeEntry, parsedData) => { + if (!parsedData.nodeList.includes(nodeEntry.data.id)) { + parsedData.nodeList.push(nodeEntry.data.id); + parsedData.nodes.push(nodeEntry); } - let startNode = null; - let endNode = null; - for (let j = 1; j < currentPath.length; j++) { - //Add the edges in a path and keep track in the edgeList - startNode = currentPath[j - 1].properties.id; - endNode = currentPath[j].properties.id; + }; + + const determineNodeType = (node, sourceId, currentIndex, pathLength) => { + const isSource = node.properties.id.toUpperCase() === sourceId.toUpperCase(); + const isLastNode = currentIndex === pathLength - 1; + if (isSource && isLastNode) return "go_source"; + if (isSource) return "source"; + if (isLastNode) return "go_protein"; + return "intermediate"; + }; + + const addEdges = (path, parsedData) => { + for (let j = 1; j < path.length; j++) { + const startNode = path[j - 1].properties.id; + const endNode = path[j].properties.id; if ( !parsedData.edgeList.includes(startNode + endNode) && !parsedData.edgeList.includes(endNode + startNode) ) { - let edgeEntry = { - data: { - source: endNode, - target: startNode, - }, - }; parsedData.edgeList.push(startNode + endNode); - parsedData.edges.push(edgeEntry); + parsedData.edges.push({ data: { source: endNode, target: startNode } }); } } - } + }; + + data.slice(0, Math.min(k, data.length - 1)).forEach((currentPath) => { + const sourceId = currentPath[0].properties.id; + + currentPath.forEach((node, index) => { + const nodeType = determineNodeType(node, sourceId, index, currentPath.length); + const nodeEntry = createNodeEntry(node, nodeType, node.properties.degree); + addNodeIfNotExists(nodeEntry, parsedData); + }); + + addEdges(currentPath, parsedData); + }); + parsedData.goTerm = data[data.length - 1][0]._fields[0].properties; + return parsedData; -} +} \ No newline at end of file