nasa-gcn · Courey · Nov 26, 2024 · Jan 28, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/app/backfill-eventId.js b/app/backfill-eventId.js
@@ -0,0 +1,125 @@
+import { DynamoDBClient, paginateScan } from '@aws-sdk/client-dynamodb'
+import { GetParameterCommand, SSMClient } from '@aws-sdk/client-ssm'
+import { BatchWriteCommand } from '@aws-sdk/lib-dynamodb'
+import { unmarshall } from '@aws-sdk/util-dynamodb'
+import chunk from 'lodash/chunk.js'
+
+// THIS CODE IS CUT AND PASTED FROM THE APP CODE IN /routes/circulars/circulars.lib.ts
+// running as a node script it would not import the module from the file
+const subjectMatchers = [
+  [/GRB[.\s_-]*(\d{6}[a-z|.]\d*)/i, ([, id]) => `GRB ${id.toUpperCase()}`],
+  [/SGR[.\s_-]*(J*\d{4}\.?\d*\+\d{4})/i, ([, id]) => `SGR ${id.toUpperCase()}`],
+  [
+    /SGR[.\s_-]*Swift[.\s_-]*(J*\d{4}\.?\d*\+\d{4})/i,
+    ([, id]) => `SGR Swift ${id.toUpperCase()}`,
+  ],
+  [/IceCube[.\s_-]*(\d{6}[a-z])/i, ([, id]) => `IceCube-${id.toUpperCase()}`],
+  [/ZTF[.\s_-]*(\d{2}[a-z]*)/i, ([, id]) => `ZTF${id.toLowerCase()}`],
+  [/HAWC[.\s_-]*(\d{6}A)/i, ([, id]) => `HAWC-${id.toUpperCase()}`],
+  [
+    /((?:LIGO|Virgo|KAGRA)(?:[/-](?:LIGO|Virgo|KAGRA))*)[-_ \s]?(S|G|GW)(\d{5,}[a-z]*)/i,
+    ([, instrument, flag, id]) => {
+      return `${instrument} ${flag.toUpperCase()}${id.toLowerCase()}`
+    },
+  ],
+  [/ANTARES[.\s_-]*(\d{6}[a-z])/i, ([, id]) => `ANTARES ${id}`.toUpperCase()],
+  [
+    /Baksan\sNeutrino\sObservatory\sAlert[.\s_-]*(\d{6}.\d{2})/i,
+    ([, id]) => `Baksan Neutrino Observatory Alert ${id}`,
+  ],
+  [/EP[.\s_-]*(\d{6}[a-z])/i, ([, id]) => `EP${id}`],
+  [/FRB[.\s_-]*(\d{8}[a-z])/i, ([, id]) => `FRB ${id}`.toUpperCase()],
+]
+
+export function parseEventFromSubject(value) {
+  for (const [regexp, normalize] of subjectMatchers) {
+    const startsWithMatch = RegExp(`^${regexp.source}`).exec(value)
+    if (startsWithMatch) return normalize(startsWithMatch)
+  }
+  for (const [regexp, normalize] of subjectMatchers) {
+    const match = regexp.exec(value)
+    if (match) return normalize(match)
+  }
+}
+// END CUT AND PASTE OF APP CODE FROM /routes/circulars/circulars.lib.ts
+
+async function getTableNameFromSSM(dynamoTableName) {
+  const ssmClient = new SSMClient({ region: 'us-east-1' })
+
+  try {
+    const command = new GetParameterCommand({ Name: dynamoTableName })
+    const response = await ssmClient.send(command)
+
+    if (!response.Parameter?.Value) {
+      throw new Error('dynamoTableName not found in SSM')
+    }
+
+    return response.Parameter.Value
+  } catch (error) {
+    console.error('Error fetching table name from SSM:', error)
+    throw error
+  }
+}
+
+export async function backfillEventIds() {
+  const startTime = new Date()
+  console.log('Starting EVENT ID backfill...', startTime)
+  const writeLimit = 1000
+  const dynamoTableName = '/RemixGcnProduction/tables/circulars'
+  const TableName = await getTableNameFromSSM(dynamoTableName)
+  const client = new DynamoDBClient({ region: 'us-east-1' })
+  const pages = paginateScan({ client }, { TableName })
+  let totalWriteCount = 0
+  let limitHit = false
+
+  for await (const page of pages) {
+    if (limitHit) break
+    const chunked = chunk(page.Items || [], 25)
+
+    for (const chunk of chunked) {
+      if (limitHit) break
+      let writes = []
+
+      for (const record of chunk) {
+        if (limitHit) break
+
+        const circular = unmarshall(record)
+        const parsedEventId = parseEventFromSubject(circular.subject)
+
+        if (!circular.eventId && parsedEventId) {
+          circular.eventId = parsedEventId
+          writes.push(circular)
+        }
+      }
+
+      if (writes.length + totalWriteCount > writeLimit) {
+        const overage = writes.length + totalWriteCount - writeLimit
+        const writesToLimitTo = writes.length - overage
+        writes = writes.slice(0, writesToLimitTo)
+        limitHit = true
+      }
+
+      if (writes.length > 0) {
+        const command = new BatchWriteCommand({
+          RequestItems: {
+            [TableName]: writes.map((circ) => ({
+              PutRequest: {
+                Item: {
+                  ...circ,
+                },
+              },
+            })),
+          },
+        })
+
+        await client.send(command)
+        totalWriteCount = writes.length + totalWriteCount
+      }
+    }
+  }
+  const endTime = new Date()
+  console.log('... End EVENT ID backfill... ', endTime)
+  console.log('Total Event Ids Updated: ', totalWriteCount)
+}
+
+backfillEventIds()