Skip to content

Commit

Permalink
Merge pull request #936 from concord-consortium/186179035-single-quot…
Browse files Browse the repository at this point in the history
…e-strings

Handle escaping of single quote strings in formulas
  • Loading branch information
pjanik authored Oct 12, 2023
2 parents 1bd1252 + 6ac7432 commit 685a44c
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 37 deletions.
46 changes: 23 additions & 23 deletions v3/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion v3/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@
"escape-string-regexp": "^5.0.0",
"leaflet": "^1.9.4",
"lodash": "^4.17.21",
"mathjs": "^11.11.0",
"mathjs": "^11.11.2",
"mobx": "^6.10.2",
"mobx-react-lite": "^4.0.4",
"nanoid": "^4.0.2",
Expand Down
3 changes: 2 additions & 1 deletion v3/src/models/data/formula-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,8 @@ export class FormulaManager {
// In 99% of cases, the two `if` statements above will be sufficient to detect deleted formulas. However, this
// could serve as the ultimate check in case a formula instance was deleted in a different manner.
if (!isAlive(metadata.formula)) {
console.warn("FormulaManager.unregisterDeletedFormulas unregistering a defunct formula that was not detected by the usual means.")
console.warn("FormulaManager.unregisterDeletedFormulas unregistering a defunct formula that " +
"was not detected by the usual means.")
this.unregisterFormula(formulaId)
}
})
Expand Down
53 changes: 49 additions & 4 deletions v3/src/models/data/formula-utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { ICase } from "./data-set-types"
import { CANONICAL_NAME, DisplayNameMap, GLOBAL_VALUE, ILocalAttributeDependency, ILookupDependency, LOCAL_ATTR } from "./formula-types"
import {
CANONICAL_NAME, DisplayNameMap, GLOBAL_VALUE, ILocalAttributeDependency, ILookupDependency, LOCAL_ATTR
} from "./formula-types"
import {
safeSymbolName, customizeDisplayFormula, reverseDisplayNameMap, canonicalToDisplay, makeDisplayNamesSafe,
displayToCanonical, unescapeBacktickString, escapeBacktickString, safeSymbolNameFromDisplayFormula,
getLocalAttrCasesToRecalculate, getLookupCasesToRecalculate, isAttrDefined, parseBasicCanonicalName
getLocalAttrCasesToRecalculate, getLookupCasesToRecalculate, isAttrDefined, parseBasicCanonicalName, formulaIndexOf
} from "./formula-utils"

const displayNameMapExample: DisplayNameMap = {
Expand Down Expand Up @@ -50,6 +52,39 @@ describe("parseBasicCanonicalName", () => {
})
})

describe("formulaIndexOf", () => {
const formula = "foo.bar + 'baz' + \"qux\" + 'qux' + \"baz\""

describe("when name is a symbol, not a string constant", () => {
const isStringConstant = false
it("returns the index of the name in the formula", () => {
const result = formulaIndexOf(formula, "bar", isStringConstant)
expect(result).toEqual({ stringDelimiter: null, nameIndex: 4, finalName: "bar" })
})
})

describe("when name is a string constant", () => {
const isStringConstant = true
it("returns the index of the first found name in the formula if it is a double-quoted string constant", () => {
const name = "qux"
const result = formulaIndexOf(formula, name, isStringConstant)
expect(result).toEqual({ stringDelimiter: "\"", nameIndex: 18, finalName: "\"qux\"" })
})

it("returns the index of the name in the formula if it is a single-quoted string constant", () => {
const name = "baz"
const result = formulaIndexOf(formula, name, isStringConstant)
expect(result).toEqual({ stringDelimiter: "'", nameIndex: 10, finalName: "'baz'" })
})

it("returns -1 if the name is not found in the formula", () => {
const name = "quux"
const result = formulaIndexOf(formula, name, isStringConstant)
expect(result).toEqual({ stringDelimiter: null, nameIndex: -1, finalName: "quux" })
})
})
})

describe("displayToCanonical", () => {
it("converts display formula to canonical formula", () => {
expect(displayToCanonical(
Expand Down Expand Up @@ -225,16 +260,26 @@ describe("canonicalToDisplay", () => {
)).toEqual("mean ( `new\\\\mean\\`attribute 🙃` ) + 'mean'")
})
})
describe("when attribute name is provided as string constant (e.g. lookup functions)", () => {
describe("when attribute name is provided as a double quote string constant (e.g. lookup functions)", () => {
it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => {
expect(canonicalToDisplay(
'lookupByKey("__CANONICAL_NAME__DATA_ROLLER_COASTER", "__CANONICAL_NAME__ATTR_PARK",' +
' "__CANONICAL_NAME__ATTR_TOP_SPEED", __CANONICAL_NAME__LOCAL_ATTR_ATTR_ORDER) * 2',
'lookupByKey("Old Roller Coaster", "Old Park", "Old Top Speed", OldOrder) * 2',
'lookupByKey("Old Roller Coaster", "Old\\"Park", "Old\\"Top\'Speed", OldOrder) * 2',
reverseDisplayNameMap(displayNameMapExample)
)).toEqual('lookupByKey("Roller Coaster", "Park\\"", "Top\\\\Speed\'", Order) * 2')
})
})
describe("when attribute name is provided as a single quote string constant (e.g. lookup functions)", () => {
it("is still converted correctly and names with special characters are NOT enclosed in backticks", () => {
expect(canonicalToDisplay(
"lookupByKey('__CANONICAL_NAME__DATA_ROLLER_COASTER', '__CANONICAL_NAME__ATTR_PARK'," +
" '__CANONICAL_NAME__ATTR_TOP_SPEED', __CANONICAL_NAME__LOCAL_ATTR_ATTR_ORDER) * 2",
"lookupByKey('Old Roller Coaster', 'Old\"Park', 'Old\"Top\\'Speed', OldOrder) * 2",
reverseDisplayNameMap(displayNameMapExample)
)).toEqual("lookupByKey('Roller Coaster', 'Park\"', 'Top\\\\Speed\\'', Order) * 2")
})
})
})

describe("isAttrDefined", () => {
Expand Down
61 changes: 53 additions & 8 deletions v3/src/models/data/formula-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ export const escapeBacktickString = (name: string) =>
name.replace(/\\/g, "\\\\").replace(/`/g, "\\`")

export const escapeDoubleQuoteString = (constant: string) =>
constant.replace(/\\/g, "\\\\").replace(/"/g, "\\\"")
constant.replace(/\\/g, "\\\\").replace(/"/g, '\\"')

export const escapeSingleQuoteString = (constant: string) =>
constant.replace(/\\/g, "\\\\").replace(/'/g, "\\'")

export const safeSymbolName = (name: string) =>
name
Expand Down Expand Up @@ -78,6 +81,31 @@ export const reverseDisplayNameMap = (displayNameMap: DisplayNameMap): Canonical
])
}

// "Names" in formula can refer to symbols, constants or function names. Symbols and function names are easy, nothing
// special to do there. String constants become tricky, as MathJS parser stores only string constant, but it doesn't
// store the string delimiter. So we need to determine the original string delimiter, as it affects necessary escaping.
// It'd be easier if MathJS stored the string delimiter / kind of string constant, but currently it doesn't
// See: https://github.com/josdejong/mathjs/issues/3073#issuecomment-1758267336
export const formulaIndexOf = (formula: string, name: string, isStringConstant: boolean) => {
if (!isStringConstant) {
return { stringDelimiter: null, nameIndex: formula.indexOf(name), finalName: name }
}
const doubleQuoteString = `"${escapeDoubleQuoteString(name)}"`
const singleQuoteString = `'${escapeSingleQuoteString(name)}'`
const dQuoteIndex = formula.indexOf(doubleQuoteString)
const sQuoteIndex = formula.indexOf(singleQuoteString)

// We need to check both indices, as formula can contain both "foobar" and 'foobar'. In such case, we need to
// pick the first one.
if (dQuoteIndex >= 0 && (sQuoteIndex < 0 || dQuoteIndex < sQuoteIndex)) {
return { stringDelimiter: '"', nameIndex: dQuoteIndex, finalName: doubleQuoteString }
}
if (sQuoteIndex >= 0 && (dQuoteIndex < 0 || sQuoteIndex < dQuoteIndex)) {
return { stringDelimiter: "'", nameIndex: sQuoteIndex, finalName: singleQuoteString }
}
return { stringDelimiter: null, nameIndex: -1, finalName: name }
}

export const canonicalToDisplay = (canonical: string, originalDisplay: string, canonicalNameMap: CanonicalNameMap) => {
// Algorithm is as follows:
// 1. Parse original display formula and get all the names that need to be replaced.
Expand All @@ -104,33 +132,50 @@ export const canonicalToDisplay = (canonical: string, originalDisplay: string, c

const namesToReplace: string[] = []
const newNames: string[] = []
const isStringConstantNode: boolean[] = []

parse(originalDisplay).traverse((node: MathNode, path: string, parent: MathNode) => {
isNonFunctionSymbolNode(node, parent) && namesToReplace.push(node.name)
isConstantStringNode(node) && namesToReplace.push(escapeDoubleQuoteString(node.value))
isFunctionNode(node) && namesToReplace.push(node.fn.name)
if (isNonFunctionSymbolNode(node, parent)) {
namesToReplace.push(node.name)
isStringConstantNode.push(false)
}
if (isConstantStringNode(node)) {
namesToReplace.push(node.value)
isStringConstantNode.push(true)
}
if (isFunctionNode(node)) {
namesToReplace.push(node.fn.name)
isStringConstantNode.push(false)
}
})
parse(canonical).traverse((node: MathNode, path: string, parent: MathNode) => {
// Symbol with nonstandard characters need to be wrapped in backticks, while constants don't (as they're already
// wrapped in string quotes).
isNonFunctionSymbolNode(node, parent) && newNames.push(
wrapInBackticksIfNecessary(escapeBacktickString(getDisplayNameFromSymbol(node.name)))
)
isConstantStringNode(node) && newNames.push(escapeDoubleQuoteString(getDisplayNameFromSymbol(node.value)))
isConstantStringNode(node) && newNames.push(getDisplayNameFromSymbol(node.value))
isFunctionNode(node) && newNames.push(node.fn.name)
})

let result = ""
let formulaToProcess = originalDisplay
while (newNames.length > 0) {
const name = namesToReplace.shift()!
const newName = newNames.shift()!
const nameIndex = formulaToProcess.indexOf(name)
const isStringConstant = isStringConstantNode.shift()!
const { nameIndex, stringDelimiter, finalName } = formulaIndexOf(formulaToProcess, name, isStringConstant)
if (nameIndex < 0) {
throw new Error(`canonicalToDisplay: name ${name} not found in formula`)
}
let newName = newNames.shift()!
if (stringDelimiter === "'") {
newName = `'${escapeSingleQuoteString(newName)}'`
}
if (stringDelimiter === '"') {
newName = `"${escapeDoubleQuoteString(newName)}"`
}
result += formulaToProcess.substring(0, nameIndex) + newName
formulaToProcess = formulaToProcess.substring(nameIndex + name.length)
formulaToProcess = formulaToProcess.substring(nameIndex + finalName.length)
}
return result + formulaToProcess
}
Expand Down

0 comments on commit 685a44c

Please sign in to comment.