Skip to content

Commit

Permalink
perf: avoid repeating costly stringifier lookup for each stats row
Browse files Browse the repository at this point in the history
  • Loading branch information
arildm committed Dec 9, 2024
1 parent 4e850a0 commit 20dc444
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Search history is stored as parameters only, not full urls #118
- Enabled the `noImplicitAny` TypeScript flag for added strictness, and fixed/refactored various parts as a consequence
- The `hitCountHtml` util function now takes the numbers as a tuple
- `reduceStringify()` now returns the stringifier, so it can be called only once per attribute

### Fixed

Expand Down
44 changes: 14 additions & 30 deletions app/config/statistics_config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ import _ from "lodash"
import settings from "@/settings"
import { lemgramToHtml, regescape, saldoToHtml } from "@/util"
import { locAttribute } from "@/i18n"
import { Token } from "@/backend/kwic-proxy"
import { Attribute } from "@/settings/config.types"
import { JQueryStaticExtended } from "@/jquery.types"

type Stringifier = (tokens: string[], ignoreCase?: boolean) => string

Expand Down Expand Up @@ -103,22 +101,19 @@ function reduceCqp(type: string, tokens: string[], ignoreCase: boolean): string
}

// Get the html (no linking) representation of the result for the statistics table
export function reduceStringify(type: string, values: string[], structAttr?: Attribute): string {
export function reduceStringify(type: string, structAttr?: Attribute): (values: string[]) => string {
let attrs = settings.corpusListing.getCurrentAttributes()

if (attrs[type] && attrs[type].stats_stringify) {
return customFunctions[attrs[type].stats_stringify!](values)
return customFunctions[attrs[type].stats_stringify!]
}

switch (type) {
case "word":
case "msd":
return values.join(" ")
return (values) => values.join(" ")
case "pos":
var output = _.map(values, function (token) {
return locAttribute(attrs["pos"].translation, token)
}).join(" ")
return output
return (values) => values.map((token) => locAttribute(attrs["pos"].translation, token)).join(" ")
case "saldo":
case "prefix":
case "suffix":
Expand All @@ -134,42 +129,31 @@ export function reduceStringify(type: string, values: string[], structAttr?: Att
stringify = lemgramToHtml
}

const html = _.map(values, function (token) {
if (token === "") return "–"
return stringify(token.replace(/:.*/g, ""), true)
})

return html.join(" ")
return (values) =>
values.map((token) => (token === "" ? "–" : stringify(token.replace(/:.*/g, ""), true))).join(" ")

case "transformer-neighbour":
return values.map((value) => value.replace(/:.*/g, "")).join(" ")
return (values) => values.map((value) => value.replace(/:.*/g, "")).join(" ")

case "deprel":
var output = _.map(values, function (token) {
return locAttribute(attrs["deprel"].translation, token)
}).join(" ")
return output
return (values) => values.map((token) => locAttribute(attrs["deprel"].translation, token)).join(" ")
case "msd_orig": // TODO: OMG this is corpus specific, move out to config ASAP (ASU corpus)
var output = _.map(values, function (token) {
return ($("<span>").text(token) as any).outerHTML()
}).join(" ")
return output
return (values) => values.map((token) => ($("<span>").text(token) as any).outerHTML()).join(" ")
default:
if (attrs[type]) {
// word attributes
return values.join(" ")
return (values) => values.join(" ")
} else {
// structural attributes
var mapped = _.map(values, function (value) {
function stringify(value: string) {
if (value === "") {
return "-"
} else if (structAttr?.translation) {
return locAttribute(structAttr.translation, value)
} else {
return value
}
})
return mapped.join(" ")
return value
}
return (values) => values.map(stringify).join(" ")
}
}
}
5 changes: 3 additions & 2 deletions app/scripts/statistics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {
SearchParams,
SlickgridColumn,
} from "./statistics.types"
import { hitCountHtml } from "@/util"
import { fromKeys, hitCountHtml } from "@/util"
import { LangString } from "./i18n/types"
import { getLang, locObj } from "./i18n"
const pieChartImg = require("../img/stats2.png")
Expand Down Expand Up @@ -100,11 +100,12 @@ const createStatisticsService = function () {
// Format the values of the attributes we are reducing by
const cl = settings.corpusListing.subsetFactory(corpora)
const structAttrs = cl.getStructAttrs()
const stringifiers = fromKeys(reduceVals, (attr) => reduceStringify(attr, structAttrs[attr]))
for (const row of e.data) {
if (isTotalRow(row)) continue
for (const type of row.statsValues) {
for (const attr in type) {
row.formattedValue[attr] = reduceStringify(attr, type[attr], structAttrs[attr])
row.formattedValue[attr] = stringifiers[attr](type[attr])
}
}
}
Expand Down

0 comments on commit 20dc444

Please sign in to comment.