Skip to content

Commit

Permalink
refactor(ts): CQP
Browse files Browse the repository at this point in the history
  • Loading branch information
arildm committed Jul 5, 2024
1 parent 15ba098 commit 22eb9a4
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Added

- TypeScript typings for config/settings
- TypeScript typings for CQP queries

### Changed

Expand Down
2 changes: 1 addition & 1 deletion app/scripts/components/statistics.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import settings from "@/settings"
import { html } from "@/util"
import { loc, locObj } from "@/i18n"
import { getCqp } from "../../config/statistics_config.js"
import { expandOperators } from "@/cqp_parser/cqp.js"
import { expandOperators } from "@/cqp_parser/cqp"
import "@/components/corpus-distribution-chart"

angular.module("korpApp").component("statistics", {
Expand Down
92 changes: 60 additions & 32 deletions app/scripts/cqp_parser/cqp.js → app/scripts/cqp_parser/cqp.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
/** @format */
import _ from "lodash"
import moment from "moment"
import moment, { type Moment } from "moment"
import settings from "@/settings"
import { parse } from "./CQPParser"
import { parse as parse_ } from "./CQPParser"
import type { Condition, CqpQuery, DateRange, OperatorKorp } from "./cqp.types"

/** Parse CQP string to syntax tree. */
// Rename to be able to add typing.
const parse: (input: string, options?: never) => CqpQuery = parse_
export { parse }

export function parseDateInterval(op, val, expanded_format) {
let out
/**
* Create CQP expression for a date interval condition.
*
* @param opKorp Operator to use if not using `expanded_format`
* @param val An array like `[fromdate, todate, fromtime, totime]`
* @param expanded_format Whether to convert to standard CQP or keep Korp-specific operators
*/
export function parseDateInterval(opKorp: OperatorKorp, val: DateRange, expanded_format?: boolean) {
let out: string
val = _.invokeMap(val, "toString")
if (!expanded_format) {
return `$date_interval ${op} '${val.join(",")}'`
return `$date_interval ${opKorp} '${val.join(",")}'`
}

const [fromdate, todate, fromtime, totime] = val
Expand All @@ -25,7 +36,7 @@ export function parseDateInterval(op, val, expanded_format) {
text_timeto: totime,
}

op = function (field, operator, valfield) {
function op(field: string, operator: string, valfield?: string) {
val = valfield ? fieldMapping[valfield] : fieldMapping[field]
return `int(_.${field}) ${operator} ${val}`
}
Expand Down Expand Up @@ -62,7 +73,12 @@ export function parseDateInterval(op, val, expanded_format) {
return out
}

export function stringify(cqp_obj, expanded_format) {
/**
* Serialize syntax tree to CQP string.
* @param cqp_obj Syntax tree
* @param expanded_format Whether to convert to standard CQP or keep Korp-specific operators
*/
export function stringify(cqp_obj: CqpQuery, expanded_format?: boolean): string {
if (expanded_format == null) {
expanded_format = false
}
Expand All @@ -80,13 +96,13 @@ export function stringify(cqp_obj, expanded_format) {
continue
}

const outer_and_array = []
const outer_and_array: string[][] = []
for (let and_array of token.and_block) {
const or_array = []
const or_array: string[] = []
for (let { type, op, val, flags } of and_array) {
var out
if (expanded_format) {
;[val, op] = {
;[val, op] = ({
"^=": [val + ".*", "="],
"_=": [`.*${val}.*`, "="],
"&=": [`.*${val}`, "="],
Expand All @@ -104,7 +120,7 @@ export function stringify(cqp_obj, expanded_format) {
not_incontains_contains: [`.*${val}.*`, "not contains"],
ends_with_contains: [`.*${val}`, "contains"],
not_ends_with_contains: [`.*${val}`, "not contains"],
}[op] || [val, op]
}[op] || [val, op]) as [string | DateRange, OperatorKorp]
}

let flagstr = ""
Expand All @@ -115,7 +131,7 @@ export function stringify(cqp_obj, expanded_format) {
if (type === "word" && val === "") {
out = ""
} else if (settings.corpusListing.isDateInterval(type)) {
out = parseDateInterval(op, val, expanded_format)
out = parseDateInterval(op, val as DateRange, expanded_format)
} else {
out = `${type} ${op} \"${val}\"`
}
Expand All @@ -129,7 +145,7 @@ export function stringify(cqp_obj, expanded_format) {
}
}

let or_out = outer_and_array.map((x) => (x.length > 1 ? `(${x.join(" | ")})` : x.join(" | ")))
let or_out: string[] = outer_and_array.map((x) => (x.length > 1 ? `(${x.join(" | ")})` : x.join(" | ")))

if (token.bound) {
or_out = _.compact(or_out)
Expand All @@ -149,46 +165,58 @@ export function stringify(cqp_obj, expanded_format) {
return output.join(" ")
}

export const expandOperators = (cqpstr) => stringify(parse(cqpstr), true)
export const expandOperators = (cqpstr: string) => stringify(parse(cqpstr), true)

export function getTimeInterval(obj) {
let from = []
let to = []
/**
* Find first and last date in any date interval conditions.
* @param obj Syntax tree
* @returns `[from, to]` as Moment objects, or `undefined` if query has no intervals
*/
export function getTimeInterval(obj: CqpQuery): [Moment, Moment] | undefined {
let froms: Moment[] = []
let tos: Moment[] = []
for (let token of obj) {
for (let or_block of token.and_block) {
for (let item of or_block) {
if (item.type === "date_interval") {
from.push(moment(`${item.val[0]}${item.val[2]}`, "YYYYMMDDhhmmss"))
to.push(moment(`${item.val[1]}${item.val[3]}`, "YYYYMMDDhhmmss"))
froms.push(moment(`${item.val[0]}${item.val[2]}`, "YYYYMMDDhhmmss"))
tos.push(moment(`${item.val[1]}${item.val[3]}`, "YYYYMMDDhhmmss"))
}
}
}
}

if (!from.length) {
if (!froms.length) {
return
}
from = _.minBy(from, (mom) => mom.toDate())
to = _.maxBy(to, (mom) => mom.toDate())
const from = _.minBy(froms, (m) => m.toDate())
const to = _.maxBy(tos, (m) => m.toDate())

return [from, to]
}

export function prioSort(cqpObjs) {
const getPrio = function (and_array) {
const numbers = _.map(and_array, (item) => _.indexOf(settings["cqp_prio"], item.type))
/**
* Sort the conditions in each token according to the `cqp_prio` setting.
*/
export function prioSort(cqpObjs: CqpQuery) {
const getPrio = function (or_block: Condition[]) {
const numbers = _.map(or_block, (item) => _.indexOf(settings.cqp_prio, item.type))
return Math.min(...(numbers || []))
}

for (let token of cqpObjs) {
token.and_block = _.sortBy(token.and_block, getPrio).reverse()
token.and_block = (_.sortBy(token.and_block, getPrio) as Condition[][]).reverse()
}

return cqpObjs
}

// assume cqpObj2 to contain fewer tokens than cqpObj1
export function mergeCqpExprs(cqpObj1, cqpObj2) {
/**
* Create intersection of two queries.
*
* The second query is assumed to contain fewer tokens than the first.
*/
export function mergeCqpExprs(cqpObj1: CqpQuery, cqpObj2: CqpQuery) {
for (let i = 0; i < cqpObj2.length; i++) {
const token = cqpObj2[i]
for (let j = 0; j < cqpObj1.length; j++) {
Expand All @@ -202,14 +230,14 @@ export function mergeCqpExprs(cqpObj1, cqpObj2) {
}

/** Check if a query has any wildcards (`[]`) */
export const hasWildcard = (cqpObjs) => cqpObjs.some((token) => stringify([token]).indexOf("[]") === 0)
export const hasWildcard = (cqpObjs: CqpQuery) => cqpObjs.some((token) => stringify([token]).indexOf("[]") === 0)

/** Check if a query has any tokens with repetition */
export const hasRepetition = (cqpObjs) => cqpObjs.some((token) => token.repeat)
export const hasRepetition = (cqpObjs: CqpQuery) => cqpObjs.some((token) => token.repeat)

/** Check if a query has any structure boundaries, e.g. sentence start */
export const hasStruct = (cqpObjs) => cqpObjs.some((token) => token.struct)
export const hasStruct = (cqpObjs: CqpQuery) => cqpObjs.some((token) => token.struct)

/** Determine whether a query will work with the in_order option */
export const supportsInOrder = (cqpObjs) =>
export const supportsInOrder = (cqpObjs: CqpQuery) =>
cqpObjs.length > 1 && !hasWildcard(cqpObjs) && !hasRepetition(cqpObjs) && !hasStruct(cqpObjs)
43 changes: 43 additions & 0 deletions app/scripts/cqp_parser/cqp.types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/** @format */
export type CqpQuery = CqpToken[]

export type CqpToken = {
and_block: Condition[][]
bound?: Record<"lbound" | "rbound", true>
/** `[min]` or `[min, max]` */
repeat?: [number] | [number, number]
struct?: string
start?: number
}

export type Condition = {
type: string
op: OperatorKorp
val: string | DateRange
flags: Record<string, true>
}

/** Should be `[fromdate, todate, fromtime, totime]` */
export type DateRange = (string | number)[]

export type Operator = "=" | "!=" | "contains" | "not contains"

export type OperatorKorp =
| Operator
| "^="
| "_="
| "&="
| "*="
| "!*="
| "rank_contains"
| "not_rank_contains"
| "highest_rank"
| "not_highest_rank"
| "regexp_contains"
| "not_regexp_contains"
| "starts_with_contains"
| "not_starts_with_contains"
| "incontains_contains"
| "not_incontains_contains"
| "ends_with_contains"
| "not_ends_with_contains"

0 comments on commit 22eb9a4

Please sign in to comment.