Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Streamline and test @siteimprove/alfa-cascade #1534

Merged
merged 26 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .changeset/four-parents-jog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
"@siteimprove/alfa-cascade": minor
---

**Breaking:** `RuleTree.add` and `RuleTree.Node.add` have been made internal.

These have heavy assumptions on arguments in order to build a correct rule tree and are not intended for external use. Use `Cascade.of` to build correct cascade and rule trees.

In addition, `RuleTree.Node.add` has been moved to an instance method, and its arguments have been simplified.
5 changes: 5 additions & 0 deletions .changeset/pink-walls-remain.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@siteimprove/alfa-selector": minor
---

**Added:** Selectors now contain a "key selector" which is the leftmost simple selector in a compound one, or the rightmost in a complex one.
7 changes: 7 additions & 0 deletions .changeset/tiny-eyes-attack.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@siteimprove/alfa-cascade": minor
---

**Breaking:** `Cascade.get()` now returns a `RuleTree.Node` instead of an `Option`.

`RuleTree` now have a fake root with no declarations, if no rule matches the current element, `Cascade.get(element)` will return that fake root.
45 changes: 30 additions & 15 deletions docs/review/api/alfa-cascade.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export class Cascade implements Serializable {
// Warning: (ae-forgotten-export) The symbol "AncestorFilter" needs to be exported by the entry point index.d.ts
//
// (undocumented)
get(element: Element, context?: Context, filter?: Option<AncestorFilter>): Option<RuleTree.Node>;
get(element: Element, context?: Context, filter?: Option<AncestorFilter>): RuleTree.Node;
// (undocumented)
static of(node: Document | Shadow, device: Device): Cascade;
// (undocumented)
Expand Down Expand Up @@ -51,12 +51,8 @@ export namespace Cascade {

// @public
export class RuleTree implements Serializable {
// (undocumented)
add(rules: Iterable_2<{
rule: Rule;
selector: Selector;
declarations: Iterable_2<Declaration>;
}>): Option<RuleTree.Node>;
// @internal
add(rules: Iterable_2<RuleTree.Item>): RuleTree.Node;
// (undocumented)
static empty(): RuleTree;
// (undocumented)
Expand All @@ -65,12 +61,35 @@ export class RuleTree implements Serializable {

// @public (undocumented)
export namespace RuleTree {
// @internal
export interface Item {
// (undocumented)
declarations: Iterable_2<Declaration>;
// (undocumented)
rule: Rule;
// (undocumented)
selector: Selector;
}
// (undocumented)
export namespace Item {
// (undocumented)
export interface JSON {
// (undocumented)
[key: string]: json.JSON;
// (undocumented)
declarations: Array<Declaration.JSON>;
// (undocumented)
rule: Rule.JSON;
// (undocumented)
selector: Selector.JSON;
}
}
// (undocumented)
export type JSON = Array<Node.JSON>;
// (undocumented)
export class Node implements Serializable {
// (undocumented)
static add(rule: Rule, selector: Selector, declarations: Iterable_2<Declaration>, children: Array<Node>, parent: Option<Node>): Node;
// @internal
add(item: Item): Node;
// (undocumented)
ancestors(): Iterable_2<Node>;
// (undocumented)
Expand All @@ -80,7 +99,7 @@ export namespace RuleTree {
// (undocumented)
inclusiveAncestors(): Iterable_2<Node>;
// (undocumented)
static of(rule: Rule, selector: Selector, declarations: Iterable_2<Declaration>, children: Array<Node>, parent: Option<Node>): Node;
static of({ rule, selector, declarations }: Item, children: Array<Node>, parent: Option<Node>): Node;
// (undocumented)
get parent(): Option<Node>;
// (undocumented)
Expand All @@ -99,11 +118,7 @@ export namespace RuleTree {
// (undocumented)
children: Array<Node.JSON>;
// (undocumented)
declarations: Array<Declaration.JSON>;
// (undocumented)
rule: Rule.JSON;
// (undocumented)
selector: Selector.JSON;
item: Item.JSON;
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions docs/review/api/alfa-selector.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ export class Class extends WithName<"class"> {
// (undocumented)
equals(value: unknown): value is this;
// (undocumented)
protected readonly _key: Option<Class>;
// (undocumented)
matches(element: Element): boolean;
// (undocumented)
static of(name: string): Class;
Expand Down Expand Up @@ -161,6 +163,8 @@ export class Complex extends Selector_2<"complex"> {
// (undocumented)
equals(value: unknown): value is this;
// (undocumented)
protected readonly _key: Option<Id | Class | Type>;
// (undocumented)
get left(): Simple | Compound | Complex;
// (undocumented)
matches(element: Element, context?: Context): boolean;
Expand Down Expand Up @@ -200,6 +204,8 @@ export class Compound extends Selector_2<"compound"> {
// (undocumented)
equals(value: unknown): value is this;
// (undocumented)
protected readonly _key: Option<Id | Class | Type>;
// (undocumented)
get length(): number;
// (undocumented)
matches(element: Element, context?: Context): boolean;
Expand Down Expand Up @@ -296,6 +302,8 @@ export class Id extends WithName<"id"> {
// (undocumented)
equals(value: unknown): value is this;
// (undocumented)
protected readonly _key: Option<Id>;
// (undocumented)
matches(element: Element): boolean;
// (undocumented)
static of(name: string): Id;
Expand Down Expand Up @@ -495,6 +503,8 @@ export class Type extends WithName<"type"> {
// (undocumented)
equals(value: unknown): value is this;
// (undocumented)
protected readonly _key: Option<Type>;
// (undocumented)
matches(element: Element): boolean;
// (undocumented)
get namespace(): Option<string>;
Expand Down
1 change: 0 additions & 1 deletion docs/review/api/alfa-style.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,6 @@ export namespace Style {
export type Computed<N extends Name> = Longhands.Computed<N>;
// (undocumented)
export type Declared<N extends Name> = Longhands.Declared<N>;
// (undocumented)
export function from(element: Element, device: Device, context?: Context): Style;
// (undocumented)
export type Inherited<N extends Name> = Longhands.Inherited<N>;
Expand Down
35 changes: 35 additions & 0 deletions packages/alfa-cascade/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Alfa Cascade

This package builds the cascade which is then used by `@siteimprove/alfa-style` in order to find the cascaded value of each property.

While resolving the cascade is in theory somewhat easy (for each element and property, find the highest precedence declaration), it requires a lot of work. A typical page contain hundreds of elements and possibly thousands of style rules, and we support nearly 150 properties. So, we cannot just brute force our way through this and need structures to help eliminate quickly most of the selectors. This is especially true for the descendants and sibling selectors whose matching require traversing the DOM tree, potentially at a far away place for something like `main .foo`

## Ancestor filter

The ancestor filter is a structure to optimize matching of descendants selectors. It is build during a depth-first traversal of the DOM tree. While inspecting each element (and trying to match selectors), we keep a list of the ancestors we've found.

In order to be compact and efficient, we just count the number of each type, class, and id on the path to the element. So, for example, a `div.foo .bar` selector cannot match if there is no `div` type or `.foo` class along the path. We cannot just keep a boolean because we want to be able to update the ancestor filter during the "upward moves" of the traversal, which requires removing elements from it, so we need a precise count to figure out when it reaches 0.

The ancestor filter only allows for guaranteed "won't match" answers, because the type, class and id have been separated for the sake of compactness. For example, a `div.foo .bar` selector won't match if the `div` and `.foo` ancestors are different, but the ancestor filter doesn't hold that information. However, the filter greatly reduce the actual number of elements to test against each descendant selector and thus the amount of work to be done.

## Key selector and Selector map

The other tool to speed up matching of complex (and compound) selectors is the selector map.

Each selector is associated to a _key selector_ which is the part which is going to be matched against the element itself (not against its siblings or ancestors). For complex selectors, the key selector is thus the rightmost selector. For compound selectors, it could be any selector; we take the leftmost one (mostly to limit the risk of key selector being pseudo-classes or -elements; key selectors are not really built for these).

That is, in a `div.foo .bar` selector, the key selector is `.bar`. Any element that matches the full `div.foo .bar` selector must necessarily be a `.bar` itself (plus some DOM tree context). For anything else, we don't need to look at DOM structure. Similarly, in the `div.foo` selector, the key selector is `div`.

Conversely, an element can only match selectors if it matches their key selector. So, a `<span class="bar baz" id="my-id">` can only match selectors whose key selector is either `span`, `.bar`, `.baz`, or `#my-id`.

The selector map groups selectors by their key selector. Thus, when searching for selectors that may match a given element, we only ask the selector map for selectors that have one of the possible key selectors and greatly reduce the search space.

## Rule tree

The rule tree (actually a forest) is a representation of the association between elements and the list of selectors (actually, rules) that they match, in decreasing precedence (according to cascade sorting).

Using a tree, rather than a separated list for each element allows to share the selectors that are matching several elements and reduce the memory usage.

## Cascade

The cascade itself is a rule tree associated with a map from elements to nodes in it. Each element is mapped to its highest precedence selector in the rule tree. Thus, in order to find the cascaded value of any property for a given element, we can simply walk up the rule tree until we find a selector (and associated rule) declaring that property. Since we've walk up the tree from the highest possible precedence to the lowest, this will be the cascaded value, no matter if more rules up the tree also define this property.
3 changes: 3 additions & 0 deletions packages/alfa-cascade/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
"@siteimprove/alfa-refinement": "workspace:^0.70.0",
"@siteimprove/alfa-selector": "workspace:^0.70.0"
},
"devDependencies": {
"@siteimprove/alfa-test": "workspace:^0.70.0"
},
"publishConfig": {
"access": "public",
"registry": "https://npm.pkg.github.com/"
Expand Down
93 changes: 69 additions & 24 deletions packages/alfa-cascade/src/ancestor-filter.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,47 @@
import { Element } from "@siteimprove/alfa-dom";
import { Serializable } from "@siteimprove/alfa-json";
import { Class, Id, Selector, Type } from "@siteimprove/alfa-selector";

import * as json from "@siteimprove/alfa-json";

/**
* The ancestor filter is a data structure used for optimising selector matching
* in the case of descendant selectors. When traversing down through the DOM
* tree during selector matching, the ancestor filter stores information about
* the ancestor elements that are found up the path from the element that is
* currently being visited. Given an element and a descendant selector, we can
* therefore quickly determine if the selector might match an ancestor of the
* current element.
* in the case of descendant selectors.
*
* @remarks
* When traversing down through the DOM tree during selector matching, the
* ancestor filter stores information about the ancestor elements that are
* found up the path from the element that is currently being visited.
* Given an element and a descendant selector, we can therefore quickly
* determine if the selector might match an ancestor of the current element.
*
* The ancestor filter simply count the number of each ID, class, and type
* amongst the path walked so far. When a descendant selector is encountered, we
* can quickly see if the ancestor filter contains the ID, class, or type of the
* ancestor part, without walking up the full tree again.
*
* The information stored about elements includes their ID, classes, and type
* which are what the majority of selectors make use of. A bucket exists for
* each of these and whenever an element is added to the filter, its associated
* ID, classes, and type are added to the three buckets. The buckets also keep
* count of how many elements in the current path match a given ID, class, or
* type, in order to evict these from the filter when the last element with a
* given ID, class, or type is removed from the filter.
* We need to remember exact count rather than just existence because the
* initial build of the cascade traverses the tree in depth-first order and
* therefore needs to be able to *remove* item from the filter when going up.
*
* For example, consider the following tree:
* For example, consider the following DOM tree:
*
* section#content
* +-- blockquote
* +-- p.highlight
* +-- b
*
* If we assume that we're currently visiting the `<b>` element, the ancestor
* filter would contain the `section` and `p` types, the `#content` ID,
* and the `.highlight` class. Given a selector `main b`, we can therefore
* reject that the selector would match `<b>` as the ancestor filter does not
* contain an entry for the type `main`.
* For the `<b>` element, the ancestor filter would be:
* \{ ids: [["content", 1]],
* classes: [["highlight", 1]],
* types: [["p", 1], ["section", 1]]\}
* Given a selector `main b`, we can therefore reject that the selector would
* match the `<b>` as the ancestor filter does not contain the type `main`.
*
* However, given a selector `section.highlight`, the ancestor filter can only
* tell that it **may** match the `<b>` element. In this case, it doesn't. So,
* the filter acts as a quick guaranteed rejection mechanism, but actual match
* test is needed to have an accurate final result.
*
* NB: None of the operations of the ancestor filter are idempotent to avoid
* keeping track of more information than strictly necessary. This is however
Expand All @@ -43,7 +55,7 @@ import { Class, Id, Selector, Type } from "@siteimprove/alfa-selector";
*
* @internal
*/
export class AncestorFilter {
export class AncestorFilter implements Serializable<AncestorFilter.JSON> {
public static empty(): AncestorFilter {
return new AncestorFilter();
}
Expand Down Expand Up @@ -79,20 +91,40 @@ export class AncestorFilter {
}

public matches(selector: Selector): boolean {
if (selector instanceof Id) {
if (Id.isId(selector)) {
return this._ids.has(selector.name);
}

if (selector instanceof Class) {
if (Class.isClass(selector)) {
return this._classes.has(selector.name);
}

if (selector instanceof Type) {
if (Type.isType(selector)) {
return this._types.has(selector.name);
}

return false;
}

public toJSON(): AncestorFilter.JSON {
return {
ids: this._ids.toJSON(),
classes: this._classes.toJSON(),
types: this._types.toJSON(),
};
}
}

/**
* @internal
*/
export namespace AncestorFilter {
export interface JSON {
[key: string]: json.JSON;
ids: Bucket.JSON;
classes: Bucket.JSON;
types: Bucket.JSON;
}
}

/**
Expand All @@ -106,8 +138,10 @@ export class AncestorFilter {
* as we only ever compute cascade once for every context, and native maps are
* actually much faster than any bloom filter we might be able to cook up in
* plain JavaScript.
*
* @internal
*/
class Bucket {
export class Bucket implements Serializable<Bucket.JSON> {
public static empty(): Bucket {
return new Bucket();
}
Expand Down Expand Up @@ -143,4 +177,15 @@ class Bucket {
this._entries.set(entry, count - 1);
}
}

public toJSON(): Bucket.JSON {
return [...this._entries];
}
}

/**
* @internal
*/
export namespace Bucket {
export type JSON = Array<[string, number]>;
}
Loading