diff --git a/build/check-images.ts b/build/check-images.ts index 3d6546c2d252..eee498a7a62a 100644 --- a/build/check-images.ts +++ b/build/check-images.ts @@ -10,6 +10,7 @@ import { Document, FileAttachment } from "../content/index.js"; import { FLAW_LEVELS } from "../libs/constants/index.js"; import { findMatchesInText, findMatchesInMarkdown } from "./matches.js"; import * as cheerio from "cheerio"; +import { Element } from "domhandler"; import { Doc } from "../libs/types/document.js"; const { default: sizeOf } = imagesize; @@ -33,7 +34,7 @@ export function checkImageReferences( const checked = new Map(); function addImageFlaw( - $img: cheerio.Cheerio, + $img: cheerio.Cheerio, src: string, { explanation, @@ -235,7 +236,7 @@ export function checkImageWidths( const checked = new Map(); function addStyleFlaw( - $img: cheerio.Cheerio, + $img: cheerio.Cheerio, style: string, suggestion: string ) { diff --git a/build/extract-sections.ts b/build/extract-sections.ts index 10bfb47b0309..7426988b7a28 100644 --- a/build/extract-sections.ts +++ b/build/extract-sections.ts @@ -1,4 +1,5 @@ import * as cheerio from "cheerio"; +import { Element, ParentNode } from "domhandler"; import { ProseSection, Section } from "../libs/types/document.js"; import { extractSpecifications } from "./extract-specifications.js"; @@ -9,20 +10,17 @@ export async function extractSections( ): Promise<[Section[], string[]]> { const flaws: string[] = []; const sections: Section[] = []; - const section = cheerio - .load("
", { - // decodeEntities: false - })("div") - .eq(0); + const section = cheerio.load("
")("div").eq(0); - const body = $("body")[0] as cheerio.ParentNode; - const iterable = [...(body.childNodes as cheerio.Element[])]; + const bodies = $("body"); + const body = bodies[0] as ParentNode; + const iterable = [...(body.childNodes as Element[])]; let c = 0; for (const child of iterable) { if ( - (child as cheerio.Element).tagName === "h2" || - (child as cheerio.Element).tagName === "h3" + (child as Element).tagName === "h2" || + (child as Element).tagName === "h3" ) { if (c) { const [subSections, subFlaws] = await addSections(section.clone()); @@ -164,7 +162,7 @@ export async function extractSections( * }] */ async function addSections( - $: cheerio.Cheerio + $: cheerio.Cheerio ): Promise { const flaws: string[] = []; @@ -207,17 +205,14 @@ async function addSections( */ if (countPotentialSpecialDivs > 1) { const subSections: Section[] = []; - const section = cheerio - .load("
", { - // decodeEntities: false - })("div") - .eq(0); + const section = cheerio.load("
")("div").eq(0); // Loop over each and every "root element" in the node and keep piling // them up in a buffer, until you encounter a `div.bc-data` or `div.bc-specs` then // add that to the stack, clear and repeat. - const div = $[0] as cheerio.ParentNode; - const iterable = [...(div.childNodes as cheerio.Element[])]; + const div = $[0] as ParentNode; + console.log({ div }); + const iterable = [...(div.childNodes as Element[])]; let c = 0; let countSpecialDivsFound = 0; for (const child of iterable) { @@ -290,7 +285,7 @@ async function addSections( } async function _addSingleSpecialSection( - $: cheerio.Cheerio + $: cheerio.Cheerio ): Promise { let id: string | null = null; let title: string | null = null; @@ -373,9 +368,7 @@ async function _addSingleSpecialSection( throw new Error(`Unrecognized special section type '${specialSectionType}'`); } -function _addSectionProse( - $: cheerio.Cheerio -): SectionsAndFlaws { +function _addSectionProse($: cheerio.Cheerio): SectionsAndFlaws { let id: string | null = null; let title: string | null = null; let isH3 = false; diff --git a/build/flaws/broken-links.ts b/build/flaws/broken-links.ts index 30f53aff04a5..14f70cc5d03d 100644 --- a/build/flaws/broken-links.ts +++ b/build/flaws/broken-links.ts @@ -9,6 +9,7 @@ import { } from "../../libs/constants/index.js"; import { isValidLocale } from "../../libs/locale-utils/index.js"; import * as cheerio from "cheerio"; +import { Element } from "domhandler"; import { Doc } from "../../libs/types/document.js"; import { Flaw } from "./index.js"; import { ONLY_AVAILABLE_IN_ENGLISH } from "../../libs/l10n/l10n.js"; @@ -43,7 +44,7 @@ function isHomepageURL(url) { } function mutateLink( - $element: cheerio.Cheerio, + $element: cheerio.Cheerio, suggestion: string = null, enUSFallback: string = null, isSelfLink = false @@ -93,7 +94,7 @@ export function getBrokenLinksFlaws( // A closure function to help making it easier to append flaws function addBrokenLink( - $element: cheerio.Cheerio, + $element: cheerio.Cheerio, index: number, href: string, suggestion: string = null, @@ -137,11 +138,7 @@ export function getBrokenLinksFlaws( }); } - function checkHash( - hash: string, - a: cheerio.Cheerio, - href: string - ) { + function checkHash(hash: string, a: cheerio.Cheerio, href: string) { if (hash.startsWith(":~:")) { // Ignore fragment directives. return; diff --git a/build/flaws/pre-tags.ts b/build/flaws/pre-tags.ts index ea64396a83c7..1f0929257384 100644 --- a/build/flaws/pre-tags.ts +++ b/build/flaws/pre-tags.ts @@ -2,6 +2,7 @@ import { Flaw } from "./index.js"; import { getFirstMatchInText } from "../matches.js"; import * as cheerio from "cheerio"; +import { Element } from "domhandler"; import { Doc } from "../../libs/types/document.js"; const escapeHTML = (s: string) => s @@ -36,7 +37,7 @@ export function getPreTagFlaws( // // This makes it easier to edit the code in raw form. It also makes it less // heavy because any HTML will be replaced with Prism HTML anyway. - function addCodeTagFlaw($pre: cheerio.Cheerio) { + function addCodeTagFlaw($pre: cheerio.Cheerio) { const id = `bad_pre_tags${flaws.length + 1}`; const type = "pre_with_html"; const explanation = `
CODE can be just 
CODE`;
diff --git a/build/flaws/unsafe-html.ts b/build/flaws/unsafe-html.ts
index 41724792d088..795cedb79d03 100644
--- a/build/flaws/unsafe-html.ts
+++ b/build/flaws/unsafe-html.ts
@@ -6,6 +6,7 @@ import {
 } from "../../libs/env/index.js";
 import { findMatchesInText } from "../matches.js";
 import * as cheerio from "cheerio";
+import { Element } from "domhandler";
 import { Doc } from "../../libs/types/document.js";
 
 const safeIFrameSrcs = [
@@ -32,7 +33,7 @@ function getAndMarkupUnsafeHTMLFlaws(
 ) {
   const flaws: Flaw[] = [];
 
-  function addFlaw(element: cheerio.Element, explanation: string) {
+  function addFlaw(element: Element, explanation: string) {
     const id = `unsafe_html${flaws.length + 1}`;
     let html = $.html($(element));
     $(element).replaceWith($("").addClass("unsafe-html").text(html));
diff --git a/build/utils.ts b/build/utils.ts
index 0d49705d4b6b..a630d0550827 100644
--- a/build/utils.ts
+++ b/build/utils.ts
@@ -5,6 +5,7 @@ import path from "node:path";
 import { cwd } from "node:process";
 
 import * as cheerio from "cheerio";
+import { Element } from "domhandler";
 import got from "got";
 import { fileTypeFromBuffer } from "file-type";
 import imagemin from "imagemin";
@@ -153,11 +154,9 @@ export function splitSections(rawHTML) {
   const blocks = [];
   const toc = [];
 
-  const section = cheerio
-    .load("
", { decodeEntities: false })("div") - .eq(0); + const section = cheerio.load("
")("div").eq(0); - const iterable = [...($("#_body")[0] as cheerio.Element).childNodes]; + const iterable = [...($("#_body")[0] as Element).childNodes]; let c = 0; iterable.forEach((child) => { if ("tagName" in child && child.tagName === "h2") { diff --git a/client/scripts/postprocess-client-build.js b/client/scripts/postprocess-client-build.js index 446f27d6420e..61166495d36d 100644 --- a/client/scripts/postprocess-client-build.js +++ b/client/scripts/postprocess-client-build.js @@ -6,7 +6,7 @@ import fs from "node:fs"; import path from "node:path"; -import cheerio from "cheerio"; +import * as cheerio from "cheerio"; import md5File from "md5-file"; export async function hashSomeStaticFilesForClientBuild(buildRoot) { diff --git a/kumascript/src/api/util.ts b/kumascript/src/api/util.ts index 3a5655b4fcb4..db8599cd0398 100644 --- a/kumascript/src/api/util.ts +++ b/kumascript/src/api/util.ts @@ -4,6 +4,7 @@ */ import sanitizeFilename from "sanitize-filename"; import * as cheerio from "cheerio"; +import { Element } from "domhandler"; const H1_TO_H6_TAGS = new Set(["h1", "h2", "h3", "h4", "h5", "h6"]); const HEADING_TAGS = new Set([...H1_TO_H6_TAGS, "hgroup"]); @@ -142,10 +143,7 @@ export class HTMLTool { private $: cheerio.CheerioAPI; constructor(html, pathDescription?: any) { - this.$ = - typeof html == "string" - ? cheerio.load(html, { decodeEntities: true }) - : html; + this.$ = typeof html == "string" ? cheerio.load(html) : html; this.pathDescription = pathDescription; } @@ -177,7 +175,7 @@ export class HTMLTool { // And we ensure all IDs that get added are completely lowercase. $([...INJECT_SECTION_ID_TAGS].join(",")).each((i, element) => { const $element = $(element); - const $first = $element[0] as cheerio.Element; + const $first = $element[0] as Element; const isDt = $first.name === "dt"; // Default is the existing one. Let's see if we need to change it. let id = $element.attr("id"); diff --git a/kumascript/src/info.ts b/kumascript/src/info.ts index 29c311ffbf37..8088ba618e03 100644 --- a/kumascript/src/info.ts +++ b/kumascript/src/info.ts @@ -1,4 +1,4 @@ -import cheerio from "cheerio"; +import * as cheerio from "cheerio"; import * as Parser from "./parser.js"; import { Document, Redirect } from "../../content/index.js"; diff --git a/kumascript/src/live-sample.ts b/kumascript/src/live-sample.ts index 2dcdc9456539..3d23c6b8755f 100644 --- a/kumascript/src/live-sample.ts +++ b/kumascript/src/live-sample.ts @@ -1,4 +1,4 @@ -import cheerio from "cheerio"; +import * as cheerio from "cheerio"; import ejs from "ejs"; import path from "node:path"; diff --git a/kumascript/tests/macros/svginfo.test.ts b/kumascript/tests/macros/svginfo.test.ts index 84626630b41e..8c8536358b28 100644 --- a/kumascript/tests/macros/svginfo.test.ts +++ b/kumascript/tests/macros/svginfo.test.ts @@ -1,6 +1,6 @@ import fs from "node:fs"; import path from "node:path"; -import cheerio from "cheerio"; +import * as cheerio from "cheerio"; import { jest } from "@jest/globals"; import { itMacro, describeMacro, beforeEachMacro } from "./utils.js"; diff --git a/package.json b/package.json index b4132867b4e1..fc196ba5107e 100644 --- a/package.json +++ b/package.json @@ -82,7 +82,7 @@ "accept-language-parser": "^1.5.0", "async": "^3.2.5", "chalk": "^5.3.0", - "cheerio": "^1.0.0-rc.12", + "cheerio": "^1.0.0", "cli-progress": "^3.12.0", "codemirror": "^6.0.1", "compression": "^1.7.4", @@ -92,6 +92,7 @@ "css-tree": "^2.3.1", "dayjs": "^1.11.12", "dexie": "^4.0.8", + "domhandler": "^5.0.3", "dotenv": "^16.4.5", "ejs": "^3.1.10", "express": "^4.19.2", diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index 9eded792f560..98ecae01ea11 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1,7 +1,7 @@ import fs from "node:fs"; import path from "node:path"; -import cheerio from "cheerio"; +import * as cheerio from "cheerio"; import imagesize from "image-size"; const { default: sizeOf } = imagesize; diff --git a/yarn.lock b/yarn.lock index 84b4397b3006..1d46d6c582b6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5191,18 +5191,22 @@ cheerio-select@^2.1.0: domhandler "^5.0.3" domutils "^3.0.1" -cheerio@^1.0.0-rc.12: - version "1.0.0-rc.12" - resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" - integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== +cheerio@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0.tgz#1ede4895a82f26e8af71009f961a9b8cb60d6a81" + integrity sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww== dependencies: cheerio-select "^2.1.0" dom-serializer "^2.0.0" domhandler "^5.0.3" - domutils "^3.0.1" - htmlparser2 "^8.0.1" - parse5 "^7.0.0" + domutils "^3.1.0" + encoding-sniffer "^0.2.0" + htmlparser2 "^9.1.0" + parse5 "^7.1.2" parse5-htmlparser2-tree-adapter "^7.0.0" + parse5-parser-stream "^7.1.2" + undici "^6.19.5" + whatwg-mimetype "^4.0.0" "chokidar@>=3.0.0 <4.0.0", chokidar@^3.5.3, chokidar@^3.6.0: version "3.6.0" @@ -6350,7 +6354,7 @@ domhandler@^4.0.0, domhandler@^4.2.0, domhandler@^4.3.1: dependencies: domelementtype "^2.2.0" -domhandler@^5.0.1, domhandler@^5.0.2, domhandler@^5.0.3: +domhandler@^5.0.2, domhandler@^5.0.3: version "5.0.3" resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31" integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w== @@ -6366,14 +6370,14 @@ domutils@^2.5.2, domutils@^2.8.0: domelementtype "^2.2.0" domhandler "^4.2.0" -domutils@^3.0.1: - version "3.0.1" - resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.0.1.tgz#696b3875238338cb186b6c0612bd4901c89a4f1c" - integrity sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q== +domutils@^3.0.1, domutils@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e" + integrity sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA== dependencies: dom-serializer "^2.0.0" domelementtype "^2.3.0" - domhandler "^5.0.1" + domhandler "^5.0.3" dot-case@^3.0.4: version "3.0.4" @@ -6515,6 +6519,14 @@ encodeurl@~1.0.2: resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w== +encoding-sniffer@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz#799569d66d443babe82af18c9f403498365ef1d5" + integrity sha512-ju7Wq1kg04I3HtiYIOrUrdfdDvkyO9s5XM8QAj/bN61Yo/Vb4vgJxy5vi4Yxk01gWHbrofpPtpxM8bKger9jhg== + dependencies: + iconv-lite "^0.6.3" + whatwg-encoding "^3.1.1" + end-of-stream@^1.0.0, end-of-stream@^1.1.0: version "1.4.4" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" @@ -6535,11 +6547,16 @@ entities@^2.0.0: resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== -entities@^4.2.0, entities@^4.3.0, entities@^4.4.0: +entities@^4.2.0, entities@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/entities/-/entities-4.4.0.tgz#97bdaba170339446495e653cfd2db78962900174" integrity sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA== +entities@^4.5.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + env-editor@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/env-editor/-/env-editor-1.1.0.tgz#bd510b6cb1528a64b17273aaeba272c050e786e9" @@ -8722,15 +8739,15 @@ htmlparser2@^6.1.0: domutils "^2.5.2" entities "^2.0.0" -htmlparser2@^8.0.1: - version "8.0.1" - resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.1.tgz#abaa985474fcefe269bc761a779b544d7196d010" - integrity sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA== +htmlparser2@^9.1.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-9.1.0.tgz#cdb498d8a75a51f739b61d3f718136c369bc8c23" + integrity sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ== dependencies: domelementtype "^2.3.0" - domhandler "^5.0.2" - domutils "^3.0.1" - entities "^4.3.0" + domhandler "^5.0.3" + domutils "^3.1.0" + entities "^4.5.0" http-cache-semantics@3.8.1, http-cache-semantics@>=4.1.1, http-cache-semantics@^4.1.1: version "4.1.1" @@ -11996,6 +12013,13 @@ parse5-htmlparser2-tree-adapter@^7.0.0: domhandler "^5.0.2" parse5 "^7.0.0" +parse5-parser-stream@^7.1.2: + version "7.1.2" + resolved "https://registry.yarnpkg.com/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz#d7c20eadc37968d272e2c02660fff92dd27e60e1" + integrity sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow== + dependencies: + parse5 "^7.0.0" + parse5@^7.0.0, parse5@^7.1.1, parse5@^7.1.2: version "7.1.2" resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32" @@ -15532,6 +15556,11 @@ undici-types@~5.26.4: resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617" integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA== +undici@^6.19.5: + version "6.19.7" + resolved "https://registry.yarnpkg.com/undici/-/undici-6.19.7.tgz#7d4cf26dc689838aa8b6753a3c5c4288fc1e0216" + integrity sha512-HR3W/bMGPSr90i8AAp2C4DM3wChFdJPLrWYpIS++LxS8K+W535qftjt+4MyjNYHeWabMj1nvtmLIi7l++iq91A== + unicode-canonical-property-names-ecmascript@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/unicode-canonical-property-names-ecmascript/-/unicode-canonical-property-names-ecmascript-2.0.0.tgz#301acdc525631670d39f6146e0e77ff6bbdebddc"