Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crowdin.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
files:
- source: /content/**/*.md
translation: /translations/%locale%/%original_path%/%original_file_name%
# See lib/page-data.js for a matching list of prefix exceptions
# Try to keep these in sync when editing in either location.
ignore:
- '/content/README.md'
- '/content/early-access'
Expand Down
2 changes: 1 addition & 1 deletion lib/create-tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url))
const _basePaths = new Map()
// Return a full directory based on __dirname from a specific language directory.
// This function is memoized with a simple global cache object.
function getBasePath(directory) {
export function getBasePath(directory) {
if (!_basePaths.has(directory)) {
_basePaths.set(directory, path.posix.join(__dirname, '..', directory, 'content'))
}
Expand Down
124 changes: 121 additions & 3 deletions lib/page-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,29 @@ import { fileURLToPath } from 'url'
import path from 'path'
import languages from './languages.js'
import { allVersions } from './all-versions.js'
import createTree from './create-tree.js'
import createTree, { getBasePath } from './create-tree.js'
import renderContent from './render-content/index.js'
import loadSiteData from './site-data.js'
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
import Page from './page.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const versions = Object.keys(allVersions)
const enterpriseServerVersions = versions.filter((v) => v.startsWith('enterprise-server@'))
const renderOpts = { textOnly: true, encodeEntities: true }

// These are the exceptions to the rule.
// These URI prefixes should match what you'll find in crowdin.yml.
// If a URI starts with one of these prefixes, it basically means we don't
// bother to "backfill" a translation in its spot.
// For example, `/en/github/site-policy-deprecated/foo` works
// only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
// work too.
const TRANSLATION_DRIFT_EXCEPTIONS = [
'github/site-policy-deprecated',
// Early access stuff never has translations.
'early-access',
]

/**
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
* first since it's the most expensive work. This gets us a nested object with pages attached that we can use
Expand Down Expand Up @@ -152,8 +166,112 @@ export function createMapFromArray(pageList) {
}

export async function loadPageMap(pageList) {
const pages = pageList || (await loadPageList())
return createMapFromArray(pages)
const pages = await correctTranslationOrphans(pageList || (await loadPageList()))
const pageMap = createMapFromArray(pages)
return pageMap
}

// If a translation page exists, that doesn't have an English equivalent,
// remove it.
// If an English page exists, that doesn't have an translation equivalent,
// add it.
// Note, this function is exported purely for the benefit of the unit tests.
export async function correctTranslationOrphans(pageList, basePath = null) {
const englishRelativePaths = new Set()
for (const page of pageList) {
if (page.languageCode === 'en') {
englishRelativePaths.add(page.relativePath)
}
}

// Prime the Map with an empty set for each language prefix.
// It's important that we do this for *every* language rather than
// just populating `nonEnglish` based on those pages that *are* present.
// Otherwise, we won't have an index of all the languages
// that *might* be missing.
const nonEnglish = new Map()
Object.keys(languages)
.filter((lang) => lang !== 'en')
.forEach((languageCode) => {
nonEnglish.set(languageCode, new Set())
})

// By default, when backfilling, we set the `basePath` to be that of
// English. But for the benefit of being able to do unit tests,
// we make this an optional argument. Then, unit tests can use
// its "tests/fixtures" directory.
const englishBasePath = basePath || getBasePath(languages.en.dir)

// Filter out all non-English pages that appear to be excess.
// E.g. if an English doc was renamed from `content/foo.md` to
// `content/bar.md` what will happen is that `translations/*/content/foo.md`
// will still linger around and we want to remove that even if it was
// scooped up from disk.
const newPageList = []
for (const page of pageList) {
if (page.languageCode === 'en') {
// English pages are never considered "excess"
newPageList.push(page)
continue
}

// If this translation page exists in English, keep it but also
// add it to the set of relative paths that is known.
if (englishRelativePaths.has(page.relativePath)) {
nonEnglish.get(page.languageCode).add(page.relativePath)
newPageList.push(page)
continue
}

// All else is considered "excess" and should be excluded from
// the new list of pages. So do nothing.
if (process.env.NODE_ENV === 'development') {
console.log(
`For ${page.languageCode}, omit the page ${page.relativePath} even though it's present on disk.`
)
}
}

const pageLoadPromises = []
for (const relativePath of englishRelativePaths) {
for (const [languageCode, relativePaths] of nonEnglish) {
if (!relativePaths.has(relativePath)) {
// At this point, we've found an English `relativePath` that is
// not used by this language.
// But before we decide to "backfill" it from the English equivalent
// we first need to figure out if it should be excluded.
// The reason for doing this check this late is for the benefit
// of optimization. In general, when the translation pipeline has
// done its magic, this should be very rare, so it's unnecessary
// to do this exception check on every single English relativePath.
if (TRANSLATION_DRIFT_EXCEPTIONS.find((exception) => relativePath.startsWith(exception))) {
continue
}

// The magic right here!
// The trick is that we can't clone instances of class Page. We need
// to create them for this language. But the trick is that we
// use the English relative path so it can have something to read.
// For example, if we have figured out that
// `translations/ja-JP/content/foo.md` doesn't exist, we pretend
// that we can use `foo.md` and the base path of `content/`.
pageLoadPromises.push(
Page.init({
basePath: englishBasePath,
relativePath: relativePath,
languageCode: languageCode,
})
)
if (process.env.NODE_ENV === 'development') {
console.log(`Backfill ${relativePath} for ${languageCode} from English`)
}
}
}
}
const additionalPages = await Promise.all(pageLoadPromises)
newPageList.push(...additionalPages)

return newPageList
}

export default {
Expand Down
60 changes: 60 additions & 0 deletions tests/unit/load-translation-orphans.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import path from 'path'
import { fileURLToPath } from 'url'

import { expect } from '@jest/globals'

import languages from '../../lib/languages.js'
import Page from '../../lib/page.js'
import { loadPageMap, correctTranslationOrphans } from '../../lib/page-data.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))

describe('loading page map with translation orphans', () => {
test('inject missing translations from English', async () => {
const basePath = path.join(__dirname, '../fixtures')
const page = await Page.init({
relativePath: 'page-that-does-not-exist-in-translations-dir.md',
basePath,
languageCode: 'en',
})
console.assert(page, 'page could not be loaded')

const pageList = [page]
const pageMap = await loadPageMap(await correctTranslationOrphans(pageList, basePath))
// It should make a copy of the English into each language
expect(Object.keys(pageMap).length).toBe(Object.keys(languages).length)

// +1 for the test just above, 2 tests per language.
expect.assertions(1 + Object.keys(languages).length * 2)

for (const languageCode of Object.keys(languages)) {
for (const permalink of page.permalinks) {
const translationHref = permalink.href.replace('/en', `/${languageCode}`)
const translationPage = pageMap[translationHref]
expect(translationPage).toBeTruthy()
expect(translationPage.languageCode).toBe(languageCode)
}
}
})

test('remove translation pages that were not in English', async () => {
const basePath = path.join(__dirname, '../fixtures')
const page = await Page.init({
relativePath: 'page-that-does-not-exist-in-translations-dir.md',
basePath,
languageCode: 'en',
})
console.assert(page, 'page could not be loaded')
const orphan = await Page.init({
relativePath: 'article-with-videos.md',
basePath,
languageCode: 'ja',
})
console.assert(orphan, 'page could not be loaded')
const orphanPermalinks = orphan.permalinks.map((p) => p.href)

const pageList = await correctTranslationOrphans([page, orphan], basePath)
const pageMap = await loadPageMap(pageList)
expect(pageMap[orphanPermalinks[0]]).toBeFalsy()
expect(Object.keys(pageMap).length).toBe(Object.keys(languages).length)
})
})
54 changes: 34 additions & 20 deletions tests/unit/pages.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { jest } from '@jest/globals'
import path from 'path'
import { loadPages, loadPageMap } from '../../lib/page-data.js'
import { loadPages, loadPageMap, correctTranslationOrphans } from '../../lib/page-data.js'
import libLanguages from '../../lib/languages.js'
import { liquid } from '../../lib/render-content/index.js'
import patterns from '../../lib/patterns.js'
Expand All @@ -13,13 +13,24 @@ import removeFPTFromPath from '../../lib/remove-fpt-from-path.js'
const languageCodes = Object.keys(libLanguages)
const slugger = new GithubSlugger()

// By default, the tests don't change that each translation has an
// equivalent English page (e.g. `translations/*/content/foo.md`
// expects `content/foo.md`)
// Set the environment variable `TEST_TRANSLATION_MATCHING=true`
// to enable that test.
const testIfRequireTranslationMatching = JSON.parse(
process.env.TEST_TRANSLATION_MATCHING || 'false'
)
? test
: test.skip

describe('pages module', () => {
jest.setTimeout(60 * 1000)

let pages

beforeAll(async () => {
pages = await loadPages()
pages = await correctTranslationOrphans(await loadPages())
})

describe('loadPages', () => {
Expand Down Expand Up @@ -76,8 +87,8 @@ describe('pages module', () => {

const message = `Found ${duplicates.length} duplicate redirect_from ${
duplicates.length === 1 ? 'path' : 'paths'
}.
Ensure that you don't define the same path more than once in the redirect_from property in a single file and across all English files.
}.
Ensure that you don't define the same path more than once in the redirect_from property in a single file and across all English files.
You may also receive this error if you have defined the same children property more than once.\n
${duplicates.join('\n')}`
expect(duplicates.length, message).toBe(0)
Expand Down Expand Up @@ -152,26 +163,29 @@ describe('pages module', () => {
expect(liquidErrors.length, failureMessage).toBe(0)
})

test('every non-English page has a matching English page', async () => {
const englishPaths = chain(walk('content', { directories: false }))
.uniq()
.value()

const nonEnglishPaths = chain(Object.values(libLanguages))
.filter((language) => language.code !== 'en')
.map((language) => walk(`${language.dir}/content`, { directories: false }))
.flatten()
.uniq()
.value()

const diff = difference(nonEnglishPaths, englishPaths)
const failureMessage = `
testIfRequireTranslationMatching(
'every non-English page has a matching English page',
async () => {
const englishPaths = chain(walk('content', { directories: false }))
.uniq()
.value()

const nonEnglishPaths = chain(Object.values(libLanguages))
.filter((language) => language.code !== 'en')
.map((language) => walk(`${language.dir}/content`, { directories: false }))
.flatten()
.uniq()
.value()

const diff = difference(nonEnglishPaths, englishPaths)
const failureMessage = `
Found ${diff.length} non-English pages without a matching English page:\n - ${diff.join('\n - ')}

Remove them with script/i18n/prune-stale-files.js and commit your changes using "git commit --no-verify".
`
expect(diff.length, failureMessage).toBe(0)
})
expect(diff.length, failureMessage).toBe(0)
}
)
})

describe('loadPageMap', () => {
Expand Down