Skip to content

Commit

Permalink
feat(lbac-2194): sitemap avec les offres (#1701)
Browse files Browse the repository at this point in the history
* feat(lbac-2194): sitemap avec les offres

* fix: suppression commentaire

* fix: mise en place du cron + non remplacement si pas de changement

* fix: build
  • Loading branch information
remy-auricoste authored Dec 11, 2024
1 parent 84f49e7 commit e846683
Show file tree
Hide file tree
Showing 17 changed files with 316 additions and 60 deletions.
26 changes: 26 additions & 0 deletions server/src/http/controllers/sitemap.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { zRoutes } from "shared/index"

import dayjs from "@/services/dayjs.service"
import { getSitemap } from "@/services/sitemap.service"

import { Server } from "../server"

export default function (server: Server) {
server.get(
"/sitemap-offers.xml",
{
schema: zRoutes.get["/sitemap-offers.xml"],
},
async (_req, res) => {
const sitemap = await getSitemap()
const lastModified = dayjs(sitemap.created_at).utc().toString()
return res
.status(200)
.headers({
"content-type": "text/xml",
"last-modified": lastModified,
})
.send(sitemap.xml)
}
)
}
2 changes: 2 additions & 0 deletions server/src/http/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import optoutRoute from "./controllers/optout.controller"
import partnersRoute from "./controllers/partners.controller"
import reportedCompanyController from "./controllers/reportedCompany.controller"
import rome from "./controllers/rome.controller"
import sitemapController from "./controllers/sitemap.controller"
import trainingLinks from "./controllers/trainingLinks.controller"
import unsubscribeLbaCompany from "./controllers/unsubscribeRecruteurLba.controller"
import updateLbaCompany from "./controllers/updateRecruteurLba.controller"
Expand Down Expand Up @@ -119,6 +120,7 @@ export async function bind(app: Server) {
(subApp, _, done) => {
const typedSubApp = subApp.withTypeProvider<ZodTypeProvider>()
coreRoutes(typedSubApp)
sitemapController(typedSubApp)

/**
* LBACandidat
Expand Down
5 changes: 5 additions & 0 deletions server/src/jobs/jobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { ObjectId } from "mongodb"

import { create as createMigration, status as statusMigration, up as upMigration } from "@/jobs/migrations/migrations"
import { updateReferentielCommune } from "@/services/referentiel/commune/commune.referentiel.service"
import { generateSitemap } from "@/services/sitemap.service"

import { getLoggerWithContext, logger } from "../common/logger"
import { getDatabase } from "../common/utils/mongodbUtils"
Expand Down Expand Up @@ -212,6 +213,10 @@ export async function setupJobProcessor() {
cron_string: "30 22 * * *",
handler: sendContactsToBrevo,
},
"Génération du sitemap pour les offres": {
cron_string: "5 22 * * *",
handler: generateSitemap,
},
},
jobs: {
"remove:duplicates:recruiters": {
Expand Down
6 changes: 6 additions & 0 deletions server/src/jobs/simpleJobDefinitions.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { generateSitemap } from "@/services/sitemap.service"

import anonymizeOldApplications from "./anonymization/anonymizeOldApplications"
import { anonimizeUsers } from "./anonymization/anonymizeUserRecruteurs"
import { anonymizeOldUsers } from "./anonymization/anonymizeUsers"
Expand Down Expand Up @@ -204,4 +206,8 @@ export const simpleJobDefinitions: SimpleJobDefinition[] = [
fct: sendContactsToBrevo,
description: "Envoi à Brevo la liste des contacts",
},
{
fct: generateSitemap,
description: "Génère le sitemap pour les offres",
},
]
88 changes: 88 additions & 0 deletions server/src/services/sitemap.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import { ObjectId } from "mongodb"
import { RECRUITER_STATUS } from "shared/constants"
import { IJob, IRecruiter, JOB_STATUS } from "shared/models"
import { ISitemap } from "shared/models/sitemap.model"
import { hashcode } from "shared/utils"
import { generateSitemapFromUrlEntries } from "shared/utils/sitemapUtils"

import { logger } from "@/common/logger"
import { getDbCollection } from "@/common/utils/mongodbUtils"
import { notifyToSlack } from "@/common/utils/slackUtils"
import config from "@/config"
import dayjs from "@/services/dayjs.service"

type AggregateRecruiter = Pick<Omit<IRecruiter, "jobs">, "updatedAt"> & {
jobs: Pick<IJob, "job_update_date" | "_id">
}

const generateSitemapXml = async () => {
const documents = (await getDbCollection("recruiters")
.aggregate([
{ $match: { status: RECRUITER_STATUS.ACTIF, "jobs.job_status": JOB_STATUS.ACTIVE } },
{ $unwind: { path: "$jobs" } },
{ $match: { "jobs.job_status": JOB_STATUS.ACTIVE } },
{ $project: { updatedAt: 1, "jobs.job_update_date": 1, "jobs._id": 1 } },
])
.limit(Number.MAX_SAFE_INTEGER)
.toArray()) as AggregateRecruiter[]

const sitemap = generateSitemapFromUrlEntries(
documents.map((document) => {
const { jobs: job, updatedAt } = document
const { job_update_date, _id } = job
const lastMod = job_update_date && dayjs(updatedAt).isBefore(job_update_date) ? job_update_date : updatedAt
const url = `${config.publicUrl}/recherche-apprentissage?type=matcha&itemId=${_id}`
return {
loc: url,
lastmod: lastMod,
changefreq: "daily",
}
})
)
return { xml: sitemap, count: documents.length }
}

export const generateSitemap = async () => {
const { xml, count } = await generateSitemapXml()
const hash = hashcode(xml)
let dbSitemap = await getDbCollection("sitemaps").findOne({ hashcode: hash.toString() })
if (!dbSitemap) {
await getDbCollection("sitemaps").deleteMany({})
dbSitemap = {
_id: new ObjectId(),
created_at: new Date(),
xml: xml,
hashcode: hash.toString(),
}
await getDbCollection("sitemaps").insertOne(dbSitemap)
const sizeInMo = xml.length / (1024 * 1024)
const message = `Generated sitemap with ${count} offers. size: ~${sizeInMo.toFixed(1)} Mo. Max 50 Mo`
logger.info(message)
await notifyToSlack({
subject: "job de génération du sitemap des offres",
message,
error: count === 0 || sizeInMo > 40,
})
}
}

export const getSitemap = async (): Promise<ISitemap> => {
let dbSitemap = await getDbCollection("sitemaps").findOne({})
if (!dbSitemap) {
// should not happen but added for safety
const { xml } = await generateSitemapXml()
dbSitemap = {
_id: new ObjectId(),
created_at: new Date(),
xml,
hashcode: hashcode(xml).toString(),
}
await getDbCollection("sitemaps").insertOne(dbSitemap)
await notifyToSlack({
subject: "job de génération du sitemap des offres",
message: `Inattendu : génération du sitemap par le backend alors qu il aurait dû être généré par le job processor !`,
error: true,
})
}
return dbSitemap
}
2 changes: 2 additions & 0 deletions shared/models/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import roleManagement360Model from "./roleManagement360.model"
import romeModel from "./rome.model"
import sessionModel from "./session.model"
import siretDiffusibleStatusModel from "./siretDiffusibleStatus.model"
import sitemapModel from "./sitemap.model"
import trafficSourcesModel from "./trafficSources.model"
import unsubscribedLbaCompanyModel from "./unsubscribedRecruteurLba.model"
import unsubscribeOFModel from "./unsubscribeOF.model"
Expand Down Expand Up @@ -93,6 +94,7 @@ const modelDescriptorMap = {
[rawKelioModel.collectionName]: rawKelioModel,
[rawRHAlternanceModel.collectionName]: rawRHAlternanceModel,
[trafficSourcesModel.collectionName]: trafficSourcesModel,
[sitemapModel.collectionName]: sitemapModel,
} as const satisfies Record<string, IModelDescriptor>

export const modelDescriptors = Object.values(modelDescriptorMap) as (typeof modelDescriptorMap)[keyof typeof modelDescriptorMap][] satisfies IModelDescriptor[]
Expand Down
20 changes: 20 additions & 0 deletions shared/models/sitemap.model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { z } from "../helpers/zodWithOpenApi"

import { IModelDescriptor, zObjectId } from "./common"

export const ZSitemap = z
.object({
_id: zObjectId,
created_at: z.date().describe("Date d'ajout en base de données"),
xml: z.string(),
hashcode: z.string().describe("hashcode du xml"),
})
.strict()

export type ISitemap = z.output<typeof ZSitemap>

export default {
zod: ZSitemap,
indexes: [],
collectionName: "sitemaps" as const,
} as const satisfies IModelDescriptor
2 changes: 2 additions & 0 deletions shared/routes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { zPartnersRoutes } from "./partners.routes"
import { zRecruiterRoutes } from "./recruiters.routes"
import { zReportedCompanyRoutes } from "./reportedCompany.routes"
import { zRomeRoutes } from "./rome.routes"
import { zSitemapRoutes } from "./sitemap.routes"
import { zTrainingLinksRoutes } from "./trainingLinks.routes"
import { zUnsubscribeRoute } from "./unsubscribe.routes"
import { zUpdateLbaCompanyRoutes } from "./updateLbaCompany.routes"
Expand All @@ -45,6 +46,7 @@ const zRoutesGetP2 = {
...zV1JobsRoutes.get,
...zV1FormationsRoutes.get,
...zApplicationRoutes.get,
...zSitemapRoutes.get,
} as const

const zRoutesGetP3 = {
Expand Down
16 changes: 16 additions & 0 deletions shared/routes/sitemap.routes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { z } from "../helpers/zodWithOpenApi"

import { IRoutesDef } from "./common.routes"

export const zSitemapRoutes = {
get: {
"/sitemap-offers.xml": {
method: "get",
path: "/sitemap-offers.xml",
response: {
"200": z.string(),
},
securityScheme: null,
},
},
} as const satisfies IRoutesDef
25 changes: 25 additions & 0 deletions shared/utils/sitemapUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
export type SitemapUrlEntry = {
loc: string
lastmod?: Date
changefreq?: "always" | "hourly" | "daily" | "weekly" | "monthly" | "yearly" | "never"
priority?: number
}

const xmlEncode = (text: string): string => text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;")

export const generateSitemapFromUrlEntries = (urlEntries: SitemapUrlEntry[]) => `<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
${urlEntries
.map((urlEntry) => {
const { loc, changefreq, lastmod, priority } = urlEntry
const fields = [
`<loc>${xmlEncode(loc)}</loc>`,
lastmod ? `<lastmod>${lastmod.toISOString()}</lastmod>` : "",
changefreq ? `<changefreq>${changefreq}</changefreq>` : "",
priority !== undefined ? `<priority>${priority}</priority>` : "",
]
return `<url>${fields.filter((x) => x).join(`\n`)}
</url>`
})
.join("")}
</urlset>`
13 changes: 13 additions & 0 deletions shared/utils/stringUtils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
export const removeAccents = (str: string) => str.normalize("NFD").replace(/[\u0300-\u036f]/g, "")
export const removeRegexChars = (str: string) => str.replace(/[.*+?^${}()|[\]\\]/g, "")

export const joinNonNullStrings = (values: (string | null | undefined)[]): string | null => {
const result = values.flatMap((item) => (item && item.trim() ? [item.trim()] : [])).join(" ")
return result || null
}

// cf https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript
export const hashcode = (str: string) => {
let hash = 0
if (str.length === 0) return hash
for (let i = 0; i < str.length; i++) {
const chr = str.charCodeAt(i)
hash = (hash << 5) - hash + chr
hash |= 0 // Convert to 32bit integer
}
return hash
}
35 changes: 35 additions & 0 deletions ui/app/sitemap-index.xml/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { publicConfig } from "@/config.public"
import { mainSitemapLastModificationDate } from "@/services/generateMainSitemap"
import { getHostFromHeader } from "@/utils/requestUtils"

export async function GET(request: Request) {
const sitemap = await generateSiteMap(request)

return new Response(sitemap, {
status: 200,
headers: {
"Content-Type": "text/xml",
},
})
}

async function generateSiteMap(request: Request) {
const host = getHostFromHeader(request)
const response = await fetch(`${publicConfig.apiEndpoint}/sitemap-offers.xml`, {
cache: "no-cache",
})
const lastModifiedHeader = response.headers.get("last-modified")
const offersLastMod = new Date(lastModifiedHeader).toISOString()

return `<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>${host}/sitemap-main.xml</loc>
<lastmod>${mainSitemapLastModificationDate.toISOString()}</lastmod>
</sitemap>
<sitemap>
<loc>${host}/sitemap-offers.xml</loc>
<lastmod>${offersLastMod}</lastmod>
</sitemap>
</sitemapindex>`
}
11 changes: 11 additions & 0 deletions ui/app/sitemap-main.xml/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { generateMainSitemap } from "@/services/generateMainSitemap"

export async function GET(request: Request) {
const sitemap = generateMainSitemap(request)
return new Response(sitemap, {
status: 200,
headers: {
"Content-Type": "text/xml",
},
})
}
18 changes: 18 additions & 0 deletions ui/app/sitemap-offers.xml/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { publicConfig } from "@/config.public"

// disable next cache. Cache is handled in the API
export const dynamic = "force-dynamic"

export async function GET(_request: Request) {
const response = await fetch(`${publicConfig.apiEndpoint}/sitemap-offers.xml`, {
cache: "no-cache",
})
const xml = await response.text()
return new Response(xml, {
status: 200,
headers: {
"content-type": "text/xml",
"last-modified": response.headers.get("last-modified"),
},
})
}
Loading

0 comments on commit e846683

Please sign in to comment.