-
-
Notifications
You must be signed in to change notification settings - Fork 277
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(search): duplicate elasticsearch reindex cronjob and associated…
… files while migrating to elasticsearch 8
- Loading branch information
1 parent
56063c2
commit 841978e
Showing
3 changed files
with
970 additions
and
0 deletions.
There are no files selected for viewing
77 changes: 77 additions & 0 deletions
77
helm-chart/sefaria-project/templates/cronjob/reindex-elasticsearch-es6.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{{- if .Values.cronJobs.reindexElasticSearch.enabled }} | ||
--- | ||
apiVersion: batch/v1 | ||
kind: CronJob | ||
metadata: | ||
name: {{ .Values.deployEnv }}-reindex-elastic-search-es6 | ||
labels: | ||
{{- include "sefaria.labels" . | nindent 4 }} | ||
spec: | ||
schedule: "20 13 * * 0" | ||
jobTemplate: | ||
spec: | ||
backoffLimit: 1 | ||
template: | ||
spec: | ||
affinity: | ||
podAntiAffinity: | ||
requiredDuringSchedulingIgnoredDuringExecution: | ||
- labelSelector: | ||
matchExpressions: | ||
- key: app | ||
operator: In | ||
values: | ||
- mongo | ||
topologyKey: kubernetes.io.hostname | ||
containers: | ||
- name: reindex-elastic-search-es6 | ||
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}" | ||
resources: | ||
limits: | ||
memory: 9Gi | ||
requests: | ||
memory: 7Gi | ||
env: | ||
- name: REDIS_HOST | ||
value: "redis-{{ .Values.deployEnv }}" | ||
- name: NODEJS_HOST | ||
value: "node-{{ .Values.deployEnv }}-{{ .Release.Revision }}" | ||
- name: VARNISH_HOST | ||
value: "varnish-{{ .Values.deployEnv }}-{{ .Release.Revision }}" | ||
- name: SLACK_URL | ||
valueFrom: | ||
secretKeyRef: | ||
name: {{ template "sefaria.secrets.slackWebhook" . }} | ||
key: slack-webhook | ||
envFrom: | ||
- secretRef: | ||
name: {{ template "sefaria.secrets.elasticAdmin" . }} | ||
- secretRef: | ||
name: {{ .Values.secrets.localSettings.ref }} | ||
optional: true | ||
- configMapRef: | ||
name: local-settings-{{ .Values.deployEnv }} | ||
- secretRef: | ||
name: local-settings-secrets-{{ .Values.deployEnv }} | ||
optional: true | ||
volumeMounts: | ||
- mountPath: /app/sefaria/local_settings.py | ||
name: local-settings | ||
subPath: local_settings.py | ||
readOnly: true | ||
command: ["bash"] | ||
args: [ | ||
"-c", | ||
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/reindex_elasticsearch_cronjob_ES6.py" | ||
] | ||
restartPolicy: Never | ||
volumes: | ||
- name: local-settings | ||
configMap: | ||
name: local-settings-file-{{ .Values.deployEnv }} | ||
items: | ||
- key: local_settings.py | ||
path: local_settings.py | ||
successfulJobsHistoryLimit: 1 | ||
failedJobsHistoryLimit: 2 | ||
{{- end }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
""" | ||
This file is meant to be temporary while we are migrating to elasticsearch 8 | ||
""" | ||
from datetime import datetime | ||
import requests | ||
import traceback | ||
import os | ||
import django | ||
django.setup() | ||
from sefaria.model import * | ||
from sefaria.search_ES6 import index_all | ||
from sefaria.local_settings import SEFARIA_BOT_API_KEY | ||
from sefaria.pagesheetrank import update_pagesheetrank | ||
|
||
""" | ||
Source sheets added after last_sheet_timestamp will be missing from the index process. We want to manually index all | ||
source sheets created after this. Depending on the database being used to index the timestamp will be different. If | ||
running against a production database, last_sheet_timestamp will be the time this script began running. Otherwise, this | ||
value will need to be set to the time at which the last mongo dump was created (assuming the database is using the most | ||
up-to-date mongo dump). | ||
""" | ||
# last_sheet_timestamp = datetime.fromtimestamp(os.path.getmtime("/var/data/sefaria_public/dump/sefaria")).isoformat() | ||
try: | ||
last_sheet_timestamp = datetime.now().isoformat() | ||
update_pagesheetrank() | ||
index_all() | ||
r = requests.post("https://www.sefaria.org/admin/index-sheets-by-timestamp", data={"timestamp": last_sheet_timestamp, "apikey": SEFARIA_BOT_API_KEY}) | ||
if "error" in r.text: | ||
raise Exception("Error when calling admin/index-sheets-by-timestamp API: " + r.text) | ||
else: | ||
print("SUCCESS!", r.text) | ||
except Exception as e: | ||
tb_str = traceback.format_exc() | ||
print("Caught exception") | ||
post_object = { | ||
"icon_emoji": ":facepalm:", | ||
"username": "Reindex ElasticSearch", | ||
"channel": "#engineering-discuss", | ||
"attachments": [ | ||
{ | ||
"fallback": tb_str, | ||
"color": "#a30200", | ||
"pretext": "Cronjob Error", | ||
"text": tb_str | ||
} | ||
] | ||
} | ||
requests.post(os.environ['SLACK_URL'], json=post_object) | ||
raise e |
Oops, something went wrong.