Skip to content

Commit

Permalink
Merge pull request #1644 from Sefaria/elasticsearch-8-compat
Browse files Browse the repository at this point in the history
Elasticsearch 8 compat
  • Loading branch information
nsantacruz authored Dec 5, 2023
2 parents c48e90d + 9089003 commit bcc65a7
Show file tree
Hide file tree
Showing 25 changed files with 1,153 additions and 53 deletions.
2 changes: 1 addition & 1 deletion build/ci/integration-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ localSettings:
DEBUG: true
DOMAIN_LANGUAGE: {}
APSCHEDULER_NAME: "apscheduler-{{ .Values.deployEnv }}"
SEARCH_ADMIN: "http://elasticsearch-data:9200"
SEARCH_URL: "http://elasticsearch-data:9200"
TURN_SERVER: ''
USE_CLOUDFLARE: false
FRONT_END_URL: "http://${NAME}.integration.sefaria.org"
Expand Down
5 changes: 3 additions & 2 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ cronJobs:
enabled: true
reindexElasticSearch:
enabled: true
SEARCH_HOST_ES6: "elasticsearch-data"
SEARCH_HOST_ES8: "elasticsearch-es-http.elasticsearch.svc"
topicsIndexing:
enabled: true
trello:
Expand Down Expand Up @@ -206,7 +208,7 @@ localSettings:
}
MONGO_HOST: "mongo"
APSCHEDULER_NAME: "apscheduler-{{ .Values.deployEnv }}"
SEARCH_ADMIN: "http://elasticsearch-data:9200"
SEARCH_URL: "http://elasticsearch-data:9200"
TURN_SERVER: ''
USE_CLOUDFLARE: false
FRONT_END_URL: "http://www.sefaria.org"
Expand All @@ -216,7 +218,6 @@ localSettings:
GLOBAL_WARNING: false
GLOBAL_WARNING_MESSAGE: "Sefaria will be in <b>Read-Only</b> mode for scheduled maintenance from 4:45pm-6:45pm Pacific time. Edits will <b>not</b> be saved during that time."
SITE_PACKAGE: "sites.sefaria"
SEARCH_HOST: elasticsearch.data
DEFAULT_FROM_EMAIL: "Sefaria <[email protected]>"
SERVER_EMAIL: "[email protected]"
MULTISERVER_ENABLED: "True"
Expand Down
2 changes: 1 addition & 1 deletion build/ci/sandbox-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ localSettings:
DEBUG: false
DOMAIN_LANGUAGE: {}
APSCHEDULER_NAME: "apscheduler-{{ .Values.deployEnv }}"
SEARCH_ADMIN: "http://elasticsearch-data:9200"
SEARCH_URL: "http://elasticsearch-data:9200"
TURN_SERVER: ''
USE_CLOUDFLARE: false
FRONT_END_URL: "http://${NAME}.cauldron.sefaria.org"
Expand Down
16 changes: 16 additions & 0 deletions helm-chart/sefaria-project/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ elastic-certificate-{{ .Values.deployEnv }}
{{- end }}
{{- end }}

{{- define "sefaria.secrets.elasticUser" }}
{{- if .Values.secrets.elasticUser.ref -}}
{{- .Values.secrets.elasticUser.ref }}
{{- else -}}
elastic-user-{{ .Values.deployEnv }}
{{- end }}
{{- end }}

{{- define "sefaria.secrets.elasticAdmin" }}
{{- if .Values.secrets.elasticAdmin.ref -}}
{{- .Values.secrets.elasticAdmin.ref }}
{{- else -}}
elastic-admin-{{ .Values.deployEnv }}
{{- end }}
{{- end }}


{{- define "sefaria.secrets.originTls" }}
{{- if .Values.ingress.secrets.originTls.ref -}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,11 @@ data:
}
SERVER_EMAIL = os.getenv("SERVER_EMAIL")
SEARCH_HOST = "/api/search"
SEARCH_ADMIN = os.getenv("SEARCH_ADMIN")
SEARCH_ADMIN_USER = os.getenv("SEARCH_ADMIN_USER")
SEARCH_ADMIN_PW = os.getenv("SEARCH_ADMIN_PW")
SEARCH_ADMIN_K8S = os.getenv("SEARCH_ADMIN_K8S")
auth_str = f'{os.getenv("ELASTIC_USERNAME")}:{os.getenv("ELASTIC_PASSWORD")}@' if os.getenv("ELASTIC_USERNAME") else ''
SEARCH_URL = f'http://{auth_str}{os.getenv("SEARCH_HOST")}:9200'
SEARCH_INDEX_ON_SAVE = True
SEARCH_INDEX_NAME = "sefaria"
SEARCH_INDEX_NAME_TEXT = 'text' # name of the ElasticSearch index to use
SEARCH_INDEX_NAME_SHEET = 'sheet'
SEARCH_INDEX_NAME_MERGED = 'merged'
TURN_SERVER = os.getenv("TURN_SERVER") #coturn.cauldron.sefaria.org
TURN_SECRET= os.getenv("TURN_SECRET")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ data:
DEBUG: "{{ .Values.localSettings.DEBUG }}"
DOMAIN_LANGUAGE: {{ .Values.localSettings.DOMAIN_LANGUAGE | toJson | quote }}
APSCHEDULER_NAME: {{ tpl .Values.localSettings.APSCHEDULER_NAME . | quote }}
SEARCH_ADMIN: "http://{{ .Values.nginx.SEARCH_HOST }}:9200"
TURN_SERVER: {{ .Values.localSettings.TURN_SERVER | quote }}
USE_CLOUDFLARE: "{{ .Values.localSettings.USE_CLOUDFLARE }}"
FRONT_END_URL: {{ .Values.localSettings.FRONT_END_URL | quote }}
Expand All @@ -26,3 +25,4 @@ data:
SENTRY_ENVIRONMENT: {{ .Values.deployEnv | quote }}
SENTRY_CODE_VERSION: {{ .Values.web.containerImage.tag }}
FAIL_GRACEFULLY: "{{ .Values.localSettings.FAIL_GRACEFULLY }}"
SEARCH_HOST: {{ .Values.nginx.SEARCH_HOST | quote }}
11 changes: 11 additions & 0 deletions helm-chart/sefaria-project/templates/configmap/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ data:
}
}
{{- end }}
entrypoint.sh: |
#!/bin/bash
set -e
export ELASTIC_AUTH_HEADER=$(echo -n $ELASTIC_USERNAME:$ELASTIC_PASSWORD | base64)
envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${STRAPI_LOCATION},${ELASTIC_AUTH_HEADER}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf
nginx -c /nginx.conf -g 'daemon off;'
nginx.template.conf: |-
{{- if .Values.instrumentation.enabled }}
load_module /etc/nginx/modules/ngx_http_opentracing_module.so;
Expand Down Expand Up @@ -108,6 +118,7 @@ data:
location /api/search/ {
rewrite ^/(?:api/search)/(.*)$ /$1 break;
proxy_set_header Content-Type application/json; # es 6.0 requires this header
proxy_set_header Authorization "Basic ${ELASTIC_AUTH_HEADER}";
add_header 'Access-Control-Allow-Origin' '';
proxy_pass http://elasticsearch_upstream/;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{{- if .Values.cronJobs.reindexElasticSearch.enabled }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ .Values.deployEnv }}-reindex-elastic-search-es6
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
schedule: "20 13 * * 0"
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- mongo
topologyKey: kubernetes.io.hostname
containers:
- name: reindex-elastic-search-es6
image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}"
resources:
limits:
memory: 9Gi
requests:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronjobs.reindexElasticSearch.SEARCH_HOST_ES6 }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
value: "node-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: VARNISH_HOST
value: "varnish-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
- name: SLACK_URL
valueFrom:
secretKeyRef:
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
volumeMounts:
- mountPath: /app/sefaria/local_settings.py
name: local-settings
subPath: local_settings.py
readOnly: true
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/reindex_elasticsearch_cronjob_ES6.py"
]
restartPolicy: Never
volumes:
- name: local-settings
configMap:
name: local-settings-file-{{ .Values.deployEnv }}
items:
- key: local_settings.py
path: local_settings.py
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
schedule: "20 13 * * 0"
schedule: "20 13 * * 2"
jobTemplate:
spec:
backoffLimit: 1
Expand All @@ -32,6 +32,8 @@ spec:
requests:
memory: 7Gi
env:
- name: SEARCH_HOST
value: "{{ .Values.cronjobs.reindexElasticSearch.SEARCH_HOST_ES8 }}"
- name: REDIS_HOST
value: "redis-{{ .Values.deployEnv }}"
- name: NODEJS_HOST
Expand All @@ -44,14 +46,16 @@ spec:
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
envFrom:
- secretRef:
name: {{ template "sefaria.secrets.elasticAdmin" . }}
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
volumeMounts:
- mountPath: /app/sefaria/local_settings.py
name: local-settings
Expand All @@ -60,7 +64,7 @@ spec:
command: ["bash"]
args: [
"-c",
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy && /app/run /app/scripts/reindex_elasticsearch_cronjob.py"
"mkdir -p /log && touch /log/sefaria_book_errors.log && pip install numpy elasticsearch==8.8.2 git+https://github.com/Sefaria/[email protected]#egg=elasticsearch-dsl && /app/run /app/scripts/reindex_elasticsearch_cronjob.py"
]
restartPolicy: Never
volumes:
Expand Down
10 changes: 7 additions & 3 deletions helm-chart/sefaria-project/templates/rollout/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ spec:
- name: nginx
image: "{{ .Values.nginx.containerImage.imageRegistry }}:{{ .Values.nginx.containerImage.tag }}"
imagePullPolicy: Always
command: ["bash", "-c"]
# https://serverfault.com/questions/577370/how-can-i-use-environment-variables-in-nginx-conf
args: [ "envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${STRAPI_LOCATION}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf && exec nginx -c /nginx.conf -g 'daemon off;'" ]
command: ["bash", "-c", "/usr/src/entrypoint.sh"]
ports:
- containerPort: 80
- containerPort: 443
Expand All @@ -76,6 +74,9 @@ spec:
name: nginx-conf
subPath: nginx.template.conf
readOnly: true
- mountPath: /usr/src/entrypoint.sh
name: nginx-conf
subPath: entrypoint.sh
{{- if .Values.instrumentation.enabled }}
- mountPath: /etc/nginx/opentracing.json
name: nginx-conf
Expand Down Expand Up @@ -106,6 +107,8 @@ spec:
value: "linker-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
{{- end }}
envFrom:
- secretRef:
name: {{ template "sefaria.secrets.elasticUser" . }}
- configMapRef:
name: local-settings-nginx-{{ .Values.deployEnv }}
optional: true
Expand All @@ -116,6 +119,7 @@ spec:
- name: nginx-conf
configMap:
name: nginx-conf-{{ .Values.deployEnv }}
defaultMode: 0755
- name: robots-txt
configMap:
name: robots-txt-{{ .Values.deployEnv }}
2 changes: 2 additions & 0 deletions helm-chart/sefaria-project/templates/rollout/web.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ spec:
value: k8s.container.name=app,k8s.deployment.name={{ .Values.deployEnv }}-web,k8s.namespace.name={{ .Release.Namespace }},k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)
{{- end }}
envFrom:
- secretRef:
name: {{ template "sefaria.secrets.elasticUser" . }}
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
optional: true
Expand Down
11 changes: 11 additions & 0 deletions helm-chart/sefaria-project/templates/secret/elastic-admin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{- if .Values.secrets.elasticAdmin.data }}
apiVersion: v1
kind: Secret
metadata:
name: elastic-admin-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
type: Opaque
stringData: {{ .Values.secrets.elasticAdmin.data | toYaml | nindent 2 }}
{{- end }}
11 changes: 11 additions & 0 deletions helm-chart/sefaria-project/templates/secret/elastic-user.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{- if .Values.secrets.elasticUser.data }}
apiVersion: v1
kind: Secret
metadata:
name: elastic-user-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
type: Opaque
stringData: {{ .Values.secrets.elasticUser.data | toYaml | nindent 2 }}
{{- end }}
16 changes: 13 additions & 3 deletions helm-chart/sefaria-project/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,7 @@ secrets:
# SEFARIA_DB:
# SEFARIA_DB_USER:
# SEFARIA_DB_PASSWORD:
# SEARCH_ADMIN_USER:
# SEARCH_ADMIN_PW:
# SEARCH_ADMIN_K8S:
# SEARCH_URL
# TURN_SECRET:
# TURN_USER:
# SEFARIA_BOT_API_KEY:
Expand Down Expand Up @@ -372,6 +370,16 @@ secrets:
# should be commented out and vice-versa.
ref: trello-secret
# data:
elasticUser:
# If you're using a reference to an existing secret then the data: section
# should be commented out and vice-versa.
ref: elastic-user
# data:
elasticAdmin:
# If you're using a reference to an existing secret then the data: section
# should be commented out and vice-versa.
ref: elastic-admin
# data:


# Settings for various cronjobs
Expand All @@ -391,6 +399,8 @@ cronJobs:
enabled: false
reindexElasticSearch:
enabled: false
SEARCH_HOST_ES6: ""
SEARCH_HOST_ES8: ""
topicsIndexing:
enabled: false
trello:
Expand Down
18 changes: 14 additions & 4 deletions reader/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from sefaria.utils.util import text_preview, short_to_long_lang_code, epoch_time
from sefaria.utils.hebrew import hebrew_term, has_hebrew
from sefaria.utils.calendars import get_all_calendar_items, get_todays_calendar_items, get_keyed_calendar_items, get_parasha, get_todays_parasha
from sefaria.settings import STATIC_URL, USE_VARNISH, USE_NODE, NODE_HOST, DOMAIN_LANGUAGES, MULTISERVER_ENABLED, SEARCH_ADMIN, MULTISERVER_REDIS_SERVER, \
from sefaria.settings import STATIC_URL, USE_VARNISH, USE_NODE, NODE_HOST, DOMAIN_LANGUAGES, MULTISERVER_ENABLED, MULTISERVER_REDIS_SERVER, \
MULTISERVER_REDIS_PORT, MULTISERVER_REDIS_DB, DISABLE_AUTOCOMPLETER, ENABLE_LINKER
from sefaria.site.site_settings import SITE_SETTINGS
from sefaria.system.multiserver.coordinator import server_coordinator
Expand Down Expand Up @@ -4200,19 +4200,29 @@ def dummy_search_api(request):


@csrf_exempt
def search_wrapper_api(request):
def search_wrapper_api(request, es6_compat=False):
"""
@param request:
@param es6_compat: True to return API response that's compatible with an Elasticsearch 6 compatible client
@return:
"""
from sefaria.helper.search import get_elasticsearch_client

if request.method == "POST":
if "json" in request.POST:
j = request.POST.get("json") # using form-urlencoded
else:
j = request.body # using content-type: application/json
j = json.loads(j)
es_client = Elasticsearch(SEARCH_ADMIN)
es_client = get_elasticsearch_client()
search_obj = Search(using=es_client, index=j.get("type")).params(request_timeout=5)
search_obj = get_query_obj(search_obj=search_obj, **j)
response = search_obj.execute()
if response.success():
return jsonResponse(response.to_dict(), callback=request.GET.get("callback", None))
response_json = getattr(response.to_dict(), 'body', response.to_dict())
if es6_compat and isinstance(response_json['hits']['total'], dict):
response_json['hits']['total'] = response_json['hits']['total']['value']
return jsonResponse(response_json, callback=request.GET.get("callback", None))
return jsonResponse({"error": "Error with connection to Elasticsearch. Total shards: {}, Shards successful: {}, Timed out: {}".format(response._shards.total, response._shards.successful, response.timed_out)}, callback=request.GET.get("callback", None))
return jsonResponse({"error": "Unsupported HTTP method."}, callback=request.GET.get("callback", None))

Expand Down
Loading

0 comments on commit bcc65a7

Please sign in to comment.