diff --git a/.github/workflows/weekly.yaml b/.github/workflows/weekly.yaml index 4c450566de..63ef8bb0cb 100644 --- a/.github/workflows/weekly.yaml +++ b/.github/workflows/weekly.yaml @@ -3,6 +3,7 @@ name: Weekly Tasks on: schedule: - cron: '0 0 * 6 *' + workflow_dispatch: jobs: build-mongo: @@ -11,11 +12,21 @@ jobs: contents: 'read' id-token: 'write' steps: + - name: Maximize build space + uses: AdityaGarg8/remove-unwanted-software@v4.1 + with: + remove-android: 'true' + remove-dotnet: 'true' + remove-haskell: 'true' + remove-codeql: 'true' + remove-docker-images: 'true' - uses: actions/checkout@v2 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 + with: + buildkitd-config: ./build/standalone-db/buildkit.toml - id: auth name: Authenticate to Google Cloud uses: google-github-actions/auth@v1 @@ -37,14 +48,14 @@ jobs: password: '${{ steps.auth.outputs.access_token }}' - name: Get current date id: date - run: echo "date$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT + run: echo "date=$(date +'%Y%m%d%H%M')" >> $GITHUB_OUTPUT - name: Generate image metadata id: meta uses: docker/metadata-action@v3 with: images: | gcr.io/${{ secrets.DEV_PROJECT }}/sefaria-mongo - us-east1-docker.pkg.dev/${{ secrets.DEV_PROJECT }}/containers/sefaria-${{ matrix.app }}-${{ steps.branch-name.outputs.current_branch }} + us-east1-docker.pkg.dev/${{ secrets.DEV_PROJECT }}/sefaria-public/sefaria-mongo # generate Docker tags based on the following events/attributes tags: | type=sha,enable=true,priority=100,prefix=sha-,suffix=-${{ steps.date.outputs.date }},format=short @@ -52,12 +63,11 @@ jobs: flavor: | latest=true - name: build and push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v6 with: - cache-from: type=registry, ref=sefaria-mongo/cache - cache-to: type=registry, ref=sefaria-mongo/cache, mode=max context: . push: true + platforms: linux/amd64,linux/arm64 file: ./build/standalone-db/Dockerfile tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/api/api_errors.py b/api/api_errors.py new file mode 100644 index 0000000000..aac047b48d --- /dev/null +++ b/api/api_errors.py @@ -0,0 +1,16 @@ +""" +Classes for API errors +""" +from sefaria.client.util import jsonResponse + + +class APIInvalidInputException(Exception): + """ + When data in an invalid format is passed to an API + """ + def __init__(self, message): + super().__init__(message) + self.message = message + + def to_json_response(self): + return jsonResponse({"invalid_input_error": self.message}, status=400) diff --git a/api/views.py b/api/views.py index bd1525390a..73d4dffa67 100644 --- a/api/views.py +++ b/api/views.py @@ -1,6 +1,7 @@ from sefaria.model import * from sefaria.model.text_reuqest_adapter import TextRequestAdapter from sefaria.client.util import jsonResponse +from sefaria.system.exceptions import InputError, ComplexBookLevelRefError from django.views import View from .api_warnings import * @@ -53,6 +54,12 @@ def get(self, request, *args, **kwargs): if return_format not in self.RETURN_FORMATS: return jsonResponse({'error': f'return_format should be one of those formats: {self.RETURN_FORMATS}.'}, status=400) text_manager = TextRequestAdapter(self.oref, versions_params, fill_in_missing_segments, return_format) - data = text_manager.get_versions_for_query() - data = self._handle_warnings(data) + + try: + data = text_manager.get_versions_for_query() + data = self._handle_warnings(data) + + except Exception as e: + return jsonResponse({'error': str(e)}, status=400) + return jsonResponse(data) diff --git a/build/standalone-db/buildkit.toml b/build/standalone-db/buildkit.toml new file mode 100644 index 0000000000..f6dc45486d --- /dev/null +++ b/build/standalone-db/buildkit.toml @@ -0,0 +1,2 @@ +[worker.oci] + max-parallelism = 1 diff --git a/django_topics/README.md b/django_topics/README.md new file mode 100644 index 0000000000..d84868dd9a --- /dev/null +++ b/django_topics/README.md @@ -0,0 +1,8 @@ +# Django Topics app + +Django app that defines models and admin interfaces for editing certain aspects of topics that are unique to Sefaria's product and not needed for general usage of Sefaria's data. + +Currently contains methods to: +- Edit which topics are in which pools +- Define topic of the day schedule +- Define seasonal topic schedule \ No newline at end of file diff --git a/django_topics/__init__.py b/django_topics/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/django_topics/admin.py b/django_topics/admin.py new file mode 100644 index 0000000000..f9ce67aadc --- /dev/null +++ b/django_topics/admin.py @@ -0,0 +1,229 @@ +from django.contrib import admin, messages +from django.utils.html import format_html +from django_topics.models import Topic, TopicPool, TopicOfTheDayEnglish, TopicOfTheDayHebrew, SeasonalTopicEnglish, SeasonalTopicHebrew +from django_topics.models.pool import PoolType + + +def create_add_to_pool_action(pool_name): + def add_to_pool(modeladmin, request, queryset): + try: + pool = TopicPool.objects.get(name=pool_name) + for topic in queryset: + topic.pools.add(pool) + modeladmin.message_user(request, f"Added {queryset.count()} topics to {pool.name}", messages.SUCCESS) + + except TopicPool.DoesNotExist: + modeladmin.message_user(request, "The specified pool does not exist.", messages.ERROR) + + add_to_pool.short_description = f"Add selected topics to '{pool_name}' pool" + add_to_pool.__name__ = f"add_to_specific_pool_{pool_name}" + return add_to_pool + + +def create_remove_from_pool_action(pool_name): + def remove_from_pool(modeladmin, request, queryset): + try: + pool = TopicPool.objects.get(name=pool_name) + for topic in queryset: + topic.pools.remove(pool) + modeladmin.message_user(request, f"Removed {queryset.count()} topics from {pool.name}", messages.SUCCESS) + + except TopicPool.DoesNotExist: + modeladmin.message_user(request, "The specified pool does not exist.", messages.ERROR) + + remove_from_pool.short_description = f"Remove selected topics from '{pool_name}' pool" + remove_from_pool.__name__ = f"remove_from_pool_{pool_name}" + return remove_from_pool + + +class PoolFilter(admin.SimpleListFilter): + title = 'Pool Filter' + parameter_name = 'pool' + + def lookups(self, request, model_admin): + return [ + ('general_en', 'General Pool EN'), + ('general_he', 'General Pool HE'), + (PoolType.TORAH_TAB.value, 'TorahTab Pool'), + ] + + def queryset(self, request, queryset): + pool_name = self.value() + if pool_name: + pool = TopicPool.objects.get(name=pool_name) + return queryset.filter(pools=pool) + return queryset + + +@admin.register(Topic) +class TopicAdmin(admin.ModelAdmin): + list_display = ('slug', 'en_title', 'he_title', 'is_in_pool_general_en', 'is_in_pool_general_he', 'is_in_pool_torah_tab', 'sefaria_link') + list_filter = (PoolFilter,) + filter_horizontal = ('pools',) + search_fields = ('slug', 'en_title', 'he_title') + readonly_fields = ('slug', 'en_title', 'he_title') + actions = [ + create_add_to_pool_action('general_en'), + create_add_to_pool_action('general_he'), + create_add_to_pool_action(PoolType.TORAH_TAB.value), + create_remove_from_pool_action('general_en'), + create_remove_from_pool_action('general_he'), + create_remove_from_pool_action(PoolType.TORAH_TAB.value), + ] + + def has_add_permission(self, request): + return False + + def has_delete_permission(self, request, obj=None): + return False + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.filter(pools__name=PoolType.LIBRARY.value) + + def is_in_pool_general_en(self, obj): + return obj.pools.filter(name='general_en').exists() + is_in_pool_general_en.boolean = True + is_in_pool_general_en.short_description = "General Pool EN" + + def is_in_pool_general_he(self, obj): + return obj.pools.filter(name='general_he').exists() + is_in_pool_general_he.boolean = True + is_in_pool_general_he.short_description = "General Pool HE" + + def is_in_pool_torah_tab(self, obj): + return obj.pools.filter(name=PoolType.TORAH_TAB.value).exists() + is_in_pool_torah_tab.boolean = True + is_in_pool_torah_tab.short_description = "TorahTab Pool" + + def sefaria_link(self, obj): + url = f"https://www.sefaria.org/topics/{obj.slug}" + return format_html('{}', url, obj.slug) + sefaria_link.short_description = "Sefaria Link" + + +class TopicOfTheDayAdmin(admin.ModelAdmin): + exclude = ("lang",) # not for manual editing + list_display = ('start_date', 'topic') + list_filter = ('start_date',) + raw_id_fields = ('topic',) + search_fields = ('topic__slug', 'topic__en_title', 'topic__he_title') + date_hierarchy = 'start_date' + ordering = ['-start_date'] + fieldsets = ( + (None, { + 'fields': ('topic', 'start_date'), + }), + ) + + def formfield_for_foreignkey(self, db_field, request, **kwargs): + if db_field.name == "topic": + kwargs["label"] = "Topic slug" + kwargs["help_text"] = "Use the magnifying glass button to select a topic." + return super().formfield_for_foreignkey(db_field, request, **kwargs) + + +@admin.register(TopicOfTheDayEnglish) +class TopicOfTheDayAdminEnglish(TopicOfTheDayAdmin): + + def get_queryset(self, request): + qs = super().get_queryset(request) + return qs.filter(lang="en") + + +@admin.register(TopicOfTheDayHebrew) +class TopicOfTheDayAdminHebrew(TopicOfTheDayAdmin): + + def get_queryset(self, request): + qs = super().get_queryset(request) + return qs.filter(lang="he") + + +class SeasonalTopicAdmin(admin.ModelAdmin): + exclude = ("lang",) # not for manual editing + list_display = ( + 'start_date', + 'topic', + 'display_date_prefix', + 'display_date_suffix', + 'secondary_topic', + 'display_start_date_israel', + 'display_end_date_israel', + 'display_start_date_diaspora', + 'display_end_date_diaspora' + ) + raw_id_fields = ('topic', 'secondary_topic') + list_filter = ( + 'start_date', + 'display_start_date_israel', + 'display_start_date_diaspora' + ) + ordering = ['-start_date'] + search_fields = ('topic__slug', 'topic__en_title', 'topic__he_title', 'secondary_topic__slug') + autocomplete_fields = ('topic', 'secondary_topic') + date_hierarchy = 'start_date' + fieldsets = ( + (None, { + 'fields': ( + 'topic', + 'secondary_topic', + 'start_date' + ) + }), + ('Display Date Prefix/Suffix', { + 'fields': ( + 'display_date_prefix', + 'display_date_suffix', + ), + 'description': 'Prefix/Suffix that will be displayed around the secondary topic.', + }), + ('Israel Display Dates', { + 'fields': ( + 'display_start_date_israel', + 'display_end_date_israel' + ), + 'description': 'Dates to be displayed to the user of when this topic is "happening". ' + 'E.g. for a holiday, when the holiday occurs. ' + 'When the dates are the same for both Israel and Diaspora, only fill out Israeli dates. ' + 'Similarly, when the start and end dates are the same, only fill out start date.' + }), + ('Diaspora Display Dates', { + 'fields': ( + 'display_start_date_diaspora', + 'display_end_date_diaspora' + ), + 'description': 'When the dates are the same for both Israel and Diaspora, only fill out Israeli dates. ' + 'Similarly, when the start and end dates are the same, only fill out start date.' + + }), + ) + + def formfield_for_foreignkey(self, db_field, request, **kwargs): + if db_field.name == "topic": + kwargs["label"] = "Topic slug" + kwargs["help_text"] = "Use the magnifying glass button to select a topic." + if db_field.name == "secondary_topic": + kwargs["label"] = "Secondary topic slug" + return super().formfield_for_foreignkey(db_field, request, **kwargs) + + def save_model(self, request, obj, form, change): + """ + Overriding the save_model to ensure the model's clean method is executed. + """ + obj.clean() + super().save_model(request, obj, form, change) + + +@admin.register(SeasonalTopicEnglish) +class SeasonalTopicAdminEnglish(SeasonalTopicAdmin): + + def get_queryset(self, request): + qs = super().get_queryset(request) + return qs.filter(lang="en") + + +@admin.register(SeasonalTopicHebrew) +class SeasonalTopicAdminHebrew(SeasonalTopicAdmin): + def get_queryset(self, request): + qs = super().get_queryset(request) + return qs.filter(lang="he") diff --git a/django_topics/apps.py b/django_topics/apps.py new file mode 100644 index 0000000000..7b405a4a58 --- /dev/null +++ b/django_topics/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class DjangoTopicsAppConfig(AppConfig): + name = "django_topics" + verbose_name = "Topics Management" diff --git a/django_topics/migrations/0001_initial.py b/django_topics/migrations/0001_initial.py new file mode 100644 index 0000000000..c73e0efeeb --- /dev/null +++ b/django_topics/migrations/0001_initial.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-19 08:18 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='SeasonalTopic', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('start_date', models.DateField()), + ('display_start_date_israel', models.DateField(blank=True, null=True)), + ('display_end_date_israel', models.DateField(blank=True, null=True)), + ('display_start_date_diaspora', models.DateField(blank=True, null=True)), + ('display_end_date_diaspora', models.DateField(blank=True, null=True)), + ], + options={ + 'verbose_name': 'Seasonal Topic', + 'verbose_name_plural': 'Seasonal Topics', + }, + ), + migrations.CreateModel( + name='Topic', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('slug', models.CharField(max_length=255, unique=True)), + ('en_title', models.CharField(blank=True, default='', max_length=255)), + ('he_title', models.CharField(blank=True, default='', max_length=255)), + ], + ), + migrations.CreateModel( + name='TopicOfTheDay', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('start_date', models.DateField()), + ('topic', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='topic_of_the_day', to='django_topics.Topic')), + ], + options={ + 'verbose_name': 'Topic of the Day', + 'verbose_name_plural': 'Topics of the Day', + }, + ), + migrations.CreateModel( + name='TopicPool', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=255, unique=True)), + ], + ), + migrations.AddField( + model_name='topic', + name='pools', + field=models.ManyToManyField(blank=True, related_name='topics', to='django_topics.TopicPool'), + ), + migrations.AddField( + model_name='seasonaltopic', + name='secondary_topic', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='seasonal_secondary_topic', to='django_topics.Topic'), + ), + migrations.AddField( + model_name='seasonaltopic', + name='topic', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='seasonal_topic', to='django_topics.Topic'), + ), + migrations.AlterUniqueTogether( + name='topicoftheday', + unique_together=set([('topic', 'start_date')]), + ), + migrations.AlterUniqueTogether( + name='seasonaltopic', + unique_together=set([('topic', 'start_date')]), + ), + ] diff --git a/django_topics/migrations/0002_auto_20241121_0617.py b/django_topics/migrations/0002_auto_20241121_0617.py new file mode 100644 index 0000000000..ee2228f89e --- /dev/null +++ b/django_topics/migrations/0002_auto_20241121_0617.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-21 10:17 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0001_initial'), + ] + + operations = [ + migrations.AlterModelOptions( + name='seasonaltopic', + options={'verbose_name': 'Landing Page - Calendar', 'verbose_name_plural': 'Landing Page - Calendar'}, + ), + migrations.AlterModelOptions( + name='topic', + options={'verbose_name': 'Topic Pool Management', 'verbose_name_plural': 'Topic Pool Management'}, + ), + migrations.AlterModelOptions( + name='topicoftheday', + options={'verbose_name': 'Landing Page - Topic of the Day', 'verbose_name_plural': 'Landing Page - Topic of the Day'}, + ), + migrations.AlterField( + model_name='seasonaltopic', + name='secondary_topic', + field=models.ForeignKey(blank=True, help_text="Secondary topic which will be displayed alongside `topic`. E.g. `topic` is 'Teshuva' then secondary topic could be 'Yom Kippur'.", null=True, on_delete=django.db.models.deletion.CASCADE, related_name='seasonal_secondary_topic', to='django_topics.Topic'), + ), + migrations.AlterField( + model_name='seasonaltopic', + name='start_date', + field=models.DateField(help_text='Start date of when this will appear. End date is implied by when the next Seasonal Topic is displayed.'), + ), + ] diff --git a/django_topics/migrations/0003_auto_20241121_0757.py b/django_topics/migrations/0003_auto_20241121_0757.py new file mode 100644 index 0000000000..a6765ce614 --- /dev/null +++ b/django_topics/migrations/0003_auto_20241121_0757.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-21 11:57 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0002_auto_20241121_0617'), + ] + + operations = [ + migrations.RemoveField( + model_name='topic', + name='id', + ), + migrations.AlterField( + model_name='topic', + name='slug', + field=models.CharField(max_length=255, primary_key=True, serialize=False), + ), + ] diff --git a/django_topics/migrations/0004_auto_20241126_2359.py b/django_topics/migrations/0004_auto_20241126_2359.py new file mode 100644 index 0000000000..075a93cbf5 --- /dev/null +++ b/django_topics/migrations/0004_auto_20241126_2359.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-27 03:59 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0003_auto_20241121_0757'), + ] + + operations = [ + migrations.AddField( + model_name='seasonaltopic', + name='display_date_prefix', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name='seasonaltopic', + name='display_date_suffix', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AddField( + model_name='seasonaltopic', + name='lang', + field=models.CharField(default='en', max_length=255), + preserve_default=False, + ), + ] diff --git a/django_topics/migrations/0005_auto_20241127_0004.py b/django_topics/migrations/0005_auto_20241127_0004.py new file mode 100644 index 0000000000..af7ca982fd --- /dev/null +++ b/django_topics/migrations/0005_auto_20241127_0004.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-27 04:04 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0004_auto_20241126_2359'), + ] + + operations = [ + migrations.AlterField( + model_name='seasonaltopic', + name='lang', + field=models.CharField(choices=[('en', 'English'), ('he', 'Hebrew')], max_length=2), + ), + ] diff --git a/django_topics/migrations/0006_seasonaltopicenglish_seasonaltopichebrew.py b/django_topics/migrations/0006_seasonaltopicenglish_seasonaltopichebrew.py new file mode 100644 index 0000000000..7cdded5723 --- /dev/null +++ b/django_topics/migrations/0006_seasonaltopicenglish_seasonaltopichebrew.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-27 04:25 +from __future__ import unicode_literals + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0005_auto_20241127_0004'), + ] + + operations = [ + migrations.CreateModel( + name='SeasonalTopicEnglish', + fields=[ + ], + options={ + 'verbose_name': 'English Seasonal Topic', + 'verbose_name_plural': 'English Seasonal Topics', + 'proxy': True, + 'indexes': [], + }, + bases=('django_topics.seasonaltopic',), + ), + migrations.CreateModel( + name='SeasonalTopicHebrew', + fields=[ + ], + options={ + 'verbose_name': 'Hebrew Seasonal Topic', + 'verbose_name_plural': 'Hebrew Seasonal Topics', + 'proxy': True, + 'indexes': [], + }, + bases=('django_topics.seasonaltopic',), + ), + ] diff --git a/django_topics/migrations/0007_auto_20241127_0034.py b/django_topics/migrations/0007_auto_20241127_0034.py new file mode 100644 index 0000000000..f375c1622f --- /dev/null +++ b/django_topics/migrations/0007_auto_20241127_0034.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2024-11-27 04:34 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('django_topics', '0006_seasonaltopicenglish_seasonaltopichebrew'), + ] + + operations = [ + migrations.CreateModel( + name='TopicOfTheDayEnglish', + fields=[ + ], + options={ + 'verbose_name': 'Landing Page - Topic of the Day (EN)', + 'verbose_name_plural': 'Landing Page - Topic of the Day (EN)', + 'proxy': True, + 'indexes': [], + }, + bases=('django_topics.topicoftheday',), + ), + migrations.CreateModel( + name='TopicOfTheDayHebrew', + fields=[ + ], + options={ + 'verbose_name': 'Landing Page - Topic of the Day (HE)', + 'verbose_name_plural': 'Landing Page - Topic of the Day (HE)', + 'proxy': True, + 'indexes': [], + }, + bases=('django_topics.topicoftheday',), + ), + migrations.AlterModelOptions( + name='seasonaltopicenglish', + options={'verbose_name': 'Landing Page - Calendar (EN)', 'verbose_name_plural': 'Landing Page - Calendar (EN)'}, + ), + migrations.AlterModelOptions( + name='seasonaltopichebrew', + options={'verbose_name': 'Landing Page - Calendar (HE)', 'verbose_name_plural': 'Landing Page - Calendar (HE)'}, + ), + migrations.AddField( + model_name='topicoftheday', + name='lang', + field=models.CharField(choices=[('en', 'English'), ('he', 'Hebrew')], default='en', max_length=2), + preserve_default=False, + ), + ] diff --git a/django_topics/migrations/__init__.py b/django_topics/migrations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/django_topics/models/__init__.py b/django_topics/models/__init__.py new file mode 100644 index 0000000000..e05c9bccf9 --- /dev/null +++ b/django_topics/models/__init__.py @@ -0,0 +1,4 @@ +from .topic import Topic +from .pool import TopicPool, PoolType +from .topic_of_the_day import TopicOfTheDay, TopicOfTheDayEnglish, TopicOfTheDayHebrew +from .seasonal_topic import SeasonalTopic, SeasonalTopicEnglish, SeasonalTopicHebrew diff --git a/django_topics/models/pool.py b/django_topics/models/pool.py new file mode 100644 index 0000000000..37f32be50f --- /dev/null +++ b/django_topics/models/pool.py @@ -0,0 +1,15 @@ +from django.db import models +from enum import Enum + + +class PoolType(Enum): + LIBRARY = "library" + SHEETS = "sheets" + TORAH_TAB = "torah_tab" + + +class TopicPool(models.Model): + name = models.CharField(max_length=255, unique=True) + + def __str__(self): + return f"TopicPool('{self.name}')" diff --git a/django_topics/models/seasonal_topic.py b/django_topics/models/seasonal_topic.py new file mode 100644 index 0000000000..ff45333529 --- /dev/null +++ b/django_topics/models/seasonal_topic.py @@ -0,0 +1,84 @@ +from django.db import models +from django_topics.models import Topic +from django.core.exceptions import ValidationError + + +class SeasonalTopic(models.Model): + topic = models.ForeignKey( + Topic, + on_delete=models.CASCADE, + related_name='seasonal_topic' + ) + secondary_topic = models.ForeignKey( + Topic, + on_delete=models.CASCADE, + related_name='seasonal_secondary_topic', + blank=True, + null=True, + help_text="Secondary topic which will be displayed alongside `topic`. E.g. `topic` is 'Teshuva' then secondary topic could be 'Yom Kippur'." + ) + start_date = models.DateField(help_text="Start date of when this will appear. End date is implied by when the next Seasonal Topic is displayed.") + display_start_date_israel = models.DateField(blank=True, null=True) + display_end_date_israel = models.DateField(blank=True, null=True) + display_start_date_diaspora = models.DateField(blank=True, null=True) + display_end_date_diaspora = models.DateField(blank=True, null=True) + display_date_prefix = models.CharField(max_length=255, blank=True, null=True) + display_date_suffix = models.CharField(max_length=255, blank=True, null=True) + lang = models.CharField(max_length=2, choices=[('en', 'English'), ('he', 'Hebrew')]) + + class Meta: + unique_together = ('topic', 'start_date') + verbose_name = "Landing Page - Calendar" + verbose_name_plural = "Landing Page - Calendar" + + def populate_field_based_on_field(self, field, reference_field): + if not getattr(self, field, None) and getattr(self, reference_field, None): + setattr(self, field, getattr(self, reference_field)) + + def validate_start_end_dates(self, start_date_field, end_date_field): + if not getattr(self, start_date_field, None) and not getattr(self, end_date_field, None): + # no data + return + if not getattr(self, start_date_field, None) and getattr(self, end_date_field): + raise ValidationError(f"End date field '{end_date_field}' defined without start date.") + if getattr(self, start_date_field) > getattr(self, end_date_field): + raise ValidationError(f"Start date field '{start_date_field}' cannot be after end date.") + + def clean(self): + self.populate_field_based_on_field('display_end_date_israel', 'display_start_date_israel') + self.populate_field_based_on_field('display_end_date_diaspora', 'display_start_date_diaspora') + self.populate_field_based_on_field('display_start_date_diaspora', 'display_start_date_israel') + self.populate_field_based_on_field('display_end_date_diaspora', 'display_end_date_israel') + if not getattr(self, 'display_start_date_israel') and getattr(self, 'display_start_date_diaspora'): + raise ValidationError("If diaspora date is defined, Israel date must also be defined.") + self.validate_start_end_dates('display_start_date_israel', 'display_end_date_israel') + self.validate_start_end_dates('display_start_date_diaspora', 'display_end_date_diaspora') + if self.display_date_prefix: + self.display_date_prefix = self.display_date_prefix.strip() + if self.display_date_suffix: + self.display_date_suffix = self.display_date_suffix.strip() + + def __str__(self): + return f"{self.topic.slug} ({self.start_date})" + + +class SeasonalTopicEnglish(SeasonalTopic): + class Meta: + proxy = True + verbose_name = "Landing Page - Calendar (EN)" + verbose_name_plural = "Landing Page - Calendar (EN)" + + def save(self, *args, **kwargs): + self.lang = "en" + super().save(*args, **kwargs) + + +class SeasonalTopicHebrew(SeasonalTopic): + class Meta: + proxy = True + verbose_name = "Landing Page - Calendar (HE)" + verbose_name_plural = "Landing Page - Calendar (HE)" + + def save(self, *args, **kwargs): + self.lang = "he" + super().save(*args, **kwargs) diff --git a/django_topics/models/topic.py b/django_topics/models/topic.py new file mode 100644 index 0000000000..66fa2614b6 --- /dev/null +++ b/django_topics/models/topic.py @@ -0,0 +1,37 @@ +from django.db import models +from django.db.models.query import QuerySet +import random +from django_topics.models.pool import TopicPool + + +class TopicManager(models.Manager): + def sample_topic_slugs(self, order, pool: str = None, limit=10) -> list[str]: + if pool: + topics = self.get_topic_slugs_by_pool(pool) + else: + topics = self.all().values_list('slug', flat=True) + if order == 'random': + return random.sample(list(topics), min(limit, len(topics))) + else: + raise Exception("Invalid order: '{}'".format(order)) + + def get_pools_by_topic_slug(self, topic_slug: str) -> QuerySet: + return self.filter(slug=topic_slug).values_list("pools__name", flat=True) + + def get_topic_slugs_by_pool(self, pool: str) -> QuerySet: + return self.filter(pools__name=pool).values_list("slug", flat=True) + + +class Topic(models.Model): + slug = models.CharField(max_length=255, primary_key=True) + en_title = models.CharField(max_length=255, blank=True, default="") + he_title = models.CharField(max_length=255, blank=True, default="") + pools = models.ManyToManyField(TopicPool, related_name="topics", blank=True) + objects = TopicManager() + + class Meta: + verbose_name = "Topic Pool Management" + verbose_name_plural = "Topic Pool Management" + + def __str__(self): + return self.slug diff --git a/django_topics/models/topic_of_the_day.py b/django_topics/models/topic_of_the_day.py new file mode 100644 index 0000000000..30e8f22cbc --- /dev/null +++ b/django_topics/models/topic_of_the_day.py @@ -0,0 +1,43 @@ +from django.db import models +from django_topics.models import Topic +from django.core.exceptions import ValidationError + + +class TopicOfTheDay(models.Model): + topic = models.ForeignKey( + Topic, + on_delete=models.CASCADE, + related_name='topic_of_the_day' + ) + start_date = models.DateField() + lang = models.CharField(max_length=2, choices=[('en', 'English'), ('he', 'Hebrew')]) + + class Meta: + unique_together = ('topic', 'start_date') + verbose_name = "Landing Page - Topic of the Day" + verbose_name_plural = "Landing Page - Topic of the Day" + + def __str__(self): + return f"{self.topic.slug} ({self.start_date})" + + +class TopicOfTheDayEnglish(TopicOfTheDay): + class Meta: + proxy = True + verbose_name = "Landing Page - Topic of the Day (EN)" + verbose_name_plural = "Landing Page - Topic of the Day (EN)" + + def save(self, *args, **kwargs): + self.lang = "en" + super().save(*args, **kwargs) + + +class TopicOfTheDayHebrew(TopicOfTheDay): + class Meta: + proxy = True + verbose_name = "Landing Page - Topic of the Day (HE)" + verbose_name_plural = "Landing Page - Topic of the Day (HE)" + + def save(self, *args, **kwargs): + self.lang = "he" + super().save(*args, **kwargs) diff --git a/docs/openAPI.json b/docs/openAPI.json index 213320032b..198651e12c 100644 --- a/docs/openAPI.json +++ b/docs/openAPI.json @@ -4183,7 +4183,6 @@ }, "categoryDescription": {}, "numSources": 1767, - "good_to_promote": true, "description_published": true, "data_source": "sefaria", "primaryTitle": { @@ -4605,7 +4604,6 @@ "isTopLevelDisplay": true, "displayOrder": 30, "numSources": 2937, - "good_to_promote": true, "primaryTitle": { "en": "Prayer", "he": "תפילה" @@ -4637,7 +4635,6 @@ "_temp_id": "תורה" }, "numSources": 2333, - "good_to_promote": true, "primaryTitle": { "en": "Torah", "he": "תורה" @@ -4780,7 +4777,6 @@ }, "categoryDescription": {}, "numSources": 1967, - "good_to_promote": true, "description_published": true, "data_source": "sefaria", "primaryTitle": { @@ -4883,7 +4879,6 @@ "categoryDescription": {}, "displayOrder": 0, "numSources": 1662, - "good_to_promote": true, "description_published": true, "data_source": "sefaria", "image": { @@ -5132,7 +5127,6 @@ } }, "numSources": 7, - "good_to_promote": true, "primaryTitle": { "en": "Metushelach", "he": "מתושלח" @@ -9731,7 +9725,6 @@ "categoryDescription": {}, "displayOrder": 0, "numSources": 1662, - "good_to_promote": true, "description_published": true, "data_source": "sefaria", "image": { @@ -10041,9 +10034,6 @@ "format": "int32", "type": "integer" }, - "good_to_promote": { - "type": "boolean" - }, "description_published": { "type": "boolean" }, @@ -10059,7 +10049,7 @@ } } }, - "example": "{\n\"slug\": \"metushelach\",\n\"titles\": [\n{\n\"text\": \"Metushelach\",\n\"lang\": \"en\",\n\"primary\": true,\n\"transliteration\": true\n},\n{\n\"text\": \"מתושלח\",\n\"lang\": \"he\",\n\"primary\": true\n},\n{\n\"text\": \"Methuselah\",\n\"lang\": \"en\"\n},\n{\n\"text\": \"Methushelach\",\n\"lang\": \"en\"\n}\n],\n\"subclass\": \"person\",\n\"alt_ids\": {\n\"_temp_id\": \"מתושלח\",\n\"wikidata\": \"Q156290\"\n},\n\"properties\": {\n\"enWikiLink\": {\n\"value\": \"https://en.wikipedia.org/wiki/Methuselah\",\n\"dataSource\": \"wikidata\"\n},\n\"heWikiLink\": {\n\"value\": \"https://he.wikipedia.org/wiki/מתושלח\",\n\"dataSource\": \"wikidata\"\n},\n\"deWikiLink\": {\n\"value\": \"https://de.wikipedia.org/wiki/Methusalem\",\n\"dataSource\": \"wikidata\"\n},\n\"esWikiLink\": {\n\"value\": \"https://es.wikipedia.org/wiki/Matusalén\",\n\"dataSource\": \"wikidata\"\n},\n\"frWikiLink\": {\n\"value\": \"https://fr.wikipedia.org/wiki/Mathusalem\",\n\"dataSource\": \"wikidata\"\n},\n\"ruWikiLink\": {\n\"value\": \"https://ru.wikipedia.org/wiki/Мафусал_(потомок_Сифа)\",\n\"dataSource\": \"wikidata\"\n}\n},\n\"numSources\": 7,\n\"good_to_promote\": true,\n\"primaryTitle\": {\n\"en\": \"Metushelach\",\n\"he\": \"מתושלח\"\n}\n}" + "example": "{\n\"slug\": \"metushelach\",\n\"titles\": [\n{\n\"text\": \"Metushelach\",\n\"lang\": \"en\",\n\"primary\": true,\n\"transliteration\": true\n},\n{\n\"text\": \"מתושלח\",\n\"lang\": \"he\",\n\"primary\": true\n},\n{\n\"text\": \"Methuselah\",\n\"lang\": \"en\"\n},\n{\n\"text\": \"Methushelach\",\n\"lang\": \"en\"\n}\n],\n\"subclass\": \"person\",\n\"alt_ids\": {\n\"_temp_id\": \"מתושלח\",\n\"wikidata\": \"Q156290\"\n},\n\"properties\": {\n\"enWikiLink\": {\n\"value\": \"https://en.wikipedia.org/wiki/Methuselah\",\n\"dataSource\": \"wikidata\"\n},\n\"heWikiLink\": {\n\"value\": \"https://he.wikipedia.org/wiki/מתושלח\",\n\"dataSource\": \"wikidata\"\n},\n\"deWikiLink\": {\n\"value\": \"https://de.wikipedia.org/wiki/Methusalem\",\n\"dataSource\": \"wikidata\"\n},\n\"esWikiLink\": {\n\"value\": \"https://es.wikipedia.org/wiki/Matusalén\",\n\"dataSource\": \"wikidata\"\n},\n\"frWikiLink\": {\n\"value\": \"https://fr.wikipedia.org/wiki/Mathusalem\",\n\"dataSource\": \"wikidata\"\n},\n\"ruWikiLink\": {\n\"value\": \"https://ru.wikipedia.org/wiki/Мафусал_(потомок_Сифа)\",\n\"dataSource\": \"wikidata\"\n}\n},\n\"numSources\": 7,\n\"primaryTitle\": {\n\"en\": \"Metushelach\",\n\"he\": \"מתושלח\"\n}\n}" }, "url": { "description": "The `Ref` in a format appropriate for a URL, with spaces replaced with `.` etc. ", @@ -10105,7 +10095,6 @@ }, "categoryDescription": {}, "numSources": 217, - "good_to_promote": true, "description_published": true, "primaryTitle": { "en": "Hillel", @@ -10140,10 +10129,6 @@ "description": "A description of the category of this topic", "type": "string" }, - "good_to_promote": { - "description": "A topic which will be included in our results from the `random-by-topic` endpoint. ", - "type": "boolean" - }, "numSources": { "description": "The number of text sources associated with a topic. ", "type": "integer", @@ -10192,7 +10177,6 @@ }, "categoryDescription": {}, "numSources": 120, - "good_to_promote": true, "description_published": true, "data_source": "sefaria", "primaryTitle": { diff --git a/helm-chart/sefaria-project/templates/configmap/gunicorn.yaml b/helm-chart/sefaria-project/templates/configmap/gunicorn.yaml index ce7c0159cf..86644a6c18 100644 --- a/helm-chart/sefaria-project/templates/configmap/gunicorn.yaml +++ b/helm-chart/sefaria-project/templates/configmap/gunicorn.yaml @@ -25,6 +25,7 @@ data: {{- end }} loglevel = "warning" + preload_app = True {{- if .Values.instrumentation.enabled }} def post_fork(server, worker): @@ -33,6 +34,10 @@ data: {{- end }} + def on_starting(server): + from reader.startup import init_library_cache + init_library_cache() + def combined_logformat(logger, name, event_dict): if event_dict.get('logger') == "gunicorn.access": message = event_dict['event'] diff --git a/helm-chart/sefaria-project/templates/rollout/task.yaml b/helm-chart/sefaria-project/templates/rollout/task.yaml index 3f9a233e59..af5e9cf922 100644 --- a/helm-chart/sefaria-project/templates/rollout/task.yaml +++ b/helm-chart/sefaria-project/templates/rollout/task.yaml @@ -82,7 +82,7 @@ spec: - name: SLACK_URL valueFrom: secretKeyRef: - name: { { template "sefaria.secrets.slackWebhook" . } } + name: {{ template "sefaria.secrets.slackWebhook" . }} key: slack-webhook envFrom: {{- if .Values.tasks.enabled }} diff --git a/helm-chart/sefaria-project/templates/rollout/web.yaml b/helm-chart/sefaria-project/templates/rollout/web.yaml index 8b5837a08c..14dd148c6e 100644 --- a/helm-chart/sefaria-project/templates/rollout/web.yaml +++ b/helm-chart/sefaria-project/templates/rollout/web.yaml @@ -76,7 +76,7 @@ spec: - name: web image: "{{ .Values.web.containerImage.imageRegistry }}:{{ .Values.web.containerImage.tag }}" imagePullPolicy: Always - args: [ "python manage.py migrate && gunicorn sefaria.wsgi --access-logfile - --error-logfile - --timeout 300 --threads {{ .Values.web.resources.web.gunicornThreadCount }} --worker-tmp-dir /dev/shm -b 0.0.0.0:80" ] + args: [ "python manage.py migrate && gunicorn sefaria.wsgi --access-logfile - --error-logfile - --timeout 420 --threads {{ .Values.web.resources.web.gunicornThreadCount }} --worker-tmp-dir /dev/shm -b 0.0.0.0:80" ] env: # WEB_CONCURRENCY is used for determining the number of server workers - name: WEB_CONCURRENCY diff --git a/reader/management/commands/__init__.py b/reader/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/reader/management/commands/runserver.py b/reader/management/commands/runserver.py new file mode 100644 index 0000000000..090485cf19 --- /dev/null +++ b/reader/management/commands/runserver.py @@ -0,0 +1,14 @@ +# Using staticfiles as the base class in order to not overwrite its custom runserver logic +from django.contrib.staticfiles.management.commands.runserver import Command as RunserverCommand +from reader.startup import init_library_cache +import structlog +logger = structlog.get_logger(__name__) + + +class Command(RunserverCommand): + + def get_handler(self, *args, **options): + handler = super(Command, self).get_handler(*args, **options) + logger.info("Starting reader application") + init_library_cache() + return handler diff --git a/reader/startup.py b/reader/startup.py new file mode 100644 index 0000000000..c6c3a468fc --- /dev/null +++ b/reader/startup.py @@ -0,0 +1,41 @@ + + +def init_library_cache(): + import django + django.setup() + import structlog + logger = structlog.get_logger(__name__) + + from sefaria.model.text import library + from sefaria.system.multiserver.coordinator import server_coordinator + from django.conf import settings + logger.info("Initializing library objects.") + logger.info("Initializing TOC Tree") + library.get_toc_tree() + + logger.info("Initializing Shared Cache") + library.init_shared_cache() + + if not settings.DISABLE_AUTOCOMPLETER: + logger.info("Initializing Full Auto Completer") + library.build_full_auto_completer() + + logger.info("Initializing Ref Auto Completer") + library.build_ref_auto_completer() + + logger.info("Initializing Lexicon Auto Completers") + library.build_lexicon_auto_completers() + + logger.info("Initializing Cross Lexicon Auto Completer") + library.build_cross_lexicon_auto_completer() + + logger.info("Initializing Topic Auto Completer") + library.build_topic_auto_completer() + + if settings.ENABLE_LINKER: + logger.info("Initializing Linker") + library.build_linker('he') + + if server_coordinator: + server_coordinator.connect() + logger.info("Initialization Complete") diff --git a/reader/views.py b/reader/views.py index 2f8d64b286..7159b8569b 100644 --- a/reader/views.py +++ b/reader/views.py @@ -55,7 +55,7 @@ from sefaria.site.site_settings import SITE_SETTINGS from sefaria.system.multiserver.coordinator import server_coordinator from sefaria.system.decorators import catch_error_as_json, sanitize_get_params, json_response_decorator -from sefaria.system.exceptions import InputError, PartialRefInputError, BookNameError, NoVersionFoundError, DictionaryEntryNotFoundError +from sefaria.system.exceptions import InputError, PartialRefInputError, BookNameError, NoVersionFoundError, DictionaryEntryNotFoundError, ComplexBookLevelRefError from sefaria.system.cache import django_cache from sefaria.system.database import db from sefaria.helper.search import get_query_obj @@ -90,39 +90,6 @@ import structlog logger = structlog.get_logger(__name__) -# # # -# Initialized cache library objects that depend on sefaria.model being completely loaded. -logger.info("Initializing library objects.") -logger.info("Initializing TOC Tree") -library.get_toc_tree() - -logger.info("Initializing Shared Cache") -library.init_shared_cache() - -if not DISABLE_AUTOCOMPLETER: - logger.info("Initializing Full Auto Completer") - library.build_full_auto_completer() - - logger.info("Initializing Ref Auto Completer") - library.build_ref_auto_completer() - - logger.info("Initializing Lexicon Auto Completers") - library.build_lexicon_auto_completers() - - logger.info("Initializing Cross Lexicon Auto Completer") - library.build_cross_lexicon_auto_completer() - - logger.info("Initializing Topic Auto Completer") - library.build_topic_auto_completer() - -if ENABLE_LINKER: - logger.info("Initializing Linker") - library.build_linker('he') - -if server_coordinator: - server_coordinator.connect() -# # # - def render_template(request, template_name='base.html', app_props=None, template_context=None, content_type=None, status=None, using=None): """ @@ -1451,8 +1418,11 @@ def _get_text(oref, versionEn=versionEn, versionHe=versionHe, commentary=comment return text if not multiple or abs(multiple) == 1: - text = _get_text(oref, versionEn=versionEn, versionHe=versionHe, commentary=commentary, context=context, pad=pad, - alts=alts, wrapLinks=wrapLinks, layer_name=layer_name) + try: + text = _get_text(oref, versionEn=versionEn, versionHe=versionHe, commentary=commentary, context=context, pad=pad, + alts=alts, wrapLinks=wrapLinks, layer_name=layer_name) + except Exception as e: + return jsonResponse({'error': str(e)}, status=400) return jsonResponse(text, cb) else: # Return list of many sections @@ -3234,6 +3204,16 @@ def topic_graph_api(request, topic): return jsonResponse(response, callback=request.GET.get("callback", None)) +@catch_error_as_json +def topic_pool_api(request, pool_name): + from django_topics.models import Topic as DjangoTopic + n_samples = int(request.GET.get("n")) + order = request.GET.get("order", "random") + topic_slugs = DjangoTopic.objects.sample_topic_slugs(order, pool_name, n_samples) + response = [Topic.init(slug).contents() for slug in topic_slugs] + return jsonResponse(response, callback=request.GET.get("callback", None)) + + @staff_member_required def reorder_topics(request): topics = json.loads(request.POST["json"]).get("topics", []) @@ -4229,8 +4209,9 @@ def random_by_topic_api(request): """ Returns Texts API data for a random text taken from popular topic tags """ + from django_topics.models import PoolType cb = request.GET.get("callback", None) - random_topic = get_random_topic(good_to_promote=True) + random_topic = get_random_topic(PoolType.TORAH_TAB.value) if random_topic is None: return random_by_topic_api(request) random_source = get_random_topic_source(random_topic) @@ -4628,9 +4609,9 @@ def android_asset_links_json(request): }] ) -def application_health_api(request): +def rollout_health_api(request): """ - Defines the /healthz and /health-check API endpoints which responds with + Defines the /healthz-rollout API endpoint which responds with 200 if the application is ready for requests, 500 if the application is not ready for requests """ @@ -4644,9 +4625,9 @@ def application_health_api_nonlibrary(request): return http.HttpResponse("Healthy", status="200") -def rollout_health_api(request): +def application_health_api(request): """ - Defines the /healthz-rollout API endpoint which responds with + Defines the /healthz API endpoint which responds with 200 if the services Django depends on, Redis, Multiserver, and NodeJs are available. 500 if any of the aforementioned services are not available diff --git a/scripts/migrations/migrate_good_to_promote_to_topic_pools.py b/scripts/migrations/migrate_good_to_promote_to_topic_pools.py new file mode 100644 index 0000000000..7b10f547d4 --- /dev/null +++ b/scripts/migrations/migrate_good_to_promote_to_topic_pools.py @@ -0,0 +1,83 @@ +import django +from django.db import IntegrityError, DataError + +django.setup() +from sefaria.model import TopicSet, RefTopicLinkSet +from django_topics.models.topic import Topic +from django_topics.models.pool import TopicPool, PoolType + + +def add_to_torah_tab_pool(): + print('Adding topics to torah tab pool') + pool = TopicPool.objects.get(name=PoolType.TORAH_TAB.value) + ts = TopicSet({'good_to_promote': True}) + for topic in ts: + t = Topic.objects.get(slug=topic.slug) + t.pools.add(pool) + + +def add_to_library_pool(): + print('Adding topics to library pool') + pool = TopicPool.objects.get(name=PoolType.LIBRARY.value) + ts = TopicSet({'subclass': 'author'}) + for topic in ts: + t = Topic.objects.get(slug=topic.slug) + t.pools.add(pool) + links = RefTopicLinkSet({'is_sheet': False, 'linkType': 'about'}) + topic_slugs = {link.toTopic for link in links} + for slug in topic_slugs: + try: + t = Topic.objects.get(slug=slug) + t.pools.add(pool) + except Topic.DoesNotExist: + print('Could not find topic with slug {}'.format(slug)) + + +def add_to_sheets_pool(): + print('Adding topics to sheets pool') + pool = TopicPool.objects.get(name=PoolType.SHEETS.value) + links = RefTopicLinkSet({'is_sheet': True, 'linkType': 'about'}) + topic_slugs = {link.toTopic for link in links} + for slug in topic_slugs: + try: + t = Topic.objects.get(slug=slug) + t.pools.add(pool) + except Topic.DoesNotExist: + print('Could not find topic with slug {}'.format(slug)) + + +def delete_all_data(): + print("Delete data") + Topic.pools.through.objects.all().delete() + Topic.objects.all().delete() + TopicPool.objects.all().delete() + + +def add_topics(): + print('Adding topics') + for topic in TopicSet({}): + try: + Topic.objects.create(slug=topic.slug, en_title=topic.get_primary_title('en'), he_title=topic.get_primary_title('he')) + except IntegrityError: + print('Duplicate topic', topic.slug) + except DataError: + print('Data error with topic', topic.slug) + + +def add_pools(): + print('Adding pools') + for pool_name in [PoolType.LIBRARY.value, PoolType.SHEETS.value, 'general_en', 'general_he', PoolType.TORAH_TAB.value]: + TopicPool.objects.create(name=pool_name) + + +def run(): + delete_all_data() + add_topics() + add_pools() + add_to_torah_tab_pool() + add_to_library_pool() + add_to_sheets_pool() + + +if __name__ == "__main__": + run() diff --git a/sefaria/helper/linker.py b/sefaria/helper/linker.py index 22f1e4ab67..12e767d0bf 100644 --- a/sefaria/helper/linker.py +++ b/sefaria/helper/linker.py @@ -2,15 +2,56 @@ import json import spacy import structlog +from cerberus import Validator from sefaria.model.linker.ref_part import TermContext, RefPartType from sefaria.model.linker.ref_resolver import PossiblyAmbigResolvedRef from sefaria.model import text, library from sefaria.model.webpage import WebPage from sefaria.system.cache import django_cache -from typing import List, Union, Optional, Tuple +from api.api_errors import APIInvalidInputException +from typing import List, Optional, Tuple logger = structlog.get_logger(__name__) +FIND_REFS_POST_SCHEMA = { + "text": { + "type": "dict", + "required": True, + "schema": { + "title": {"type": "string", "required": True}, + "body": {"type": "string", "required": True}, + }, + }, + "metaDataForTracking": { + "type": "dict", + "required": False, + "schema": { + "url": {"type": "string", "required": False}, + "description": {"type": "string", "required": False}, + "title": {"type": "string", "required": False}, + }, + }, + "lang": { + "type": "string", + "allowed": ["he", "en"], + "required": False, + }, + "version_preferences_by_corpus": { + "type": "dict", + "required": False, + "nullable": True, + "keysrules": {"type": "string"}, + "valuesrules": { + "type": "dict", + "schema": { + "type": "string", + "keysrules": {"type": "string"}, + "valuesrules": {"type": "string"}, + }, + }, + }, +} + def load_spacy_model(path: str) -> spacy.Language: import re, tarfile @@ -64,13 +105,12 @@ class _FindRefsText: body: str lang: str - # def __post_init__(self): - # from sefaria.utils.hebrew import is_mostly_hebrew - # self.lang = 'he' if is_mostly_hebrew(self.body) else 'en' - def _unpack_find_refs_request(request): + validator = Validator(FIND_REFS_POST_SCHEMA) post_body = json.loads(request.body) + if not validator.validate(post_body): + raise APIInvalidInputException(validator.errors) meta_data = post_body.get('metaDataForTracking') return _create_find_refs_text(post_body), _create_find_refs_options(request.GET, post_body), meta_data diff --git a/sefaria/helper/tests/linker_test.py b/sefaria/helper/tests/linker_test.py index e4afc30574..8779429e77 100644 --- a/sefaria/helper/tests/linker_test.py +++ b/sefaria/helper/tests/linker_test.py @@ -10,6 +10,7 @@ from sefaria.model.text import Ref, TextChunk from sefaria.model.webpage import WebPage from sefaria.settings import ENABLE_LINKER +from api.api_errors import APIInvalidInputException if not ENABLE_LINKER: pytest.skip("Linker not enabled", allow_module_level=True) @@ -80,6 +81,12 @@ def mock_request_post_data_without_meta_data(mock_request_post_data: dict) -> di return mock_request_post_data +@pytest.fixture +def mock_request_invalid_post_data(mock_request_post_data: dict) -> dict: + mock_request_post_data['text'] = 'plain text' + return mock_request_post_data + + def make_mock_request(post_data: dict) -> WSGIRequest: factory = RequestFactory() request = factory.post('/api/find-refs', data=json.dumps(post_data), content_type='application/json') @@ -109,6 +116,11 @@ def mock_request_without_meta_data(mock_request_post_data_without_meta_data: dic return make_mock_request(mock_request_post_data_without_meta_data) +@pytest.fixture +def mock_request_invalid(mock_request_invalid_post_data: dict) -> WSGIRequest: + return make_mock_request(mock_request_invalid_post_data) + + @pytest.fixture def mock_webpage() -> WebPage: # Note, the path of WebPage matches the path of the import we want to patch @@ -162,6 +174,13 @@ def test_make_find_refs_response_without_meta_data(self, mock_request_without_me mock_webpage.add_hit.assert_not_called() mock_webpage.save.assert_not_called() + def test_make_find_refs_response_invalid_post_data(self, mock_request_invalid: dict, + mock_webpage: Mock): + with pytest.raises(APIInvalidInputException) as exc_info: + response = linker.make_find_refs_response(mock_request_invalid) + # assert that the 'text' field had a validation error + assert 'text' in exc_info.value.args[0] + class TestUnpackFindRefsRequest: def test_unpack_find_refs_request(self, mock_request: WSGIRequest): @@ -198,8 +217,8 @@ def mock_get_linker(self, spacy_model: spacy.Language): with patch.object(library, 'get_linker') as mock_get_linker: mock_linker = Mock() mock_get_linker.return_value = mock_linker - mock_linker.link.return_value = LinkedDoc('', [], []) - mock_linker.link_by_paragraph.return_value = LinkedDoc('', [], []) + mock_linker.link.return_value = LinkedDoc('', [], [], []) + mock_linker.link_by_paragraph.return_value = LinkedDoc('', [], [], []) yield mock_get_linker def test_make_find_refs_response_linker_v3(self, mock_get_linker: WSGIRequest, diff --git a/sefaria/helper/topic.py b/sefaria/helper/topic.py index d6f8aa92bc..05f8d3cb4f 100644 --- a/sefaria/helper/topic.py +++ b/sefaria/helper/topic.py @@ -280,16 +280,17 @@ def curated_primacy(order_dict, lang): return (bord.get('numDatasource', 0) * bord.get('tfidf', 0)) - (aord.get('numDatasource', 0) * aord.get('tfidf', 0)) -def get_random_topic(good_to_promote=True) -> Optional[Topic]: - query = {"good_to_promote": True} if good_to_promote else {} - random_topic_dict = list(db.topics.aggregate([ - {"$match": query}, - {"$sample": {"size": 1}} - ])) - if len(random_topic_dict) == 0: +def get_random_topic(pool=None) -> Optional[Topic]: + """ + :param pool: name of the pool from which to select the topic. If `None`, all topics are considered. + :return: Returns a random topic from the database. If you provide `pool`, then the selection is limited to topics in that pool. + """ + from django_topics.models import Topic as DjangoTopic + random_topic_slugs = DjangoTopic.objects.sample_topic_slugs('random', pool, limit=1) + if len(random_topic_slugs) == 0: return None - return Topic(random_topic_dict[0]) + return Topic.init(random_topic_slugs[0]) def get_random_topic_source(topic:Topic) -> Optional[Ref]: @@ -728,15 +729,16 @@ def calculate_other_ref_scores(ref_topic_map): return num_datasource_map, langs_available, comp_date_map, order_id_map -def update_ref_topic_link_orders(sheet_source_links, sheet_topic_links): - other_ref_topic_links = list(RefTopicLinkSet({"is_sheet": False, "generatedBy": {"$ne": TopicLinkHelper.generated_by_sheets}})) - ref_topic_links = other_ref_topic_links + sheet_source_links +def update_ref_topic_link_orders(source_links, sheet_topic_links): + """ - topic_tref_score_map, ref_topic_map = calculate_mean_tfidf(ref_topic_links) + @param source_links: Links between sources and topics (as opposed to sheets and topics) + @param sheet_topic_links: Links between sheets and topics + """ + topic_tref_score_map, ref_topic_map = calculate_mean_tfidf(source_links) num_datasource_map, langs_available, comp_date_map, order_id_map = calculate_other_ref_scores(ref_topic_map) pr_map, pr_seg_map = calculate_pagerank_scores(ref_topic_map) sheet_cache = {} - intra_topic_link_cache = {} def get_sheet_order(topic_slug, sheet_id): if sheet_id in sheet_cache: @@ -796,7 +798,7 @@ def get_sheet_order(topic_slug, sheet_id): } all_ref_topic_links_updated = [] - all_ref_topic_links = sheet_topic_links + ref_topic_links + all_ref_topic_links = sheet_topic_links + source_links for l in tqdm(all_ref_topic_links, desc='update link orders'): if l.is_sheet: setattr(l, 'order', get_sheet_order(l.toTopic, int(l.ref.replace("Sheet ", "")))) @@ -960,23 +962,14 @@ def calculate_popular_writings_for_authors(top_n, min_pr): "order": {"custom_order": rd['pagesheetrank']} }).save() - def recalculate_secondary_topic_data(): - # run before everything else because this creates new links - calculate_popular_writings_for_authors(100, 300) + source_links = RefTopicLinkSet({'is_sheet': False}) + sheet_links = [RefTopicLink(l) for l in generate_sheet_topic_links()] - sheet_source_links, sheet_related_links, sheet_topic_links = generate_all_topic_links_from_sheets() - related_links = update_intra_topic_link_orders(sheet_related_links) - all_ref_links = update_ref_topic_link_orders(sheet_source_links, sheet_topic_links) + related_links = update_intra_topic_link_orders(IntraTopicLinkSet()) + all_ref_links = update_ref_topic_link_orders(source_links.array(), sheet_links) - # now that we've gathered all the new links, delete old ones and insert new ones - RefTopicLinkSet({"generatedBy": TopicLinkHelper.generated_by_sheets}).delete() RefTopicLinkSet({"is_sheet": True}).delete() - IntraTopicLinkSet({"generatedBy": TopicLinkHelper.generated_by_sheets}).delete() - print(f"Num Ref Links {len(all_ref_links)}") - print(f"Num Intra Links {len(related_links)}") - print(f"Num to Update {len(list(filter(lambda x: getattr(x, '_id', False), all_ref_links + related_links)))}") - print(f"Num to Insert {len(list(filter(lambda x: not getattr(x, '_id', False), all_ref_links + related_links)))}") db.topic_links.bulk_write([ UpdateOne({"_id": l._id}, {"$set": {"order": l.order}}) @@ -984,9 +977,6 @@ def recalculate_secondary_topic_data(): InsertOne(l.contents(for_db=True)) for l in (all_ref_links + related_links) ]) - add_num_sources_to_topics() - make_titles_unique() - def set_all_slugs_to_primary_title(): # reset all slugs to their primary titles, if they have drifted away diff --git a/sefaria/model/abstract.py b/sefaria/model/abstract.py index 2057c0e91a..1195b086be 100644 --- a/sefaria/model/abstract.py +++ b/sefaria/model/abstract.py @@ -244,16 +244,12 @@ def _validate(self): " not in " + ",".join(self.required_attrs) + " or " + ",".join(self.optional_attrs)) return False """ - for attr, schema in self.attr_schemas.items(): - v = Validator(schema) - try: - value = getattr(self, attr) - if not v.validate(value): - raise InputError(v.errors) - except AttributeError: - # not checking here if value exists, that is done above. - # assumption is if value doesn't exist, it's optional - pass + schema = self.attr_schemas + for key in schema: + schema[key]['allow_unknown'] = schema[key].get('allow_unknown', False) # allow unknowns only in the root + v = Validator(schema, allow_unknown=True) + if not v.validate(self._saveable_attrs()): + raise InputError(v.errors) return True def _normalize(self): diff --git a/sefaria/model/dependencies.py b/sefaria/model/dependencies.py index eb842a16bb..2a186610f2 100644 --- a/sefaria/model/dependencies.py +++ b/sefaria/model/dependencies.py @@ -57,7 +57,7 @@ def process_version_title_change_in_search(ver, **kwargs): text_index = library.get_index(ver.title) delete_version(text_index, kwargs.get("old"), ver.language) for ref in text_index.all_segment_refs(): - TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False) + TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, ver.languageFamilyName, ver.isPrimary) # Version Title Change diff --git a/sefaria/model/portal.py b/sefaria/model/portal.py index 36984ceaad..c10e6dde34 100644 --- a/sefaria/model/portal.py +++ b/sefaria/model/portal.py @@ -20,100 +20,51 @@ class Portal(abst.SluggedAbstractMongoRecord): "organization" ] attr_schemas = { - "about": { - "title": { - "type": "dict", - "required": True, - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - "title_url": {"type": "string"}, - "image_uri": {"type": "string"}, - "image_caption": { - "type": "dict", - "schema": { - "en": {"type": "string"}, - "he": {"type": "string"} - } - }, - "description": { - "type": "dict", - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - }, - "mobile": { - "title": { - "type": "dict", - "required": True, - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - "description": { - "type": "dict", - "schema": { - "en": {"type": "string"}, - "he": {"type": "string"} - } - }, - "android_link": {"type": "string"}, - "ios_link": {"type": "string"} - }, - "organization": { - "title": { - "type": "dict", - "required": True, - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - "description": { - "type": "dict", - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - }, - "newsletter": { - "title": { - "type": "dict", - "required": True, - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - "description": { - "type": "dict", - "schema": { - "en": {"type": "string", "required": True}, - "he": {"type": "string", "required": True} - } - }, - "title_url": {"type": "string"}, - "api_schema": { - "type": "dict", - "schema": { - "http_method": {"type": "string", "required": True}, - "payload": { - "type": "dict", - "schema": { - "first_name_key": {"type": "string"}, - "last_name_key": {"type": "string"}, - "email_key": {"type": "string"} - } - }, - } - } - } + 'about': {'type': 'dict', + 'schema': {'title': {'type': 'dict', + 'required': True, + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}, + 'title_url': {'type': 'string'}, + 'image_uri': {'type': 'string'}, + 'image_caption': {'type': 'dict', + 'schema': {'en': {'type': 'string'}, 'he': {'type': 'string'}}}, + 'description': {'type': 'dict', + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}}}, + 'mobile': {'type': 'dict', + 'schema': {'title': {'type': 'dict', + 'required': True, + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}, + 'description': {'type': 'dict', + 'schema': {'en': {'type': 'string'}, 'he': {'type': 'string'}}}, + 'android_link': {'type': 'string'}, + 'ios_link': {'type': 'string'}}}, + 'organization': {'type': 'dict', + 'schema': {'title': {'type': 'dict', + 'required': True, + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}, + 'description': {'type': 'dict', + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}}}, + 'newsletter': {'type': 'dict', + 'schema': {'title': {'type': 'dict', + 'required': True, + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}, + 'description': {'type': 'dict', + 'schema': {'en': {'type': 'string', 'required': True}, + 'he': {'type': 'string', 'required': True}}}, + 'title_url': {'type': 'string'}, + 'api_schema': {'type': 'dict', + 'schema': {'http_method': {'type': 'string', 'required': True}, + 'payload': {'type': 'dict', + 'schema': { + 'first_name_key': {'type': 'string'}, + 'last_name_key': {'type': 'string'}, + 'email_key': {'type': 'string'}}}}}}} } def _validate(self): diff --git a/sefaria/model/tests/topic_test.py b/sefaria/model/tests/topic_test.py index 56345624f1..47c8ceb5e5 100644 --- a/sefaria/model/tests/topic_test.py +++ b/sefaria/model/tests/topic_test.py @@ -3,7 +3,7 @@ from sefaria.model.text import Ref from sefaria.system.database import db from sefaria.system.exceptions import SluggedMongoRecordMissingError -from sefaria.helper.topic import update_topic +from django_topics.models import Topic as DjangoTopic, TopicPool def make_topic(slug): @@ -105,6 +105,13 @@ def topic_graph_to_merge(): db.sheets.delete_one({"id": 1234567890}) +@pytest.fixture(scope='module') +def topic_pool(): + pool = TopicPool.objects.create(name='test-pool') + yield pool + pool.delete() + + class TestTopics(object): def test_graph_funcs(self, topic_graph): @@ -155,6 +162,38 @@ def test_merge(self, topic_graph_to_merge): {"slug": '30', 'asTyped': 'thirty'} ] + t40 = Topic.init('40') + assert t40 is None + DjangoTopic.objects.get(slug='20') + with pytest.raises(DjangoTopic.DoesNotExist): + DjangoTopic.objects.get(slug='40') + + def test_change_title(self, topic_graph): + ts = topic_graph['topics'] + dt1 = DjangoTopic.objects.get(slug=ts['1'].slug) + assert dt1.en_title == ts['1'].get_primary_title('en') + ts['1'].title_group.add_title('new title', 'en', True, True) + ts['1'].save() + dt1 = DjangoTopic.objects.get(slug=ts['1'].slug) + assert dt1.en_title == ts['1'].get_primary_title('en') + + def test_pools(self, topic_graph, topic_pool): + ts = topic_graph['topics'] + t1 = ts['1'] + assert len(t1.get_pools()) == 0 + t1.add_pool(topic_pool.name) + assert t1.get_pools() == [topic_pool.name] + + # dont add duplicates + t1.add_pool(topic_pool.name) + assert t1.get_pools() == [topic_pool.name] + + assert t1.has_pool(topic_pool.name) + t1.remove_pool(topic_pool.name) + assert len(t1.get_pools()) == 0 + # dont error when removing non-existant pool + t1.remove_pool(topic_pool.name) + def test_sanitize(self): t = Topic() t.slug = "sdfsdg