Skip to content

Commit

Permalink
Show similar mods on mod page
Browse files Browse the repository at this point in the history
  • Loading branch information
HebaruSan committed Dec 17, 2021
1 parent 79fb0da commit f4072a3
Show file tree
Hide file tree
Showing 10 changed files with 316 additions and 1 deletion.
6 changes: 6 additions & 0 deletions KerbalStuff/blueprints/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
ModList
from ..search import search_mods, search_users, typeahead_mods, get_mod_score
from ..thumbnail import thumb_path_from_background_path
from ..celery import update_mod_similarities

api = Blueprint('api', __name__)

Expand Down Expand Up @@ -545,6 +546,8 @@ def accept_grant_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
mod = _get_mod(mod_id)
author = _get_mod_pending_author(mod)
author.accepted = True
db.commit()
update_mod_similarities.delay([mod.id])
notify_ckan(mod, 'co-author-added')
return {'error': False}, 200

Expand Down Expand Up @@ -581,6 +584,8 @@ def revoke_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
author = [a for a in mod.shared_authors if a.user == new_user][0]
mod.shared_authors = [a for a in mod.shared_authors if a.user != current_user]
db.delete(author)
db.commit()
update_mod_similarities.delay([mod.id])
notify_ckan(mod, 'co-author-removed')
return {'error': False}, 200

Expand Down Expand Up @@ -698,6 +703,7 @@ def create_mod() -> Tuple[Dict[str, Any], int]:
db.commit()
mod.score = get_mod_score(mod)
db.commit()
update_mod_similarities.delay([mod.id])
set_game_info(game)
send_to_ckan(mod)
return {
Expand Down
5 changes: 5 additions & 0 deletions KerbalStuff/blueprints/mods.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Featured, Media, GameVersion, Game, Following
from ..search import get_mod_score
from ..thumbnail import thumb_path_from_background_path
from ..celery import update_mod_similarities

mods = Blueprint('mods', __name__)

Expand Down Expand Up @@ -295,6 +296,8 @@ def edit_mod(mod_id: int, mod_name: str) -> Union[str, werkzeug.wrappers.Respons
if not mod.published:
newly_published = True
mod.published = True
db.commit()
update_mod_similarities.delay([mod.id])
if ckan is None:
ckan = False
else:
Expand Down Expand Up @@ -499,6 +502,8 @@ def publish(mod_id: int, mod_name: str) -> werkzeug.wrappers.Response:
mod.published = True
mod.updated = datetime.now()
mod.score = get_mod_score(mod)
db.commit()
update_mod_similarities.delay([mod.id])
send_to_ckan(mod)
return redirect(url_for("mods.mod", mod_id=mod.id, mod_name=mod.name))

Expand Down
9 changes: 9 additions & 0 deletions KerbalStuff/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .objects import Mod
from .search import get_mod_score
from .ckan import import_ksp_versions_from_ckan
from .similarity import update_similar_mods

app = Celery("tasks", broker=_cfg("redis-connection"))

Expand Down Expand Up @@ -118,6 +119,14 @@ def ckan_version_import() -> None:
if game_id > 0:
import_ksp_versions_from_ckan(game_id)


@app.task
@with_session
def update_mod_similarities(mod_ids: List[int]) -> None:
for mod_id in mod_ids:
update_similar_mods(Mod.query.get(mod_id))


# to debug this:
# * add PTRACE capability to celery container via docker-compose.yaml
# celery:
Expand Down
48 changes: 47 additions & 1 deletion KerbalStuff/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os.path
from datetime import datetime
import re
from typing import Optional
from typing import Optional, Dict, Set

import bcrypt
from flask import url_for
Expand All @@ -14,6 +14,7 @@

from . import thumbnail
from .database import Base
from .str_similarity import meaningful_words, words_similarity


class Following(Base): # type: ignore
Expand Down Expand Up @@ -235,6 +236,7 @@ class Mod(Base): # type: ignore
followings = relationship('Following', back_populates='mod')
# List of users that follow this mods
followers = association_proxy('followings', 'user')
similar_mods = association_proxy('similarities', 'other_mod')

def background_thumb(self) -> Optional[str]:
return thumbnail.get_or_create(self)
Expand All @@ -251,6 +253,20 @@ def background_url(self, protocol: Optional[str], cdn_domain: Optional[str]) ->
else:
return url_for('mods.mod_background', mod_id=self.id, mod_name=self.name)

def get_author_names(self) -> Set[str]:
self._author_names: Set[str]
if not hasattr(self, '_author_names'):
self._author_names = {self.user.username, *(a.username for a in self.shared_authors)}
return self._author_names

def get_words(self, prop_name: str) -> Set[str]:
""" Only parse the strings once to speed up mass-compares """
if not hasattr(self, '_words'):
self._words: Dict[str, Set[str]] = {}
if prop_name not in self._words:
self._words[prop_name] = meaningful_words(getattr(self, prop_name, ''))
return self._words[prop_name]

def __repr__(self) -> str:
return '<Mod %r %r>' % (self.id, self.name)

Expand Down Expand Up @@ -300,6 +316,36 @@ def __repr__(self) -> str:
return '<SharedAuthor %r>' % self.user_id


class ModSimilarity(Base): # type: ignore
__tablename__ = 'mod_similarity'
__table_args__ = (PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'), )
similarity = Column(Float(precision=5), nullable=False)
main_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
main_mod = relationship('Mod',
foreign_keys=main_mod_id,
backref=backref('similarities',
passive_deletes=True,
order_by=similarity.desc()))
other_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
other_mod = relationship('Mod', foreign_keys=other_mod_id)

Index('ix_mod_similarity_main_mod_similarity', main_mod_id, similarity.desc())

WORD_PROPS = ['name', 'short_description', 'description']

def __init__(self, main_mod: Mod, other_mod: Mod) -> None:
self.main_mod_id = main_mod.id
self.other_mod_id = other_mod.id
self.similarity = (0.1 * words_similarity(main_mod.get_author_names(),
other_mod.get_author_names())
+ sum(words_similarity(main_mod.get_words(prop_name),
other_mod.get_words(prop_name))
for prop_name in self.WORD_PROPS))

def __repr__(self) -> str:
return f'<Mod Similarity {self.main_mod_id} {self.other_mod_id}>'


class DownloadEvent(Base): # type: ignore
__tablename__ = 'downloadevent'
id = Column(Integer, primary_key=True)
Expand Down
47 changes: 47 additions & 0 deletions KerbalStuff/similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from heapq import nlargest
from typing import List

from .objects import Mod, ModSimilarity


def find_most_similar(mod: Mod, how_many: int = 6) -> List[ModSimilarity]:
get_sim = lambda mod_sim: mod_sim.similarity
return sorted(nlargest(how_many,
# Zero similarity means nothing at all in common, so skip those
filter(lambda mod_sim: mod_sim.similarity > 0,
(ModSimilarity(mod, other_mod)
for other_mod in
Mod.query.filter(Mod.published,
Mod.game_id == mod.game_id,
Mod.id != mod.id))),
key=get_sim),
key=get_sim,
reverse=True)


def update_similar_mods(mod: Mod, how_many: int = 6) -> None:
if not mod.published:
mod.similarities = []
else:
most_similar = find_most_similar(mod, how_many)
# Remove rows for mods that are no longer among the most similar
for mod_sim in mod.similarities:
if not any(mod_sim.other_mod_id == other_sim.other_mod_id
for other_sim in most_similar):
ModSimilarity.query\
.filter(ModSimilarity.main_mod_id == mod_sim.main_mod_id,
ModSimilarity.other_mod_id == mod_sim.other_mod_id)\
.delete()
for mod_sim in most_similar:
match = [other_sim for other_sim in mod.similarities
if mod_sim.other_mod_id == other_sim.other_mod_id]
if match:
# Update existing rows for mods that are still similar
match[0].similarity = mod_sim.similarity
# Update the row with swapped IDs, if any
for other_sim in match[0].other_mod.similarities:
if other_sim.other_mod_id == mod_sim.main_mod_id:
other_sim.similarity = mod_sim.similarity
else:
# Add new rows for newly similar mods
mod.similarities.append(mod_sim)
55 changes: 55 additions & 0 deletions KerbalStuff/str_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
from typing import Set, Iterable


# Split words on one or more non-alphanumerics
WORD_SPLIT = re.compile(r'[^a-zA-Z0-9]+')

# Split up pieces of StudlyCapsStrings
STUDLY_SPLIT = re.compile(r'(?=[A-Z])')

# English words that do not convey meaning about the context
# We care about things like "rocket" and "propellant" and "deltaV"
MEANINGLESS = {
'the', 'an', 'this', 'these', 'that', 'those',
'and', 'or', 'but', 'however',
'as', 'such', 'than', 'there',
'me', 'my', 'we', 'us', 'our',
'you', 'your', 'he', 'him', 'she', 'her', 'it',
'they', 'them',
'to', 'from', 'in', 'on', 'for', 'with', 'of', 'into', 'at', 'by',
'what', 'because', 'then',
'is', 'be', 'been', 'are', 'get', 'getting', 'has', 'have', 'come',
'do', 'does',
'will', 'make', 'work', 'also', 'more',
'should', 'so', 'some', 'like', 'likely', 'can', 'seems',
'really', 'very', 'each', 'yup', 'which',
've', 're',
'accommodate', 'manner', 'therefore', 'ever', 'probably', 'almost',
'something',
'mod', 'pack', 'contains', 'ksp',
'http', 'https', 'www', 'youtube', 'imgur', 'com',
'github', 'githubusercontent',
'forum', 'kerbalspaceprogram', 'index', 'thread', 'topic', 'php',
'kerbal', 'space', 'continued', 'revived', 'updated', 'redux',
'inc', 'plus',
}


def split_with_acronyms(s: str) -> Iterable[str]:
words = WORD_SPLIT.split(s)
yield from words
for w in words:
yield from STUDLY_SPLIT.split(w)


def meaningful_words(s: str) -> Set[str]:
return set(map(lambda w: w.lower(),
filter(lambda w: len(w) > 1 and not w.isnumeric(),
split_with_acronyms(s)))) - MEANINGLESS


def words_similarity(words1: Set[str], words2: Set[str]) -> float:
in_both = words1.intersection(words2)
all_words = words1 | words2
return len(in_both) / len(all_words) if all_words else 0
47 changes: 47 additions & 0 deletions alembic/versions/2021_12_15_23_06_02-bbcce95b6e79.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Create mod_similarity table
Revision ID: bbcce95b6e79
Revises: 3fb8a6e2e0a5
Create Date: 2021-12-16 05:06:06.312797
"""

# revision identifiers, used by Alembic.
revision = 'bbcce95b6e79'
down_revision = '3fb8a6e2e0a5'

from alembic import op
from alembic.op import create_table, drop_table
import sqlalchemy as sa

from KerbalStuff.celery import update_mod_similarities

Base = sa.ext.declarative.declarative_base()

class Mod(Base): # type: ignore
__tablename__ = 'mod'
id = sa.Column(sa.Integer, primary_key=True)
published = sa.Column(sa.Boolean, default=False)


def upgrade() -> None:
create_table('mod_similarity',
sa.Column('main_mod_id', sa.Integer(), nullable=False),
sa.Column('other_mod_id', sa.Integer(), nullable=False),
sa.Column('similarity', sa.Float(precision=5), nullable=False),
sa.ForeignKeyConstraint(['main_mod_id'], ['mod.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['other_mod_id'], ['mod.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'))
op.create_index('ix_mod_similarity_main_mod_similarity',
'mod_similarity', ['main_mod_id', sa.text('similarity DESC')], unique=False)

# Ask Celery to build the similarity rows for existing published mods
update_mod_similarities.delay([mod_id for mod_id, in
sa.orm.Session(bind=op.get_bind())
.query(Mod)
.filter(Mod.published)
.with_entities(Mod.id)])

def downgrade() -> None:
op.drop_index('ix_mod_similarity_main_mod_similarity', table_name='mod_similarity')
drop_table('mod_similarity')
15 changes: 15 additions & 0 deletions templates/mod.html
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,21 @@ <h3>Export Raw Stats</h3>
</div>
</div>
</div>
{% if mod.similar_mods %}
<div class="well">
<div class="container main-cat">
<h3>Similar-ish Mods</h3>
</div>
</div>
<div class="container">
<div class="row">
{% set similar_mods = mod.similar_mods[:6] -%}
{%- for mod in similar_mods -%}
{%- include "mod-box.html" -%}
{%- endfor %}
</div>
</div>
{% endif %}
{% if editable %}
<div class="modal fade" id="confirm-delete" tabindex="-1" role="dialog" aria-labelledby="confirm-delete" aria-hidden="true">
<div class="modal-dialog">
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/fake_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
config[env]['protocol'] = 'https'
config[env]['domain'] = 'tests.spacedock.info'
config[env]['ksp-game-id'] = '1'
config[env]['profile-dir'] = ''

dummy = ''
Loading

0 comments on commit f4072a3

Please sign in to comment.