Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show similar mods on mod page #438

Open
wants to merge 1 commit into
base: alpha
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions KerbalStuff/blueprints/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
ModList
from ..search import search_mods, search_users, typeahead_mods, get_mod_score
from ..thumbnail import thumb_path_from_background_path
from ..celery import update_mod_similarities

api = Blueprint('api', __name__)

Expand Down Expand Up @@ -545,6 +546,8 @@ def accept_grant_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
mod = _get_mod(mod_id)
author = _get_mod_pending_author(mod)
author.accepted = True
db.commit()
update_mod_similarities.delay([mod.id])
notify_ckan(mod, 'co-author-added')
return {'error': False}, 200

Expand Down Expand Up @@ -581,6 +584,8 @@ def revoke_mod(mod_id: int) -> Tuple[Dict[str, Any], int]:
author = [a for a in mod.shared_authors if a.user == new_user][0]
mod.shared_authors = [a for a in mod.shared_authors if a.user != current_user]
db.delete(author)
db.commit()
update_mod_similarities.delay([mod.id])
notify_ckan(mod, 'co-author-removed')
return {'error': False}, 200

Expand Down Expand Up @@ -698,6 +703,7 @@ def create_mod() -> Tuple[Dict[str, Any], int]:
db.commit()
mod.score = get_mod_score(mod)
db.commit()
update_mod_similarities.delay([mod.id])
set_game_info(game)
send_to_ckan(mod)
return {
Expand Down
5 changes: 5 additions & 0 deletions KerbalStuff/blueprints/mods.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Featured, Media, GameVersion, Game, Following
from ..search import get_mod_score
from ..thumbnail import thumb_path_from_background_path
from ..celery import update_mod_similarities

mods = Blueprint('mods', __name__)

Expand Down Expand Up @@ -295,6 +296,8 @@ def edit_mod(mod_id: int, mod_name: str) -> Union[str, werkzeug.wrappers.Respons
if not mod.published:
newly_published = True
mod.published = True
db.commit()
update_mod_similarities.delay([mod.id])
if ckan is None:
ckan = False
else:
Expand Down Expand Up @@ -499,6 +502,8 @@ def publish(mod_id: int, mod_name: str) -> werkzeug.wrappers.Response:
mod.published = True
mod.updated = datetime.now()
mod.score = get_mod_score(mod)
db.commit()
update_mod_similarities.delay([mod.id])
send_to_ckan(mod)
return redirect(url_for("mods.mod", mod_id=mod.id, mod_name=mod.name))

Expand Down
9 changes: 9 additions & 0 deletions KerbalStuff/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .objects import Mod
from .search import get_mod_score
from .ckan import import_ksp_versions_from_ckan
from .similarity import update_similar_mods

app = Celery("tasks", broker=_cfg("redis-connection"))

Expand Down Expand Up @@ -118,6 +119,14 @@ def ckan_version_import() -> None:
if game_id > 0:
import_ksp_versions_from_ckan(game_id)


@app.task
@with_session
def update_mod_similarities(mod_ids: List[int]) -> None:
for mod_id in mod_ids:
update_similar_mods(Mod.query.get(mod_id))


# to debug this:
# * add PTRACE capability to celery container via docker-compose.yaml
# celery:
Expand Down
48 changes: 47 additions & 1 deletion KerbalStuff/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os.path
from datetime import datetime
import re
from typing import Optional
from typing import Optional, Dict, Set

import bcrypt
from flask import url_for
Expand All @@ -14,6 +14,7 @@

from . import thumbnail
from .database import Base
from .str_similarity import meaningful_words, words_similarity


class Following(Base): # type: ignore
Expand Down Expand Up @@ -235,6 +236,7 @@ class Mod(Base): # type: ignore
followings = relationship('Following', back_populates='mod')
# List of users that follow this mods
followers = association_proxy('followings', 'user')
similar_mods = association_proxy('similarities', 'other_mod')

def background_thumb(self) -> Optional[str]:
return thumbnail.get_or_create(self)
Expand All @@ -251,6 +253,20 @@ def background_url(self, protocol: Optional[str], cdn_domain: Optional[str]) ->
else:
return url_for('mods.mod_background', mod_id=self.id, mod_name=self.name)

def get_author_names(self) -> Set[str]:
self._author_names: Set[str]
if not hasattr(self, '_author_names'):
self._author_names = {self.user.username, *(a.username for a in self.shared_authors)}
return self._author_names

def get_words(self, prop_name: str) -> Set[str]:
""" Only parse the strings once to speed up mass-compares """
if not hasattr(self, '_words'):
self._words: Dict[str, Set[str]] = {}
if prop_name not in self._words:
self._words[prop_name] = meaningful_words(getattr(self, prop_name, ''))
return self._words[prop_name]

def __repr__(self) -> str:
return '<Mod %r %r>' % (self.id, self.name)

Expand Down Expand Up @@ -300,6 +316,36 @@ def __repr__(self) -> str:
return '<SharedAuthor %r>' % self.user_id


class ModSimilarity(Base): # type: ignore
__tablename__ = 'mod_similarity'
__table_args__ = (PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'), )
similarity = Column(Float(precision=5), nullable=False)
main_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
main_mod = relationship('Mod',
foreign_keys=main_mod_id,
backref=backref('similarities',
passive_deletes=True,
order_by=similarity.desc()))
other_mod_id = Column(Integer, ForeignKey('mod.id', ondelete='CASCADE'), nullable=False)
other_mod = relationship('Mod', foreign_keys=other_mod_id)

Index('ix_mod_similarity_main_mod_similarity', main_mod_id, similarity.desc())

WORD_PROPS = ['name', 'short_description', 'description']

def __init__(self, main_mod: Mod, other_mod: Mod) -> None:
self.main_mod_id = main_mod.id
self.other_mod_id = other_mod.id
self.similarity = (0.1 * words_similarity(main_mod.get_author_names(),
other_mod.get_author_names())
+ sum(words_similarity(main_mod.get_words(prop_name),
other_mod.get_words(prop_name))
for prop_name in self.WORD_PROPS))

def __repr__(self) -> str:
return f'<Mod Similarity {self.main_mod_id} {self.other_mod_id}>'


class DownloadEvent(Base): # type: ignore
__tablename__ = 'downloadevent'
id = Column(Integer, primary_key=True)
Expand Down
47 changes: 47 additions & 0 deletions KerbalStuff/similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from heapq import nlargest
from typing import List

from .objects import Mod, ModSimilarity


def find_most_similar(mod: Mod, how_many: int = 6) -> List[ModSimilarity]:
get_sim = lambda mod_sim: mod_sim.similarity
return sorted(nlargest(how_many,
# Zero similarity means nothing at all in common, so skip those
filter(lambda mod_sim: mod_sim.similarity > 0,
(ModSimilarity(mod, other_mod)
for other_mod in
Mod.query.filter(Mod.published,
Mod.game_id == mod.game_id,
Mod.id != mod.id))),
key=get_sim),
key=get_sim,
reverse=True)


def update_similar_mods(mod: Mod, how_many: int = 6) -> None:
if not mod.published:
mod.similarities = []
else:
most_similar = find_most_similar(mod, how_many)
# Remove rows for mods that are no longer among the most similar
for mod_sim in mod.similarities:
if not any(mod_sim.other_mod_id == other_sim.other_mod_id
for other_sim in most_similar):
ModSimilarity.query\
.filter(ModSimilarity.main_mod_id == mod_sim.main_mod_id,
ModSimilarity.other_mod_id == mod_sim.other_mod_id)\
.delete()
for mod_sim in most_similar:
match = [other_sim for other_sim in mod.similarities
if mod_sim.other_mod_id == other_sim.other_mod_id]
if match:
# Update existing rows for mods that are still similar
match[0].similarity = mod_sim.similarity
# Update the row with swapped IDs, if any
for other_sim in match[0].other_mod.similarities:
if other_sim.other_mod_id == mod_sim.main_mod_id:
other_sim.similarity = mod_sim.similarity
else:
# Add new rows for newly similar mods
mod.similarities.append(mod_sim)
55 changes: 55 additions & 0 deletions KerbalStuff/str_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import re
from typing import Set, Iterable


# Split words on one or more non-alphanumerics
WORD_SPLIT = re.compile(r'[^a-zA-Z0-9]+')

# Split up pieces of StudlyCapsStrings
STUDLY_SPLIT = re.compile(r'(?=[A-Z])')

# English words that do not convey meaning about the context
# We care about things like "rocket" and "propellant" and "deltaV"
MEANINGLESS = {
'the', 'an', 'this', 'these', 'that', 'those',
'and', 'or', 'but', 'however',
'as', 'such', 'than', 'there',
'me', 'my', 'we', 'us', 'our',
'you', 'your', 'he', 'him', 'she', 'her', 'it',
'they', 'them',
'to', 'from', 'in', 'on', 'for', 'with', 'of', 'into', 'at', 'by',
'what', 'because', 'then',
'is', 'be', 'been', 'are', 'get', 'getting', 'has', 'have', 'come',
'do', 'does',
'will', 'make', 'work', 'also', 'more',
'should', 'so', 'some', 'like', 'likely', 'can', 'seems',
'really', 'very', 'each', 'yup', 'which',
've', 're',
'accommodate', 'manner', 'therefore', 'ever', 'probably', 'almost',
'something',
'mod', 'pack', 'contains', 'ksp',
'http', 'https', 'www', 'youtube', 'imgur', 'com',
'github', 'githubusercontent',
'forum', 'kerbalspaceprogram', 'index', 'thread', 'topic', 'php',
'kerbal', 'space', 'continued', 'revived', 'updated', 'redux',
'inc', 'plus',
}


def split_with_acronyms(s: str) -> Iterable[str]:
words = WORD_SPLIT.split(s)
yield from words
for w in words:
yield from STUDLY_SPLIT.split(w)


def meaningful_words(s: str) -> Set[str]:
return set(map(lambda w: w.lower(),
filter(lambda w: len(w) > 1 and not w.isnumeric(),
split_with_acronyms(s)))) - MEANINGLESS


def words_similarity(words1: Set[str], words2: Set[str]) -> float:
in_both = words1.intersection(words2)
all_words = words1 | words2
return len(in_both) / len(all_words) if all_words else 0
47 changes: 47 additions & 0 deletions alembic/versions/2021_12_15_23_06_02-bbcce95b6e79.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Create mod_similarity table

Revision ID: bbcce95b6e79
Revises: 3fb8a6e2e0a5
Create Date: 2021-12-16 05:06:06.312797

"""

# revision identifiers, used by Alembic.
revision = 'bbcce95b6e79'
down_revision = '3fb8a6e2e0a5'

from alembic import op
from alembic.op import create_table, drop_table
import sqlalchemy as sa

from KerbalStuff.celery import update_mod_similarities

Base = sa.ext.declarative.declarative_base()

class Mod(Base): # type: ignore
__tablename__ = 'mod'
id = sa.Column(sa.Integer, primary_key=True)
published = sa.Column(sa.Boolean, default=False)


def upgrade() -> None:
create_table('mod_similarity',
sa.Column('main_mod_id', sa.Integer(), nullable=False),
sa.Column('other_mod_id', sa.Integer(), nullable=False),
sa.Column('similarity', sa.Float(precision=5), nullable=False),
sa.ForeignKeyConstraint(['main_mod_id'], ['mod.id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['other_mod_id'], ['mod.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('main_mod_id', 'other_mod_id', name='pk_mods'))
op.create_index('ix_mod_similarity_main_mod_similarity',
'mod_similarity', ['main_mod_id', sa.text('similarity DESC')], unique=False)

# Ask Celery to build the similarity rows for existing published mods
update_mod_similarities.delay([mod_id for mod_id, in
sa.orm.Session(bind=op.get_bind())
.query(Mod)
.filter(Mod.published)
.with_entities(Mod.id)])

def downgrade() -> None:
op.drop_index('ix_mod_similarity_main_mod_similarity', table_name='mod_similarity')
drop_table('mod_similarity')
15 changes: 15 additions & 0 deletions templates/mod.html
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,21 @@ <h3>Export Raw Stats</h3>
</div>
</div>
</div>
{% if mod.similar_mods %}
<div class="well">
<div class="container main-cat">
<h3>Similar-ish Mods</h3>
</div>
</div>
<div class="container">
<div class="row">
{% set similar_mods = mod.similar_mods[:6] -%}
{%- for mod in similar_mods -%}
{%- include "mod-box.html" -%}
{%- endfor %}
</div>
</div>
{% endif %}
{% if editable %}
<div class="modal fade" id="confirm-delete" tabindex="-1" role="dialog" aria-labelledby="confirm-delete" aria-hidden="true">
<div class="modal-dialog">
Expand Down
Loading