forked from pgvector/pgvector-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
surprise_recs.py
49 lines (34 loc) · 1.55 KB
/
surprise_recs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from pgvector.sqlalchemy import Vector
from sqlalchemy import create_engine, insert, select, text, Integer
from sqlalchemy.orm import declarative_base, mapped_column, Session
from surprise import Dataset, SVD
engine = create_engine('postgresql+psycopg://localhost/pgvector_example')
with engine.connect() as conn:
conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
conn.commit()
Base = declarative_base()
class User(Base):
__tablename__ = 'user'
id = mapped_column(Integer, primary_key=True)
factors = mapped_column(Vector(20))
class Item(Base):
__tablename__ = 'item'
id = mapped_column(Integer, primary_key=True)
factors = mapped_column(Vector(20))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
algo = SVD(n_factors=20, biased=False)
algo.fit(trainset)
users = [dict(id=trainset.to_raw_uid(i), factors=algo.pu[i]) for i in trainset.all_users()]
items = [dict(id=trainset.to_raw_iid(i), factors=algo.qi[i]) for i in trainset.all_items()]
session = Session(engine)
session.execute(insert(User), users)
session.execute(insert(Item), items)
user = session.get(User, 1)
items = session.scalars(select(Item).order_by(Item.factors.max_inner_product(user.factors)).limit(5))
print('user-based recs:', [item.id for item in items])
item = session.get(Item, 50)
items = session.scalars(select(Item).filter(Item.id != item.id).order_by(Item.factors.cosine_distance(item.factors)).limit(5))
print('item-based recs:', [item.id for item in items])