Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Club Vector Embeddings #180

Merged
merged 15 commits into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ require (
require (
github.com/awnumar/memcall v0.2.0 // indirect
github.com/awnumar/memguard v0.22.4 // indirect
github.com/h2non/gock v1.2.0 // indirect
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
)

require (
Expand Down
5 changes: 5 additions & 0 deletions backend/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/h2non/gock v1.2.0 h1:K6ol8rfrRkUOefooBC8elXoaNGYkpp7y2qcxGG6BzUE=
github.com/h2non/gock v1.2.0/go.mod h1:tNhoxHYW2W42cYkYb1WqzdbYIieALC99kpYr7rH/BQk=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw=
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/huandu/go-assert v1.1.6 h1:oaAfYxq9KNDi9qswn/6aE0EydfxSa+tWZC1KabNitYs=
Expand Down Expand Up @@ -98,6 +102,7 @@ github.com/mcnijman/go-emailaddress v1.1.1 h1:AGhgVDG3tCDaL0/Vc6erlPQjDuDN3dAT7r
github.com/mcnijman/go-emailaddress v1.1.1/go.mod h1:5whZrhS8Xp5LxO8zOD35BC+b76kROtsh+dPomeRt/II=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4=
github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
Expand Down
51 changes: 51 additions & 0 deletions backend/src/embeddings/openai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package embeddings

import (
"bytes"
"encoding/json"
"fmt"
"github.com/GenerateNU/sac/backend/src/errors"

Check failure on line 7 in backend/src/embeddings/openai.go

View workflow job for this annotation

GitHub Actions / Lint

File is not `goimports`-ed (goimports)
"net/http"
"os"
)

func CreateEmbeddingVector(infoForEmbedding string) ([]float32, *errors.Error) {
apiKey := os.Getenv("SAC_OPENAI_API_KEY")

InfoPayload := map[string]interface{}{
"input": infoForEmbedding,
"model": "text-embedding-ada-002",
}

InfoBody, _ := json.Marshal(InfoPayload)
requestInfoBody := bytes.NewBuffer(InfoBody)

req, err := http.NewRequest("POST", fmt.Sprintf("https://api.openai.com/v1/embeddings"), requestInfoBody)

Check failure on line 23 in backend/src/embeddings/openai.go

View workflow job for this annotation

GitHub Actions / Lint

S1039: unnecessary use of fmt.Sprintf (gosimple)
Fixed Show fixed Hide fixed
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
req.Header.Set("content-type", "application/json")

resp, err := http.DefaultClient.Do(req)
defer resp.Body.Close()

Check failure on line 28 in backend/src/embeddings/openai.go

View workflow job for this annotation

GitHub Actions / Lint

httpresponse: using resp before checking for errors (govet)
Fixed Show fixed Hide fixed

if err != nil {
return nil, &errors.FailedToVectorizeClub
}

type ResponseBody struct {
Data []struct {
Embedding []float32 `json:"embedding"`
} `json:"data"`
}

embeddingResultBody := ResponseBody{}
err = json.NewDecoder(resp.Body).Decode(&embeddingResultBody)
if err != nil {
return nil, &errors.FailedToVectorizeClub
}

if len(embeddingResultBody.Data) < 1 {
return nil, &errors.FailedToVectorizeClub
}

return embeddingResultBody.Data[0].Embedding, nil
}
145 changes: 145 additions & 0 deletions backend/src/embeddings/pinecone.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package embeddings

import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"os"

"github.com/GenerateNU/sac/backend/src/errors"
"github.com/GenerateNU/sac/backend/src/types"
)

var (
indexHost = os.Getenv("SAC_PINECONE_INDEX_HOST")
apiKey = os.Getenv("SAC_PINECONE_API_KEY")
)

func UpsertToPinecone(item types.Embeddable) *errors.Error {
embeddingResult, _err := item.Embed()
if _err != nil {
return &errors.FailedToUpsertPinecone
}

upsertBody, _ := json.Marshal(map[string]interface{}{
"vectors": []types.Embedding{
*embeddingResult,
},
"namespace": item.Namespace(),
})
requestBody := bytes.NewBuffer(upsertBody)

req, err := http.NewRequest("POST", fmt.Sprintf("%s/vectors/upsert", indexHost), requestBody)
if err != nil {
return &errors.FailedToUpsertPinecone
}

req.Header.Set("Api-Key", apiKey)
req.Header.Set("accept", "application/json")
req.Header.Set("content-type", "application/json")

resp, err := http.DefaultClient.Do(req)
if err != nil {
return &errors.FailedToUpsertPinecone
}

if resp.StatusCode != 200 {
return &errors.FailedToUpsertPinecone
}

return nil
}

func DeleteFromPinecone(item types.Embeddable) *errors.Error {
deleteBody, err := json.Marshal(map[string]interface{}{
"deleteAll": false,
"ids": []string{
item.EmbeddingId(),
},
"namespace": item.Namespace(),
})
if err != nil {
return &errors.FailedToDeletePinecone
}
requestBody := bytes.NewBuffer(deleteBody)

req, err := http.NewRequest("POST", fmt.Sprintf("%s/vectors/delete", indexHost), requestBody)
if err != nil {
return &errors.FailedToDeletePinecone
}

req.Header.Set("Api-Key", apiKey)
req.Header.Set("accept", "application/json")
req.Header.Set("content-type", "application/json")

resp, err := http.DefaultClient.Do(req)
if err != nil {
return &errors.FailedToDeletePinecone
}

if resp.StatusCode != 200 {
return &errors.FailedToDeletePinecone
}

return nil
}

func SearchPinecone(item types.Embeddable, topKResults int) ([]string, *errors.Error) {
embeddingResult, _err := item.Embed()
if _err != nil {
return []string{}, _err
}

searchBody, _ := json.Marshal(map[string]interface{}{
"includeValues": false,
"includeMetadata": false,
"topK": topKResults,
"vector": embeddingResult.Values,
"namespace": item.Namespace(),
})

requestBody := bytes.NewBuffer(searchBody)

req, err := http.NewRequest("POST", fmt.Sprintf("%s/query", indexHost), requestBody)
if err != nil {
return []string{}, &errors.FailedToSearchPinecone
}

req.Header.Set("Api-Key", apiKey)
req.Header.Set("accept", "application/json")
req.Header.Set("content-type", "application/json")

resp, err := http.DefaultClient.Do(req)
defer resp.Body.Close()

Check failure on line 114 in backend/src/embeddings/pinecone.go

View workflow job for this annotation

GitHub Actions / Lint

httpresponse: using resp before checking for errors (govet)
Fixed Show fixed Hide fixed

if err != nil {
return []string{}, &errors.FailedToSearchPinecone
}

if resp.StatusCode != 200 {
return []string{}, &errors.FailedToSearchPinecone
}

type SearchPineconeResults struct {
Matches []struct {
Id string `json:"id"`
Score float32 `json:"score"`
Values []float32 `json:"values"`
} `json:"matches"`
Namespace string `json:"namespace"`
}

results := SearchPineconeResults{}
err = json.NewDecoder(resp.Body).Decode(&results)
if err != nil {
return []string{}, &errors.FailedToSearchPinecone
}

var resultsToReturn []string
for i := 0; i < len(results.Matches); i += 1 {
resultsToReturn = append(resultsToReturn, results.Matches[i].Id)
}

return resultsToReturn, nil
}
4 changes: 4 additions & 0 deletions backend/src/errors/club.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ var (
StatusCode: fiber.StatusInternalServerError,
Message: "failed to get admin ids",
}
FailedToVectorizeClub = Error{
StatusCode: fiber.StatusInternalServerError,
Message: "failed to vectorize club",
}
)
18 changes: 18 additions & 0 deletions backend/src/errors/pinecone.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package errors

import "github.com/gofiber/fiber/v2"

var (
FailedToUpsertPinecone = Error{
StatusCode: fiber.StatusInternalServerError,
Message: "failed to upsert to pinecone",
}
FailedToDeletePinecone = Error{
StatusCode: fiber.StatusInternalServerError,
Message: "failed to delete from pinecone",
}
FailedToSearchPinecone = Error{
StatusCode: fiber.StatusInternalServerError,
Message: "failed to search on pinecone",
}
)
2 changes: 1 addition & 1 deletion backend/src/middleware/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (m *MiddlewareService) Authorize(requiredPermissions ...types.Permission) f
return errors.FailedToParseAccessToken.FiberError(c)
}

userPermissions := types.GetPermissions(models.UserRole(*role))
userPermissions := models.GetPermissions(models.UserRole(*role))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GetPermissions/permissions in general is not a model, possibly not a type--can be put in the auth folder instead


for _, requiredPermission := range requiredPermissions {
if !slices.Contains(userPermissions, requiredPermission) {
Expand Down
23 changes: 23 additions & 0 deletions backend/src/models/club.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package models

import (
"github.com/GenerateNU/sac/backend/src/embeddings"
"github.com/GenerateNU/sac/backend/src/errors"
"github.com/GenerateNU/sac/backend/src/types"
"github.com/google/uuid"
"gorm.io/gorm"
)
Expand Down Expand Up @@ -84,3 +87,23 @@
tx.Model(&c).Update("num_members", c.NumMembers-1)
return
}

func (c *Club) EmbeddingId() string {
return c.ID.String()
}

func (c *Club) Namespace() string {
return "clubs"
}

func (c *Club) Embed() (*types.Embedding, *errors.Error) {
var clubInfoForEmbedding string

Check failure on line 100 in backend/src/models/club.go

View workflow job for this annotation

GitHub Actions / Lint

S1021: should merge variable declaration with assignment on next line (gosimple)
clubInfoForEmbedding = c.Name + " " + c.Name + " " + c.Name + " " + c.Name + " " + c.Description
embeddingVector, err := embeddings.CreateEmbeddingVector(clubInfoForEmbedding)

if err != nil {
return nil, err
}

return &types.Embedding{Id: c.ID.String(), Values: embeddingVector}, nil
}
37 changes: 36 additions & 1 deletion backend/src/models/user.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above

Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package models

import "github.com/google/uuid"
import (
"github.com/GenerateNU/sac/backend/src/types"
"github.com/google/uuid"
)

type UserRole string

Expand Down Expand Up @@ -84,3 +87,35 @@ type LoginUserResponseBody struct {
type CreateUserTagsBody struct {
Tags []uuid.UUID `json:"tags" validate:"required"`
}

var rolePermissions = map[UserRole][]types.Permission{
Super: {
types.UserRead, types.UserReadAll, types.UserWrite, types.UserDelete,
types.TagRead, types.TagCreate, types.TagWrite, types.TagDelete,
types.ClubRead, types.ClubCreate, types.ClubWrite, types.ClubDelete,
types.PointOfContactRead, types.PointOfContactCreate, types.PointOfContactWrite, types.PointOfContactDelete,
types.CommentRead, types.CommentCreate, types.CommentWrite, types.CommentDelete,
types.EventRead, types.EventCreate, types.EventWrite, types.EventDelete,
types.ContactRead, types.ContactCreate, types.ContactWrite, types.ContactDelete,
types.CategoryRead, types.CategoryCreate, types.CategoryWrite, types.CategoryDelete,
types.NotificationRead, types.NotificationCreate, types.NotificationWrite, types.NotificationDelete,
types.UserReadAll, types.TagReadAll, types.ClubReadAll, types.PointOfContactReadAll, types.CommentReadAll,
types.EventReadAll, types.ContactReadAll, types.CategoryReadAll, types.NotificationReadAll,
},
Student: {
types.UserRead,
types.TagRead,
types.ClubRead,
types.PointOfContactRead,
types.CommentRead,
types.EventRead,
types.ContactRead,
types.CategoryRead,
types.NotificationRead,
},
}

// Returns the permissions for a given role
func GetPermissions(role UserRole) []types.Permission {
return rolePermissions[role]
}
26 changes: 26 additions & 0 deletions backend/src/types/embeddable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package types

import (
"github.com/GenerateNU/sac/backend/src/errors"
)

// Embedding the necessary data for an embedding vector. This type is designed to mimic how Pinecone's API handles
// vectors, for easy use with it.
type Embedding struct {
// The id this embedding should be upserted with. Note: This should be the same value as produced by
// Embeddable.EmbeddingId(), the reason it is in both places is to simplify the upload to Pinecone code (expects
// both id and values in the upsert payload).
Id string `json:"id"`
// The vector that should be upserted.
Values []float32 `json:"values"`
}

// Embeddable Represents a value that can be transformed into an embedding vector (i.e for use in a vector database)
type Embeddable interface {
// EmbeddingId Returns the id this embeddable value should be upserted with.
EmbeddingId() string
// Namespace Returns the namespace this embeddable value should be upserted to.
Namespace() string
// Embed Returns the embedding vector this embeddable value should be upserted as.
Embed() (*Embedding, *errors.Error)
}
Loading
Loading