From 92a431ac3ccf85634acd02f1a9b1f63c61732575 Mon Sep 17 00:00:00 2001 From: Medcl Date: Sun, 10 Nov 2024 20:09:28 +0800 Subject: [PATCH] feat: init implement for indexing api (#13) --- README.md | 102 +++++++++++++++++++++++++ coco.yml | 2 +- modules/coco.go | 11 +-- modules/indexing/document.go | 142 +++++++++++++++++++++++++++++++++++ modules/indexing/init.go | 19 +++++ 5 files changed, 267 insertions(+), 9 deletions(-) create mode 100644 modules/indexing/document.go diff --git a/README.md b/README.md index a26cb29..9d9cbda 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,15 @@ ollama pull mistral:latest ollama pull nomic-embed-text:latest ``` + +### OCR Server + +Coco use a [OCR Server](https://github.com/otiai10/ocrserver) to processing image files. + +``` +docker run -p 8080:8080 otiai10/ocrserver +``` + ### Easysearch Install Easysearch @@ -175,3 +184,96 @@ curl -H 'Content-Type: application/json' -XPOST http://localhost:2900/chat/cs "found": true } ``` + +## Indexing API Reference + +### Index a Document + +```shell +//request +curl -H 'Content-Type: application/json' -XPOST http://localhost:2900/document/ -d'{ "source": "google_drive", "category": "report", "categories": ["business", "quarterly_reports"], "cover": "https://example.com/images/report_cover.jpg", "title": "Q3 Business Report", "summary": "An overview of the company financial performance for Q3.", "type": "PDF", "lang": "en", "content": "This quarters revenue increased by 15%, driven by strong sales in the APAC region...", "thumbnail": "https://example.com/images/report_thumbnail.jpg", "owner": "jdoe", "tags": ["finance", "quarterly", "business", "report"], "url": "https://drive.google.com/file/d/abc123/view", "size": 1048576, "metadata": { "version": "1.2", "department": "Finance", "last_reviewed": "2024-10-20" }, "last_updated_by": { "user": { "username": "jdoe", "userid": "user123" }, "timestamp": "2024-11-01T15:30:00Z" } }' + +//response +{ + "_id": "cso9vr3q50k38nobvmcg", + "result": "created" +} +``` + +### Get a Document + +```shell +//request +curl -XGET http://localhost:2900/document/cso9vr3q50k38nobvmcg + +//response +{ + "_id": "cso9vr3q50k38nobvmcg", + "_source": { + "id": "cso9vr3q50k38nobvmcg", + "created": "2024-11-10T19:58:36.009086+08:00", + "updated": "2024-11-10T19:58:36.009092+08:00", + "source": "google_drive", + "category": "report", + "categories": [ + "business", + "quarterly_reports" + ], + "cover": "https://example.com/images/report_cover.jpg", + "title": "Q3 Business Report", + "summary": "An overview of the company financial performance for Q3.", + "type": "PDF", + "lang": "en", + "content": "This quarters revenue increased by 15%, driven by strong sales in the APAC region...", + "thumbnail": "https://example.com/images/report_thumbnail.jpg", + "owner": "jdoe", + "tags": [ + "finance", + "quarterly", + "business", + "report" + ], + "url": "https://drive.google.com/file/d/abc123/view", + "size": 1048576, + "metadata": { + "department": "Finance", + "last_reviewed": "2024-10-20", + "version": "1.2" + }, + "last_updated_by": { + "user": { + "username": "jdoe", + "userid": "user123" + }, + "timestamp": "2024-11-01T15:30:00Z" + } + }, + "found": true +} +``` + +### Update a Document + +```shell +//request +curl -H 'Content-Type: application/json' -XPUT http://localhost:2900/document/cso9vr3q50k38nobvmcg -d'{ "source": "google_drive", "category": "report", "categories": ["business", "quarterly_reports"], "cover": "https://example.com/images/report_cover.jpg", "title": "Q3 Business Report", "summary": "An overview of the company financial performance for Q3.", "type": "PDF", "lang": "en", "content": "This quarters revenue increased by 15%, driven by strong sales in the APAC region...", "thumbnail": "https://example.com/images/report_thumbnail.jpg", "owner": "jdoe", "tags": ["finance", "quarterly", "business", "report"], "url": "https://drive.google.com/file/d/abc123/view", "size": 1048576, "metadata": { "version": "1.2", "department": "Finance", "last_reviewed": "2024-10-20" }, "last_updated_by": { "user": { "username": "jdoe", "userid": "user123" }, "timestamp": "2024-11-01T15:30:00Z" } }' + +//response +{ + "_id": "cso9vr3q50k38nobvmcg", + "result": "updated" +} +``` + +### Delete a Document + +```shell +//request +curl -XDELETE http://localhost:2900/document/cso9vr3q50k38nobvmcg + +//response +{ + "_id": "cso9vr3q50k38nobvmcg", + "result": "deleted" +} +``` \ No newline at end of file diff --git a/coco.yml b/coco.yml index 33e9d2b..d352a85 100644 --- a/coco.yml +++ b/coco.yml @@ -37,7 +37,7 @@ web: network: binding: $[[env.SERV_BINDING]] tls: - enabled: true + enabled: false skip_insecure_verify: true default_domain: "api.coco.rs" auto_issue: diff --git a/modules/coco.go b/modules/coco.go index 8533e0a..4bc2984 100644 --- a/modules/coco.go +++ b/modules/coco.go @@ -18,14 +18,9 @@ type Coco struct { } func (this *Coco) Setup() { - err := orm.RegisterSchemaWithIndexName(assistant.Session{}, "session") - if err != nil { - panic(err) - } - err = orm.RegisterSchemaWithIndexName(assistant.ChatMessage{}, "message") - if err != nil { - panic(err) - } + orm.MustRegisterSchemaWithIndexName(assistant.Session{}, "session") + orm.MustRegisterSchemaWithIndexName(common.Document{}, "document") + orm.MustRegisterSchemaWithIndexName(assistant.ChatMessage{}, "message") cocoConfig := common.Config{ OllamaConfig: common.OllamaConfig{ diff --git a/modules/indexing/document.go b/modules/indexing/document.go new file mode 100644 index 0000000..403b042 --- /dev/null +++ b/modules/indexing/document.go @@ -0,0 +1,142 @@ +/* Copyright © INFINI LTD. All rights reserved. + * Web: https://infinilabs.com + * Email: hello#infini.ltd */ + +package indexing + +import ( + "infini.sh/coco/modules/common" + httprouter "infini.sh/framework/core/api/router" + "infini.sh/framework/core/orm" + "infini.sh/framework/core/util" + "net/http" +) + +func (h *APIHandler) createDoc(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + var obj = &common.Document{} + err := h.DecodeJSON(req, obj) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + err = orm.Create(nil, obj) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + h.WriteJSON(w, util.MapStr{ + "_id": obj.ID, + "result": "created", + }, 200) + +} + +func (h *APIHandler) getDoc(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + id := ps.MustGetParameter("doc_id") + + obj := common.Document{} + obj.ID = id + + exists, err := orm.Get(&obj) + if !exists || err != nil { + h.WriteJSON(w, util.MapStr{ + "_id": id, + "found": false, + }, http.StatusNotFound) + return + } + + h.WriteJSON(w, util.MapStr{ + "found": true, + "_id": id, + "_source": obj, + }, 200) +} + +func (h *APIHandler) updateDoc(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + id := ps.MustGetParameter("doc_id") + obj := common.Document{} + + obj.ID = id + exists, err := orm.Get(&obj) + if !exists || err != nil { + h.WriteJSON(w, util.MapStr{ + "_id": id, + "result": "not_found", + }, http.StatusNotFound) + return + } + + id = obj.ID + create := obj.Created + + obj = common.Document{} + err = h.DecodeJSON(req, &obj) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + //protect + obj.ID = id + obj.Created = create + err = orm.Update(nil, &obj) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + h.WriteJSON(w, util.MapStr{ + "_id": obj.ID, + "result": "updated", + }, 200) +} + +func (h *APIHandler) deleteDoc(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + id := ps.MustGetParameter("doc_id") + + obj := common.Document{} + obj.ID = id + + exists, err := orm.Get(&obj) + if !exists || err != nil { + h.WriteJSON(w, util.MapStr{ + "_id": id, + "result": "not_found", + }, http.StatusNotFound) + return + } + + err = orm.Delete(nil, &obj) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + h.WriteJSON(w, util.MapStr{ + "_id": obj.ID, + "result": "deleted", + }, 200) +} + +func (h *APIHandler) searchDocs(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + + var err error + q := orm.Query{} + q.RawQuery,err=h.GetRawBody(req) + + //TODO handle url query args + + err, res := orm.Search(&common.Document{}, &q) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + _, err = h.Write(w, res.Raw) + if err != nil { + h.Error(w, err) + } +} diff --git a/modules/indexing/init.go b/modules/indexing/init.go index 2cc52e7..29d8f2f 100644 --- a/modules/indexing/init.go +++ b/modules/indexing/init.go @@ -3,3 +3,22 @@ * Email: hello#infini.ltd */ package indexing + +import ( + "infini.sh/framework/core/api" +) + +type APIHandler struct { + api.Handler +} + +func init() { + handler := APIHandler{} + + //for internal document management, security should be enabled + api.HandleAPIMethod(api.POST, "/document/", handler.createDoc) + api.HandleAPIMethod(api.GET, "/document/:doc_id", handler.getDoc) + api.HandleAPIMethod(api.PUT, "/document/:doc_id", handler.updateDoc) + api.HandleAPIMethod(api.DELETE, "/document/:doc_id", handler.deleteDoc) + api.HandleAPIMethod(api.GET, "/document/_search", handler.searchDocs) +}