Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: refactoring hugo_site connector to support mutlti datasource #56

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions coco.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,6 @@ connector:
interval: 60s
queue:
name: indexing_documents
urls:
- "https://pizza.rs/index.json"
- "https://infinilabs.cn/index.json"
- "https://blog.infinilabs.com/index.json"

##background jobs
pipeline:
Expand Down
4 changes: 2 additions & 2 deletions docs/content.en/docs/references/document.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Below is the field description for the document.
curl -H 'Content-Type: application/json' -XPOST http://localhost:9000/document/ -d '{
"source": {
"type":"connector",
"name":"google_drive",
"name":"My Hugo Site",
"id":"e806831dacc3",
},
"category": "report",
Expand Down Expand Up @@ -119,7 +119,7 @@ curl -XGET http://localhost:9000/document/cso9vr3q50k38nobvmcg
"updated": "2024-11-10T19:58:36.009092+08:00",
"source": {
"type":"connector",
"name":"google_drive",
"name":"My Hugo Site",
"id":"e806831dacc3",
}
...OMITTED...
Expand Down
3 changes: 1 addition & 2 deletions modules/coco.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,15 @@ import (
"infini.sh/coco/modules/assistant"
_ "infini.sh/coco/modules/assistant"
"infini.sh/coco/modules/common"
_ "infini.sh/coco/modules/connector"
_ "infini.sh/coco/modules/indexing"
_ "infini.sh/coco/modules/search"
_ "infini.sh/coco/modules/connector"
"infini.sh/framework/core/env"
"infini.sh/framework/core/global"
"infini.sh/framework/core/orm"
)

type Coco struct {

}

func (this *Coco) Setup() {
Expand Down
10 changes: 5 additions & 5 deletions modules/common/datasource.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ package common
type DataSource struct {
CombinedFullText

Type string `json:"type,omitempty" elastic_mapping:"type:{type:keyword}"` // Type of the datasource, eg: connector
Name string `json:"name,omitempty" elastic_mapping:"name:{type:keyword}"` // Display name of this datasource
Type string `json:"type,omitempty" elastic_mapping:"type:{type:keyword}"` // Type of the datasource, eg: connector
Name string `json:"name,omitempty" elastic_mapping:"name:{type:keyword}"` // Display name of this datasource

Connector ConnectorConfig `json:"connector,omitempty" elastic_mapping:"connector:{type:keyword}"`
Connector ConnectorConfig `json:"connector,omitempty" elastic_mapping:"connector:{type:keyword}"`
}

type ConnectorConfig struct {
ConnectorID string `json:"id,omitempty" elastic_mapping:"id:{type:keyword}"` // Connector ID for the datasource
ConnectorID string `json:"id,omitempty" elastic_mapping:"id:{type:keyword}"` // Connector ID for the datasource
Config map[string]interface{} `json:"config,omitempty" elastic_mapping:"config:{enabled:false}"` // Configs for this Connector
}
}
12 changes: 6 additions & 6 deletions modules/common/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ type RichLabel struct {

type DataSourceReference struct {
Type string `json:"type,omitempty" elastic_mapping:"type:{type:keyword}"` // ID of the datasource, eg: connector
Name string `json:"name,omitempty" elastic_mapping:"name:{type:keyword}"` // Source of the document (e.g., "Github", "Google Drive", "Dropbox")
ID string `json:"id,omitempty" elastic_mapping:"id:{type:keyword}"` // ID of this the datasource, eg: google_drive
Name string `json:"name,omitempty" elastic_mapping:"name:{type:keyword}"` // Name of the datasource (e.g., "My Github", "My Google Drive", "My Dropbox")
ID string `json:"id,omitempty" elastic_mapping:"id:{type:keyword}"` // ID of this the datasource, eg: 8ca2fe8cf5027b0f1b5f932b429e38c3
}

type Document struct {
Expand All @@ -27,14 +27,14 @@ type Document struct {

Type string `json:"type,omitempty" elastic_mapping:"type:{type:keyword,copy_to:combined_fulltext}"` // Document type, such as PDF, Docx, etc.

Category string `json:"category,omitempty" elastic_mapping:"category:{type:keyword,copy_to:combined_fulltext}"` // Primary category of the document (e.g., "report", "article")
Subcategory string `json:"subcategory,omitempty" elastic_mapping:"subcategory:{type:keyword,copy_to:combined_fulltext}"` // Secondary category of the document (e.g., "report", "article")
Category string `json:"category,omitempty" elastic_mapping:"category:{type:keyword,copy_to:combined_fulltext}"` // Primary category of the document (e.g., "report", "article")
Subcategory string `json:"subcategory,omitempty" elastic_mapping:"subcategory:{type:keyword,copy_to:combined_fulltext}"` // Secondary category of the document (e.g., "report", "article")

//use categories for very complex hierarchy categories
Categories []string `json:"categories,omitempty" elastic_mapping:"categories:{type:keyword,copy_to:combined_fulltext}"` // Full hierarchy of categories, useful for detailed classification
Categories []string `json:"categories,omitempty" elastic_mapping:"categories:{type:keyword,copy_to:combined_fulltext}"` // Full hierarchy of categories, useful for detailed classification

//use rich_categories for icon need to display for each category
RichCategories []RichLabel `json:"rich_categories,omitempty" elastic_mapping:"rich_categories:{type:object}"` // Full hierarchy of categories, useful for detailed classification, with icon decoration
RichCategories []RichLabel `json:"rich_categories,omitempty" elastic_mapping:"rich_categories:{type:object}"` // Full hierarchy of categories, useful for detailed classification, with icon decoration

Title string `json:"title,omitempty" elastic_mapping:"title:{type:text,copy_to:combined_fulltext,fields:{keyword: {type: keyword}, pinyin: {type: text, analyzer: pinyin_analyzer}}}"` // Document title
Summary string `json:"summary,omitempty" elastic_mapping:"summary:{type:text,copy_to:combined_fulltext}"` // Brief summary or description of the document
Expand Down
5 changes: 2 additions & 3 deletions modules/common/fulltext.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@ package common

import "infini.sh/framework/core/orm"


type CombinedFullText struct {
orm.ORMObjectBase // Embedding ORM base for persistence-related fields
CombinedFullText string `json:"-" elastic_mapping:"combined_fulltext:{type:text,index_prefixes:{},index_phrases:true, analyzer:combined_text_analyzer }"`
orm.ORMObjectBase // Embedding ORM base for persistence-related fields
CombinedFullText string `json:"-" elastic_mapping:"combined_fulltext:{type:text,index_prefixes:{},index_phrases:true, analyzer:combined_text_analyzer }"`

Metadata map[string]interface{} `json:"metadata,omitempty" elastic_mapping:"metadata:{type:object}"` // Additional accessible metadata (e.g., file version, permissions)
Payload map[string]interface{} `json:"payload,omitempty" elastic_mapping:"payload:{enabled:false}"` // Additional store-only metadata (e.g., file binary data)
Expand Down
16 changes: 7 additions & 9 deletions modules/connector/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"time"
)


func (h *APIHandler) create(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
var obj = &common.Connector{}
err := h.DecodeJSON(req, obj)
Expand Down Expand Up @@ -61,11 +60,11 @@ func (h *APIHandler) update(w http.ResponseWriter, req *http.Request, ps httprou
id := ps.MustGetParameter("id")
obj := common.Connector{}

replace:=h.GetBoolOrDefault(req,"replace",false)
replace := h.GetBoolOrDefault(req, "replace", false)

var err error
var create *time.Time
if !replace{
var create *time.Time
if !replace {
obj.ID = id
exists, err := orm.Get(&obj)
if !exists || err != nil {
Expand All @@ -77,9 +76,9 @@ func (h *APIHandler) update(w http.ResponseWriter, req *http.Request, ps httprou
}
id = obj.ID
create = obj.Created
}else{
t:=time.Now()
create=&t
} else {
t := time.Now()
create = &t
}

obj = common.Connector{}
Expand Down Expand Up @@ -135,7 +134,7 @@ func (h *APIHandler) search(w http.ResponseWriter, req *http.Request, ps httprou

var err error
q := orm.Query{}
q.RawQuery,err=h.GetRawBody(req)
q.RawQuery, err = h.GetRawBody(req)

//TODO handle url query args

Expand All @@ -150,4 +149,3 @@ func (h *APIHandler) search(w http.ResponseWriter, req *http.Request, ps httprou
h.Error(w, err)
}
}

30 changes: 13 additions & 17 deletions modules/connector/datasource.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ func (h *APIHandler) createDatasource(w http.ResponseWriter, req *http.Request,
return
}

if obj.Type=="connector"{
if obj.Type == "connector" {

if obj.Connector.ConnectorID==""{
if obj.Connector.ConnectorID == "" {
panic("invalid connector")
}

//check connector
connector:=common.Connector{}
connector.ID=obj.Connector.ConnectorID
exists,err:=orm.Get(&connector)
if !exists||err!=nil{
connector := common.Connector{}
connector.ID = obj.Connector.ConnectorID
exists, err := orm.Get(&connector)
if !exists || err != nil {
panic("invalid connector")
}

Expand All @@ -51,7 +51,6 @@ func (h *APIHandler) createDatasource(w http.ResponseWriter, req *http.Request,
h.WriteError(w, "invalid datasource", http.StatusInternalServerError)
}


func (h *APIHandler) deleteDatasource(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
id := ps.MustGetParameter("id")

Expand Down Expand Up @@ -79,7 +78,6 @@ func (h *APIHandler) deleteDatasource(w http.ResponseWriter, req *http.Request,
}, 200)
}


func (h *APIHandler) getDatasource(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
id := ps.MustGetParameter("id")

Expand All @@ -106,11 +104,11 @@ func (h *APIHandler) updateDatasource(w http.ResponseWriter, req *http.Request,
id := ps.MustGetParameter("id")
obj := common.DataSource{}

replace:=h.GetBoolOrDefault(req,"replace",false)
replace := h.GetBoolOrDefault(req, "replace", false)

var err error
var create *time.Time
if !replace{
var create *time.Time
if !replace {
obj.ID = id
exists, err := orm.Get(&obj)
if !exists || err != nil {
Expand All @@ -122,9 +120,9 @@ func (h *APIHandler) updateDatasource(w http.ResponseWriter, req *http.Request,
}
id = obj.ID
create = obj.Created
}else{
t:=time.Now()
create=&t
} else {
t := time.Now()
create = &t
}

obj = common.DataSource{}
Expand All @@ -149,12 +147,11 @@ func (h *APIHandler) updateDatasource(w http.ResponseWriter, req *http.Request,
}, 200)
}


func (h *APIHandler) searchDatasource(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {

var err error
q := orm.Query{}
q.RawQuery,err=h.GetRawBody(req)
q.RawQuery, err = h.GetRawBody(req)

//TODO handle url query args

Expand All @@ -169,4 +166,3 @@ func (h *APIHandler) searchDatasource(w http.ResponseWriter, req *http.Request,
h.Error(w, err)
}
}

18 changes: 10 additions & 8 deletions plugins/connectors/hugo_site/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
package hugo_site

type HugoDocument struct {
Category string `json:"category"` // The main category of the document
Content string `json:"content"` // The content description
Subcategory string `json:"subcategory"` // The subcategory of the document
Summary string `json:"summary"` // A brief summary
Tags []string `json:"tags"` // Tags associated with the document
Title string `json:"title"` // The title of the document
URL string `json:"url"` // The URL for the document reference
Category string `json:"category,omitempty"` // The main category of the document
Content string `json:"content,omitempty"` // The content description
Subcategory string `json:"subcategory,omitempty"` // The subcategory of the document
Summary string `json:"summary,omitempty"` // A brief summary
Tags []string `json:"tags,omitempty"` // Tags associated with the document
Title string `json:"title,omitempty"` // The title of the document
URL string `json:"url,omitempty"` // The URL for the document reference
Created string `json:"created,omitempty"`
Updated string `json:"updated,omitempty"`
Lang string `json:"lang,omitempty"`
}

Loading
Loading