diff --git a/coco.yml b/coco.yml index bdc65e9..7756841 100644 --- a/coco.yml +++ b/coco.yml @@ -199,6 +199,15 @@ connector: indexing_users: true include_private_book: false include_private_doc: false + hugo_site: + enabled: true + interval: 60s + queue: + name: indexing_documents + urls: + - "https://pizza.rs/index.json" + - "https://infinilabs.cn/index.json" + - "https://blog.infinilabs.com/index.json" ##background jobs pipeline: diff --git a/plugins/connectors/hugo_site/json.go b/plugins/connectors/hugo_site/json.go new file mode 100644 index 0000000..bea2fcd --- /dev/null +++ b/plugins/connectors/hugo_site/json.go @@ -0,0 +1,16 @@ +/* Copyright © INFINI LTD. All rights reserved. + * Web: https://infinilabs.com + * Email: hello#infini.ltd */ + +package hugo_site + +type HugoDocument struct { + Category string `json:"category"` // The main category of the document + Content string `json:"content"` // The content description + Subcategory string `json:"subcategory"` // The subcategory of the document + Summary string `json:"summary"` // A brief summary + Tags []string `json:"tags"` // Tags associated with the document + Title string `json:"title"` // The title of the document + URL string `json:"url"` // The URL for the document reference +} + diff --git a/plugins/connectors/hugo_site/plugin.go b/plugins/connectors/hugo_site/plugin.go new file mode 100644 index 0000000..ea07a62 --- /dev/null +++ b/plugins/connectors/hugo_site/plugin.go @@ -0,0 +1,156 @@ +/* Copyright © INFINI LTD. All rights reserved. + * Web: https://infinilabs.com + * Email: hello#infini.ltd */ + +package hugo_site + +import ( + "context" + "fmt" + log "github.com/cihub/seelog" + "infini.sh/coco/modules/common" + "infini.sh/framework/core/api" + "infini.sh/framework/core/env" + "infini.sh/framework/core/errors" + "infini.sh/framework/core/global" + "infini.sh/framework/core/module" + "infini.sh/framework/core/queue" + "infini.sh/framework/core/task" + "infini.sh/framework/core/util" + "net/url" + "strings" + "time" +) + +type Plugin struct { + api.Handler + + Enabled bool `config:"enabled"` + Interval string `config:"interval"` + SkipInvalidToken bool `config:"skip_invalid_token"` + Urls []string `config:"urls"` + Queue *queue.QueueConfig `config:"queue"` +} + +func (this *Plugin) Setup() { + ok, err := env.ParseConfig("connector.hugo_site", &this) + if ok && err != nil && global.Env().SystemConfig.Configs.PanicOnConfigError { + panic(err) + } + + if !this.Enabled { + return + } + + if this.Queue == nil { + this.Queue = &queue.QueueConfig{Name: "indexing_documents"} + } + + //api.HandleAPIMethod(api.GET, "/connector/google_drive/connect", this.connect) + //api.HandleAPIMethod(api.POST, "/connector/google_drive/reset", this.reset) + //api.HandleAPIMethod(api.GET, "/connector/google_drive/oauth_redirect", this.oAuthRedirect) + +} + +func (this *Plugin) Start() error { + + if this.Enabled { + task.RegisterScheduleTask(task.ScheduleTask{ + ID: util.GetUUID(), + Group: "connectors", + Singleton: true, + Interval: util.GetDurationOrDefault(this.Interval, time.Second*30).String(), + Description: "indexing hugo json docs", + Task: func(ctx context.Context) { + for _, url := range this.Urls { + log.Infof("fetch hugo url: %v", url) + + res,err:=util.HttpGet(url) + if err!=nil{ + panic(err) + } + + if res.Body!=nil{ + var documents []HugoDocument + + // Unmarshal JSON into the slice + err := util.FromJSONBytes(res.Body,&documents) + if err != nil { + panic(errors.Errorf("Failed to parse JSON: %v", err)) + } + + // Output the parsed data + for i, v := range documents { + doc:=common.Document{Source: common.DataSource{Type: "connector",Name: "hugo_site"}} + doc.Type="web_page" + doc.Icon="web" + doc.Title=v.Title + doc.Content=v.Content + doc.Category=v.Category + doc.Subcategory=v.Subcategory + doc.Summary=v.Summary + doc.Tags=v.Tags + v2,er:=getFullURL(url,v.URL) + if er!=nil{ + panic(er) + } + doc.URL=v2 + log.Infof("Document %d: %+v %v", i+1, doc.Title, doc.URL) + doc.ID = util.MD5digest(fmt.Sprintf("%v-%v-%v", "test", "hugo-site", doc.URL)) + + data := util.MustToJSONBytes(doc) + + if global.Env().IsDebug { + log.Tracef(string(data)) + } + + err := queue.Push(queue.SmartGetOrInitConfig(this.Queue), data) + if err != nil { + panic(err) + } + } + + } + + } + + }, + }) + + } + + return nil +} + +// Function to construct the full URL using only the domain from the seed URL +func getFullURL(seedURL, relativePath string) (string, error) { + // Parse the seed URL + parsedURL, err := url.Parse(seedURL) + if err != nil { + return "", fmt.Errorf("invalid seed URL: %w", err) + } + + // Extract the domain (scheme and host) + domain := fmt.Sprintf("%s://%s", parsedURL.Scheme, parsedURL.Host) + + // Remove any leading "/" from relativePath to avoid duplication + relativePath = strings.TrimPrefix(relativePath, "/") + + // Combine the domain with the relative path + fullURL := fmt.Sprintf("%s/%s", domain, relativePath) + + return fullURL, nil +} + +func (this *Plugin) Stop() error { + return nil +} + +func (this *Plugin) Name() string { + return "hugo_site" +} + +func init() { + module.RegisterUserPlugin(&Plugin{SkipInvalidToken: true}) +} +