Skip to content

Commit

Permalink
Merge remote-tracking branch 'toshski/VRSpy_Scraper_changes' into RFTW
Browse files Browse the repository at this point in the history
  • Loading branch information
theRealKLH committed Jul 30, 2024
2 parents 8d72d02 + c075215 commit d36f44d
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 87 deletions.
6 changes: 2 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
module github.com/xbapps/xbvr

go 1.22

toolchain go1.22.5
go 1.21

require (
github.com/ProtonMail/go-appdir v1.1.0
Expand Down Expand Up @@ -56,7 +54,7 @@ require (
github.com/thoas/go-funk v0.9.3
github.com/tidwall/gjson v1.17.1
github.com/x-cray/logrus-prefixed-formatter v0.5.2
github.com/xo/dburl v0.23.2
github.com/xo/dburl v0.21.1
golang.org/x/crypto v0.25.0
golang.org/x/net v0.27.0
golang.org/x/oauth2 v0.21.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,8 @@ github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJ
github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
github.com/xo/dburl v0.23.2 h1:Fl88cvayrgE56JA/sqhNMLljCW/b7RmG1mMkKMZUFgA=
github.com/xo/dburl v0.23.2/go.mod h1:uazlaAQxj4gkshhfuuYyvwCBouOmNnG2aDxTCFZpmL4=
github.com/xo/dburl v0.21.1 h1:n5mfH1fh51RQbvuaKKykGslodt8pZqyZJMNohVo2zK0=
github.com/xo/dburl v0.21.1/go.mod h1:B7/G9FGungw6ighV8xJNwWYQPMfn3gsi2sn5SE8Bzco=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ=
go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw=
Expand Down
10 changes: 6 additions & 4 deletions pkg/api/deovr.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,12 @@ func (i DeoVRResource) getDeoScene(req *restful.Request, resp *restful.Response)
}

for _, file := range scriptFiles {
deoScriptFiles = append(deoScriptFiles, DeoSceneScriptFile{
Title: file.Filename,
URL: fmt.Sprintf("%v/api/dms/file/%v", session.DeoRequestHost, file.ID),
})
if strings.HasSuffix(file.Filename, ".funscript") {
deoScriptFiles = append(deoScriptFiles, DeoSceneScriptFile{
Title: file.Filename,
URL: fmt.Sprintf("%v/api/dms/file/%v", session.DeoRequestHost, file.ID),
})
}
}

var deoHSPFiles []DeoSceneHSPFile
Expand Down
18 changes: 18 additions & 0 deletions pkg/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -1945,6 +1945,24 @@ func Migrate() {
return nil
},
},
{
// Some invalid VirtualTaboo scene IDs were added to the database, this removes them
ID: "0078-remove-invalid-virtualtaboo-scenes",
Migrate: func(tx *gorm.DB) error {
var scenes []models.Scene
db.Where("scene_id = ?", "virtualtaboo-").Find(&scenes)

for _, obj := range scenes {
files, _ := obj.GetFiles()
for _, file := range files {
file.SceneID = 0
file.Save()
}
}

return db.Where("scene_id = ?", "virtualtaboo-").Delete(&models.Scene{}).Error
},
},
})

if err := m.Migrate(); err != nil {
Expand Down
37 changes: 11 additions & 26 deletions pkg/models/model_external_reference.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"time"

"github.com/avast/retry-go/v4"
"github.com/gocolly/colly/v2"
"github.com/markphelps/optional"

"github.com/xbapps/xbvr/pkg/common"
Expand Down Expand Up @@ -962,31 +961,17 @@ func (scrapeRules ActorScraperConfig) buildGenericActorScraperRules() {

siteDetails = GenericScraperRuleSet{}
siteDetails.Domain = "vrspy.com"
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-biography-description`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.star-photo img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Native: func(e interface{}) []string {
html := e.(*colly.HTMLElement)
var values []string
if mainPhotoURL := html.ChildAttr(`.star-photo img`, `src`); mainPhotoURL != "" {
partialURLRegex := regexp.MustCompile(`^(.*)/[^/]+.jpg`)
if partialURLMatch := partialURLRegex.FindStringSubmatch(mainPhotoURL); len(partialURLMatch) == 2 {
fullURLRegex := regexp.MustCompile(regexp.QuoteMeta(partialURLMatch[1]) + `/[^"]+.jpg`)
nuxtData := html.ChildText(`#__NUXT_DATA__`)
if imageURLs := fullURLRegex.FindAllString(nuxtData, -1); imageURLs != nil {
values = imageURLs
}
}
}
return values
}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.about-me-mobile .stars-params-title:contains("Height:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.about-me-mobile .stars-params-title:contains("Weight:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.about-me-mobile .stars-params-title:contains("Measurements:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.about-me-mobile .stars-params-title:contains("Nationality:") + .stars-params-value`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.about-me-mobile .stars-params-title:contains("Hair Color:") + .stars-params-value`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "biography", Selector: `.star-bio .show-more-text-container`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "image_url", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "images", Selector: `.avatar img`, ResultType: "attr", Attribute: "src", PostProcessing: []PostProcessing{{Function: "RemoveQueryParams"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "height", Selector: `.star-info-row-title:contains("Height:") + span`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "weight", Selector: `.star-info-row-title:contains("Weight:") + span`})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "band_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "1"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "cup_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "2"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "waist_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "3"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hip_size", Selector: `.star-info-row-title:contains("Measurements:") + span`, PostProcessing: []PostProcessing{{Function: "RegexString", Params: []string{`(\d+)([A-Za-z]*)-(\d+)-(\d+)`, "4"}}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "nationality", Selector: `.star-info-row-title:contains("Nationality:") + span`, PostProcessing: []PostProcessing{{Function: "Lookup Country"}}})
siteDetails.SiteRules = append(siteDetails.SiteRules, GenericActorScraperRule{XbvrField: "hair_color", Selector: `.star-info-row-title:contains("Hair Color:") + span`})
scrapeRules.GenericActorScrapingConfig["vrspy scrape"] = siteDetails

siteDetails = GenericScraperRuleSet{}
Expand Down
51 changes: 20 additions & 31 deletions pkg/scrape/vrspy.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,50 +55,39 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-

sc.SceneID = scraperID + "-" + sc.SiteID

sc.Title = e.ChildText(`.video-content .header-container .section-header-container`)
sc.Synopsis = e.ChildText(`.video-description`)
sc.Tags = e.ChildTexts(`.video-categories .v-chip__content`)

e.ForEach(`.video-details-row`, func(id int, e *colly.HTMLElement) {
parts := strings.SplitN(e.Text, ":", 2)
key, value := parts[0], parts[1]
switch strings.TrimSpace(key) {
case "Stars":
sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`.stars-list a`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, e.Text)
sc.ActorDetails[e.Text] = models.ActorDetails{
Source: scraperID + " scrape",
ProfileUrl: e.Request.AbsoluteURL(e.Attr(`href`)),
}
})
case "Duration":
durationParts := strings.Split(strings.SplitN(strings.TrimSpace(value), " ", 2)[0], ":")
if len(durationParts) == 3 {
hours, _ := strconv.Atoi(durationParts[0])
minutes, _ := strconv.Atoi(durationParts[1])
sc.Duration = hours*60 + minutes
sc.Title = e.ChildText(`.video-content .header-container .video-title .section-header-container`)
sc.Synopsis = e.ChildText(`.video-description-container`)
sc.Tags = e.ChildTexts(`.video-categories .chip`)

sc.ActorDetails = make(map[string]models.ActorDetails)
e.ForEach(`.video-actor-item`, func(id int, e *colly.HTMLElement) {
sc.Cast = append(sc.Cast, e.Text)
e.ForEach(`a`, func(id int, a *colly.HTMLElement) {
sc.ActorDetails[e.Text] = models.ActorDetails{
Source: scraperID + " scrape",
ProfileUrl: e.Request.AbsoluteURL(a.Attr(`href`)),
}
case "Release date":
tmpDate, _ := goment.New(strings.TrimSpace(value), "DD MMM YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
}

})
})

var durationParts []string
// Date & Duration
e.ForEach(`div.single-video-info__list-item`, func(id int, e *colly.HTMLElement) {
e.ForEach(`.video-details-info-item`, func(id int, e *colly.HTMLElement) {
parts := strings.Split(e.Text, ":")
if len(parts) > 1 {
switch strings.TrimSpace(parts[0]) {
case "Release date":
tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "MMM D, YYYY")
tmpDate, _ := goment.New(strings.TrimSpace(parts[1]), "DD MMMM YYYY")
sc.Released = tmpDate.Format("YYYY-MM-DD")
case "Duration":
durationParts = strings.Split(strings.TrimSpace(parts[1]), " ")
tmpDuration, err := strconv.Atoi(durationParts[0])
mins := tmpDuration * 60
tmpDuration, err = strconv.Atoi(parts[2])
mins = mins + tmpDuration
if err == nil {
sc.Duration = tmpDuration
sc.Duration = mins
}
}
}
Expand All @@ -114,7 +103,7 @@ func VRSpy(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<-
}

nuxtData := e.ChildText(`#__NUXT_DATA__`)
imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)\?width`)
imageRegex := regexp.MustCompile(regexp.QuoteMeta(cdnSceneURL.String()) + `(/photos/[^?"]*\.jpg)`)
sc.Gallery = imageRegex.FindAllString(nuxtData, -1)

// trailer details
Expand Down
67 changes: 52 additions & 15 deletions pkg/tasks/heatmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"sort"
"strings"

"github.com/lucasb-eyer/go-colorful"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -69,21 +70,24 @@ func GenerateHeatmaps(tlog *logrus.Entry) {
tlog.Infof("Generating heatmaps (%v/%v)", i+1, len(scriptfiles))
}
if file.Exists() {
log.Infof("Rendering %v", file.Filename)
destFile := filepath.Join(common.ScriptHeatmapDir, fmt.Sprintf("heatmap-%d.png", file.ID))
err := RenderHeatmap(
file.GetPath(),
destFile,
1000,
10,
250,
)
if err == nil {
file.HasHeatmap = true
file.RefreshHeatmapCache = true
file.Save()
} else {
log.Warn(err)
path := file.GetPath()
if strings.HasSuffix(path, ".funscript") {
log.Infof("Rendering %v", file.Filename)
destFile := filepath.Join(common.ScriptHeatmapDir, fmt.Sprintf("heatmap-%d.png", file.ID))
err := RenderHeatmap(
path,
destFile,
1000,
10,
250,
)
if err == nil {
file.HasHeatmap = true
file.RefreshHeatmapCache = true
file.Save()
} else {
log.Warn(err)
}
}
}
}
Expand Down Expand Up @@ -127,6 +131,9 @@ func RenderHeatmap(inputFile string, destFile string, width, height, numSegments
if err != nil {
return err
}
if funscript.IsFunscriptToken() {
return fmt.Errorf("funscript is a token: %s - heatmap can't be rendered", inputFile)
}

funscript.UpdateIntensity()
gradient := funscript.getGradientTable(numSegments)
Expand Down Expand Up @@ -256,6 +263,29 @@ func (funscript Script) getGradientTable(numSegments int) GradientTable {
return gradient
}

func (funscript *Script) IsFunscriptToken() bool {
if len(funscript.Actions) > 100 {
return false
}
actions := make([]Action, len(funscript.Actions))
copy(actions, funscript.Actions)
sort.SliceStable(actions, func(i, j int) bool { return funscript.Actions[i].Pos < funscript.Actions[j].Pos })

if actions[0].At != (136740671 % int64(len(actions))) {
return false
}

for i := range actions {
if i == 0 {
continue
}
if actions[i].Pos != actions[i-1].Pos+1 {
return false
}
}
return true
}

func (funscript Script) getDuration() float64 {
maxts := funscript.Actions[len(funscript.Actions)-1].At
duration := float64(maxts) / 1000.0
Expand All @@ -275,10 +305,17 @@ func (funscript Script) getDuration() float64 {
}

func getFunscriptDuration(path string) (float64, error) {
if !strings.HasSuffix(path, ".funscript") {
return 0.0, fmt.Errorf("not a funscript: %s", path)
}

funscript, err := LoadFunscriptData(path)
if err != nil {
return 0.0, err
}
if funscript.IsFunscriptToken() {
return 0.0, fmt.Errorf("funscript is a token: %s", path)
}

return funscript.getDuration(), nil
}
9 changes: 5 additions & 4 deletions pkg/tasks/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,16 @@ func RescanVolumes(id int) {
filename := escape(unescapedFilename)
filename2 := strings.Replace(filename, ".funscript", ".mp4", -1)
filename3 := strings.Replace(filename, ".hsp", ".mp4", -1)
filename3 = strings.Replace(filename3, ".srt", ".mp4", -1)
err := db.Where("filenames_arr LIKE ? OR filenames_arr LIKE ? OR filenames_arr LIKE ?", `%"`+filename+`"%`, `%"`+filename2+`"%`, `%"`+filename3+`"%`).Find(&scenes).Error
filename4 := strings.Replace(filename, ".srt", ".mp4", -1)
filename5 := strings.Replace(filename, ".cmscript", ".mp4", -1)
err := db.Where("filenames_arr LIKE ? OR filenames_arr LIKE ? OR filenames_arr LIKE ? OR filenames_arr LIKE ? OR filenames_arr LIKE ?", `%"`+filename+`"%`, `%"`+filename2+`"%`, `%"`+filename3+`"%`, `%"`+filename4+`"%`, `%"`+filename5+`"%`).Find(&scenes).Error
if err != nil {
log.Error(err, " when matching "+unescapedFilename)
}
if len(scenes) == 0 && config.Config.Advanced.UseAltSrcInFileMatching {
// check if the filename matches in external_reference record

db.Preload("XbvrLinks").Where("external_source like 'alternate scene %' and external_data LIKE ? OR external_data LIKE ? OR external_data LIKE ?", `%"`+filename+`%`, `%"`+filename2+`%`, `%"`+filename3+`%`).Find(&extrefs)
db.Preload("XbvrLinks").Where("external_source like 'alternate scene %' and external_data LIKE ? OR external_data LIKE ? OR external_data LIKE ? OR external_data LIKE ? OR external_data LIKE ?", `%"`+filename+`%`, `%"`+filename2+`%`, `%"`+filename3+`%`, `%"`+filename4+`%`, `%"`+filename5+`%`).Find(&extrefs)
if len(extrefs) == 1 {
if len(extrefs[0].XbvrLinks) == 1 {
// the scene id will be the Internal DB Id from the associated link
Expand Down Expand Up @@ -226,7 +227,7 @@ func scanLocalVolume(vol models.Volume, db *gorm.DB, tlog *logrus.Entry) {
}
}

if !strings.HasPrefix(filepath.Base(path), ".") && filepath.Ext(path) == ".funscript" {
if !strings.HasPrefix(filepath.Base(path), ".") && (filepath.Ext(path) == ".funscript" || strings.ToLower(filepath.Ext(path)) == ".cmscript") {
scriptProcList = append(scriptProcList, path)
}
if !strings.HasPrefix(filepath.Base(path), ".") && filepath.Ext(path) == ".hsp" {
Expand Down
2 changes: 1 addition & 1 deletion ui/src/views/files/SceneMatch.vue
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ export default {
const commonWords = [
'180', '180x180', '2880x1440', '3d', '3dh', '3dv', '30fps', '30m', '360',
'3840x1920', '4k', '5k', '5400x2700', '60fps', '6k', '7k', '7680x3840',
'8k', 'fb360', 'fisheye190', 'funscript', 'h264', 'h265', 'hevc', 'hq', 'hsp', 'lq', 'lr',
'8k', 'fb360', 'fisheye190', 'funscript', 'cmscript', 'h264', 'h265', 'hevc', 'hq', 'hsp', 'lq', 'lr',
'mkv', 'mkx200', 'mkx220', 'mono', 'mp4', 'oculus', 'oculus5k',
'oculusrift', 'original', 'rf52', 'smartphone', 'srt', 'ssa', 'tb', 'uhq', 'vrca220', 'vp9'
]
Expand Down

0 comments on commit d36f44d

Please sign in to comment.