From 35d2a7489ad4ba5bb4022370f001a5827b8c7a39 Mon Sep 17 00:00:00 2001 From: Tomas Hanacek Date: Wed, 31 Jan 2024 01:20:09 +0100 Subject: [PATCH] feat: nsfw inscription check --- indexer/.gitignore | 3 +- indexer/Makefile | 2 +- indexer/go.mod | 8 +- indexer/go.sum | 11 +- indexer/src/indexer/indexer.go | 9 +- .../src/indexer/metaprotocol/inscription.go | 9 +- indexer/src/indexer/models/inscription.go | 1 + indexer/src/nsfw/predictor.go | 188 ++++++++++++++++++ indexer/src/nsfw/worker.go | 75 +++++++ 9 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 indexer/src/nsfw/predictor.go create mode 100644 indexer/src/nsfw/worker.go diff --git a/indexer/.gitignore b/indexer/.gitignore index ff84dbbb..8a5be68e 100644 --- a/indexer/.gitignore +++ b/indexer/.gitignore @@ -1,2 +1,3 @@ bin -.env \ No newline at end of file +.env +model \ No newline at end of file diff --git a/indexer/Makefile b/indexer/Makefile index e8ab8b6b..61c8f67c 100644 --- a/indexer/Makefile +++ b/indexer/Makefile @@ -18,7 +18,7 @@ dependencies: ## Install dependencies for the service go mod tidy build: ## Build the binary for the service - CGO_ENABLED=0 go build -o ./bin/${APP_NAME} ./src/*.go + go build -o ./bin/${APP_NAME} ./src/*.go run: build ## Build and run the service binary ./bin/${APP_NAME} diff --git a/indexer/go.mod b/indexer/go.mod index e1eb045c..41ac30dd 100644 --- a/indexer/go.mod +++ b/indexer/go.mod @@ -9,9 +9,13 @@ require ( github.com/cosmos/cosmos-sdk v0.45.0 github.com/cosmos/ibc-go v1.0.0 github.com/crypto-org-chain/chain-main/v3 v3.0.0-croeseid + github.com/galeone/tensorflow/tensorflow/go v0.0.0-20220620094824-6bb01e3a58fa + github.com/galeone/tfgo v0.0.0-20220622151904-fc7b7ccad83b + github.com/joho/godotenv v1.5.1 github.com/kelseyhightower/envconfig v1.4.0 github.com/leodido/go-urn v1.2.4 github.com/sirupsen/logrus v1.9.3 + golang.org/x/image v0.15.0 gorm.io/datatypes v1.2.0 gorm.io/driver/postgres v1.5.2 gorm.io/gorm v1.25.5 @@ -45,6 +49,7 @@ require ( github.com/dustin/go-humanize v1.0.0 // indirect github.com/dvsekhvalnov/jose2go v0.0.0-20200901110807-248326c1351b // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect + github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/go-kit/kit v0.10.0 // indirect github.com/go-logfmt/logfmt v0.5.0 // indirect github.com/go-sql-driver/mysql v1.7.1 // indirect @@ -74,7 +79,6 @@ require ( github.com/jinzhu/now v1.1.5 // indirect github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af // indirect github.com/jmhodges/levigo v1.0.0 // indirect - github.com/joho/godotenv v1.5.1 // indirect github.com/keybase/go-keychain v0.0.0-20190712205309-48d3d31d256d // indirect github.com/libp2p/go-buffer-pool v0.0.2 // indirect github.com/magiconair/properties v1.8.5 // indirect @@ -123,7 +127,7 @@ require ( golang.org/x/tools v0.10.0 // indirect google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71 // indirect google.golang.org/grpc v1.42.0 // indirect - google.golang.org/protobuf v1.27.1 // indirect + google.golang.org/protobuf v1.28.1 // indirect gopkg.in/ini.v1 v1.62.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/indexer/go.sum b/indexer/go.sum index e1dd51ae..70ded00c 100644 --- a/indexer/go.sum +++ b/indexer/go.sum @@ -300,6 +300,12 @@ github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= +github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= +github.com/galeone/tensorflow/tensorflow/go v0.0.0-20220620094824-6bb01e3a58fa h1:UsqNDZ0Olkhk+9wFchvc1JCQn7Bq6e694uQ+hGBlJcM= +github.com/galeone/tensorflow/tensorflow/go v0.0.0-20220620094824-6bb01e3a58fa/go.mod h1:nHvVZJgJuQ0V2Xe4BqhTeCKQSMWDRI/gDkN8UxAANtU= +github.com/galeone/tfgo v0.0.0-20220622151904-fc7b7ccad83b h1:TGgxstITwmNG7+OI95FNQI5wsYkkOSiOE8xOi0QjccU= +github.com/galeone/tfgo v0.0.0-20220622151904-fc7b7ccad83b/go.mod h1:KqgpdfIYdJEusyCqP9uUcnwvSAp7m37ML5lh6YPlUh4= github.com/gballet/go-libpcsclite v0.0.0-20190607065134-2772fd86a8ff/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= @@ -1013,6 +1019,8 @@ golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMk golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5/go.mod h1:4M0jN8W1tt0AVLNr8HDosyJCDCDuyL9N9+3m7wDWgKw= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.15.0 h1:kOELfmgrmJlw4Cdb7g/QGuB3CvDrXbqEIww/pNtNBm8= +golang.org/x/image v0.15.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -1443,8 +1451,9 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.25.1-0.20200805231151-a709e31e5d12/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/indexer/src/indexer/indexer.go b/indexer/src/indexer/indexer.go index 95ed9c27..f1e676ca 100644 --- a/indexer/src/indexer/indexer.go +++ b/indexer/src/indexer/indexer.go @@ -18,6 +18,7 @@ import ( "github.com/donovansolms/cosmos-inscriptions/indexer/src/indexer/metaprotocol" "github.com/donovansolms/cosmos-inscriptions/indexer/src/indexer/models" "github.com/donovansolms/cosmos-inscriptions/indexer/src/indexer/types" + "github.com/donovansolms/cosmos-inscriptions/indexer/src/nsfw" "github.com/kelseyhightower/envconfig" "github.com/leodido/go-urn" "github.com/sirupsen/logrus" @@ -48,6 +49,7 @@ type Indexer struct { metaprotocols map[string]metaprotocol.Processor stopChannel chan bool db *gorm.DB + nsfw *nsfw.Worker wg sync.WaitGroup } @@ -71,8 +73,11 @@ func New( } + nsfwWorker := nsfw.NewWorker(log) + nsfwWorker.Start("./model") + metaprotocols := make(map[string]metaprotocol.Processor) - metaprotocols["inscription"] = metaprotocol.NewInscriptionProcessor(config.ChainID, db) + metaprotocols["inscription"] = metaprotocol.NewInscriptionProcessor(config.ChainID, db, nsfwWorker) metaprotocols["cft20"] = metaprotocol.NewCFT20Processor(config.ChainID, db) metaprotocols["marketplace"] = metaprotocol.NewMarketplaceProcessor(config.ChainID, db) @@ -87,6 +92,7 @@ func New( logger: log, stopChannel: make(chan bool), db: db, + nsfw: nsfwWorker, }, nil } @@ -109,6 +115,7 @@ func (i *Indexer) Stop() error { i.logger.Info("Stopping indexer") i.stopChannel <- true i.stopChannel <- true + i.nsfw.Stop() return nil } diff --git a/indexer/src/indexer/metaprotocol/inscription.go b/indexer/src/indexer/metaprotocol/inscription.go index 8833783b..d7a2672f 100644 --- a/indexer/src/indexer/metaprotocol/inscription.go +++ b/indexer/src/indexer/metaprotocol/inscription.go @@ -15,6 +15,7 @@ import ( "github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/donovansolms/cosmos-inscriptions/indexer/src/indexer/models" "github.com/donovansolms/cosmos-inscriptions/indexer/src/indexer/types" + "github.com/donovansolms/cosmos-inscriptions/indexer/src/nsfw" "github.com/kelseyhightower/envconfig" "github.com/leodido/go-urn" "gorm.io/datatypes" @@ -45,6 +46,7 @@ type InscriptionMetadata struct { type Inscription struct { chainID string db *gorm.DB + nsfwWorker *nsfw.Worker s3Endpoint string s3Region string s3Bucket string @@ -56,7 +58,7 @@ type Inscription struct { s3Token string } -func NewInscriptionProcessor(chainID string, db *gorm.DB) *Inscription { +func NewInscriptionProcessor(chainID string, db *gorm.DB, nsfwWorker *nsfw.Worker) *Inscription { // Parse config environment variables for self var config InscriptionConfig @@ -68,6 +70,7 @@ func NewInscriptionProcessor(chainID string, db *gorm.DB) *Inscription { return &Inscription{ chainID: chainID, db: db, + nsfwWorker: nsfwWorker, s3Endpoint: config.S3Endpoint, s3Region: config.S3Region, s3Bucket: config.S3Bucket, @@ -143,6 +146,9 @@ func (protocol *Inscription) Process(transactionModel models.Transaction, protoc return fmt.Errorf("unable to store content '%s'", err) } + // check if content is explicit + isExplicit := <-protocol.nsfwWorker.Add(content) + inscriptionModel := models.Inscription{ ChainID: parsedURN.ChainID, Height: transactionModel.Height, @@ -155,6 +161,7 @@ func (protocol *Inscription) Process(transactionModel models.Transaction, protoc Metadata: datatypes.JSON(metadata), ContentPath: contentPath, ContentSizeBytes: uint64(len(content)), + IsExplicit: isExplicit, DateCreated: transactionModel.DateCreated, } diff --git a/indexer/src/indexer/models/inscription.go b/indexer/src/indexer/models/inscription.go index 9b27fb9e..59097604 100644 --- a/indexer/src/indexer/models/inscription.go +++ b/indexer/src/indexer/models/inscription.go @@ -19,6 +19,7 @@ type Inscription struct { Metadata datatypes.JSON `gorm:"column:metadata"` ContentPath string `gorm:"column:content_path"` ContentSizeBytes uint64 `gorm:"column:content_size_bytes"` + IsExplicit bool `gorm:"column:is_explicit"` DateCreated time.Time `gorm:"column:date_created"` } diff --git a/indexer/src/nsfw/predictor.go b/indexer/src/nsfw/predictor.go new file mode 100644 index 00000000..94d01fbb --- /dev/null +++ b/indexer/src/nsfw/predictor.go @@ -0,0 +1,188 @@ +package nsfw + +import ( + "bytes" + "fmt" + "image/png" + + tf "github.com/galeone/tensorflow/tensorflow/go" + "github.com/galeone/tensorflow/tensorflow/go/op" + tg "github.com/galeone/tfgo" + "golang.org/x/image/webp" +) + +const ( + ImageDimensions = 224 +) + +type Predictor struct { + model *tg.Model +} + +type Prediction struct { + Drawings float32 + Hentai float32 + Neutral float32 + Porn float32 + Sexy float32 +} + +func NewPredictor(model *tg.Model) *Predictor { + return &Predictor{ + model: model, + } +} + +func NewPredictorFromPath(modelPath string) (*Predictor, error) { + model := tg.LoadModel(modelPath, []string{"serve"}, nil) + return NewPredictor(model), nil +} + +func (p *Predictor) Predict(img []byte, imageFormat string) (*Prediction, error) { + tensor, err := createTensorFromImage(img, imageFormat) + if err != nil { + return nil, err + } + + results := p.model.Exec([]tf.Output{ + p.model.Op("StatefulPartitionedCall", 0), + }, map[tf.Output]*tf.Tensor{ + p.model.Op("serving_default_input", 0): tensor, + }) + + vals := results[0].Value().([][]float32)[0] + return &Prediction{ + Drawings: vals[0], + Hentai: vals[1], + Neutral: vals[2], + Porn: vals[3], + Sexy: vals[4], + }, nil +} + +func (p Prediction) Describe() string { + return fmt.Sprintf( + "[Drawing: %.2f%% , Hentai: %.2f%%, Porn: %.2f%%, Sexy: %.2f%%, Neutral: %.2f%%]", + p.Drawings*100, p.Hentai*100, p.Porn*100, p.Sexy*100, p.Neutral*100) +} + +func (p Prediction) IsNSFW() bool { + if p.Porn > 0.3 { + return true + } + if p.Sexy > 0.4 { + return true + } + if p.Hentai > 0.3 { + return true + } + + return false +} + +type Image struct { + Path *op.Scope + Input tf.Output + Output tf.Output +} + +func (image *Image) Scale(min, max float32) *Image { + if image.Output.DataType() != tf.Float { + image.Output = tg.Cast(image.Path, image.Output, tf.Float) + } + + scaleScope := image.Path.SubScope("scale") + + minVal := op.Min(scaleScope.SubScope("Min"), image.Output, op.Const(scaleScope.SubScope("reductionIndices"), []int32{0, 1, 2}), op.MinKeepDims(false)) + maxVal := op.Max(scaleScope.SubScope("Max"), image.Output, op.Const(scaleScope.SubScope("reductionIndices"), []int32{0, 1, 2}), op.MaxKeepDims(false)) + image.Output = op.Div(scaleScope.SubScope("Div"), + op.Mul(scaleScope.SubScope("Mul"), + op.Sub(scaleScope.SubScope("Sub"), image.Output, minVal), + op.Const(scaleScope.SubScope("scaleRange"), max-min)), + op.Sub(scaleScope.SubScope("Sub"), maxVal, minVal)) + return image +} + +func (image *Image) Resize(width, height int32) *Image { + resizeScope := image.Path.SubScope("resizeArea") + image.Output = op.ResizeArea(resizeScope.SubScope("ResizeArea"), image.Output, op.Const(resizeScope.SubScope("size"), []int32{width, height})) + return image +} + +func NewImage(imageFormat string) *Image { + s := op.NewScope() + input := op.Placeholder(s, tf.String) + + var decode tf.Output + if imageFormat == "png" { + decode = op.DecodePng(s, input, op.DecodePngChannels(3)) + } else if imageFormat == "gif" { + decode = op.DecodeGif(s, input) + } else if imageFormat == "bmp" { + decode = op.DecodeBmp(s, input, op.DecodeBmpChannels(3)) + } else if imageFormat == "jpeg" || imageFormat == "jpg" || imageFormat == "jpe" { + decode = op.DecodeJpeg(s.SubScope("DecodeJpeg"), input, op.DecodeJpegChannels(3)) + } else { + return nil + } + + output := op.ExpandDims(s.SubScope("ExpandDims"), decode, op.Const(s.SubScope("axis"), []int32{0})) + + img := &Image{Path: s, Output: op.Identity(s.SubScope("Identity"), output), Input: input} + img.Scale(0, 1) + return img +} + +func webpToPng(image []byte) ([]byte, error) { + img, err := webp.Decode(bytes.NewReader(image)) + if err != nil { + return nil, err + } + buf := new(bytes.Buffer) + err = png.Encode(buf, img) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func createTensorFromImage(image []byte, imageFormat string) (*tf.Tensor, error) { + var err error + if imageFormat == "webp" { + image, err = webpToPng(image) + if err != nil { + return nil, err + } + imageFormat = "png" + } + tensor, err := tf.NewTensor(string(image)) + if err != nil { + return nil, err + } + + img := NewImage(imageFormat) + if img == nil { + return nil, fmt.Errorf("unknown image format %s", imageFormat) + } + img.Resize(ImageDimensions, ImageDimensions) + + graph, err := img.Path.Finalize() + + if err != nil { + return nil, err + } + session, err := tf.NewSession(graph, nil) + if err != nil { + return nil, err + } + defer session.Close() + normalized, err := session.Run( + map[tf.Output]*tf.Tensor{img.Input: tensor}, + []tf.Output{img.Output}, + nil) + if err != nil { + return nil, err + } + return normalized[0], nil +} diff --git a/indexer/src/nsfw/worker.go b/indexer/src/nsfw/worker.go new file mode 100644 index 00000000..5f525014 --- /dev/null +++ b/indexer/src/nsfw/worker.go @@ -0,0 +1,75 @@ +package nsfw + +import ( + "strings" + + "github.com/gabriel-vasile/mimetype" + "github.com/sirupsen/logrus" +) + +func NewWorker(log *logrus.Entry) *Worker { + worker := Worker{ + work: make(chan []byte), + result: make(chan bool), + quitChan: make(chan bool), + logger: log.WithFields(logrus.Fields{ + "worker": "nsfw", + }), + } + + return &worker +} + +type Worker struct { + work chan []byte + result chan bool + quitChan chan bool + logger *logrus.Entry +} + +func (w Worker) Start(modelPath string) { + go func() { + predictor, err := NewPredictorFromPath(modelPath) + if err != nil { + w.logger.Fatal("unable to create predictor", err) + } + + for { + select { + case work := <-w.work: + mtype := mimetype.Detect(work) + split := strings.Split(mtype.Extension(), ".") + ext := strings.ToLower(split[len(split)-1]) + + w.logger.Debugf("New Task with ext: %s", ext) + + prediction, err := predictor.Predict(work, ext) + var res bool + if err == nil { + res = prediction.IsNSFW() + } else { + res = false + } + + w.logger.Infof("Task finished: %s, IsExplicit: %t", ext, res) + + w.result <- res + + case <-w.quitChan: + w.logger.Info("worker stopping") + return + } + } + }() +} + +func (w Worker) Stop() { + go func() { + w.quitChan <- true + }() +} + +func (w Worker) Add(image []byte) <-chan bool { + w.work <- image + return w.result +}