Skip to content

Commit

Permalink
Merge pull request #1 from cluebotng/feature/more-work
Browse files Browse the repository at this point in the history
More experimentation
  • Loading branch information
DamianZaremba authored Dec 12, 2024
2 parents cb37503 + a179b13 commit 61ac5bc
Show file tree
Hide file tree
Showing 21 changed files with 241 additions and 213 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ jobs:
uses: ncipollo/release-action@v1
with:
token: "${{ secrets.GITHUB_TOKEN }}"
artifacts: cbng
artifacts: botng
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.idea
*.iml
cbng.log*
botng.log*
vendor/
config.yaml
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Not supported:
* `oftenvandalized.txt` - This was used to emit messages into IRC, but hasn't been used since, but haven't been used
since 2018 (80cab4)
* `irc.wikimedia.org` - In favour of the HTTP event stream (which backs the IRC relay)
* `titles.txt` - This is replaced by a MySQL table

TODO
----
Expand Down
93 changes: 22 additions & 71 deletions cluebot.sql
Original file line number Diff line number Diff line change
@@ -1,82 +1,33 @@
-- MySQL dump 10.11
--
-- Host: localhost Database: cluebot_enwiki
-- ------------------------------------------------------
-- Server version 5.0.32-Debian_7etch3-log
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--
-- Table structure for table `beaten`
--
DROP TABLE IF EXISTS `beaten`;
CREATE TABLE `beaten`
CREATE TABLE IF NOT EXISTS `beaten`
(
`id` int(11) NOT NULL auto_increment,
`id` int(11) NOT NULL auto_increment,
`timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
`article` varchar(256) NOT NULL,
`diff` varchar(512) NOT NULL,
`user` varchar(256) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
--
-- Table structure for table `trr`
--
DROP TABLE IF EXISTS `trr`;
CREATE TABLE `trr`

CREATE TABLE IF NOT EXISTS `vandalism`
(
`id` int(11) NOT NULL auto_increment,
`timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
`user` varchar(256) NOT NULL,
`title` varchar(256) NOT NULL,
`url` varchar(512) NOT NULL,
`revid` int(11) NOT NULL,
`md5` char(32) default NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=488749 DEFAULT CHARSET=latin1;
--
-- Table structure for table `vandalism`
--
DROP TABLE IF EXISTS `vandalism`;
CREATE TABLE `vandalism`
(
`id` int(11) NOT NULL auto_increment,
`timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
`user` varchar(256) NOT NULL,
`article` varchar(256) NOT NULL,
`heuristic` varchar(64) NOT NULL,
`regex` varchar(2048) default NULL,
`reason` varchar(512) NOT NULL,
`diff` varchar(512) NOT NULL,
`old_id` int(11) NOT NULL,
`new_id` int(11) NOT NULL,
`reverted` tinyint(1) NOT NULL,
`id` int(11) NOT NULL auto_increment,
`timestamp` timestamp NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
`user` varchar(256) NOT NULL,
`article` varchar(256) NOT NULL,
`heuristic` varchar(64) NOT NULL,
`regex` varchar(2048) default NULL,
`reason` varchar(512) NOT NULL,
`diff` varchar(512) NOT NULL,
`old_id` int(11) NOT NULL,
`new_id` int(11) NOT NULL,
`reverted` tinyint(1) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
--
-- Table structure for table `cluster_node`
--
DROP TABLE IF EXISTS `cluster_node`;
CREATE TABLE `cluster_node`

CREATE TABLE IF NOT EXISTS `last_revert`
(
`node` varchar(256) NOT NULL,
`port` int(11) NOT NULL,
`type` varchar(256) NOT NULL,
PRIMARY KEY (`type`)
) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2008-01-14 2:31:27
`title` varchar(256) NOT NULL,
`user` varchar(256) NOT NULL,
`time` int NOT NULL,
PRIMARY KEY (`title`, `user`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
13 changes: 6 additions & 7 deletions config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,22 @@ wikipedia:
username: ClueBot_NG

irc:
server: irc.freenode.org
server: irc.libera.chat
port: 6697
username: CBNGRelay
password: xxxxxxxx
channel:
spam: wikipedia-en-cbngfeed2
revert: wikipedia-en-cbngrevertfeed2
debug: wikipedia-en-cbngdebug2

sql:
replica:
username: root
host: 127.0.0.1
port: 3306
schema: enwiki_p
- username: root
host: 127.0.0.1
port: 3306
schema: enwiki_p
cluebot:
username: root
host: 127.0.0.1
port: 3306
schema: cbng
schema: cbng
26 changes: 23 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ import (
"github.com/spf13/pflag"
"gopkg.in/natefinch/lumberjack.v2"
"net/http"
"os"
"sync"
"time"
)

func RunMetricPoller(wg *sync.WaitGroup, toReplicationWatcher, toPageMetadataLoader, toPageRecentEditCountLoader, toPageRecentRevertCountLoader, toUserEditCountLoader, toUserWarnsCountLoader, toUserDistinctPagesCountLoader, toRevisionLoader, toScoringProcessor, toRevertProcessor chan *model.ProcessEvent, r *relay.Relays) {
func RunMetricPoller(wg *sync.WaitGroup, toPageMetadataLoader, toPageRecentEditCountLoader, toPageRecentRevertCountLoader, toUserEditCountLoader, toUserWarnsCountLoader, toUserDistinctPagesCountLoader, toRevisionLoader, toScoringProcessor, toRevertProcessor chan *model.ProcessEvent, r *relay.Relays, db *database.DatabaseConnection) {
wg.Add(1)
defer wg.Done()

Expand All @@ -41,6 +42,18 @@ func RunMetricPoller(wg *sync.WaitGroup, toReplicationWatcher, toPageMetadataLoa
metrics.IrcNotificationsPending.With(prometheus.Labels{"channel": "debug"}).Set(float64(r.GetPendingDebugMessages()))
metrics.IrcNotificationsPending.With(prometheus.Labels{"channel": "revert"}).Set(float64(r.GetPendingRevertMessages()))
metrics.IrcNotificationsPending.With(prometheus.Labels{"channel": "spam"}).Set(float64(r.GetPendingSpamMessages()))

db.UpdateMetrics()
}
}

func RunDatabasePurger(wg *sync.WaitGroup, db *database.DatabaseConnection) {
wg.Add(1)
defer wg.Done()

timer := time.NewTicker(time.Hour)
for range timer.C {
db.ClueBot.PurgeOldRevertTimes()
}
}

Expand Down Expand Up @@ -80,8 +93,13 @@ func main() {
logrus.FieldKeyMsg: "message",
},
})

logFile := "botng.log"
if value, ok := os.LookupEnv("BOTNG_LOG"); ok {
logFile = value
}
logrus.AddHook(helpers.NewLogFileHook(&lumberjack.Logger{
Filename: "cbng.log",
Filename: logFile,
MaxBackups: 31,
MaxAge: 1,
Compress: true,
Expand Down Expand Up @@ -127,6 +145,7 @@ func main() {

r := relay.NewRelays(&wg, useIrcRelay, configuration.Irc.Server, configuration.Irc.Port, configuration.Irc.Username, configuration.Irc.Password, configuration.Irc.Channel)
db := database.NewDatabaseConnection(configuration)
defer db.Disconnect()

// Processing channels
toReplicationWatcher := make(chan *model.ProcessEvent, 10000)
Expand All @@ -141,7 +160,8 @@ func main() {
toScoringProcessor := make(chan *model.ProcessEvent, 10000)
toRevertProcessor := make(chan *model.ProcessEvent, 10000)

go RunMetricPoller(&wg, toReplicationWatcher, toPageMetadataLoader, toPageRecentEditCountLoader, toPageRecentRevertCountLoader, toUserEditCountLoader, toUserWarnsCountLoader, toUserDistinctPagesCountLoader, toRevisionLoader, toScoringProcessor, toRevertProcessor, r)
go RunMetricPoller(&wg, toPageMetadataLoader, toPageRecentEditCountLoader, toPageRecentRevertCountLoader, toUserEditCountLoader, toUserWarnsCountLoader, toUserDistinctPagesCountLoader, toRevisionLoader, toScoringProcessor, toRevertProcessor, r, db)
go RunDatabasePurger(&wg, db)

go feed.ConsumeHttpChangeEvents(&wg, configuration, toReplicationWatcher)
go processor.ReplicationWatcher(&wg, configuration, db, ignoreReplicationDelay, toReplicationWatcher, toPageMetadataLoader)
Expand Down
5 changes: 3 additions & 2 deletions pkg/cbng/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
)

var ReleaseTag = "development"
var RecentRevertThreshold = int64(86400)

type BotConfiguration struct {
Owner string
Expand Down Expand Up @@ -41,7 +42,7 @@ type ReplicaSqlConfiguration struct {
}

type SqlConfiguration struct {
Replica ReplicaSqlConfiguration
Replica []ReplicaSqlConfiguration
Cluebot CluebotSqlConfiguration
}

Expand Down Expand Up @@ -103,7 +104,7 @@ func NewConfiguration() *Configuration {
configuration := Configuration{}

var configPath string
if val, ok := os.LookupEnv("BOT_CFG"); ok {
if val, ok := os.LookupEnv("BOTNG_CFG"); ok {
configPath = val
}
if configPath != "" {
Expand Down
43 changes: 28 additions & 15 deletions pkg/cbng/database/cluebot/cluebot.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,21 @@ func (ci *CluebotInstance) GenerateVandalismId(logger *logrus.Entry, ctx context
_, span := metrics.OtelTracer.Start(ctx, "database.cluebot.GenerateVandalismId")
defer span.End()

var vandalismId int64

db := ci.getDatabaseConnection()
defer db.Close()

res, err := db.Exec("INSERT INTO `vandalism` (`id`,`user`,`article`,`heuristic`,`reason`,`diff`,`old_id`,`new_id`,`reverted`) VALUES (NULL, ?, ?, '', ?, ?, ?, ?, 0)", user, title, reason, diffUrl, previousId, currentId)
if err != nil {
logger.Errorf("Error running query: %v", err)
span.SetStatus(codes.Error, err.Error())
return vandalismId, err
return 0, err
}
if vandalismId, err := res.LastInsertId(); err != nil {

vandalismId, err := res.LastInsertId()
if err != nil {
logger.Errorf("Failed to get insert id: %v", err)
span.SetStatus(codes.Error, err.Error())
return vandalismId, err
return 0, err
}

logger.Debugf("Generated id %v", vandalismId)
Expand Down Expand Up @@ -153,13 +153,10 @@ func (ci *CluebotInstance) GetLastRevertTime(l *logrus.Entry, ctx context.Contex
defer span.End()

var revertTime int64
timeoutCtx, cancel := context.WithTimeout(context.Background(), time.Millisecond*300)
defer cancel()

db := ci.getDatabaseConnection()
defer db.Close()

rows, err := db.QueryContext(timeoutCtx, "SELECT `time` FROM `last_revert` WHERE title=? AND user=?", title, user)
rows, err := db.Query("SELECT `time` FROM `last_revert` WHERE title=? AND user=?", title, user)
if err != nil {
logger.Infof("Error running query: %v", err)
span.SetStatus(codes.Error, err.Error())
Expand All @@ -178,7 +175,7 @@ func (ci *CluebotInstance) GetLastRevertTime(l *logrus.Entry, ctx context.Contex
return revertTime
}

func (ci *CluebotInstance) SaveRevertTime(l *logrus.Entry, ctx context.Context, title, user string) int64 {
func (ci *CluebotInstance) SaveRevertTime(l *logrus.Entry, ctx context.Context, title, user string) error {
logger := l.WithFields(logrus.Fields{
"function": "database.cluebot.SaveRevertTime",
"args": map[string]interface{}{
Expand All @@ -190,17 +187,15 @@ func (ci *CluebotInstance) SaveRevertTime(l *logrus.Entry, ctx context.Context,
defer span.End()

var revertTime int64
timeoutCtx, cancel := context.WithTimeout(context.Background(), time.Millisecond*300)
defer cancel()

db := ci.getDatabaseConnection()
defer db.Close()

rows, err := db.QueryContext(timeoutCtx, "INSERT INTO `last_revert` (`title`, `user`, `time`) "+
rows, err := db.Query("INSERT INTO `last_revert` (`title`, `user`, `time`) "+
"VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE `time`=`time`", title, user, time.Now().UTC().Unix())
if err != nil {
logger.Infof("Error running query: %v", err)
span.SetStatus(codes.Error, err.Error())
return err
} else {
defer rows.Close()
if !rows.Next() {
Expand All @@ -209,9 +204,27 @@ func (ci *CluebotInstance) SaveRevertTime(l *logrus.Entry, ctx context.Context,
if err := rows.Scan(&revertTime); err != nil {
logger.Errorf("Error reading rows for query: %v", err)
span.SetStatus(codes.Error, err.Error())
return err
}
}
}

return revertTime
return nil
}

func (ci *CluebotInstance) PurgeOldRevertTimes() {
logger := logrus.WithFields(logrus.Fields{
"function": "database.cluebot.PurgeOldRevertTimes",
})
_, span := metrics.OtelTracer.Start(context.Background(), "database.cluebot.PurgeOldRevertTimes")
defer span.End()

db := ci.getDatabaseConnection()
defer db.Close()

_, err := db.Exec("DELETE FROM `last_revert` WHERE `time` < ?", time.Now().UTC().Unix()-(config.RecentRevertThreshold+10))
if err != nil {
logger.Warnf("Error purging database: %v", err)
span.SetStatus(codes.Error, err.Error())
}
}
8 changes: 8 additions & 0 deletions pkg/cbng/database/datasource.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,11 @@ func NewDatabaseConnection(configuration *config.Configuration) *DatabaseConnect
}
return &c
}

func (dbc *DatabaseConnection) Disconnect() {
dbc.Replica.DisconnectFromDatabase()
}

func (dbc *DatabaseConnection) UpdateMetrics() {
dbc.Replica.UpdateMetrics()
}
Loading

0 comments on commit 61ac5bc

Please sign in to comment.