diff --git a/README.md b/README.md index 39ad786..87667b7 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Project maintained by [Nebula Labs](https://about.utdnebula.com). - The input data is considered **immutable** by the parsing stage. This means the parsers should never modify the data being fed into them. #### - The `uploader` directory contains the uploader that sends the parsed data to the Nebula API MongoDB database. This is the final stage of the data pipeline. - The uploader(s) are concerned solely with pushing parsed data to the database. Data, at this point, is assumed to be valid and ready for use. +#### - The `generator` directory contains code to create data from scratch. + - This is part of a seperate pipeline (generator > uploader instead of scraper > parser > uploader) for data that does not come from an external source. ### Contributing @@ -28,7 +30,7 @@ Please visit our [Discord](https://discord.utdnebula.com) and talk to us if you' ### Development -Documentation for the project will be created soon, but for more information please visit our [Discord](https://discord.com/invite/tcpcnfxmeQ). +Documentation for the project will be created soon, but for more information please visit our [Discord](https://discord.utdnebula.com). To build the project, simply clone the repository and then either: - Run `make` in the root (top level) directory (for systems with `make` installed, i.e. most Linux distros, MacOS) diff --git a/generators/letters.go b/generators/letters.go new file mode 100644 index 0000000..75f4fcd --- /dev/null +++ b/generators/letters.go @@ -0,0 +1,99 @@ +/* + This file contains the code for the letters generator. +*/ + +package generators + +import ( + "fmt" + "log" + "math/rand" + "os" + "time" + + "github.com/UTDNebula/api-tools/utils" + "github.com/UTDNebula/nebula-api/api/schema" +) + +var NUM_LETTERS int = 21 + +func GenerateLetters(outDir string) { + // Make output folder + err := os.MkdirAll(outDir, 0777) + if err != nil { + panic(err) + } + + // Define tiles and weights: https://en.wikipedia.org/wiki/Scrabble_letter_distributions + tiles := []rune{ + 'K', 'J', 'X', 'Q', 'Z', // x1 + 'B', 'C', 'M', 'P', 'F', 'H', 'V', 'W', 'Y', // x2 + 'G', // x3 + 'L', 'S', 'U', 'D', // x4 + 'N', 'R', 'T', // x6 + 'O', // x8 + 'A', 'I', // x9 + 'E', // x12 + } + weights := []int{ + 1, 1, 1, 1, 1, // x1 + 2, 2, 2, 2, 2, 2, 2, 2, 2, // x2 + 3, // x3 + 4, 4, 4, 4, // x4 + 6, 6, 6, // x6 + 8, // x8 + 9, 9, // x9 + 12, // x12 + } + + // Seed random number generator + localRand := rand.New(rand.NewSource(time.Now().UnixNano())) + + // Precompute cumulative distribution + totalWeight := 0 + for _, w := range weights { + totalWeight += w + } + cumulative := make([]int, len(weights)) + sum := 0 + for i, w := range weights { + sum += w + cumulative[i] = sum + } + + // Function to draw a random tile based on weights + drawTile := func() rune { + r := localRand.Intn(totalWeight) + 1 + for i, c := range cumulative { + if r <= c { + return tiles[i] + } + } + return tiles[len(tiles)-1] // fallback + } + + // Draw NUM_LETTERS random tiles + result := make([]rune, NUM_LETTERS) + for i := 0; i < NUM_LETTERS; i++ { + result[i] = drawTile() + } + + // Get the date + loc, err := time.LoadLocation("America/Chicago") + if err != nil { + log.Fatalf("Error loading location: %v", err) + } + utcNow := time.Now().UTC() + today := utcNow.In(loc).Format("2006-01-02") + + // Format output + output := []schema.Letters{{ + Date: today, + Letters: string(result), + }} + + log.Print("Generated letters!") + + // Write letters to output file + utils.WriteJSON(fmt.Sprintf("%s/letters.json", outDir), output) +} diff --git a/go.mod b/go.mod index 0972c04..849e98d 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.23.0 require ( github.com/PuerkitoBio/goquery v1.8.1 - github.com/UTDNebula/nebula-api/api v0.0.0-20250509234134-d7ea14f304f8 + github.com/UTDNebula/nebula-api/api v0.0.0-20250811163350-91640280991d github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8 github.com/chromedp/chromedp v0.12.1 github.com/google/go-cmp v0.7.0 diff --git a/main.go b/main.go index 5df58b7..1a87dfb 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "os" "time" + "github.com/UTDNebula/api-tools/generators" "github.com/UTDNebula/api-tools/parser" "github.com/UTDNebula/api-tools/scrapers" "github.com/UTDNebula/api-tools/uploader" @@ -51,6 +52,10 @@ func main() { csvDir := flag.String("csv", "./grade-data", "Alongside -parse, specifies the path to the directory of CSV files containing grade data.") skipValidation := flag.Bool("skipv", false, "Alongside -parse, signifies that the post-parsing validation should be skipped. Be careful with this!") + // Flags for generating + generate := flag.Bool("generate", false, "Puts the tool into generate mode.") + letters := flag.Bool("letters", false, "Alongside -generate or -upload, signifies that letters for games should be generated/uploaded.") + // Flags for uploading data upload := flag.Bool("upload", false, "Puts the tool into upload mode.") replace := flag.Bool("replace", false, "Alongside -upload, specifies that uploaded data should replace existing data rather than being merged.") @@ -127,12 +132,19 @@ func main() { default: parser.Parse(*inDir, *outDir, *csvDir, *skipValidation) } + case *generate: + switch { + case *letters: + generators.GenerateLetters(*outDir) + } case *upload: switch { case *events: uploader.UploadEvents(*inDir) case *mapFlag: uploader.UploadMapLocations(*inDir) + case *letters: + uploader.UploadLetters(*inDir, *replace) default: uploader.Upload(*inDir, *replace, *staticOnly) } diff --git a/runners/daily.sh b/runners/daily.sh index 0a4c30a..201a567 100644 --- a/runners/daily.sh +++ b/runners/daily.sh @@ -8,3 +8,7 @@ ./api-tools -headless -verbose -scrape -astra ./api-tools -headless -verbose -parse -astra ./api-tools -headless -verbose -upload -events + +# generate and upload letters +./api-tools -headless -verbose -generate -letters +./api-tools -headless -verbose -upload -letters diff --git a/uploader/gamesUploader.go b/uploader/gamesUploader.go new file mode 100644 index 0000000..7837b57 --- /dev/null +++ b/uploader/gamesUploader.go @@ -0,0 +1,84 @@ +/* + This file is responsible for handling uploading of game data to MongoDB. +*/ + +package uploader + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strings" + "time" + + "github.com/UTDNebula/nebula-api/api/schema" + "github.com/joho/godotenv" + "go.mongodb.org/mongo-driver/bson" +) + +// Note that this uploader assumes that the collection names match the names of these files, which they should. +// If the names of these collections ever change, the file names should be updated accordingly. + +var lettersFile string = "letters.json" + +func UploadLetters(inDir string, replace bool) { + + //Load env vars + if err := godotenv.Load(); err != nil { + log.Panic("Error loading .env file") + } + + //Connect to mongo + client := connectDB() + + // Get 5 minute context + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + // Open data file for reading + fptr, err := os.Open(fmt.Sprintf("%s/"+lettersFile, inDir)) + if err != nil { + if os.IsNotExist(err) { + log.Printf("File not found. Skipping %s", lettersFile) + return + } + log.Panic(err) + } + defer fptr.Close() + + if replace { + UploadData[schema.Letters](client, ctx, fptr, false) + } else { + // Get date to upload + var docs []schema.Letters + decoder := json.NewDecoder(fptr) + err := decoder.Decode(&docs) + if err != nil { + log.Panic(err) + } + if len(docs) != 1 { + log.Println("0 or 2+ entries found in JSON, skipping upload.") + return + } + today := docs[0].Date + + // Check if date already exists + fileName := fptr.Name()[strings.LastIndex(fptr.Name(), "/")+1 : len(fptr.Name())-5] + collection := getCollection(client, fileName) + filter := bson.M{"date": today} + count, err := collection.CountDocuments(ctx, filter) + if err != nil { + log.Panicf("Error checking for existing puzzle: %v", err) + } + if count > 0 { + log.Printf("Puzzle for %s already exists. Skipping upload.", today) + return + } + + fptr.Seek(0, io.SeekStart) + UploadData[schema.Letters](client, ctx, fptr, false) + } +} diff --git a/uploader/pipelines/merge_stage_generator.go b/uploader/pipelines/merge_stage_generator.go new file mode 100644 index 0000000..4e78893 --- /dev/null +++ b/uploader/pipelines/merge_stage_generator.go @@ -0,0 +1,21 @@ +package pipelines + +import ( + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" +) + +// Pipeline for merging temp collection into existing collection +func MergeStageGenerator(fileName string, matchFilters []string) mongo.Pipeline { + return mongo.Pipeline{ + bson.D{{Key: "$unset", Value: "_id"}}, + bson.D{ + {Key: "$merge", Value: bson.D{ + {Key: "into", Value: fileName}, + {Key: "on", Value: matchFilters}, + {Key: "whenMatched", Value: "replace"}, + {Key: "whenNotMatched", Value: "insert"}, + }}, + }, + } +} diff --git a/uploader/uploader.go b/uploader/uploader.go index 85dac86..ac2e166 100644 --- a/uploader/uploader.go +++ b/uploader/uploader.go @@ -147,68 +147,71 @@ func UploadData[T any](client *mongo.Client, ctx context.Context, fptr *os.File, } } else { - log.Panicf("Uploading without the -replace flag is not currently supported.") - /* - // If a temp collection already exists, drop it - tempCollection := getCollection(client, "temp") - err = tempCollection.Drop(ctx) - if err != nil { - log.Panic(err) - } + if fileName != "letters" { + log.Panicf("Uploading without the -replace flag is not currently supported for anything but letters.") + } - // Create a temporary collection - err := client.Database("combinedDB").CreateCollection(ctx, "temp") - if err != nil { - log.Panic(err) - } + // If a temp collection already exists, drop it + tempCollection := getCollection(client, "temp") + err = tempCollection.Drop(ctx) + if err != nil { + log.Panic(err) + } - // Get the temporary collection - tempCollection = getCollection(client, "temp") + // Create a temporary collection + err := client.Database("combinedDB").CreateCollection(ctx, "temp") + if err != nil { + log.Panic(err) + } - // Convert your documents to []interface{} - docsInterface := make([]interface{}, len(docs)) - for i := range docs { - docsInterface[i] = docs[i] - } + // Get the temporary collection + tempCollection = getCollection(client, "temp") - // Add all documents decoded from the file into the temporary collection - opts := options.InsertMany().SetOrdered(false) - _, err = tempCollection.InsertMany(ctx, docsInterface, opts) - if err != nil { - log.Panic(err) - } + // Convert your documents to []interface{} + docsInterface := make([]interface{}, len(docs)) + for i := range docs { + docsInterface[i] = docs[i] + } - // Create a merge aggregate pipeline - // Matched documents from the temporary collection will replace matched documents from the Mongo collection - // Unmatched documents from the temporary collection will be inserted into the Mongo collection - var matchFilters []string - switch fileName { - case "courses": - matchFilters = []string{"catalog_year", "course_number", "subject_prefix"} - case "professors": - matchFilters = []string{"first_name", "last_name"} - case "sections": - matchFilters = []string{"section_number", "course_reference", "academic_session"} - default: - log.Panic("Unrecognizable filename: " + fileName) - } + // Add all documents decoded from the file into the temporary collection + opts := options.InsertMany().SetOrdered(false) + _, err = tempCollection.InsertMany(ctx, docsInterface, opts) + if err != nil { + log.Panic(err) + } - // The documents will be added/merged into the collection with the same name as the file - // The filters for the merge aggregate pipeline are based on the file name - mergeStage := bson.D{primitive.E{Key: "$merge", Value: bson.D{primitive.E{Key: "into", Value: fileName}, primitive.E{Key: "on", Value: matchFilters}, primitive.E{Key: "whenMatched", Value: "replace"}, primitive.E{Key: "whenNotMatched", Value: "insert"}}}} + // Create a merge aggregate pipeline + // Matched documents from the temporary collection will replace matched documents from the Mongo collection (besides _id field) + // Unmatched documents from the temporary collection will be inserted into the Mongo collection + var matchFilters []string + switch fileName { + case "courses": + matchFilters = []string{"catalog_year", "course_number", "subject_prefix"} + case "professors": + matchFilters = []string{"first_name", "last_name"} + case "sections": + matchFilters = []string{"section_number", "course_reference", "academic_session"} + case "letters": + matchFilters = []string{"date"} + default: + log.Panic("Unrecognizable filename: " + fileName) + } - // Execute aggregate pipeline - _, err = tempCollection.Aggregate(ctx, mongo.Pipeline{mergeStage}) - if err != nil { - log.Panic(err) - } + // The documents will be added/merged into the collection with the same name as the file + // The filters for the merge aggregate pipeline are based on the file name + mergeStage := pipelines.MergeStageGenerator(fileName, matchFilters) - // Drop the temporary collection - err = tempCollection.Drop(ctx) - if err != nil { - log.Panic(err) - } - */ + // Execute aggregate pipeline + _, err = tempCollection.Aggregate(ctx, mergeStage) + if err != nil { + log.Panic(err) + } + + // Drop the temporary collection + err = tempCollection.Drop(ctx) + if err != nil { + log.Panic(err) + } } log.Println("Done uploading " + fileName + ".json!")