diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a31c3df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +main \ No newline at end of file diff --git a/ASSIGNMENT.md b/ASSIGNMENT.md new file mode 100644 index 0000000..6df56a5 --- /dev/null +++ b/ASSIGNMENT.md @@ -0,0 +1,52 @@ +#Degrees of Separation + +With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. + +Write a Go program that behaves the following way: + +``` +$ degrees amitabh-bachchan robert-de-niro + +Degrees of Separation: 3 + +1. Movie: The Great Gatsby +Supporting Actor: Amitabh Bachchan +Actor: Leonardo DiCaprio + +2. Movie: The Wolf of Wall Street +Actor: Leonardo DiCaprio +Director: Martin Scorsese + +3. Movie: Taxi Driver +Director: Martin Scorsese +Actor: Robert De Niro +``` + +Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. +All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. + +Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` + +To solve the example above, your solution would fetch at least the following: + +http://data.moviebuff.com/amitabh-bachchan + +http://data.moviebuff.com/the-great-gatsby + +http://data.moviebuff.com/leonardo-dicaprio + +http://data.moviebuff.com/the-wolf-of-wall-street + +http://data.moviebuff.com/martin-scorsese + +http://data.moviebuff.com/taxi-driver + +##Notes +* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) +* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. + +Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. + +To submit a solution, fork this repo and send a Pull Request on Github. + +For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5d44b4b --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +.PHONY: build run dev + +build: + go build -o ./cmd/main ./cmd + +run: + go run ./cmd/main.go + +running: + CompileDaemon -build="go build -o ./cmd/main ./cmd" -command=./cmd/main \ No newline at end of file diff --git a/README.md b/README.md index 6df56a5..f026848 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,133 @@ -#Degrees of Separation +# ๐ŸŽฌ Degrees of Separation - Movie Industry Connections -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. +This Go application finds the degrees of separation between two people in the movie industry using data from Moviebuff. It implements an efficient graph traversal algorithm with concurrent data fetching to determine the shortest path between two industry professionals through their movie collaborations. -Write a Go program that behaves the following way: +## โœจ Features +- ๐Ÿš€ **Concurrent Data Fetching**: Efficiently fetches data from external APIs using goroutines +- ๐Ÿ’พ **In-Memory Caching**: Implements a thread-safe caching mechanism for person and movie data +- ๐Ÿ›ก๏ธ **Rate Limiting**: Protects against API throttling with built-in rate limiting +- ๐Ÿ“Š **Performance Monitoring**: Includes pprof endpoints for runtime analysis +- โšก **Graceful Error Handling**: Robust error handling for API failures and invalid inputs +- โš™๏ธ **Configuration via Environment Variables**: Flexible configuration through environment variables +- ๐Ÿ”„ **Resource Management**: Proper channel and goroutine lifecycle management + +## ๐Ÿ—๏ธ Architecture + +### ๐ŸŒ Data Fetching +- Uses worker pools for concurrent data fetching from external APIs +- Implements separate workers for person and movie data +- Controlled concurrency with predefined worker counts + +### ๐Ÿ“ฆ Caching +- Thread-safe in-memory cache using maps +- Implements `sync.RWMutex` for concurrent read/write operations +- Caches both person and movie data after fetching + +### ๐Ÿ“ˆ Performance & Monitoring +- Pprof endpoints for runtime profiling and debugging +- Periodic logging of goroutine statistics +- Rate limiting to prevent API throttling + +### ๐Ÿ› ๏ธ Error Handling & Resource Management +- Context-based cancellation for cleanup +- Proper channel closing mechanisms +- Graceful error handling for API failures +- Existence validation (of target person) to prevent long unnecessary searches + +## ๐Ÿ”Œ API Endpoints + +### GET /separation +Query Parameters: +- `from`: Moviebuff URL of the first person +- `to`: Moviebuff URL of the second person + +Example: +``` +GET /separation?from=amitabh-bachchan&to=robert-de-niro ``` -$ degrees amitabh-bachchan robert-de-niro -Degrees of Separation: 3 +Response: +```json +{ + "separation": 3 +} +``` -1. Movie: The Great Gatsby -Supporting Actor: Amitabh Bachchan -Actor: Leonardo DiCaprio +## โš™๏ธ Configuration -2. Movie: The Wolf of Wall Street -Actor: Leonardo DiCaprio -Director: Martin Scorsese +The application can be configured using the following environment variables: -3. Movie: Taxi Driver -Director: Martin Scorsese -Actor: Robert De Niro +- `PORT`: Server port (default: 3001) +- `PPROF_PORT`: Port for pprof endpoints +- `LOG_LEVEL`: Logging level (debug/info) +- `RATE_LIMIT`: API rate limit per minute +- `PERSON_DATA_FETCH_WORKERS`: Number of concurrent person data fetchers +- `MOVIE_DATA_FETCH_WORKERS`: Number of concurrent movie data fetchers + +## ๐Ÿ“ฅ Getting Started + +1. Clone the repository: +```bash +git clone https://github.com/AbdulRahimOM/challenge2015.git +cd challenge2015 ``` -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. -All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. +2. Install dependencies: +```bash +go mod download +``` -Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` +3. Copy the environment file and configure: +```bash +cp no-secrets.env .env +# Edit .env with your preferred settings (or keep it as it is to run in default settings) +``` -To solve the example above, your solution would fetch at least the following: +## ๐Ÿš€ Running the Application -http://data.moviebuff.com/amitabh-bachchan +1. Set up environment variables (optional) +2. Run the application: +```bash +go run cmd/main.go +``` -http://data.moviebuff.com/the-great-gatsby +## ๐Ÿ’ช Performance Considerations -http://data.moviebuff.com/leonardo-dicaprio +1. **Concurrent Data Fetching** + - ๐Ÿ”„ Optimized worker pools for API requests + - ๐Ÿ‘ฅ Separate workers for person and movie data -http://data.moviebuff.com/the-wolf-of-wall-street +2. **Caching** + - ๐Ÿ“ฆ In-memory caching reduces API calls + - ๐Ÿ”’ Thread-safe read/write operations -http://data.moviebuff.com/martin-scorsese +3. **Resource Management** + - โšก Context-based cancellation + - ๐Ÿงน Proper cleanup of resources + - ๐Ÿ›ก๏ธ Rate limiting to prevent throttling + +## ๐Ÿ“Š Monitoring + +### ๐Ÿ” Pprof Endpoints +Access pprof endpoints at: +``` +http://localhost:{PPROF_PORT}/debug/pprof/ +``` -http://data.moviebuff.com/taxi-driver +Available profiles: +- ๐Ÿงต Goroutine +- ๐Ÿ’พ Heap +- ๐Ÿ”„ Thread +- ๐Ÿšซ Block +- ๐Ÿ“ˆ CPU profile -##Notes -* If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) -* There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. +## ๐Ÿ”ฎ Future Improvements +- โฐ Add cache expiration mechanism (Relevant, as new movies and persons are added) +- ๐Ÿ”— Show connection chain along with degree of seperation -Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. +## ๐Ÿ“š Dependencies -To submit a solution, fork this repo and send a Pull Request on Github. +- ๐Ÿš€ [Fiber](github.com/gofiber/fiber/v2) - Web framework +- ๐Ÿ“ฆ Standard Go libraries for concurrency and HTTP operations -For any questions or clarifications, raise an issue on this repo and we'll answer your questions as fast as we can. diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..e6a3446 --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,56 @@ +package main + +import ( + "test/internal/config" + "test/internal/tracer" + "time" + + "net/http" + _ "net/http/pprof" + + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/log" + "github.com/gofiber/fiber/v2/middleware/limiter" +) + +func main() { + setLogLevel() + + app := fiber.New() + app.Use(limiter.New(limiter.Config{ + Max: config.RateLimit, + Expiration: 1 * time.Minute, + })) + + app.Get("/seperation", func(c *fiber.Ctx) error { + from := c.Query("from") + to := c.Query("to") + if from == "" || to == "" { + return c.Status(fiber.StatusBadRequest).JSON(map[string]string{"error": "from and to query params are required"}) + } + + seperation, err := tracer.FindSeperation(from, to) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(map[string]string{"error": err.Error()}) + } + + return c.Status(fiber.StatusOK).JSON(map[string]interface{}{ + "seperation": seperation, + }) + }) + go func() { + log.Fatal(http.ListenAndServe(":"+config.PprofPort, nil)) //for pprof, as fiber doesn't use net/http. + }() + log.Fatal(app.Listen(":" + config.Port)) +} + +func setLogLevel() { + switch config.LogLevel { + case "debug", "DEBUG": + log.SetLevel(log.LevelDebug) + case "info", "INFO": + log.SetLevel(log.LevelInfo) + default: + log.SetLevel(log.LevelInfo) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ceaa784 --- /dev/null +++ b/go.mod @@ -0,0 +1,24 @@ +module test + +go 1.23.2 + +require ( + github.com/gofiber/fiber/v2 v2.52.6 + github.com/joho/godotenv v1.5.1 +) + +require ( + github.com/andybalholm/brotli v1.1.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c // indirect + github.com/rivo/uniseg v0.2.0 // indirect + github.com/tinylib/msgp v1.2.5 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + github.com/valyala/tcplisten v1.0.0 // indirect + golang.org/x/sys v0.28.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a1ff890 --- /dev/null +++ b/go.sum @@ -0,0 +1,33 @@ +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/gofiber/fiber/v2 v2.52.6 h1:Rfp+ILPiYSvvVuIPvxrBns+HJp8qGLDnLJawAu27XVI= +github.com/gofiber/fiber/v2 v2.52.6/go.mod h1:YEcBbO/FB+5M1IZNBP9FO3J9281zgPAreiI1oqg8nDw= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c h1:dAMKvw0MlJT1GshSTtih8C2gDs04w8dReiOGXrGLNoY= +github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/tinylib/msgp v1.2.5 h1:WeQg1whrXRFiZusidTQqzETkRpGjFjcIhW6uqWH09po= +github.com/tinylib/msgp v1.2.5/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8= +github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..6d4ecb2 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,81 @@ +package config + +import ( + "fmt" + "strconv" + + "os" + + "github.com/gofiber/fiber/v2/log" + "github.com/joho/godotenv" +) + +const envPath = "no-secrets.env" + +var ( + Port string // PORT + PprofPort string // PPROF_PORT + RateLimit int // RATE_LIMIT + LogLevel string // LOG_LEVEL + PersonDataFetchWorkersCount int // PERSON_DATA_FETCH_WORKERS_COUNT + MovieDataFetchWorkersCount int // MOVIE_DATA_FETCH_WORKERS_COUNT + LogGoroutineCount bool // LOG_GOROUTINE_COUNT +) + +func init() { + LoadEnv() +} + +func LoadEnv() { + fmt.Println("Loading .env file...") + err := godotenv.Load(envPath) + if err != nil { + log.Fatal("Error loading .env file. err", err) + } + + Port = getEnvString("PORT", "3001") + PprofPort = getEnvString("PPROF_PORT", "6060") + LogLevel = getEnvString("LOG_LEVEL", "INFO") + RateLimit = getEnvInt("RATE_LIMIT", 10) + PersonDataFetchWorkersCount = getEnvInt("PERSON_DATA_FETCH_WORKERS_COUNT", 10) + MovieDataFetchWorkersCount = getEnvInt("MOVIE_DATA_FETCH_WORKERS_COUNT", 10) + LogGoroutineCount = getEnvBool("LOG_GOROUTINE_COUNT", false) + fmt.Println("Load .env file completed") +} + +func getEnvString(key, defaultValue string) string { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + return str +} + +func getEnvInt(key string, defaultValue int) int { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + value, err := strconv.Atoi(str) + if err != nil { + log.Info("Error parsing environment variable. Using default value for ", key) + return defaultValue + } + return value +} + +func getEnvBool(key string, defaultValue bool) bool { + str := os.Getenv(key) + if str == "" { + log.Info("Environment variable not found. Using default value for ", key) + return defaultValue + } + value, err := strconv.ParseBool(str) + if err != nil { + log.Info("Error parsing environment variable. Using default value for ", key) + return defaultValue + } + return value +} \ No newline at end of file diff --git a/internal/data/cache.go b/internal/data/cache.go new file mode 100644 index 0000000..ca05d42 --- /dev/null +++ b/internal/data/cache.go @@ -0,0 +1,53 @@ +package data + +import ( + "sync" +) + +type cache struct { + PersonMutex sync.RWMutex + MovieMutex sync.RWMutex + Persons map[string]*Person + Movies map[string]*Movie +} + +var CachedData cache + +func init() { + CachedData.Persons = make(map[string]*Person) + CachedData.Movies = make(map[string]*Movie) +} + +func (c *cache) GetCachedPerson(personURL string) *Person { + c.PersonMutex.RLock() + defer c.PersonMutex.RUnlock() + return c.Persons[personURL] +} + +func (c *cache) GetCachedMovie(movieURL string) *Movie { + c.MovieMutex.RLock() + defer c.MovieMutex.RUnlock() + return c.Movies[movieURL] +} + +func (c *cache) CachePerson(personURL string, person *Person) { + if person == nil { + return + } + c.PersonMutex.Lock() + defer c.PersonMutex.Unlock() + if _, ok := c.Persons[personURL]; ok { + c.Persons[personURL] = person + } +} + +func (c *cache) CacheMovie(movieURL string, movie *Movie) { + if movie == nil { + return + } + c.MovieMutex.Lock() + defer c.MovieMutex.Unlock() + if _, ok := c.Movies[movieURL]; ok { + c.Movies[movieURL] = movie + } +} diff --git a/internal/data/check.go b/internal/data/check.go new file mode 100644 index 0000000..e3fad0a --- /dev/null +++ b/internal/data/check.go @@ -0,0 +1,28 @@ +package data + +import ( + "fmt" + "net/http" + _ "test/internal/status" //to use GetStatistics function +) + +func CheckPersonExistence(personURL string) (bool, error) { + if CachedData.GetCachedPerson(personURL) != nil { + return true, nil + } + + // If not in cache, check if it exists in external API + url := fmt.Sprintf("http://data.moviebuff.com/%s", personURL) + + resp, err := http.Head(url) // Use HEAD instead of GET to check existence + if err != nil { + return false, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + return true, nil + } else { + return false, nil + } +} diff --git a/internal/data/external_fetch.go b/internal/data/external_fetch.go new file mode 100644 index 0000000..1a4c03c --- /dev/null +++ b/internal/data/external_fetch.go @@ -0,0 +1,62 @@ +package data + +import ( + "encoding/json" + "fmt" + "io" + "net/http" +) + +func FetchMovieDataFromExternalAPI(movieURL string) (*Movie, error) { + url := fmt.Sprintf("http://data.moviebuff.com/%s", movieURL) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("received non-200 response: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %v", err) + } + + var movie Movie + err = json.Unmarshal(body, &movie) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON: %v", err) + } + + return &movie, nil +} + +func FetchPersonDataFromExternalAPI(personURL string) (*Person, error) { + url := fmt.Sprintf("http://data.moviebuff.com/%s", personURL) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch data: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("received non-200 response: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %v", err) + } + + var person Person + err = json.Unmarshal(body, &person) + if err != nil { + return nil, fmt.Errorf("failed to parse JSON: %v", err) + } + + return &person, nil +} diff --git a/internal/data/type.go b/internal/data/type.go new file mode 100644 index 0000000..d218149 --- /dev/null +++ b/internal/data/type.go @@ -0,0 +1,28 @@ +package data + +type Person struct { + // Name string `json:"name"` + // URL string `json:"url"` + // Type string `json:"type"` + MovieRoles []MovieRole `json:"movies"` +} + +type MovieRole struct { + // Name string `json:"name"` + URL string `json:"url"` + // Role string `json:"role"` +} + +type Movie struct { + // Name string `json:"name"` + // URL string `json:"url"` + // Type string `json:"type"` + Cast []CastCrew `json:"cast"` + Crew []CastCrew `json:"crew"` +} + +type CastCrew struct { + // Name string `json:"name"` + URL string `json:"url"` + // Role string `json:"role"` +} diff --git a/internal/status/statistics.go b/internal/status/statistics.go new file mode 100644 index 0000000..400ec4e --- /dev/null +++ b/internal/status/statistics.go @@ -0,0 +1,37 @@ +package status + +import ( + "fmt" + "runtime" + "test/internal/config" + "time" +) + +func init() { + if config.LogGoroutineCount { + go func() { + for { + time.Sleep(3 * time.Second) + GetStatistics() + } + }() + } +} + +func GetStatistics() { + + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + + fmt.Println("=====================================") + fmt.Println("Number of Goroutines:", runtime.NumGoroutine(), "@", time.Now().Format("15:04:05")) + // fmt.Println("Heap Allocation (bytes):", memStats.HeapAlloc) + // fmt.Println("Stack System (bytes):", memStats.StackSys) + // fmt.Println("Stack In Use (bytes):", memStats.StackInuse) + + // buf := make([]byte, 10*1024) + // n := runtime.Stack(buf, true) + // fmt.Println("result:", string(buf[:n])) + + // fmt.Println("=====================================") +} diff --git a/internal/tracer/execute.go b/internal/tracer/execute.go new file mode 100644 index 0000000..9249f87 --- /dev/null +++ b/internal/tracer/execute.go @@ -0,0 +1,186 @@ +package tracer + +import ( + "context" + "fmt" + "sync" + "test/internal/data" +) + +func FindSeperation(p1URL string, targetPerson string) (int, error) { + + if p1URL == targetPerson { + return 0, nil + } + + //check if target person exists to avoid infinite search + exists, err := data.CheckPersonExistence(targetPerson) + if err != nil { + return 0, fmt.Errorf("error checking target person existence: %v", err) + } + if !exists { + return 0, fmt.Errorf("target person not found") + } + + var ( + personURLQueue = []string{p1URL} + visitedPersons = make(map[string]bool) + visitedMovies = make(map[string]bool) + ) + + for seperation := 2; len(personURLQueue) > 0; seperation++ { + found, newPersonURLQueue := findTargetOrNextPersonList(personURLQueue, targetPerson, visitedPersons, visitedMovies) + if found { + return seperation, nil + } + personURLQueue = newPersonURLQueue + } + + return -1, fmt.Errorf("seperation not found") + +} + +func findTargetOrNextPersonList(personURLQueue []string, targetPerson string, visitedPersons map[string]bool, visitedMovies map[string]bool) (bool, []string) { + var ( + personChan = make(chan *data.Person, 10) + movieChan = make(chan *data.Movie, 10) + movieUrlChan = make(chan string, 100) + ctx = context.Background() + ) + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + go startFetchingPersons(personURLQueue, personChan, ctx) + for _, personURL := range personURLQueue { + visitedPersons[personURL] = true + } + + //receive person data and send movie urls + go func() { + defer close(movieUrlChan) + for { + select { + case personData, ok := <-personChan: + if !ok { + return + } + for _, movieRole := range personData.MovieRoles { + if _, isVisited := visitedMovies[movieRole.URL]; isVisited { + continue + } else { + visitedMovies[movieRole.URL] = true + + select { + case movieUrlChan <- movieRole.URL: + case <-ctx.Done(): + return + } + } + } + case <-ctx.Done(): + return + } + } + }() + + //call startMovieFetching to fetch movie data and send person urls of cast and crew + go startMovieFetching(movieUrlChan, movieChan, ctx) + + newPersonURLQueue := []string{} + + //add unvisited persons to new queue + for movieData := range movieChan { + for _, cast := range movieData.Cast { + if cast.URL == targetPerson { + return true, nil + } + if _, isVisited := visitedPersons[cast.URL]; isVisited { + continue + } + newPersonURLQueue = append(newPersonURLQueue, cast.URL) + } + + for _, crew := range movieData.Crew { + if _, isVisited := visitedPersons[crew.URL]; isVisited { + continue + } + if crew.URL == targetPerson { + return true, nil + } + newPersonURLQueue = append(newPersonURLQueue, crew.URL) + } + } + + //reset personURLQueue to newPersonURLQueue + return false, newPersonURLQueue +} + +func startFetchingPersons(personURLs []string, personChan chan *data.Person, ctx context.Context) { + + wg := sync.WaitGroup{} + defer close(personChan) + defer wg.Wait() + for _, personURL := range personURLs { + if person := data.CachedData.GetCachedPerson(personURL); person != nil { + select { + case personChan <- person: + continue + case <-ctx.Done(): + return + } + } + + wg.Add(1) + select { + case <-ctx.Done(): + return + default: + externalFetcher.PersonRequestChan <- personRequest{ + personURL: personURL, + replyChan: personChan, + ctx: ctx, + wg: &wg, + } + } + } +} + +// initiateMovieFetcher +func startMovieFetching(movieURLsChan chan string, movieChan chan *data.Movie, ctx context.Context) { + + wg := sync.WaitGroup{} + defer close(movieChan) + defer wg.Wait() + + for { + select { + case movieURL, ok := <-movieURLsChan: + if !ok { + return + } + if movie := data.CachedData.GetCachedMovie(movieURL); movie != nil { + select { + case movieChan <- movie: + continue + case <-ctx.Done(): + return + } + } + wg.Add(1) + select { + case <-ctx.Done(): + return + default: + externalFetcher.MovieRequestChan <- movieRequest{ + movieURL: movieURL, + replyChan: movieChan, + ctx: ctx, + wg: &wg, + } + } + case <-ctx.Done(): + return + } + } +} diff --git a/internal/tracer/workers.go b/internal/tracer/workers.go new file mode 100644 index 0000000..7fb0263 --- /dev/null +++ b/internal/tracer/workers.go @@ -0,0 +1,118 @@ +package tracer + +import ( + "context" + "sync" + "test/internal/config" + "test/internal/data" + + "github.com/gofiber/fiber/v2/log" +) + +var ( + fetchPersonWorkersCount = config.PersonDataFetchWorkersCount + fetchMovieWorkersCount = config.MovieDataFetchWorkersCount +) + +type personRequest struct { + personURL string + replyChan chan *data.Person + ctx context.Context + wg *sync.WaitGroup +} + +type movieRequest struct { + movieURL string + replyChan chan *data.Movie + ctx context.Context + wg *sync.WaitGroup +} + +var externalFetcher = struct { + PersonRequestChan chan personRequest + MovieRequestChan chan movieRequest +}{ + PersonRequestChan: make(chan personRequest, 3*fetchPersonWorkersCount), + MovieRequestChan: make(chan movieRequest, 3*fetchMovieWorkersCount), +} + +// initiate workers +func init() { + for range fetchPersonWorkersCount { + go fetchPersonWorker(externalFetcher.PersonRequestChan) + } + for range fetchMovieWorkersCount { + go fetchMovieWorker(externalFetcher.MovieRequestChan) + } +} + +// fetchPersonWorkers +func fetchPersonWorker(personRequestChan chan personRequest) { + for req := range personRequestChan { + fetchAndCachePerson(req.personURL, req.wg, req.replyChan, req.ctx) + } +} + +func fetchAndCachePerson(personURL string, wg *sync.WaitGroup, replyChan chan *data.Person, ctx context.Context) { + defer wg.Done() + + // Again recheck if the person is cached (while the URL was in queue) + if person := data.CachedData.GetCachedPerson(personURL); person != nil { + select { + case replyChan <- person: + return + case <-ctx.Done(): + return + } + } + person, err := data.FetchPersonDataFromExternalAPI(personURL) + if err != nil { + log.Debugf("failed to fetch person data: %v\n", err) + return + } + + data.CachedData.CachePerson(personURL, person) + + select { + case replyChan <- person: + return + case <-ctx.Done(): + return + } +} + +// fetchMovieWorkers +func fetchMovieWorker(requestChan chan movieRequest) { + for req := range requestChan { + fetchAndCacheMovie(req.movieURL, req.wg, req.replyChan, req.ctx) + } +} + +func fetchAndCacheMovie(movieURL string, wg *sync.WaitGroup, replyChan chan *data.Movie, ctx context.Context) { + defer wg.Done() + + //Again recheck if the movie is cached (while the URL was in queue) + if movie := data.CachedData.GetCachedMovie(movieURL); movie != nil { + select { + case replyChan <- movie: + return + case <-ctx.Done(): + return + } + } + + movie, err := data.FetchMovieDataFromExternalAPI(movieURL) + if err != nil { + log.Debugf("failed to fetch movie data: %v\n", err) + return + } + + data.CachedData.CacheMovie(movieURL, movie) + + select { + case replyChan <- movie: + return + case <-ctx.Done(): + return + } + } diff --git a/no-secrets.env b/no-secrets.env new file mode 100644 index 0000000..c55c2f1 --- /dev/null +++ b/no-secrets.env @@ -0,0 +1,8 @@ +# Public, because no secrets here +PORT=3001 # Or any other port you want to run. Default: 3001 +PPROF_PORT=3002 # Port for pprof. Default: 6060 +RATE_LIMIT=10 # Number of requests per minute. Default: 10 +PERSON_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch person data. Default:10 +MOVIE_DATA_FETCH_WORKERS_COUNT=100 # Number of workers to fetch movie data. Default:10 +LOG_LEVEL=INFO # Log level, can be DEBUG, INFO (other levels not used). Default: 'INFO' +LOG_GOROUTINE_COUNT="false" # Log number of go-routines periodically. Default: False \ No newline at end of file