diff --git a/README.md b/README.md index 6df56a5..82c299f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ +How to run: + +1. Git clone the repo +2. cd .\cmd\app\ +3. go run . amitabh-bachchan robert-de-niro + #Degrees of Separation -With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. +With cinema going global these days, every one of the [A-Z]ollywoods are now connected. Use the wealth of data available at [Moviebuff](http://www.moviebuff.com) to see how. Write a Go program that behaves the following way: @@ -22,8 +28,13 @@ Director: Martin Scorsese Actor: Robert De Niro ``` -Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. -All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. +<<<<<<< HEAD +Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. +======= +Your solution should use the Moviebuff data available to figure out the smallest degree of separation between the two people. + +> > > > > > > 647cfff52627c33b293e43129798f20f057b00de +> > > > > > > All the inputs should be Moviebuff URLs for their respective people: For Amitabh Bachchan, his page is on http://www.moviebuff.com/amitabh-bachchan and his Moviebuff URL is `amitabh-bachchan`. Please do not attempt to scrape the Moviebuff website - All the data is available on an S3 bucket in an easy to parse JSON format here: `https://data.moviebuff.com/{moviebuff_url}` @@ -42,8 +53,14 @@ http://data.moviebuff.com/martin-scorsese http://data.moviebuff.com/taxi-driver ##Notes +<<<<<<< HEAD + +- If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) +- # There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. + * If you receive HTTP errors when trying to fetch the data, that might be the CDN throttling you. Luckily, Go has some very elegant idioms for rate limiting :) * There may be a discrepancy in some cases where a movie appears on an actor's list but not vice versa. This usually happens when we edit data while exporting it, so feel free to either ignore these mismatches or handle them in some way. + > > > > > > > 647cfff52627c33b293e43129798f20f057b00de Write a program in any language you want (If you're here from Gophercon, use Go :D) that does this. Feel free to make your own input and output format / command line tool / GUI / Webservice / whatever you want. Feel free to hold the dataset in whatever structure you want, but try not to use external databases - as far as possible stick to your langauage without bringing in MySQL/Postgres/MongoDB/Redis/Etc. diff --git a/cmd/app/helpers.go b/cmd/app/helpers.go new file mode 100644 index 0000000..fed7529 --- /dev/null +++ b/cmd/app/helpers.go @@ -0,0 +1,28 @@ +package main + +import ( + "fmt" + "io" + "net/http" +) + +func (app *GlobalVar) doHTTPRequest(sURL string) ([]byte, error) { + sFunctionName := "doHTTPRequest" + + // Make GET request + response, err := http.Get(sURL) + if err != nil { + fmt.Println(sFunctionName, "Error making the request:", err) + return nil, err + } + defer response.Body.Close() + + // Read the response body + body, err := io.ReadAll(response.Body) + if err != nil { + fmt.Println("Error reading the response body:", err) + return nil, err + } + + return body, nil +} diff --git a/cmd/app/main.go b/cmd/app/main.go new file mode 100644 index 0000000..768fcde --- /dev/null +++ b/cmd/app/main.go @@ -0,0 +1,255 @@ +package main + +import ( + "container/list" + "encoding/json" + "errors" + "fmt" + "os" +) + +var INVALID_VAL int = 100005 +var LOGS_BATCH_SIZE = 1000 + +func main() { + + args := os.Args + + if len(args) < 3 { + fmt.Println("Invalid inputs. Input format eg. \"amitabh-bachchan robert-de-niro\"") + return + } + + app := GlobalVar{ + IsNodeVisitedSet: make(map[string]bool), + FirstPersonNode: InfoNodeForQueue{InfoNodeEntry: InfoNode{URL: args[1]}}, + SecondPersonNode: InfoNodeForQueue{InfoNodeEntry: InfoNode{URL: args[2]}}, + } + app.findDegreesOfSeparation() + + app.PrintAns() +} + +type GlobalVar struct { + IsNodeVisitedSet map[string]bool + FirstPersonNode InfoNodeForQueue + SecondPersonNode InfoNodeForQueue +} + +func (app *GlobalVar) findDegreesOfSeparation() error { + sFunctionName := "findDegreesOfSeparation" + + if len(app.FirstPersonNode.InfoNodeEntry.URL) == 0 || len(app.SecondPersonNode.InfoNodeEntry.URL) == 0 { + fmt.Println(sFunctionName, "Please provide valid inputs") + return errors.New("Invalid data provided") + } + + infoNodeQueue := list.New() + + app.addNodeToQueue(infoNodeQueue, InfoNodeForQueue{ + ParentNodeEntry: &InfoNodeForQueue{}, + InfoNodeEntry: InfoNode{ + URL: app.FirstPersonNode.InfoNodeEntry.URL, + }}) + + bIsCurrentPerson := true + + for false == app.checkAnsFound() && infoNodeQueue.Len() > 0 { + + nextInfoNodeQueue := list.New() + + for infoNodeQueue.Len() > 0 && false == app.checkAnsFound() { + removedNode := infoNodeQueue.Front() + infoNodeQueue.Remove(removedNode) + + if nil == removedNode.Value { + fmt.Println(sFunctionName, "Found invalid value during traversing at level") + return errors.New(fmt.Sprintf("Found invalid value during traversing at level")) + } + + castedRemovedNode := removedNode.Value.(InfoNodeForQueue) + + if len(castedRemovedNode.InfoNodeEntry.URL) == 0 { + fmt.Println(sFunctionName, "Found invalid value during traversing at level") + return errors.New(fmt.Sprintf("Found invalid value during traversing at level")) + } + + app.populateNeighbours(castedRemovedNode, nextInfoNodeQueue, bIsCurrentPerson) + + if nextInfoNodeQueue.Len()%LOGS_BATCH_SIZE == 0 { + fmt.Printf("[%d] items added to next queue\n", nextInfoNodeQueue.Len()) + } + } + + if false == app.checkAnsFound() { + // current BFS done, use the new infoNodeQueue for next iteration + infoNodeQueue = nextInfoNodeQueue + + // next time we will be iterating opposite + bIsCurrentPerson = !bIsCurrentPerson + } + } + + return nil +} + +func (app *GlobalVar) populateNeighbours(infoNode InfoNodeForQueue, queue *list.List, bIsCurrentPerson bool) error { + sFunctionName := "populateNeighbours" + + if len(infoNode.InfoNodeEntry.URL) == 0 { + fmt.Println(sFunctionName, "Invalid Moviebuff URL provided") + return errors.New("Invalid Moviebuff URL provided") + } + + sFormattedURL := fmt.Sprintf("http://data.moviebuff.com/%s", infoNode.InfoNodeEntry.URL) + + // Make GET request + response, err := app.doHTTPRequest(sFormattedURL) + + if err != nil { + fmt.Println(sFunctionName, "Error making the request:", err) + return err + } + + if bIsCurrentPerson { + app.unmarshalAndPopulatePerson(response, queue, infoNode) + } else { + app.unmarshalAndPopulateMovies(response, queue, infoNode) + } + + return nil +} + +func (app *GlobalVar) unmarshalAndPopulatePerson(rawData []byte, queue *list.List, parentInfoNode InfoNodeForQueue) error { + // sFunctionName := "unmarshalAndPopulatePerson" + + // Unmarshal JSON into struct + var responseData MovieBuffResponseDataForPerson + err := json.Unmarshal(rawData, &responseData) + if err != nil { + // fmt.Println(sFunctionName, "Error unmarshalling JSON:", err) + return err + } + + if parentInfoNode.InfoNodeEntry.URL == app.FirstPersonNode.InfoNodeEntry.URL { + app.FirstPersonNode.InfoNodeEntry = InfoNode{ + URL: responseData.URL, + Name: responseData.Name, + } + + parentInfoNode.InfoNodeEntry = InfoNode{ + URL: responseData.URL, + Name: responseData.Name, + } + } + + for _, node := range responseData.Movies { + toAdd := InfoNodeForQueue{ + ParentNodeEntry: &parentInfoNode, + InfoNodeEntry: node, + } + app.addNodeToQueue(queue, toAdd) + } + + return nil +} + +func (app *GlobalVar) unmarshalAndPopulateMovies(rawData []byte, queue *list.List, parentInfoNode InfoNodeForQueue) error { + // sFunctionName := "unmarshalAndPopulatePerson" + + // Unmarshal JSON into struct + var responseData MovieBuffResponseDataForMovies + err := json.Unmarshal(rawData, &responseData) + if err != nil { + // fmt.Println(sFunctionName, "Error unmarshalling JSON:", err) + return err + } + + for _, node := range responseData.Cast { + toAdd := InfoNodeForQueue{ + ParentNodeEntry: &parentInfoNode, + InfoNodeEntry: node, + } + app.addNodeToQueue(queue, toAdd) + } + + for _, node := range responseData.Crew { + toAdd := InfoNodeForQueue{ + ParentNodeEntry: &parentInfoNode, + InfoNodeEntry: node, + } + app.addNodeToQueue(queue, toAdd) + } + + return nil +} + +func (app *GlobalVar) addNodeToQueue(queue *list.List, node InfoNodeForQueue) bool { + + if app.IsNodeVisitedSet[node.InfoNodeEntry.URL] { + return false + } + + app.IsNodeVisitedSet[node.InfoNodeEntry.URL] = true + queue.PushBack(node) + + if node.InfoNodeEntry.URL == app.SecondPersonNode.InfoNodeEntry.URL { + app.SecondPersonNode = node + } + + if queue.Len()%LOGS_BATCH_SIZE == 0 { + fmt.Printf("[%d] items added to queue\n", queue.Len()) + } + + return true +} + +func (app *GlobalVar) checkAnsFound() bool { + return app.IsNodeVisitedSet[app.SecondPersonNode.InfoNodeEntry.URL] +} + +func (app *GlobalVar) PrintAns() { + + if app.SecondPersonNode.ParentNodeEntry == nil { + fmt.Println("Unable to find the answer") + return + } + + type AnsNodes struct { + Movie string + FirstPerson string + SecondPerson string + } + + list := list.New() + + temp := app.SecondPersonNode + + for temp.ParentNodeEntry.InfoNodeEntry.URL != "" { + curAnsNodes := AnsNodes{ + Movie: temp.ParentNodeEntry.InfoNodeEntry.Name, + SecondPerson: temp.InfoNodeEntry.Role + ": " + temp.InfoNodeEntry.Name, + FirstPerson: temp.ParentNodeEntry.InfoNodeEntry.Role + ": " + temp.ParentNodeEntry.ParentNodeEntry.InfoNodeEntry.Name, + } + + temp = *(*temp.ParentNodeEntry).ParentNodeEntry + + list.PushFront(curAnsNodes) + } + + index := 1 + + fmt.Println("Degrees of Separation: ", list.Len()) + + for element := list.Front(); element != nil; element = element.Next() { + node := element.Value.(AnsNodes) + + fmt.Printf("%d. Movie: %s\n", index, node.Movie) + fmt.Println(node.FirstPerson) + fmt.Println(node.SecondPerson) + + fmt.Println() + + index++ + } +} diff --git a/cmd/app/models.go b/cmd/app/models.go new file mode 100644 index 0000000..feae39f --- /dev/null +++ b/cmd/app/models.go @@ -0,0 +1,27 @@ +package main + +type InfoNode struct { + Name string `json:"name"` + URL string `json:"url"` + Role string `json:"role"` +} + +type InfoNodeForQueue struct { + ParentNodeEntry *InfoNodeForQueue + InfoNodeEntry InfoNode +} + +type MovieBuffResponseDataForPerson struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Movies []InfoNode `json:"movies"` +} + +type MovieBuffResponseDataForMovies struct { + URL string `json:"url"` + Type string `json:"type"` + Name string `json:"name"` + Cast []InfoNode `json:"cast"` + Crew []InfoNode `json:"crew"` +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..ee58c9e --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module moviebuff + +go 1.20 diff --git a/todo.txt b/todo.txt new file mode 100644 index 0000000..eefc80d --- /dev/null +++ b/todo.txt @@ -0,0 +1,7 @@ +1. Add proper error handling, null checking +2. See if we can use cache and improve the performance +3. Refactor code into different modules +4. Add support for test cases +5. See if we can add test cases in the github pipeline +6. Add logging to insure we are going good in the background +7. Add some sleep, backoff to insure we don't throttle the server and we have mechanism incase we do \ No newline at end of file