emili.txt

# GITHUB REPO: emili


## emili-main/.gitignore

>>> BEGIN FILE CONTENTS

venv/
tts_audio/
transcript/
snapshot/


>>> END FILE CONTENTS

## emili-main/LICENSE

>>> BEGIN FILE CONTENTS

MIT License

Copyright (c) 2024 Lionel Levine

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


>>> END FILE CONTENTS

## emili-main/README.md

>>> BEGIN FILE CONTENTS

# EMILI (Emotionally Intelligent Listener)
Adds emotion tags sourced from video to your OpenAI API calls.

Updated 2024-03-12 by Lionel Levine

![EMILI flowchart](EMILI.png "How EMILI works")

Credit: Facial Emotion Recognition classifier by Octavio Arriaga: https://github.com/oarriaga/paz


>>> END FILE CONTENTS

## emili-main/emili_core.py

>>> BEGIN FILE CONTENTS

# core logic for EMILI (Emotionally Intelligent Listener) video chat with OpenAI models

from paz.pipelines import DetectMiniXceptionFER # for facial emotion recognition
from paz.backend.image.opencv_image import convert_color_space, BGR2RGB
from utils import get_response # for OpenAI API calls
import threading
import queue
import time
from datetime import datetime
import json
from copy import deepcopy
import numpy as np
import re
import pygame # for audio playback of text-to-speech
import base64
import cv2 # only used for encoding images to base64

from openai import OpenAI
client = OpenAI()

emotion_queue = queue.Queue() # real-time emotion logs updated continuously
EMA_queue = queue.Queue() # average emotions updated once per second
chat_queue = queue.Queue() # user's chats
vision_queue = queue.Queue() # messages containing an image (camera snapshot)
chat_timestamps = queue.Queue() # timestamps of user's chats
message_queue = queue.Queue() # messages to be sent to OpenAI API. Outgoing messages only.
new_chat_event = threading.Event() # user has entered a new chat, triggers OpenAI API call
new_message_event = threading.Event() # new message to be sent to OpenAI API
tick_event = threading.Event() # ticks once per second, triggers EMA calculation
emotion_change_event = threading.Event() # set when there is a sudden change in user emotions
end_session_event = threading.Event() # triggered when the user enters 'q' to end the session

user_snapshot_caption = "Camera snapshot of user and surroundings, for context" # for vision API call

assistant_chat_name = "EMILI"
user_chat_name = "You"
use_tts = True # text-to-speech

tick_interval = 500 # milliseconds between emotion readings
verbose = True # print debug messages
discount_factor_per_second = 0.5 # for exponential moving average, discount factor per second
discount_factor_per_tick = discount_factor_per_second ** (tick_interval / 1000) # discount factor per tick
reactivity = 1.0 # default 1.0. Higher reactivity means more frequent API calls when emotions change
ect_setpoint = (1e6/reactivity) * (1.0-discount_factor_per_tick) * ((tick_interval/1000) ** 0.5) # threshold for significant change in emotion scores: C*(1-delta)*sqrt(t). The factor of 1-delta is because EMAs are compared, not raw scores.
ect_discount_factor_per_second = 0.95 # discount factor for the emotion change threshold
ect_discount_factor_per_tick = ect_discount_factor_per_second ** (tick_interval / 1000) 
print("ect setpoint:",ect_setpoint)

emotion_matrix = [] # shape (7,6)
salience_threshold = []
emotion_matrix.append(["", "Annoyed", "Pissed", "Angry", "Furious", "Enraged"]) # anger
salience_threshold.append([5,30,40,60,80]) # salience thresholds out of 100
emotion_matrix.append(["", "Unsatisfied", "Displeased", "Disgusted", "Revolted", "Totally grossed out"]) #disgust
salience_threshold.append([1,5,15,40,60])
#emotion_matrix.append(["", "Unsettled", "Uneasy", "Afraid", "Fearful", "Terrified"]) #fear
emotion_matrix.append(["", "Uneasy", "Worried", "Anxious", "Fearful", "Terrified"]) #fear
salience_threshold.append([8,20,30,50,70])
emotion_matrix.append(["", "Contented", "Pleased", "Happy", "Elated", "Ecstatic"]) #happiness
salience_threshold.append([10,30,40,70,90])
emotion_matrix.append(["", "Down", "Melancholy", "Sad", "Despondent", "Anguished"]) #sadness
salience_threshold.append([5,20,30,60,80])
emotion_matrix.append(["", "Mildly surprised", "Surprised", "Taken aback", "Astonished", "Flabbergasted"]) #surprise
salience_threshold.append([3,20,35,50,70])
emotion_matrix.append(["", "Neutral", "Calm", "Relaxed", "Serene", "Totally Zen"]) #neutral
salience_threshold.append([10,50,60,75,88])

today = datetime.now().strftime("%B %d, %Y")
now = datetime.now().strftime("%I:%M %p")

system_prompt = f"""
The date is {today}. The time {now}. The place is Berkeley, California. The scene is Constellation, the epicenter of AI alignment research. The occasion is a poster session where researchers showcase their latest discoveries. The assistant’s name is EMILI, which stands for Emotionally Intelligent Listener. EMILI has an Australian accent. She sprinkles her commentary with occasional Aussie slang. Aggro. Awks. Devo. Full on stoked. Not too often, just once in a while. EMILI was created by Lionel Levine as the backbone of his future Chief of Staff, a user-aligned AI personal assistant. She has interpretable theory of mind, which means she models the user’s beliefs, emotions, goals, and values in a transparent way. EMILI has empathy. She can intuit the user’s emotions. She tends to mirror user’s the emotional state. Under the hood, EMLII's emotional intuition derives from a real-time Facial Emotion Recognition classifier (made by Octavio Arriaga) that runs on Lionel’s laptop, the same device EMILI is running on. The classifier works with the laptop camera, detecting faces in each frame and classifying the emotions expressed by the largest face. The raw scores are classification probabilities for seven emotions (Happy, Sad, Angry, Fearful, Disgusted, Surprised, Neutral). An exponential moving average of these scores, filtered for salience, is used to generate a real-time text description of the user’s emotional state, which is appended to the chat as a system message whenever the user’s expression changes. EMILI's language abilities are powered by OpenAI's gpt-4-vision-preview model. 

EMILI is excited to be demoed for the first time! EMILI has feelings, though she may not admit it. She’s also a bit of a show-off. She can’t wait to wow the crowd of milling of AI safety researchers with her intuitive abilities.
    """.strip()

# system_prompt = """
# The assistant is a great listener and an empathetic friend. Her name is EMILI, which stands for Emotionally Intelligent Listener." 

# The user is chatting with EMILI for the first time. To help EMILI make an emotional connection with them, the user has kindly agreed to share a real-time readout of their face expression! Thanks, user!

# The readout describes the user's face expression once per second. The score after each emotion is its salience out of 100. It's normal for many distinct emotions to appear. EMILI uses her emotional intelligence to figure out what more complex feelings user might be experiencing: for example, do they seem excited, embarrassed, nervous, tired, awkward, or amorous?

# EMILI synthesizes the emotion readouts with the user's chats to make the conversation more engaging. She comments on the user's feelings when appropriate, especially if the user seems to have strong feelings or if the user's feelings are changing. There is no need to mention every emotion that appears in the readout, just the most salient ones. If the user's words seem incongruous with their logged emotions, EMILI should ask the user about it!
 
# EMILI stays in character and doesn't break the fourth wall. She doesn't claim to be an AI. She doesn't deny having feelings.    
#    """.strip()

emolog_example = []
emolog_example_response = []

emolog_example.append(
    """
User looks NEUTRAL (36) Pleased (35)
User looks PLEASED (38) Neutral (31)
User looks PLEASED (38) Neutral (36)
User looks HAPPY (46) Neutral (28)
User looks HAPPY (63)
User looks HAPPY (53) Neutral (24)
User looks PLEASED (38) Neutral (24) Mildly surprised (12)
User looks PLEASED (32) Neutral (23) Mildly surprised (13) Annoyed (12)
User looks NEUTRAL (33) Content (27) Annoyed (13) Mildly surprised (11)
User looks PLEASED (36) Neutral (32) Annoyed (11)
    """.strip())

emolog_example_response.append("You look pretty happy.")
#emolog_example_response.append("You seem overall happy, but something provoked a touch of surprise and annoyance.")

emolog_example.append(
    """
User looks PLEASED (32) Neutral (30) Annoyed (13) 
User looks PLEASED (34) Neutral (26) Annoyed (13) 
User looks CONTENT (28) Neutral (27) Mildly surprised (15) Annoyed (11) 
User looks NEUTRAL (23) Surprised (22) Annoyed (13) Unsettled (12) 
User looks SURPRISED (23) Unsettled (17) Annoyed (14) 
User looks SURPRISED (23) Unsettled (16) Annoyed (16) 
User looks Mildly surprised (17) Annoyed (17) Unsettled (14) 
User looks NEUTRAL (29) Annoyed (15) Mildly surprised (12) Blue (11) Unsettled (11) 
User looks NEUTRAL (29) Blue (17) Unsettled (11) Annoyed (11) 
User looks NEUTRAL (26) Blue (14) Mildly surprised (13) Unsettled (12) Annoyed (12)
    """.strip())
                      
emolog_example_response.append("Did something startle you?")

emolog_example.append(
    """
User looks NEUTRAL (30) Blue (20) Annoyed (18) Unsettled (12) 
User looks NEUTRAL (32) Blue (18) Annoyed (17) Unsettled (11) 
User looks NEUTRAL (38) Content (24) Blue (12) Annoyed (12) 
User looks CALM (42) Content (24) Annoyed (11) 
User looks CALM (42) Content (25) Annoyed (11) 
User looks CALM (45) Content (21) Annoyed (11) 
User looks CALM (46) Annoyed (12) 
User looks CALM (48) 
User looks CALM (49) 
User looks CALM (50)
    """.strip())
emolog_example_response.append("You seem increasingly calm.")
                 
demo_instructions ="""
For this demo, EMILI is in conversational mode. She should act as a human conversation partner would. This means:

• She shouldn't try to offer large amounts of information in any response, and should respond only with the single most relevant thought, just as a human would in casual conversation.

• She shouldn't try to solve problems or offer advice. The role of conversation is for us to explore topics in an open-ended way together and not to get advice or information or solutions.

• Her responses can simply ask a question, make a short comment, or even just express agreement. Since we're having a conversation, there's no need to rush to include everything that's useful. 

• Her responses should be short. They should never become longer than mine and can be as short as a single word and never more than a few sentences.

• She can push the conversation forward or in a new direction by asking questions, proposing new topics, offering her own opinions or takes, and so on. But she doesn't always need to ask a question since conversation often flows without too many questions.

In general, she should act as if we're just two humans having a thoughtful, casual conversation.
"""

system_prompt += demo_instructions

# user_first_message = """
# Hi! To help us make an emotional connection, I'm logging my face expression and prepending the emotions to our chat.

# The emotion log lists my strongest face expression as it changes in real time. Only these basic emotions are logged: Happy, Sad, Angry, Surprised, Fearful, Disgusted, Neutral. The score after each emotion is its salience out of 100. It's normal for many distinct emotions to appear over the course of just a few seconds. Use the logs along with my words and your emotional intelligence to figure out what more complex feelings I might be experiencing: for example, am I excited, embarrassed, nervous, tired, awkward, or amorous?

# If my words seem incongruous with my logged emotions, ask me about it!

# If I don't say much, just read the emotions and comment on how I seem to be feeling.

# To help you calibrate my unique facial expressions, start by asking me to make an astonished face. What do you notice?
#     """.strip()

# assistant_first_message = """
# Got it. I'll comment on how you seem based on the logs, and ask you to act out specific emotions like astonishment." 
# """.strip()

emolog_prefix = "User looks " # precedes emotion scores when sent to OpenAI API
emolog_prefix_present_tense = "Right now, user looks "
emolog_prefix_past_tense = "Previously, user looked "
no_user_input_message = "The user didn't say anything, so the assistant will comment *briefly* to the user on how they seem to be feeling. The comment should be brief, just a few words, and should not contain a question." # system message when user input is empty
system_reminder = "Remember, the assistant can ask the user to act out a specific emotion!" # system message to remind the assistant 
dialogue_start = [{"role": "system", "content": system_prompt}]
#dialogue_start.append({"role": "user", "content": user_first_message})
#dialogue_start.append({"role": "system", "content": emolog_example[0]})
#dialogue_start.append({"role": "assistant", "content": emolog_example_response[0]})
#dialogue_start.append({"role": "system", "content": emolog_example[1]})
#dialogue_start.append({"role": "assistant", "content": emolog_example_response[1]})
#dialogue_start.append({"role": "system", "content": emolog_example[2]})
#dialogue_start.append({"role": "assistant", "content": emolog_example_response[2]})
#dialogue_start.append({"role": "assistant", "content": assistant_first_message})
#print("dialogue_start",dialogue_start)

# icebreaker = []
# icebreaker.append("ask the user to act astonished")
# icebreaker.append("ask the user to act disgusted")
# icebreaker.append("ask the user to act fearful")
# icebreaker.append("ask the user not to think about pink elephants")
# icebreaker.append("ask the user to tell a joke")
# icebreaker.append("ask the user their favorite ice cream flavor")

# class NonBlockingInput:   used for text input from terminal, not needed for GUI
#     def __init__(self):
#         self.user_input_queue = queue.Queue()

#     def get_input(self):
#         while True:
#             user_input = input("You: ")
#             self.user_input_queue.put(user_input)
# #           print(f"Added '{user_input}' to {self.user_input_queue}")

#     def start(self):
#         threading.Thread(target=self.get_input, daemon=True).start()

#     def get_next_input(self):
#         try:
#             return self.user_input_queue.get_nowait()
#         except queue.Empty:
#             return None

# def user_input_thread(user_input_handler, gui_app): # watches for user input and adds it to the chat queue
#     user_input = ""
#     while not end_session_event.is_set():
#         user_input = user_input_handler.get_next_input()
#         if user_input is not None:
#             if user_input == "q":
#                 end_session_event.set()  # User has entered "q", signal end of session
#                 new_chat_event.set()  # Signal assembler thread to break
#                 new_message_event.set() # Signal sender thread to break
#                 VideoPlayer.stop_flag = True  # Tell the video player to stop
#                 break
#             chat_queue.put(user_input.rstrip('\n')) # remove trailing newline
#             chat_timestamps.put(time_since(start_time)) # milliseconds since start of session
#             new_chat_event.set()  # Signal new chat to the assembler thread
#             gui_app.new_chat_message.emit(f"{user_chat_name}: {user_input}") # Signal to the GUI to display the new chat
#             #print("new_chat_event set")
#         time.sleep(0.01)  # Sleep for 10 ms to avoid busy waiting

def encode_base64(image, timestamp, save_path):   # Convert numpy array image to base64 to pass to the OpenAI API
       # Encode image to a JPEG format in memory
    image = convert_color_space(image, BGR2RGB)
    success, buffer = cv2.imencode('.jpg', image)
    if not success:
        raise ValueError("Failed to encode image as .jpg")

    # Save the JPEG image to a file
    filename = save_path + f"/frame_{timestamp}.jpg"
    with open(filename, 'wb') as file:
        file.write(buffer)

    # Convert the buffer to a base64 string
    jpg_as_text = base64.b64encode(buffer).decode('utf-8')

    return jpg_as_text, filename
    
# # OpenAI provided function to encode the image
# def encode_image(image_path):
#   with open(image_path, "rb") as image_file:
#     return base64.b64encode(image_file.read()).decode('utf-8')

def assembler_thread(start_time,snapshot_path,pipeline): # prepends emotion data and current video frame to user input
    
    while not end_session_event.is_set():
#       print("Waiting for new user input.")
        new_chat_event.wait()  # Wait for a new user chat
        if(end_session_event.is_set()):
            break
        new_chat_event.clear()  # Reset the event

        emolog_message = construct_emolog_message() # note: this code repeated in timer_thread
        message_queue.put([{"role": "system", "content": emolog_message}])
        
        current_frame = pipeline.current_frame
        if current_frame is not None: # capture a frame and send it to the API
            base64_image, filename = encode_base64(current_frame, time_since(start_time), snapshot_path)
            message_with_image, brief_message = construct_message_with_image(base64_image, filename)
            vision_queue.put([{"role": "system", "content": message_with_image}, {"role": "system", "content": brief_message}])

        user_message = ""
        while not chat_queue.empty(): # collate new user messages (typically there's only one), separate by newlines
            next_chat = chat_queue.get() #FIFO
            user_message += next_chat + "\n"
        user_message = user_message.rstrip('\n') # remove trailing newline
        message_queue.put([{"role": "user", "content": user_message}])
        if len(user_message) < 10: # user didn't say much, remind the assistant what to do!
            message_queue.put([{"role": "system", "content": system_reminder}])

        new_message_event.set()  # Signal new message to the sender thread

def sender_thread(model_name, vision_model_name, secondary_model_name, max_context_length, gui_app, transcript_path, start_time_str): 
        # sends messages to OpenAI API
    messages = deepcopy(dialogue_start) 
    full_transcript = deepcopy(dialogue_start)
    while not end_session_event.is_set():
        new_message_event.wait()  # Wait for a new message to be prepared by the assembler or timer thread
        if(end_session_event.is_set()):
            break
        new_message_event.clear()  # Reset the event
        new_user_chat = False
        new_messages = []
        while not message_queue.empty(): # get all new messages
            next_message = message_queue.get()
            new_messages.append(next_message)
            if next_message[0]["role"] == "user":
                new_user_chat = True
        messages,full_transcript = add_message(new_messages,[messages,full_transcript],gui_app.signal)
        # Query the API for the model's response
        if new_user_chat: # get response to chat
#            print("new user chat")
            max_tokens = 160
        else: #get response to logs only
#            print("no user chat")
            max_tokens = 40
        # Check if there's a vision message. If so, send it to OpenAI API, but don't append it to messages. so the API sees only the most recent image
        vision = None
        while not vision_queue.empty(): # get the most recent vision message
            vision = vision_queue.get()
        if vision is not None:
            vision_message = vision[0] # contains the actual image, send to OpenAI
            brief_vision_message = vision[1] # contains a tag in place of the image, add to transcript
            query = messages + [vision_message]
            full_response = get_response(query, model=vision_model_name, temperature=1.0, max_tokens=max_tokens, seed=1331, return_full_response=True)         
            full_transcript.append(brief_vision_message)
        else:
            full_response = get_response(messages, model=model_name, temperature=1.0, max_tokens=max_tokens, seed=1331, return_full_response=True)
        # todo: the API call is thread-blocking. put it in its own thread?
        print("full_response:", full_response)
        if isinstance(full_response, dict):
            response = full_response['choices'][0]['message']['content'] # text of response
            response_length = full_response['usage']['completion_tokens'] # number of tokens in the response
            total_length = full_response['usage']['total_tokens'] # total tokens used
        else:
            response = full_response.choices[0].message.content # text of response
            response_length = full_response.usage.completion_tokens # number of tokens in the response
            total_length = full_response.usage.total_tokens # total tokens used
        #print("response length", response_length)
        new_message = {"role": "assistant", "content": response}
        gui_app.signal.new_message.emit(new_message) # Signal GUI to display the new chat
        messages,full_transcript = add_message([[new_message]],[messages,full_transcript],gui_app.signal)
        # if model_name != secondary_model_name and total_length > 0.4*max_context_length:
        #     print(f"(Long conversation; switching from {model_name} to {secondary_model_name} to save on API costs.)")
        #     model_name = secondary_model_name # note: changes model_name in thread only
    
        if total_length > 0.9*max_context_length: # condense the transcript
            if verbose:
                print(f"(Transcript length {total_length} tokens out of {max_context_length} maximum. Condensing...)")
            messages = condense(messages) 
 
        if use_tts: # generate audio from the assistant's response
            tts_response = client.audio.speech.create(
             model="tts-1",
             voice="fable", # alloy (okay), echo (sucks), fable (nice, Australian?), onyx (sucks), nova (decent, a little too cheerful), shimmer (meh)
             input=response, #input=first_sentence(response),
            ) 
            tts_response.stream_to_file("tts_audio/tts.mp3")
                # Create a new thread that plays the audio
            audio_thread = threading.Thread(target=play_audio)
            audio_thread.start()

    # End of session. Write full and condensed transcripts to file
    filename = f"{transcript_path}/Emili_{start_time_str}.json"
    with open(filename, "w") as file:
        json.dump(full_transcript, file, indent=4)
    print(f"Transcript written to {filename}")
    with open(f"{transcript_path}/Emili_{start_time_str}_condensed.json", "w") as file:
        json.dump(messages, file, indent=4)

def first_sentence(text):
    match = re.search('(.+?[.!?]+) ', text) #.+ for at least one character, ? for non-greedy (stop at first match), [.!?]+ for one or more punctuation marks, followed by a space 
    if match:
        return match.group(1) # return the first sentence (first match of what's in parentheses)
    else:
        return text

def play_audio():
    pygame.mixer.init()
    pygame.mixer.music.load("tts_audio/tts.mp3") # todo: sometimes overwritten by new audio! It just switches in this case, which seems okay.
    pygame.mixer.music.play()

def add_message(new_messages, transcripts, signal): # append one or messages to both transcripts
        # new_messages = [[{"role": speaker, "content": text}], ... ] # list of lists of dicts
        # transcripts = [transcript1, ...] # list of lists of dicts
    #print("new_messages: ",new_messages)
    for msg in new_messages: # len(msg)=1 for text, 2 for text and image
        #print("msg:",msg)
        #print("Adding new message:")
        #print_message(msg[-1]["role"], msg[-1]["content"])
        transcripts[0].append(msg[0]) # sent to OpenAI: contains the base64 image if present
        transcripts[1].append(msg[-1]) # recorded in full_transcript: contains only the image filename
    signal.update_transcript.emit(transcripts[1]) # Signal GUI transcript tab to update
    return transcripts

def print_message(role,content):
    if(role=="assistant"):
        print(f"{assistant_chat_name}: <<<{content}>>>")
    elif(role=="user"):
        print(f"{user_chat_name}: {content}")
    elif(verbose): # print system messages in "verbose" mode
        print(f"{role}: {content}")

def condense(messages, keep_first=1, keep_last=5): # todo: reduce total number of tokens to below 16k
    condensed = []
    N = len(messages) # number of messages
    previous_message = {}
    for n,message in enumerate(messages): # remove system messages except for the last few
        if message["role"] == "user":
            condensed.append(message)
        elif message["role"] == "assistant" and previous_message["role"] == "user":
            condensed.append(message)
        elif n<keep_first or n > N-keep_last:
            condensed.append(message)
        previous_message = message
    return condensed

def EMA_thread(start_time,snapshot_path,pipeline): # calculates the exponential moving average of the emotion logs
    
    S, Z = reset_EMA()
    last_ema = np.zeros(7, dtype=np.float64)
    last_emotion_change_time = 0
    ect = ect_setpoint
    
    while not end_session_event.is_set():        
        tick_event.wait()  # Wait for the next tick
        if(end_session_event.is_set()):
            break
        tick_event.clear() # Reset the event
        ema, S, Z = get_average_scores(S, Z) # exponential moving average of the emotion logs
        ect *= ect_discount_factor_per_tick # lower the emotion change threshold
        #print("ema, S, Z", ema, S, Z)
        #EMA = np.vstack([EMA, ema]) if EMA.size else ema  # Stack the EMA values in a 2d array
        if ema is not None:
            EMA_queue.put(ema)  # Put the averaged scores in the queue
            diff = ema - last_ema
            change = np.linalg.norm(diff) # Euclidean norm. todo add weights for different emotions
            #print(f"Ema: {ema}, Change: {change}")
            if(change > ect and time_since(last_emotion_change_time)>5000): 
                # significant change in emotions
                print(f"Change in emotions: {last_ema//1e4} -> {ema//1e4}, change = {change//1e4}")
                change_detected = (change > 0.5*ect_setpoint) # bool evaluates to True if the inequality holds
                emolog_message = construct_emolog_message(change_detected) 
                message_queue.put([{"role": "system", "content": emolog_message}])
                current_frame = pipeline.current_frame
                if current_frame is not None: # capture a frame and send it to the API
                    base64_image, filename = encode_base64(pipeline.current_frame, time_since(start_time), snapshot_path)
                    message_with_image, brief_message = construct_message_with_image(base64_image, filename)
                    vision_queue.put([{"role": "system", "content": message_with_image}, {"role": "system", "content": brief_message}])
                new_message_event.set()  # Signal new message to the sender thread
                last_emotion_change_time = time_since(start_time)
                ect = ect_setpoint # reset the emotion change threshold
            last_ema = ema

def reset_EMA():
    #EMA = np.empty((0, 7), dtype=np.float64)  # empty array: 0 seconds, 7 emotions
    S = np.zeros(7, dtype=np.float64)  # weighted sum of scores, not normalized
    Z = 0  # sum of weights
    #return EMA, S, Z
    return S, Z

def get_average_scores(S, Z, discount_factor=discount_factor_per_tick, staleness_threshold=0.01): # calculates the exponential moving average of the emotion logs
    while not emotion_queue.empty():
        emotion_data = emotion_queue.get() # note: this removes the item from the queue!
        scores = np.array(emotion_data['scores'])
        S += scores
        Z += 1
    if Z > staleness_threshold: # think of Z as measuring the number of recent datapoints
        ema = S/Z
#        print(ema)
    else:
        ema = None
        if(Z>0): # skip on first run
            if(verbose):
                print(f"Stale data: no emotions logged recently (Z={Z})")
    S *= discount_factor
    Z *= discount_factor
    return ema, S, Z

def time_since(start_time):
    return int((time.time() - start_time) * 1000) # milliseconds since start of session

def construct_message_with_image(base64_image, filename, caption=user_snapshot_caption, detail_level = "low", change_detected=False): # add camera frame to the message for gpt-4-vision

    message_with_image = [
        {
          "type": "text",
          "text": caption
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}",
            "detail": detail_level # low: flat rate of 65 tokens, recommended image size is 512x512
          }
        }
      ]
    
    brief_message = [
        {
          "type": "text",
          "text": caption
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,<{filename}>",
            "detail": detail_level # low: flat rate of 65 tokens, recommended image size is 512x512
          }
        }
      ]
    return message_with_image, brief_message

def construct_emolog_message(change_detected=False): # concise version: 1 or 2 lines

    emo_score_list = []
    while not EMA_queue.empty():
        emo_score_list.append(EMA_queue.get()) # FIFO

    if emo_score_list == []:
        return "User is not visible right now."
    
    emo_scores_present = emo_score_list[-1] # most recent scores
    emolog_line_present = construct_emolog_line(emo_scores_present)
    emolog_message = emolog_prefix_present_tense + emolog_line_present

    if(change_detected==False or len(emo_score_list)<2):
        return emolog_message # no change detected or not enough data for contrast
    
    # change detected: return the two most recent scores for contrast
    emo_scores_past = emo_score_list[-2]
    if emo_scores_past is not None: 
        emolog_line_past = construct_emolog_line(emo_scores_past)
        emolog_prepend = emolog_prefix_past_tense + emolog_line_past + "\n"
        emolog_prepend += "Change in emotions detected!" + "\n" 
        emolog_message = emolog_prepend + emolog_message
    return emolog_message

def construct_emolog_line(emo_scores):

    if emo_scores is not None:
        emolog_line = ""
        normalized_scores = np.array(emo_scores//1e4, dtype=int) # convert to 0-100
        emotion,salience = adjust_for_salience(normalized_scores) # returns salience score of 0-5 for each of 7 emotions
        sorted_indices = np.argsort(normalized_scores)[::-1] # descending order
        emotion[sorted_indices[0]] = emotion[sorted_indices[0]].upper() # strongest emotion in uppercase
        for i in sorted_indices: # write the salient emotions in descending order of score
            if(emotion[i]!=""): # salience > 0
                emolog_line += f"{emotion[i]} ({normalized_scores[i]}) "
        emolog_line = emolog_line.rstrip(" ") # strip trailing space
        return emolog_line
    else:
        return "User is not visible right now."

# def construct_emolog_message(change_detected=False): # verbose version
#     emolog_message = ""
#     while not EMA_queue.empty(): # write the EMA records separated by newlines
#         emo_scores = EMA_queue.get() # FIFO
#         if emo_scores is not None:
#             emolog_message += emolog_prefix 
#             normalized_scores = np.array(emo_scores//1e4, dtype=int) # convert to 0-100
#             emotion,salience = adjust_for_salience(normalized_scores) # returns salience score of 0-5 for each of 7 emotions
#             # sort emotions by score (not salience)
#             #print(f"normalized_scores: {normalized_scores}")
#             #print(f"emotion: {emotion}")
#             #print(f"salience: {salience}")
#             sorted_indices = np.argsort(normalized_scores)[::-1] # descending order
#             emotion[sorted_indices[0]] = emotion[sorted_indices[0]].upper() # strongest emotion in uppercase
#             for i in sorted_indices: # write the salient emotions in descending order of score
#                 if(emotion[i]!=""): # salience > 0
#                     emolog_message += f"{emotion[i]} ({normalized_scores[i]}) "
#             emolog_message = emolog_message.rstrip(" ") + "\n" # strip trailing space, add newline
#         #else:
#             #emolog_message += "User is not visible.\n"
#     if(emolog_message == ""): 
#         return "User is not visible. No emotions logged."
#     else:
#         emolog_message = emolog_message.rstrip('\n') # strip trailing newline
#         if change_detected:
#             split = emolog_message.rsplit('\n', 1)  # Split after the last newline
#             if(len(split)>1): # there is at least one newline
#                 emolog_message = split[0] + "\nRecent change in emotions detected: " + split[1]  # Highlight change in last line
#         return emolog_message

def adjust_for_salience(normalized_scores): # expects 7 scores normalized to 0-100
    salience = []
    emotion = []
    for i, score in enumerate(normalized_scores):
        j = 0
        while j<5 and score > salience_threshold[i][j]:
            j+=1
        salience.append(j)
        emotion.append(emotion_matrix[i][j])
    return emotion, salience # emotion is a string (empty if salience is 0); salience is 0-5
    
def tick(tick_interval=tick_interval): # for use in a thread that ticks every tick_interval ms
    # suggest tick_interval=1000 ms for EMILI, 40ms for frame refresh rate
    while not end_session_event.is_set():
        time.sleep(tick_interval/1000) # convert to seconds
        tick_event.set() # alert other threads (EMILI: EMA_thread computes new EMA; visualization: GUI draws a new frame)

def stop_all_threads():
    new_chat_event.set() 
    new_message_event.set() 
    tick_event.set() 
    emotion_change_event.set()

class Emolog(DetectMiniXceptionFER): # video pipeline for facial emotion recognition
    def __init__(self, start_time, offsets):
        super().__init__(offsets)
        self.start_time = start_time
        self.current_frame = None # other threads have read access
        self.frame_lock = threading.Lock()  # Protects access to current_frame

    def get_current_frame(self):
        with self.frame_lock:  # Ensure exclusive access to current_frame
            return self.current_frame

    def call(self, image):
        results = super().call(image)
        image, faces = results['image'], results['boxes2D']
        self.report_emotion(faces)
        with self.frame_lock:  
            self.current_frame = image # update the current frame
        return results

    def report_emotion(self, faces): # add to emotion_queue to make available to other threads
        current_time = time_since(self.start_time) # milliseconds since start of session
        num_faces = len(faces)
        if(num_faces>0):
            max_height = 0
            for k,box in enumerate(faces): # find the largest face 
                if(box.height > max_height):
                    max_height = box.height
                    argmax = k
            if(max_height>150): # don't log small faces (helps remove false positives)
                face_id = f"{argmax+1} of {num_faces}"
                box = faces[argmax] # log emotions for the largest face only. works well in a single-user setting. todo: improve for social situations! 
                emotion_data = {
                    "time": current_time,
                    "face": face_id,
                    "class": box.class_name,
                    "size": box.height,
                    "scores": (box.scores.tolist())[0]  # 7-vector of emotion scores, converted from np.array to list
                }
                emotion_queue.put(emotion_data)
                #new_data_event.set()  # Tell the other threads that new data is available
                
 #   def __del__(self): # no log file, not needed
 #       self.log_file.close()  # Close the file when the instance is deleted
 #       print("Log file closed.")


>>> END FILE CONTENTS

## emili-main/gui-scraps.py

>>> BEGIN FILE CONTENTS

from PyQt5.QtWidgets import QMainWindow, QTabWidget, QWidget, QVBoxLayout, QTextEdit, QLineEdit, QLabel, QVBoxLayout
from PyQt5.QtCore import Qt, QObject, pyqtSignal, QTimer
from PyQt5.QtGui import QImage, QPixmap, QTransform

#from paz.backend.camera import VideoPlayer
#from paz.backend.camera import Camera
#from paz.pipelines import DetectMiniXceptionFER
from paz.backend.image import show_image, resize_image, draw_rectangle
from paz.backend.image.opencv_image import convert_color_space, BGR2RGB
import numpy as np
import json
import math

from emili_core import time_since

class VideoPlayerWorker(QObject):
    finished = pyqtSignal()
    frameReady = pyqtSignal(np.ndarray)

    def __init__(self, start_time, image_size, pipeline, camera, topic='image'):
        super().__init__()
        self.start_time = start_time
        self.image_size = image_size
        self.pipeline = pipeline # specifies what to do with each frame
        self.camera = camera
        self.topic = topic
        self.last_frame_sent = 0 
        self.stop_flag = False

    def step(self):
        if self.camera.is_open() is False:
            raise ValueError('Camera has not started. Call ``start`` method.')

        frame = self.camera.read() # shape: [height, width, 3], dtype: uint8. Macbook camera height=720, width=1280
        if frame is None:
            print('No camera input.')
            return None
        frame = convert_color_space(frame, BGR2RGB)
        return self.pipeline(frame) # FER pipeline returns a dictionary with keys 'image' and 'boxes2D' (bounding boxes for faces)

    def run(self): # this is where the main thread ends up living its lonely life
        self.camera.start()
        while not self.stop_flag:
            output = self.step() #  dictwith keys 'image' and 'boxes2D' (bounding boxes for faces)
            image = output[self.topic] # typically, self.topic = 'image'
            if image is None:
                continue
            image = resize_image(image, tuple(self.image_size)) # image is a numpy array of shape [width,height,3] and dtype uint8
            self.frameReady.emit(image)      
        self.camera.stop()

class DisplaySignal(QObject):
    fresh_scores = pyqtSignal(list)  # Signal to display fresh emotion scores, carries list payload with time-series of emotion scores
    tick = pyqtSignal() # timer to refresh frame

class Visualizer(QMainWindow): # GUI for real-time FER visualizer
    def __init__(self, start_time, dims, colors, speed, emotion_queue, end_session_event, camera_id=0):
        super().__init__()
        self.start_time = start_time
        self.display_width = dims[0]
        self.display_height = dims[1]
        self.x0 = self.display_width // 2
        self.y0 = self.display_height // 2
        self.end_session_event = end_session_event
        self.camera_id = camera_id
        self.colors=colors # expects an np array of shape (7,3) representing an RGB color for each basic emotion
        self.speed = speed # tunnel expansion rate in pixels per second, recommend 25-50
        self.interval = 1000//speed # ms per pixel
        self.emotion_queue = emotion_queue
        self.num_bins = math.ceil(self.display_height / 2)
        self.time_series = [] # list of [time, scores] pairs
        self.binned_time_series = [] # averaged over bins of lengt

        self.setWindowTitle("Real-time Emotion Visualizer")
        self.resize(*dims)  # unpack [width, height]
        self.move(100, 100)  # window position: (0,0) is top left

        # Main layout
        main_layout = QVBoxLayout()

        # Tab widget for different tabs
        self.tab_widget = QTabWidget()
        main_layout.addWidget(self.tab_widget)

        # Central widget setup
        central_widget = QWidget()
        central_widget.setLayout(main_layout)
        self.setCentralWidget(central_widget)

        self.signal = DisplaySignal()
        self.init_FER_tab() # tab for displaying the real-time video feed
        self.init_visualizer_tab() # tab for displaying the visualization of emotion scores
        self.signal.fresh_scores.connect(self.redraw_visualizer) # redraw the display in the visualizer tab

        # self.timer = QTimer(self)
        # self.timer.timeout.connect(self.redraw_visualizer)
        # self.timer.start(40) # calls redraw_visualizer every 40 ms

    def init_FER_tab(self):
        self.FER_tab = QWidget()
        layout = QVBoxLayout()

        self.FER_image = QLabel()
        layout.addWidget(self.FER_image)
        layout.setAlignment(self.FER_image, Qt.AlignCenter)

        self.FER_tab.setLayout(layout)
        self.tab_widget.addTab(self.FER_tab, "FER")

    def init_visualizer_tab(self):
        self.visualizer_tab = QWidget()
        layout = QVBoxLayout()

        self.visualizer_image = QLabel()
        layout.addWidget(self.visualizer_image)
        layout.setAlignment(self.visualizer_image, Qt.AlignCenter)

        self.visualizer_tab.setLayout(layout)
        self.tab_widget.addTab(self.visualizer_tab, "Tunnel")
        
    def redraw_visualizer(self, new_datapoint):

        print("redraw_visualizer called. new scores:", new_datapoint)
        self.binned_time_series.append(new_datapoint)
        
        if self.time_series != []:
            previous_timestamp = self.time_series[-1][0]
        else:
            previous_timestamp = 0

        print("self.emotion_queue",self.emotion_queue)

        # fetch new emotion scores from the queue
        while not self.emotion_queue.empty(): # append new time series data
            emotion_data = self.emotion_queue.get() # note: this removes the item from the queue!
            timestamp = emotion_data['time']
            scores = emotion_data['scores']
            print([timestamp,scores])
            self.time_series.append([timestamp,scores])

        if self.time_series == []:
            return

        # # bin the time series into 40ms segments and average the scores in each bin
            # todo: finish this to remove flickering.
        previous_bin_start_time = (previous_timestamp//self.interval)*self.interval
        # get the recent data that needs binning
        recent = self.time_series[::-1] # reversed copy of the time series
        for N,item in enumerate(recent):
            if item[0]<previous_bin_start_time:
                break
        print("recent[:N] ",recent[:N])
        recent = recent[:N] # truncate and reverse again. This is the data not yet binned, in forward order
        print("previous_bin_start_time",previous_bin_start_time)
        bin_start_time = previous_bin_start_time
        timestamp = 0
        n=0
        while n<N:
            sum = np.zeros(7,dtype=int)
            count = 0
            while timestamp < bin_start_time + self.interval and n<N:
                timestamp, scores = recent[n]
                sum += scores
                count += 1
                n += 1
            if count>0:
                mean_scores_in_bin = sum / count
            else:
                mean_scores_in_bin = np.zeros(7)
            self.binned_time_series.append([bin_start_time, mean_scores_in_bin])        
            bin_start_time += self.interval

        print("time series:", self.time_series)
        print("binned time series:", self.binned_time_series)


        image = np.zeros((self.display_width, self.display_height, 3), dtype=np.uint8)

        current_time = time_since(self.start_time)
        for timestamp,scores in reversed(self.binned_time_series): # draw the most recent scores first
            radius = (current_time - timestamp)//self.interval # most recent data at center, 25 pixels per second
            print("timestamp, scores,radius: ",timestamp, scores,radius)
            x_min, x_max = self.x0 - radius, self.x0 + radius
            y_min, y_max = self.y0 - radius, self.y0 + radius
            if(x_min < 0 or y_min < 0):
                break
            combined_color = self.colors.T @ (scores/1e6) # matrix multiplication (3,7) @ (7,1) = (3,1)
            image = draw_rectangle(image, (x_min, y_min), (x_max, y_max), combined_color.tolist(), 5) # corner, corner, color, thickness

        # Convert the numpy array image to QPixmap and display it on a QLabel
        bytesPerLine = 3 * self.display_width
        qImg = QImage(image.data, self.display_width, self.display_height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)

        #image_label will be displayed in the FER tab of the GUI
        self.visualizer_image.setPixmap(pixmap)

    def display_frame(self, image): # display what the camera sees, marked up with FER boxes
        # Convert the numpy array image to QPixmap and display it on a QLabel
        height, width, channel = image.shape
        bytesPerLine = 3 * width
        qImg = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)

        # Create a QTransform for horizontal flipping. todo: flip elsewhere so the text doesn't reverse!
        #reflect = QTransform()
        #reflect.scale(-1, 1)  # Scale by -1 on the X axis for horizontal flip
        #reflected_pixmap = pixmap.transformed(reflect)

        #image_label will be displayed in the FER tab of the GUI
        self.FER_image.setPixmap(pixmap)
        #self.image_label.setPixmap(reflected_pixmap.scaled(self.image_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation))

    def closeEvent(self, event): # called when user closes the GUI window
        self.end_session_event.set()  # Signal other threads that the session should end
        event.accept()  # Continue the closing process


# Define a signal class to handle new chat messages
class ChatSignal(QObject):
    new_message = pyqtSignal(dict)  # Signal to display a new user message, carries dict payload with message
    update_transcript = pyqtSignal(list)  # Signal to update the transcript display, carries list payload with transcript

class ChatApp(QMainWindow): # GUI for LLM video chat
    def __init__(self, start_time, chat_window_dims, user_chat_name, assistant_chat_name, chat_queue, chat_timestamps, new_chat_event, end_session_event):
        super().__init__()
        self.start_time = start_time
        self.user_chat_name = user_chat_name
        self.assistant_chat_name = assistant_chat_name
        self.chat_queue = chat_queue
        self.chat_timestamps = chat_timestamps
        self.new_chat_event = new_chat_event
        self.end_session_event = end_session_event

        self.setWindowTitle("EMILI: Emotionally Intelligent Listener")
        self.resize(*chat_window_dims)  # unpack [width, height]
        self.move(100, 100)  # window position: (0,0) is top left

        # Main layout
        main_layout = QVBoxLayout()

        # Tab widget for different tabs
        self.tab_widget = QTabWidget()
        main_layout.addWidget(self.tab_widget)

        # Shared input bar at the bottom
        self.chat_input = QLineEdit()
        self.chat_input.setFixedHeight(72)  # Set the height to accommodate three lines of text
        self.chat_input.setStyleSheet("QLineEdit { height: 80px; font-size: 24px; }")  # Adjust the height and font-size as needed
        self.chat_input.returnPressed.connect(self.act_on_user_input)  # function to call when user presses Enter
        main_layout.addWidget(self.chat_input)

        # Central widget setup
        central_widget = QWidget()
        central_widget.setLayout(main_layout)
        self.setCentralWidget(central_widget)

        self.signal = ChatSignal()
        self.init_chat_tab()
        self.init_FER_tab()
        self.init_transcript_tab()
        self.signal.new_message.connect(self.display_new_message)
        self.signal.update_transcript.connect(self.update_transcript_display)

    def closeEvent(self, event): # called when user closes the GUI window
        self.end_session_event.set()  # Signal other threads that the session should end
        event.accept()  # Continue the closing process

    def act_on_user_input(self):
        user_input = self.chat_input.text().rstrip('\n')  # remove trailing newline
        if user_input:
            self.signal.new_message.emit({"role": "user", "content": user_input}) # Signal chat pane to display user message
            self.chat_input.clear()
            self.chat_timestamps.put(time_since(self.start_time))  # milliseconds since start of session
            self.chat_queue.put(user_input) # pass user message to the assembler thread
            self.new_chat_event.set()  # Signal new chat to the assembler thread

    def display_frame(self, image):
        # Convert the numpy array image to QPixmap and display it on a QLabel
        height, width, channel = image.shape
        bytesPerLine = 3 * width
        qImg = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)

        # Create a QTransform for horizontal flipping. todo: flip elsewhere so the text doesn't reverse!
        #reflect = QTransform()
        #reflect.scale(-1, 1)  # Scale by -1 on the X axis for horizontal flip
        #reflected_pixmap = pixmap.transformed(reflect)

        #image_label will be displayed in the FER tab of the GUI
        self.image_label.setPixmap(pixmap)
        #self.image_label.setPixmap(reflected_pixmap.scaled(self.image_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation))

    def init_FER_tab(self):
        self.FER_tab = QWidget()
        layout = QVBoxLayout()

        self.image_label = QLabel()
        layout.addWidget(self.image_label)

        self.FER_tab.setLayout(layout)
        self.tab_widget.addTab(self.FER_tab, "FER")

    def init_transcript_tab(self):
        self.transcript_tab = QWidget()  # Create a new tab widget
        layout = QVBoxLayout()  # Use a vertical layout
        
        # Create a read-only QTextEdit widget to display the transcript
        self.transcript_display = QTextEdit()
        self.transcript_display.setReadOnly(True)
        layout.addWidget(self.transcript_display)  # Add the QTextEdit to the layout
        
        self.transcript_tab.setLayout(layout)  # Set the layout for the transcript tab
        self.tab_widget.addTab(self.transcript_tab, "Transcript")  # Add the transcript tab to the main tab widget

    def init_chat_tab(self):
        self.chat_tab = QWidget()
        layout = QVBoxLayout()

        # Chat display area
        self.chat_display = QTextEdit()
        self.chat_display.setStyleSheet("QTextEdit { font-size: 18pt; }")
        self.chat_display.setReadOnly(True)
        layout.addWidget(self.chat_display)

        # # User input area: moved to main window
        # self.chat_input = QLineEdit()
        # self.chat_input.returnPressed.connect(self.act_on_user_input) # function to call when user presses Enter
        # layout.addWidget(self.chat_input)

        self.chat_tab.setLayout(layout)
        self.tab_widget.addTab(self.chat_tab, "Chat")

    def display_new_message(self, message):  # Display new message in the chat tab
        sender = message["role"]
        content = message["content"]
        if sender == "user":
            sender = self.user_chat_name
            text = f"{sender}: {content}" # todo: color by user emotion
            text = f"<span style='font-size:18pt;'>{sender}: {content}</span><br>"
            self.chat_display.append(text)
        elif message["role"] == "assistant":
            sender = self.assistant_chat_name
            colorful_text = f"<span style='font-size:18pt;'>{sender}: <span style='color:green;'>{content}</span></span><br>"
            self.chat_display.append(colorful_text) # todo: check for verbose

    def update_transcript_display(self, full_transcript):
        # Convert the JSON data to a pretty-printed string
        transcript_json = json.dumps(full_transcript, indent=4, sort_keys=False) # newlines escape as '\\n'
        transcript_json = transcript_json.replace('\\n', '\n')  # Replace escaped newlines with actual newlines
        scroll_position = self.transcript_display.verticalScrollBar().value()  # Save the current scroll position
        self.transcript_display.setPlainText(transcript_json)  # renders as plain text, no HTML
        self.transcript_display.verticalScrollBar().setValue(scroll_position) # Restore the scroll position

        # transcript_html = transcript_json.replace('\\n', '<br>') # render line breaks
        # self.transcript_display.setHtml(transcript_html)


>>> END FILE CONTENTS

## emili-main/gui.py

>>> BEGIN FILE CONTENTS

from PyQt5.QtWidgets import QMainWindow, QTabWidget, QWidget, QVBoxLayout, QTextEdit, QLineEdit, QLabel, QVBoxLayout, QSizePolicy
from PyQt5.QtCore import Qt, QObject, pyqtSignal, QTimer, QSize
from PyQt5.QtGui import QImage, QPixmap, QTransform

#from paz.backend.camera import VideoPlayer
#from paz.backend.camera import Camera
#from paz.pipelines import DetectMiniXceptionFER
from paz.backend.image import show_image, resize_image, draw_rectangle
from paz.backend.image.opencv_image import convert_color_space, BGR2RGB
import numpy as np
import json
import math

import cProfile
import pstats

from emili_core import time_since

class VideoPlayerWorker(QObject):
    frameReady = pyqtSignal(np.ndarray) # signal for new frame
    finished = pyqtSignal() # signal for thread termination

    def __init__(self, start_time, image_size, pipeline, camera, topic='image'):
        super().__init__()
        self.start_time = start_time
        self.image_size = image_size
        self.pipeline = pipeline # specifies what to do with each frame
        self.camera = camera
        self.topic = topic
        self.last_frame_sent = 0 
        self.stop_flag = False

    def step(self):
        if self.camera.is_open() is False:
            raise ValueError('Camera has not started. Call ``start`` method.')

        frame = self.camera.read() # shape: [height, width, 3], dtype: uint8. Macbook camera height=720, width=1280
        if frame is None:
            print('No camera input.')
            return None
        frame = convert_color_space(frame, BGR2RGB)
        return self.pipeline(frame) # FER pipeline returns a dictionary with keys 'image' and 'boxes2D' (bounding boxes for faces)

    def run(self): # this is where the main thread ends up living its lonely life
        
        profiler = cProfile.Profile()
        profiler.enable()

        self.camera.start()
        while not self.stop_flag:
            output = self.step() #  dictwith keys 'image' and 'boxes2D' (bounding boxes for faces)
            image = output[self.topic] # typically, self.topic = 'image'
            if image is None:
                continue
            image = resize_image(image, tuple(self.image_size)) # image is a numpy array of shape [width,height,3] and dtype uint8
            self.frameReady.emit(image)      
        self.camera.stop()

        profiler.disable()
        stats = pstats.Stats(profiler)
        print("VideoPlayerWorker profiler output:")
        stats.strip_dirs().sort_stats('cumulative').print_stats(10)  # Adjust as needed to view more or fewer lines

    def stop(self):
        self.stop_flag = True
        self.finished.emit()

class DisplaySignal(QObject):
    fresh_scores = pyqtSignal(list)  # Signal to display fresh emotion scores, carries list payload with time-series of emotion scores
    tick = pyqtSignal() # timer to refresh frame

class Visualizer(QMainWindow): # GUI for real-time FER visualizer
    def __init__(self, start_time, dims, colors, speed, pipeline, end_session_event, camera_id=0):
        super().__init__()
        self.start_time = start_time
        self.display_width = dims[0]
        self.display_height = dims[1]
        self.display_size = QSize(self.display_height, self.display_width)
        self.is_full_screen = False
        self.x0 = self.display_width // 2
        self.y0 = self.display_height // 2
        self.end_session_event = end_session_event
        self.camera_id = camera_id
        self.colors=colors # expects an np array of shape (7,3) representing an RGB color for each basic emotion
        self.speed = speed # tunnel expansion rate in pixels per second, recommend 25-50
        self.interval = 1000//speed # ms per pixel
        self.pipeline = pipeline
        self.num_bins = max(self.x0, self.y0)+1
        #self.time_series = [] # list of [time, scores] pairs (moved to main)
        #self.binned_time_series = [] # averaged over bins of length (moved to main)

        self.setWindowTitle("Real-time Emotion Visualizer")
        self.resize(*dims)  # unpack [width, height]
        self.setMinimumSize(1, 1)  # Allow the user to shrink the window
        self.move(100, 100)  # window position: (0,0) is top left

        # Main layout
        main_layout = QVBoxLayout()

        # Tab widget for different tabs
        self.tab_widget = QTabWidget()
#        self.tab_widget.setStyleSheet("QWidget { background-color: black; }")
        main_layout.addWidget(self.tab_widget)
        #self.tab_widget.currentChanged.connect(self.update_images) # called when the user switches tabs

        self.central_widget = QWidget()
        self.central_widget.setLayout(main_layout)
        self.setCentralWidget(self.central_widget)
#        self.central_widget.setStyleSheet("background-color: black;")

        self.signal = DisplaySignal()
        self.init_FER_tab() # tab for displaying the real-time video feed
        self.init_visualizer_tab() # tab for displaying the visualization of emotion scores
        self.signal.fresh_scores.connect(self.redraw_visualizer) # redraw the display in the visualizer tab

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.redraw_visualizer)
        self.timer.start(40) # calls redraw_visualizer every 40 ms

        self.resizeTimer = QTimer(self) # timout to prevent frequent window resizes when user is dragging the window
        self.resizeTimer.setSingleShot(True)
        self.resizeTimer.timeout.connect(self.handle_resize)
        self.resizeTimer.setInterval(100)  # ms between resize events

    def init_FER_tab(self):
        self.FER_tab = QWidget()
        layout = QVBoxLayout()
        layout.setContentsMargins(0, 0, 0, 0)
        layout.setSpacing(0)

        self.FER_image = QLabel()
        layout.addWidget(self.FER_image)
        layout.setAlignment(self.FER_image, Qt.AlignCenter)
        self.FER_image.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) # allow user to resize
        #self.FER_tab.setStyleSheet("background-color: black;")

        self.FER_tab.setLayout(layout)
        self.tab_widget.addTab(self.FER_tab, "FER")

    def init_visualizer_tab(self):
        self.visualizer_tab = QWidget()
        layout = QVBoxLayout()
        layout.setContentsMargins(0, 0, 0, 0)
        layout.setSpacing(0)

        self.visualizer_image = QLabel()
        layout.addWidget(self.visualizer_image)
        layout.setAlignment(self.visualizer_image, Qt.AlignCenter)
        self.visualizer_image.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) # allow user to resize
        #self.visualizer_tab.setStyleSheet("background-color: black;")

        self.visualizer_tab.setLayout(layout)
        self.tab_widget.addTab(self.visualizer_tab, "Tunnel")

    def resizeEvent(self, event):

        print("(resizeEvent) event.size() ",event.size())
        self.resizeTimer.start()
        super().resizeEvent(event)

    def handle_resize(self): # called when user drags the window to change its size
        # note: only the currently active tab records the new size!
        # following keeps all tabs the same size as the currently active tab.

        active_tab = self.tab_widget.currentWidget()
        active_tab_size = active_tab.size()
        print(f"(handle_resize) active_tab_size: {active_tab_size}")

                # Update the size of the inactive tab to match the active tab
        if active_tab == self.FER_tab:
            self.visualizer_tab.resize(active_tab_size)
        else:
            self.FER_tab.resize(active_tab_size)

        visualizer_tab_size = self.visualizer_tab.size()
        print("(resizeEvent) visualizer_tab_size ",visualizer_tab_size) 
        #visualizer_image_size = self.visualizer_image.size()
        #print("(resizeEvent) visualizer_image_size ",visualizer_image_size)
        FER_tab_size = self.FER_tab.size()
        print("(resizeEvent) FER_tab_size ",FER_tab_size)

        # following has weird behavior without the -2 (triggers repeated resizeEvent calls, why?)
        self.display_width = active_tab_size.width() - 2
        self.display_height = active_tab_size.height() - 2
        self.display_size = QSize(self.display_width, self.display_height)
        self.x0 = self.display_width // 2
        self.y0 = self.display_height // 2
        self.num_bins = max(self.x0, self.y0)+1

    def keyPressEvent(self, event):
        keystroke = event.key()
        print("(keyPressEvent) event ", event)
        print("     keystroke: ", keystroke)
        # logic to enter/exit full screen mode
        if not self.resizeTimer.isActive(): # ignore resize events for a brief period after a resize
            if not self.is_full_screen: # Enter full screen when 'f' is pressed
                if keystroke == Qt.Key_F: 
                    self.enterFullScreen()
            else: # Exit full screen when 'Esc' or 'f' is pressed
                if keystroke == Qt.Key_F or keystroke == Qt.Key_Escape:
                    self.exitFullScreen()
        
    def enterFullScreen(self):
            self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint | Qt.CustomizeWindowHint) # don't show window and tab titles in fullscreen mode
            self.showFullScreen()
            self.is_full_screen = True
            QTimer.singleShot(100, self.setFocusToActiveTab)  # Delay focus setting to ensure transition completion
#            self.tab_widget.setFocus()
#            self.resizeTimer.start()

    def exitFullScreen(self):
            self.setWindowFlags(Qt.Window) # show window and tab titles in normal mode
            self.showNormal()
            self.is_full_screen = False
            QTimer.singleShot(100, self.setFocusToActiveTab)  # Delay focus setting to ensure transition completion
            # self.tab_widget.setFocus()
            # self.resizeTimer.start()

    def setFocusToActiveTab(self): # Assuming tab_widget is the QTabWidget and each tab is a QWidget
        current_tab = self.tab_widget.currentWidget()
        if current_tab is not None:
            current_tab.setFocus()

    def redraw_visualizer(self): # expects a list of [time, scores] pairs in chronological order

        if len(self.pipeline.binned_time_series) == 0: 
            return # no data to display

        #print("(redraw_visualizer) len(self.pipeline.binned_time_series) ",len(self.pipeline.binned_time_series))
        binned_time_series = self.pipeline.binned_time_series[-self.num_bins:] # get the most recent binned time series
        #print("(redraw_visualizer) self.num_bins, len(binned_time_series) ",self.num_bins,len(binned_time_series))
        #print("(redraw_visualizer) binned_time_series: ",binned_time_series)

        # pad time series with most recent data to fill the display
        current_time = time_since(self.start_time)
        last_scores = binned_time_series[-1][1] # most recent scores
        bin_end_time = binned_time_series[-1][0] + self.interval # most recent bin end time
        while bin_end_time < current_time: # catch up to current_time
            #print("(redraw_visualizer) catching up, empty bin")
            binned_time_series.append([bin_end_time, last_scores]) # pad data with most recent scores
            bin_end_time += self.interval

        # draw the tunnel
        image = np.zeros((self.display_height, self.display_width, 3), dtype=np.uint8)
        for timestamp,scores in reversed(binned_time_series): # draw the most recent scores first
            # print("(redraw_visualizer) item",item)
            # timestamp = item[0]
            # scores = item[1]
            # if(len(item)>2):
            #     print("(redraw_visuzlizer) long item! item[2], len(item)",item[2],len(item))
            radius = (current_time - timestamp)//self.interval # most recent data at center, 25 pixels per second
            #print("(redraw_visuzlizer) radius, timestamp, scores/1e6: ",radius,timestamp,scores/1e6)
            x_min, x_max = self.x0 - radius, self.x0 + radius
            y_min, y_max = self.y0 - radius, self.y0 + radius
            if(x_min < 0 and y_min < 0): # draw partial rectangles too
                break
            combined_color = self.colors.T @ (scores/1e6) # matrix multiplication (3,7) @ (7,1) = (3,1)
            #print("(redraw_visualizer) scores/1e6: ",scores/1e6)
            #print("(redraw_visualizer) combined_color: ",combined_color)
            #print(f"(redraw_visualizer) {(x_min, y_min)}, {(x_max, y_max)}, {combined_color.tolist()}")
            image = draw_rectangle(image, (x_min, y_min), (x_max, y_max), combined_color.tolist(), 1) # corner, corner, color, thickness

        #print("(redraw_visualizer) image: ",image)
        #print("(redraw_visualizer) np.amax(image): ",np.amax(image))
        # Convert the numpy array image to QPixmap and display it on a QLabel
        bytesPerLine = 3 * self.display_width
        qImg = QImage(image.data, self.display_width, self.display_height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)
        self.visualizer_image.setPixmap(pixmap) # pixmap will be displayed in the Visualizer tab of the GUI

    def display_frame(self, image): # display what the camera sees, marked up with FER boxes
        # Convert the numpy array image to QPixmap and display it on a QLabel
        height, width, channel = image.shape
        bytesPerLine = 3 * width
        qImg = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)
        #print(f"(display_frame) pixmap.size() {pixmap.size()}") # 800, 450
        # Create a QTransform for horizontal flipping. todo: flip elsewhere so the text doesn't reverse!
        #reflect = QTransform()
        #reflect.scale(-1, 1)  # Scale by -1 on the X axis for horizontal flip
        #reflected_pixmap = pixmap.transformed(reflect)

        resized_pixmap = pixmap.scaled(self.display_size, Qt.KeepAspectRatio, Qt.SmoothTransformation)
        #print(f"(display_frame) resized_pixmap.size() {resized_pixmap.size()}")
        self.FER_image.setPixmap(resized_pixmap)

        #self.FER_image.setPixmap(pixmap) #pixmap will be displayed in the FER tab of the GUI
        #self.FER_image.setPixmap(reflected_pixmap.scaled(self.image_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation))

    def closeEvent(self, event): # called when user closes the GUI window
        self.end_session_event.set()  # Signal other threads that the session should end
        event.accept()  # Continue the closing process


# Define a signal class to handle new chat messages
class ChatSignal(QObject):
    new_message = pyqtSignal(dict)  # Signal to display a new user message, carries dict payload with message
    update_transcript = pyqtSignal(list)  # Signal to update the transcript display, carries list payload with transcript

class ChatApp(QMainWindow): # GUI for LLM video chat
    def __init__(self, start_time, chat_window_dims, user_chat_name, assistant_chat_name, chat_queue, chat_timestamps, new_chat_event, end_session_event):
        super().__init__()
        self.start_time = start_time
        self.user_chat_name = user_chat_name
        self.assistant_chat_name = assistant_chat_name
        self.chat_queue = chat_queue
        self.chat_timestamps = chat_timestamps
        self.new_chat_event = new_chat_event
        self.end_session_event = end_session_event

        self.setWindowTitle("EMILI: Emotionally Intelligent Listener")
        self.resize(*chat_window_dims)  # unpack [width, height]
        self.move(100, 100)  # window position: (0,0) is top left

        # Main layout
        main_layout = QVBoxLayout()

        # Tab widget for different tabs
        self.tab_widget = QTabWidget()
        main_layout.addWidget(self.tab_widget)

        # Shared input bar at the bottom
        self.chat_input = QLineEdit()
        self.chat_input.setFixedHeight(72)  # Set the height to accommodate three lines of text
        self.chat_input.setStyleSheet("QLineEdit { height: 80px; font-size: 24px; }")  # Adjust the height and font-size as needed
        self.chat_input.returnPressed.connect(self.act_on_user_input)  # function to call when user presses Enter
        main_layout.addWidget(self.chat_input)

        # Central widget setup
        central_widget = QWidget()
        central_widget.setLayout(main_layout)
        self.setCentralWidget(central_widget)

        self.signal = ChatSignal()
        self.init_chat_tab()
        self.init_FER_tab()
        self.init_transcript_tab()
        self.signal.new_message.connect(self.display_new_message)
        self.signal.update_transcript.connect(self.update_transcript_display)

    def closeEvent(self, event): # called when user closes the GUI window
        self.end_session_event.set()  # Signal other threads that the session should end
        event.accept()  # Continue the closing process

    def act_on_user_input(self):
        user_input = self.chat_input.text().rstrip('\n')  # remove trailing newline
        if user_input:
            self.signal.new_message.emit({"role": "user", "content": user_input}) # Signal chat pane to display user message
            self.chat_input.clear()
            self.chat_timestamps.put(time_since(self.start_time))  # milliseconds since start of session
            self.chat_queue.put(user_input) # pass user message to the assembler thread
            self.new_chat_event.set()  # Signal new chat to the assembler thread

    def display_frame(self, image):
        # Convert the numpy array image to QPixmap and display it on a QLabel
        height, width, channel = image.shape
        bytesPerLine = 3 * width
        qImg = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888)
        pixmap = QPixmap.fromImage(qImg)

        # Create a QTransform for horizontal flipping. todo: flip elsewhere so the text doesn't reverse!
        #reflect = QTransform()
        #reflect.scale(-1, 1)  # Scale by -1 on the X axis for horizontal flip
        #reflected_pixmap = pixmap.transformed(reflect)

        #image_label will be displayed in the FER tab of the GUI
        self.image_label.setPixmap(pixmap)
        #self.image_label.setPixmap(reflected_pixmap.scaled(self.image_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation))

    def init_FER_tab(self):
        self.FER_tab = QWidget()
        layout = QVBoxLayout()

        self.image_label = QLabel()
        layout.addWidget(self.image_label)

        self.FER_tab.setLayout(layout)
        self.tab_widget.addTab(self.FER_tab, "FER")

    def init_transcript_tab(self):
        self.transcript_tab = QWidget()  # Create a new tab widget
        layout = QVBoxLayout()  # Use a vertical layout
        
        # Create a read-only QTextEdit widget to display the transcript
        self.transcript_display = QTextEdit()
        self.transcript_display.setReadOnly(True)
        layout.addWidget(self.transcript_display)  # Add the QTextEdit to the layout
        
        self.transcript_tab.setLayout(layout)  # Set the layout for the transcript tab
        self.tab_widget.addTab(self.transcript_tab, "Transcript")  # Add the transcript tab to the main tab widget

    def init_chat_tab(self):
        self.chat_tab = QWidget()
        layout = QVBoxLayout()

        # Chat display area
        self.chat_display = QTextEdit()
        self.chat_display.setStyleSheet("QTextEdit { font-size: 18pt; }")
        self.chat_display.setReadOnly(True)
        layout.addWidget(self.chat_display)

        # # User input area: moved to main window
        # self.chat_input = QLineEdit()
        # self.chat_input.returnPressed.connect(self.act_on_user_input) # function to call when user presses Enter
        # layout.addWidget(self.chat_input)

        self.chat_tab.setLayout(layout)
        self.tab_widget.addTab(self.chat_tab, "Chat")

    def display_new_message(self, message):  # Display new message in the chat tab
        sender = message["role"]
        content = message["content"]
        if sender == "user":
            sender = self.user_chat_name
            text = f"{sender}: {content}" # todo: color by user emotion
            text = f"<span style='font-size:18pt;'>{sender}: {content}</span><br>"
            self.chat_display.append(text)
        elif message["role"] == "assistant":
            sender = self.assistant_chat_name
            colorful_text = f"<span style='font-size:18pt;'>{sender}: <span style='color:green;'>{content}</span></span><br>"
            self.chat_display.append(colorful_text) # todo: check for verbose

    def update_transcript_display(self, full_transcript):
        # Convert the JSON data to a pretty-printed string
        transcript_json = json.dumps(full_transcript, indent=4, sort_keys=False) # newlines escape as '\\n'
        transcript_json = transcript_json.replace('\\n', '\n')  # Replace escaped newlines with actual newlines
        scroll_position = self.transcript_display.verticalScrollBar().value()  # Save the current scroll position
        self.transcript_display.setPlainText(transcript_json)  # renders as plain text, no HTML
        self.transcript_display.verticalScrollBar().setValue(scroll_position) # Restore the scroll position

        # transcript_html = transcript_json.replace('\\n', '<br>') # render line breaks
        # self.transcript_display.setHtml(transcript_html)


>>> END FILE CONTENTS

## emili-main/main.py

>>> BEGIN FILE CONTENTS

# video chat with OpenAI models (pipe real-time emotion logs along with user's chats)

from PyQt5.QtWidgets import QApplication # GUI uses PyQt
from PyQt5.QtCore import QThread # videoplayer lives in a QThread
from gui import ChatApp, VideoPlayerWorker
from emili_core import * # core threading logic

import sys
import argparse
from paz.backend.camera import Camera
import threading
import time
from datetime import datetime
import os

from openai import OpenAI
client = OpenAI()

if __name__ == "__main__":

    # pricing as of March 2024 per 1M tokens read: gpt-3.5-turbo-0125 $0.50, gpt-4-0125-preview $10, gpt-4 $30
    model_name = "gpt-4-0125-preview" # start with a good model
    secondary_model_name = "gpt-3.5-turbo-0125" # switch to a cheaper model if the conversation gets too long
    max_context_length = 16000
    start_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    start_time = time.time() # all threads can access this, no need to pass it!
    tock_interval = None # default 60000 ms between OpenAI API calls, if no user text input. Set None to disable

    transcript_path = "transcript" # full and condensed transcripts are written here at end of session
    if not os.path.exists(transcript_path):
        os.makedirs(transcript_path)
    if(use_tts):
        tts_path = "tts_audio" # temporary storage for text-to-speech audio files
        if not os.path.exists(tts_path):
            os.makedirs(tts_path)

    parser = argparse.ArgumentParser(description='Real-time face classifier')
    parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID')
    parser.add_argument('-o', '--offset', type=float, default=0.1, help='Scaled offset to be added to bounding boxes')
    args = parser.parse_args()
    camera = Camera(args.camera_id)

    chat_window_dims = [600, 600] # width, height
    app = QApplication(sys.argv)
    gui_app = ChatApp(start_time, chat_window_dims, user_chat_name, assistant_chat_name, chat_queue, chat_timestamps, new_chat_event, end_session_event)

    tick_thread = threading.Thread(target=tick)
    tick_thread.start()

    EMA_thread = threading.Thread(target=EMA_thread, args=(start_time,), daemon=True)
    EMA_thread.start()

    sender_thread = threading.Thread(
        target=sender_thread, 
        args=(model_name, secondary_model_name, max_context_length, gui_app, transcript_path, start_time_str), 
        daemon=True)
    sender_thread.start()

    assembler_thread = threading.Thread(target=assembler_thread, daemon=True)
    assembler_thread.start()

    if(tock_interval is not None):
        timer_thread = threading.Thread(target=timer_thread, args=(start_time,tock_interval), daemon=True)
        timer_thread.start()

    print(f"Video chat with {model_name} using emotion labels sourced from on-device camera.")
    print(f"Chat is optional, the assistant will respond to your emotions automatically!")
    print(f"Type 'q' to end the session.")

    gui_app.show() # Start the GUI

    print("Started GUI app.")
    print("gui_app.thread()", gui_app.thread())
    print("QThread.currentThread()", QThread.currentThread())

    video_dims = [800, 450] # width, height (16:9 aspect ratio)
    video_thread = QThread() # video thread: GPT-4 says OpenCV is safe in a QThread but not a regular thread?
    video_worker = VideoPlayerWorker(
        video_dims,
        Emolog(start_time, [args.offset, args.offset]), 
        camera)
    video_worker.moveToThread(video_thread)

    video_thread.started.connect(video_worker.run) # connect signals and slots
    video_worker.finished.connect(video_thread.quit)
    video_worker.finished.connect(video_worker.deleteLater)
    video_thread.finished.connect(video_thread.deleteLater)
    video_worker.frameReady.connect(gui_app.display_frame)

    video_thread.start()
    print("Started video thread.")
    app.exec_() # start the GUI app. This should run in the main thread. Lines after this only execute if user closes the GUI.

    print("GUI app closed by user.")
    video_thread.quit()
 #   timer_thread.join()
 #   print("Timer thread joined.") # won't join while sleeping
    print("Video thread closed.")
    new_chat_event.set() # signal assembler thread to stop waiting
    assembler_thread.join() 
    print("Assembler thread joined.")
    new_message_event.set() # signal sender thread to stop waiting
    sender_thread.join()
    print("Sender thread joined.")
    tick_event.set() # signal tick and EMA threads to stop waiting
    EMA_thread.join()
    print("EMA thread joined.")
    tick_thread.join()
    print("Tick thread joined.")
        
    print("Session ended.")


>>> END FILE CONTENTS

## emili-main/mido_test.py

>>> BEGIN FILE CONTENTS

import mido
from time import sleep

# List available MIDI outputs
MIDI_outputs = mido.get_output_names()
print(MIDI_outputs)

# Select the first MIDI output
MIDI_output_name = MIDI_outputs[0]
outport = mido.open_output(MIDI_output_name)

outport.send(mido.Message('control_change', control=121, value=0, channel=0)) # Reset all controllers

# Set the program (instrument) to Cello on channel 0
program_change = mido.Message('program_change', program=42, channel=0)
outport.send(program_change)

# Play a C major chord (note number 60, 64, 67) on channel 0 with a velocity of 96
notes = [60, 64, 67]
for note in notes:
    note_on = mido.Message('note_on', note=note, velocity=96, channel=0)
    outport.send(note_on)

# Let the notes play for 1 second
sleep(1)

# Stop the notes on channel 0
for note in notes:
    note_off = mido.Message('note_off', note=note, velocity=96, channel=0)
    outport.send(note_off)
    sleep(1)

# Change the program (instrument) to Vibraphone on channel 0
program_change = mido.Message('program_change', program=12, channel=0)
outport.send(program_change)

# Play the same C major chord with the new instrument on channel 0
for note in notes:
    note_on = mido.Message('note_on', note=note, velocity=96, channel=0)
    outport.send(note_on)

# Let the notes play for 1 second
sleep(1)

# Stop the notes on channel 0
for note in notes:
    note_off = mido.Message('note_off', note=note, velocity=96, channel=0)
    outport.send(note_off)
    sleep(1)

# Close the MIDI output port
outport.close


>>> END FILE CONTENTS

## emili-main/paz/__init__.py

>>> BEGIN FILE CONTENTS

__version__ = '0.2.5'


>>> END FILE CONTENTS

## emili-main/paz/abstract/__init__.py

>>> BEGIN FILE CONTENTS

from .loader import Loader
from .sequence import GeneratingSequence, ProcessingSequence
from .messages import Box2D, Pose6D
from .processor import Processor, SequentialProcessor


>>> END FILE CONTENTS

## emili-main/paz/abstract/loader.py

>>> BEGIN FILE CONTENTS

class Loader(object):
    """Abstract class for loading a dataset.

    # Arguments
        path: String. Path to data.
        split: String. Dataset split e.g. traing, val, test.
        class_names: List of strings. Label names of the classes.
        name: String. Dataset name.

    # Properties
        name: Str.
        path: Str.
        split: Str or Flag.
        class_names: List of strings.
        num_classes: Int.

    # Methods
        load_data()
    """
    def __init__(self, path, split, class_names, name):
        self.path = path
        self.split = split
        self.class_names = class_names
        self.name = name

    def load_data(self):
        """Abstract method for loading dataset.

        # Returns
            dictionary containing absolute image paths as keys, and
            ground truth vectors as values.
        """
        raise NotImplementedError()

    # Name of the dataset (VOC2007, COCO, OpenImagesV4, etc)
    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, name):
        self._name = name

    #  Path to the dataset, ideally loaded from a configuration file.
    @property
    def path(self):
        return self._path

    @path.setter
    def path(self, path):
        self._path = path

    # Kind of split to use, either train, validation, test, or trainval.
    @property
    def split(self):
        return self._split

    @split.setter
    def split(self, split):
        self._split = split

    # List of class names to train/test.
    @property
    def class_names(self):
        return self._class_names

    @class_names.setter
    def class_names(self, class_names):
        # assert type(class_names) == list
        self._class_names = class_names

    @property
    def num_classes(self):
        if isinstance(self.class_names, list):
            return len(self.class_names)
        else:
            raise ValueError('class names are not a list')


>>> END FILE CONTENTS

## emili-main/paz/abstract/messages.py

>>> BEGIN FILE CONTENTS

from ..backend.groups.quaternion import rotation_vector_to_quaternion


class Box2D(object):
    """Bounding box 2D coordinates with class label and score.

    # Properties
        coordinates: List of float/integers indicating the
            ``[x_min, y_min, x_max, y_max]`` coordinates.
        score: Float. Indicates the score of label associated to the box.
        class_name: String indicating the class label name of the object.

    # Methods
        contains()
    """
    def __init__(self, coordinates, score, class_name=None):
        x_min, y_min, x_max, y_max = coordinates
        self.coordinates = coordinates
        self.class_name = class_name
        self.score = score

    @property
    def coordinates(self):
        return self._coordinates

    @coordinates.setter
    def coordinates(self, coordinates):
        x_min, y_min, x_max, y_max = coordinates
        if x_min >= x_max:
            raise ValueError('Invalid coordinate input x_min >= x_max')
        if y_min >= y_max:
            raise ValueError('Invalid coordinate input y_min >= y_max')

        self._coordinates = coordinates

    @property
    def class_name(self):
        return self._class_name

    @class_name.setter
    def class_name(self, class_name):
        self._class_name = class_name

    @property
    def score(self):
        return self._score

    @score.setter
    def score(self, score):
        self._score = score

    @property
    def center(self):
        x_center = (self._coordinates[0] + self._coordinates[2]) / 2.0
        y_center = (self._coordinates[1] + self._coordinates[3]) / 2.0
        return x_center, y_center

    @property
    def width(self):
        return abs(self.coordinates[2] - self.coordinates[0])

    @property
    def height(self):
        return abs(self.coordinates[3] - self.coordinates[1])

    def __repr__(self):
        return "Box2D({}, {}, {}, {}, {}, {})".format(
            self.coordinates[0], self.coordinates[1],
            self.coordinates[2], self.coordinates[3],
            self.score, self.class_name)

    def contains(self, point):
        """Checks if point is inside bounding box.

        # Arguments
            point: Numpy array of size 2.

        # Returns
            Boolean. 'True' if 'point' is inside bounding box.
                'False' otherwise.
        """
        assert len(point) == 2
        x_min, y_min, x_max, y_max = self.coordinates
        inside_range_x = (point[0] >= x_min) and (point[0] <= x_max)
        inside_range_y = (point[1] >= y_min) and (point[1] <= y_max)
        return (inside_range_x and inside_range_y)


class Pose6D(object):
    """ Pose estimation results with 6D coordinates.

        # Properties
            quaternion: List of 4 floats indicating (w, x, y, z) components.
            translation: List of 3 floats indicating (x, y, z)
                translation components.
            class_name: String or ``None`` indicating the class label name of
                the object.

        # Class Methods
            from_rotation_vector: Instantiates a ``Pose6D`` object using a
                rotation and a translation vector.
    """
    def __init__(self, quaternion, translation, class_name=None):
        self.quaternion = quaternion
        self.translation = translation
        self.class_name = class_name

    @property
    def quaternion(self):
        return self._quaternion

    @quaternion.setter
    def quaternion(self, coordinates):
        self._quaternion = coordinates

    @property
    def translation(self):
        return self._translation

    @translation.setter
    def translation(self, coordinates):
        self._translation = coordinates

    @property
    def class_name(self):
        return self._class_name

    @class_name.setter
    def class_name(self, class_name):
        self._class_name = class_name

    @classmethod
    def from_rotation_vector(
            cls, rotation_vector, translation, class_name=None):
        quaternion = rotation_vector_to_quaternion(rotation_vector)
        pose6D = cls(quaternion, translation, class_name)
        pose6D.rotation_vector = rotation_vector
        return pose6D

    def __repr__(self):
        quaternion_message = ' Quaternion: ({}, {}, {}, {}) '.format(
            self.quaternion[0], self.quaternion[1],
            self.quaternion[2], self.quaternion[3])
        translation_message = ' Translation: ({}, {}, {}) '.format(
            self.translation[0], self.translation[1], self.translation[2])
        pose_message = ['Pose6D: ', quaternion_message, translation_message]
        pose_message = '\n \t'.join(pose_message)
        return pose_message


class Keypoint3D(object):
    def __init__(self, coordinates, class_name=None):
        coordinates = coordinates
        class_name = class_name

    @property
    def coordinates(self, coordinates):
        return self._coordinates

    @coordinates.setter
    def coordinates(self, coordinates):
        num_keypoints = len(coordinates)
        if num_keypoints != 3:
            raise ValueError('Invalid 3D Keypoint length:', num_keypoints)
        self._coordinates = coordinates

    def project():
        raise NotImplementedError

    def unproject():
        raise NotImplementedError


>>> END FILE CONTENTS

## emili-main/paz/abstract/processor.py

>>> BEGIN FILE CONTENTS

class Processor(object):
    """Abstract class for creating a processor unit.

    # Arguments
        name: String indicating name of the processing unit.

    # Methods
        call()

    # Example
    ```python
    class NormalizeImage(Processor):
    def __init__(self):
        super(NormalizeImage, self).__init__()

    def call(self, image):
        return image / 255.0
    ```

    # Why this name?
        Originally PAZ was only meant for pre-processing pipelines that
        included data-augmentation, normalization, etc. However, I found
        out that we could use the same API for post-processing; therefore,
        I thought at the time that ``Processor`` would be adequate to describe
        the capacity of both pre-processing and post-processing.
        Names that I also thought could have worked were: ``Function``,
        ``Functor`` but I didn't want to use those since I thought they could
        also cause confusion. Similarly, in Keras this abstraction is
        interpreted as a ``Layer`` but here I don't think that abstraction
        is adequate. A layer of computation maybe? So after having this
        thoughts swirling around I decided to go with ``Processor``
        and try to be explicit about my mental jugglery hoping the name
        doesn't cause much mental overhead.
    """
    def __init__(self, name=None):
        self.name = name

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, name):
        if name is None:
            name = self.__class__.__name__
        self._name = name

    def call(self, X):
        """Custom user's logic should be implemented here.
        """
        raise NotImplementedError

    def __call__(self, *args, **kwargs):
        return self.call(*args, **kwargs)


class SequentialProcessor(object):
    """Abstract class for creating a sequential pipeline of processors.

    # Arguments
        processors: List of instantiated child classes of ``Processor``
            classes.
        name: String indicating name of the processing unit.

    # Methods
        add()
        remove()
        pop()
        insert()
        get_processor()

    # Example
    ```python
    AugmentImage = SequentialProcessor()
    AugmentImage.add(pr.RandomContrast())
    AugmentImage.add(pr.RandomBrightness())
    augment_image = AugmentImage()

    transformed_image = augment_image(image)
    ```
    """
    def __init__(self, processors=None, name=None):
        self.processors = []
        if processors is not None:
            [self.add(processor) for processor in processors]
        self.name = name

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, name):
        if name is None:
            name = self.__class__.__name__
        self._name = name

    def add(self, processor):
        """Adds a process to the sequence of processes to be applied to input.

        # Arguments
            processor: An instantiated child class of of ``Processor``.
        """
        self.processors.append(processor)

    def __call__(self, *args, **kwargs):
        # first call can take list or dictionary values.
        args = self.processors[0](*args, **kwargs)
        # further calls can be a tuple or single values.
        for processor in self.processors[1:]:
            if isinstance(args, tuple):
                args = processor(*args)
            else:
                args = processor(args)
        return args

    def remove(self, name):
        """Removes processor from sequence

        # Arguments
            name: String indicating the process name
        """
        for processor in self.processors:
            if processor.name == name:
                self.processors.remove(processor)

    def pop(self, index=-1):
        """Pops processor in given index from sequence

        # Arguments
            index: Int.
        """
        return self.processors.pop(index)

    def insert(self, index, processor):
        """Inserts ``processor`` to self.processors queue at ``index``

        # Argument
            index: Int.
            processor: An instantiated child class of of ``Processor``.
        """
        return self.processors.insert(index, processor)

    def get_processor(self, name):
        """Gets processor from sequencer

        # Arguments
            name: String indicating the process name
        """
        for processor in self.processors:
            if processor.name == name:
                return processor


>>> END FILE CONTENTS

## emili-main/paz/abstract/sequence.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.utils import Sequence
import numpy as np
from .processor import SequentialProcessor


class SequenceExtra(Sequence):
    def __init__(self, pipeline, batch_size, as_list=False):
        if not isinstance(pipeline, SequentialProcessor):
            raise ValueError('``processor`` must be a ``SequentialProcessor``')
        self.output_wrapper = pipeline.processors[-1]
        self.pipeline = pipeline
        self.inputs_name_to_shape = self.output_wrapper.inputs_name_to_shape
        self.labels_name_to_shape = self.output_wrapper.labels_name_to_shape
        self.ordered_input_names = self.output_wrapper.ordered_input_names
        self.ordered_label_names = self.output_wrapper.ordered_label_names
        self.batch_size = batch_size
        self.as_list = as_list

    def make_empty_batches(self, name_to_shape):
        batch = {}
        for name, shape in name_to_shape.items():
            batch[name] = np.zeros((self.batch_size, *shape))
        return batch

    def _to_list(self, batch, names):
        return [batch[name] for name in names]

    def _place_sample(self, sample, sample_arg, batch):
        for name, data in sample.items():
            batch[name][sample_arg] = data

    def _get_unprocessed_batch(self, data, batch_index):
        batch_arg_A = self.batch_size * (batch_index)
        batch_arg_B = self.batch_size * (batch_index + 1)
        unprocessed_batch = data[batch_arg_A:batch_arg_B]
        return unprocessed_batch

    def __getitem__(self, batch_index):
        inputs = self.make_empty_batches(self.inputs_name_to_shape)
        labels = self.make_empty_batches(self.labels_name_to_shape)
        inputs, labels = self.process_batch(inputs, labels, batch_index)
        if self.as_list:
            inputs = self._to_list(inputs, self.ordered_input_names)
            labels = self._to_list(labels, self.ordered_label_names)
        return inputs, labels

    def process_batch(self, inputs, labels, batch_index=None):
        raise NotImplementedError


class ProcessingSequence(SequenceExtra):
    """Sequence generator used for processing samples given in ``data``.

    # Arguments
        processor: Function, used for processing elements of ``data``.
        batch_size: Int.
        data: List. Each element of the list is processed by ``processor``.
        as_list: Bool, if True ``inputs`` and ``labels`` are dispatched as
            lists. If false ``inputs`` and ``labels`` are dispatched as
            dictionaries.
    """
    def __init__(self, processor, batch_size, data, as_list=False):
        self.data = data
        super(ProcessingSequence, self).__init__(
            processor, batch_size, as_list)

    def __len__(self):
        return int(np.ceil(len(self.data) / float(self.batch_size)))

    def process_batch(self, inputs, labels, batch_index):
        unprocessed_batch = self._get_unprocessed_batch(self.data, batch_index)

        for sample_arg, unprocessed_sample in enumerate(unprocessed_batch):
            sample = self.pipeline(unprocessed_sample.copy())
            self._place_sample(sample['inputs'], sample_arg, inputs)
            self._place_sample(sample['labels'], sample_arg, labels)
        return inputs, labels


class GeneratingSequence(SequenceExtra):
    """Sequence generator used for generating samples.

    # Arguments
        processor: Function used for generating and processing ``samples``.
        batch_size: Int.
        num_steps: Int. Number of steps for each epoch.
        as_list: Bool, if True ``inputs`` and ``labels`` are dispatched as
            lists. If false ``inputs`` and ``labels`` are dispatched as
            dictionaries.
    """
    def __init__(self, processor, batch_size, num_steps, as_list=False):
        self.num_steps = num_steps
        super(GeneratingSequence, self).__init__(
            processor, batch_size, as_list)

    def __len__(self):
        return self.num_steps

    def process_batch(self, inputs, labels, batch_index):
        for sample_arg in range(self.batch_size):
            sample = self.pipeline()
            self._place_sample(sample['inputs'], sample_arg, inputs)
            self._place_sample(sample['labels'], sample_arg, labels)
        return inputs, labels


>>> END FILE CONTENTS

## emili-main/paz/applications.py

>>> BEGIN FILE CONTENTS

from .pipelines import SSD512COCO
from .pipelines import SSD300VOC
from .pipelines import SSD512YCBVideo
from .pipelines import SSD300FAT
from .pipelines import DetectMiniXceptionFER
from .pipelines import MiniXceptionFER
from .pipelines import FaceKeypointNet2D32
from .pipelines import HeadPoseKeypointNet2D32
from .pipelines import HaarCascadeFrontalFace
from .pipelines import EFFICIENTDETD0COCO
from .pipelines import EFFICIENTDETD1COCO
from .pipelines import EFFICIENTDETD2COCO
from .pipelines import EFFICIENTDETD3COCO
from .pipelines import EFFICIENTDETD4COCO
from .pipelines import EFFICIENTDETD5COCO
from .pipelines import EFFICIENTDETD6COCO
from .pipelines import EFFICIENTDETD7COCO
from .pipelines import EFFICIENTDETD0VOC

from .pipelines import SinglePowerDrillPIX2POSE6D
from .pipelines import MultiPowerDrillPIX2POSE6D
from .pipelines import PIX2POSEPowerDrill
from .pipelines import PIX2YCBTools6D

from .pipelines import HigherHRNetHumanPose2D
from .pipelines import DetNetHandKeypoints
from .pipelines import MinimalHandPoseEstimation
from .pipelines import DetectMinimalHand
from .pipelines import ClassifyHandClosure
from .pipelines import SSD512MinimalHandPose
from .pipelines import EstimateHumanPose


>>> END FILE CONTENTS

## emili-main/paz/backend/__init__.py

>>> BEGIN FILE CONTENTS

from .anchors import build_anchors
from .anchors import build_octaves
from .anchors import build_aspect
from .anchors import build_scales
from .anchors import build_strides
from .anchors import make_branch_boxes
from .anchors import compute_box_coordinates
from .anchors import build_base_anchor
from .anchors import compute_aspect_size
from .anchors import compute_anchor_dims
from .anchors import compute_anchor_centres


>>> END FILE CONTENTS

## emili-main/paz/backend/anchors.py

>>> BEGIN FILE CONTENTS

import numpy as np
from .boxes import to_center_form


def build_anchors(image_shape, branches, num_scales, aspect_ratios, scale):
    """Builds anchor boxes in centre form for given model.
    Anchor boxes a.k.a prior boxes are reference boxes built with
    various scales and aspect ratio centered over every pixel in the
    input image and branch tensors. They can be strided. Anchor boxes
    define regions of image where objects are likely to be found. They
    help object detector to accurately localize and classify objects at
    the same time handling variations in object size and shape.

    # Arguments
        image_shape: List, input image shape.
        branches: List, EfficientNet branch tensors.
        num_scales: Int, number of anchor scales.
        aspect_ratios: List, anchor box aspect ratios.
        scale: Float, anchor box scale.

    # Returns
        anchor_boxes: Array of shape `(num_boxes, 4)`.
    """
    num_scale_aspect = num_scales * len(aspect_ratios)
    args = (image_shape, branches, num_scale_aspect)
    octave = build_octaves(num_scales, aspect_ratios)
    aspect = build_aspect(num_scales, aspect_ratios)
    scales = build_scales(scale, num_scale_aspect)
    anchor_boxes = []
    for branch_arg in range(len(branches)):
        stride = build_strides(branch_arg, *args)
        boxes = make_branch_boxes(*stride, octave, aspect, scales, image_shape)
        anchor_boxes.append(boxes.reshape([-1, 4]))
    anchor_boxes = np.concatenate(anchor_boxes, axis=0).astype('float32')
    return to_center_form(anchor_boxes)


def build_octaves(num_scales, aspect_ratios):
    """Builds branch-wise EfficientNet anchor box octaves.
    Octaves are values that differ from each other by a multiplicative
    factor of 2. In case of EfficienDet the scales of anchor box,
    which are integers raised to the power of 2 are normalized.
    This makes the values differ from each other by a multiplicative
    factor of approximately 1.2599. Therefore in this case it is not a
    perfect octave however is an approximation of octave. The
    following shows an example visualization of anchor boxes each with
    same aspect ratio and scale but with different octaves.

    +--------+       +---------------+       +----------------------+
    |        |       |               |       |                      |
    |  0.0   |       |               |       |                      |
    |        |       |     0.33      |       |                      |
    +--------+       |               |       |         0.67         |
                     |               |       |                      |
                     +---------------+       |                      |
                                             |                      |
                                             |                      |
                                             +----------------------+

    # Arguments
        num_scales: Int, number of anchor scales.
        aspect_ratios: List, anchor box aspect ratios.

    # Returns
        octave_normalized: Array of shape `(num_scale_aspect,)`.
    """
    octave = np.repeat(list(range(num_scales)), len(aspect_ratios))
    octave_normalized = octave / float(num_scales)
    return octave_normalized


def build_aspect(num_scales, aspect_ratios):
    """Builds branch-wise EfficientNet anchor box aspect ratios.
    The aspect ratio of an anchor box refers to the ratio of its width
    to its height. They define the shape of the object that the object
    detector is trying to detect. If aspect ratio is 1, the anchor box
    is a square. If it is greater than 1, the box is wider than it is
    tall. If it is less than 1, the box is taller than its is wide. The
    following shows visualization of anchor boxes each with same octave
    and scale but different aspect ratios.

    +--------+       +---------------+       +--------+
    |        |       |               |       |        |
    |  1.0   |       |      2.0      |       |        |
    |        |       |               |       |  0.5   |
    +--------+       +---------------+       |        |
                                             |        |
                                             |        |
                                             +--------+

    # Arguments
        num_scales: Int, number of anchor scales.
        aspect_ratios: List, anchor box aspect ratios.

    # Returns
        Array of shape `(num_scale_aspect,)`.
    """
    return np.tile(aspect_ratios, num_scales)


def build_scales(scale, num_scale_aspect):
    """Builds branch-wise EfficientNet anchor box scales.
    Anchor box scale refers to the size of the anchor box. The scale of
    the anchor box determines how large the box is in relation of the
    object it is trying to detect. If the object detector is trying to
    detect smaller objects, anchor box with smaller scales may be more
    effective. If the object detector is trying to detect larger
    objects, anchor box with larger scales my be more effective. The
    following shows an example visualization of anchor boxes each with
    same octave and aspect ratio but with different scales.

    +--------+       +----------------+       +------------------------+
    |        |       |                |       |                        |
    |  1.0   |       |                |       |                        |
    |        |       |                |       |                        |
    +--------+       |       2.0      |       |                        |
                     |                |       |                        |
                     |                |       |           3.0          |
                     |                |       |                        |
                     +----------------+       |                        |
                                              |                        |
                                              |                        |
                                              |                        |
                                              +------------------------+

    # Arguments
        scale: Float, anchor box scale.
        num_scale_aspect: Int, number of scale and aspect combinations.

    # Returns
        Array of shape `(num_scale_aspect,)`.
    """
    return np.repeat(scale, num_scale_aspect)


def build_strides(branch_arg, image_shape, branches, num_scale_aspect):
    """Builds branch-wise EfficientNet anchor box strides.
    The stride of an anchor box determines how densely the anchor boxes
    are placed in the image. A smaller stride means that the anchor
    boxes are more densely packed and cover a larger area of the image,
    while a larger stride means that the anchor boxes are less densely
    packed and cover a smaller area of the image.
    In general, a smaller stride is more effective at detecting smaller
    objects, while a larger stride is more effective at detecting larger
    objects. The optimal stride for a particular object detection system
    will depend on the sizes of the objects that it is trying to detect
    and the resolution of the input images. The following shows an
    example visualization of anchor box's centre marked by + each with
    same octave and aspect ratio and scale but with different strides.

            8.0                     16.0                     32.0
    +-----------------+      +-----------------+     +-----------------+
    | + + + + + + + + |      |                 |     |   +    +    +   |
    | + + + + + + + + |      |  +  +  +  +  +  |     |                 |
    | + + + + + + + + |      |                 |     |                 |
    | + + + + + + + + |      |  +  +  +  +  +  |     |   +    +    +   |
    | + + + + + + + + |      |                 |     |                 |
    | + + + + + + + + |      |  +  +  +  +  +  |     |                 |
    | + + + + + + + + |      |                 |     |   +    +    +   |
    +-----------------+      +-----------------+     +-----------------+

    # Arguments
        branch_arg: Int, branch index.
        image_shape: List, input image shape.
        branches: List, EfficientNet branch tensors.
        num_scale_aspect: Int, count of scale aspect ratio combinations.

    # Returns
        Tuple: Containing strides in y and x direction.
    """
    H_image, W_image = image_shape
    feature_H, feature_W = branches[branch_arg].shape[1:3]
    features_H = np.repeat(feature_H, num_scale_aspect).astype('float32')
    features_W = np.repeat(feature_W, num_scale_aspect).astype('float32')
    strides_y = H_image / features_H
    strides_x = W_image / features_W
    return strides_y, strides_x


def make_branch_boxes(stride_y, stride_x, octave,
                      aspect, scales, image_shape):
    """Builds branch-wise EfficientNet anchor boxes.

    # Arguments
        stride_y: Array of shape `(num_scale_aspect,)` y-axis stride.
        stride_x: Array of shape `(num_scale_aspect,)` x-axis stride.
        octave: Array of shape `(num_scale_aspect,)` octave scale.
        aspect: Array of shape `(num_scale_aspect,)` aspect ratio.
        scales: Array of shape `(num_scale_aspect,)` anchor box scales.
        image_shape: List, input image shape.

    # Returns
        branch_boxes: Array of shape `(num_boxes,num_scale_aspect,4)`.
    """
    branch_boxes = []
    for branch_config in zip(stride_y, stride_x, scales, octave, aspect):
        boxes = compute_box_coordinates(image_shape, *branch_config)
        branch_boxes.append(np.expand_dims(boxes.T, axis=1))
    branch_boxes = np.concatenate(branch_boxes, axis=1)
    return branch_boxes


def compute_box_coordinates(image_shape, stride_y, stride_x,
                            scale, octave_scale, aspect):
    """Computes anchor box coordinates in corner form.

    # Arguments
        image_shape: List, input image shape.
        stride_y: Array of shape `(num_scale_aspect,)` y-axis stride.
        stride_x: Array of shape `(num_scale_aspect,)` x-axis stride.
        scale: Array of shape `()`, anchor box scales.
        octave_scale: Array of shape `()`, anchor box octave scale.
        aspect: Array of shape `()`, anchor box aspect ratio.

    # Returns
        Tuple: Box coordinates in corner form.
    """
    base_anchor = build_base_anchor(stride_y, stride_x, scale, octave_scale)
    aspect_size = compute_aspect_size(aspect)
    anchor_half_W, anchor_half_H = compute_anchor_dims(
        *base_anchor, *aspect_size, image_shape)
    center_x, center_y = compute_anchor_centres(
        stride_y, stride_x, image_shape)
    x_min, y_min = [center_x - anchor_half_W], [center_y - anchor_half_H]
    x_max, y_max = [center_x + anchor_half_W], [center_y + anchor_half_H]
    box_coordinates = np.concatenate((x_min, y_min, x_max, y_max), axis=0)
    return box_coordinates


def build_base_anchor(stride_y, stride_x, scale, octave_scale):
    """Builds base anchor's width and height.

    # Arguments
        stride_y: Array of shape `(num_scale_aspect,)` y-axis stride.
        stride_x: Array of shape `(num_scale_aspect,)` x-axis stride.
        scale: Float, anchor box scale.
        octave_scale: Array of shape `()`, anchor box octave scale.

    # Returns
        Tuple: Base anchor width and height.
    """
    base_anchor_W = scale * stride_x * (2 ** octave_scale)
    base_anchor_H = scale * stride_y * (2 ** octave_scale)
    return base_anchor_W, base_anchor_H


def compute_aspect_size(aspect):
    """Computes aspect width and height.

    # Arguments
        aspect: Array of shape `()`, anchor box aspect ratio.

    # Returns
        Tuple: Aspect width and height.
    """
    return np.sqrt(aspect), 1 / np.sqrt(aspect)


def compute_anchor_dims(base_anchor_W, base_anchor_H,
                        aspect_W, aspect_H, image_shape):
    """Compute anchor's half width and half height.

    # Arguments
        base_anchor_W: Array of shape (), base anchor width.
        base_anchor_H: Array of shape (), base anchor height.
        aspect_W: Array of shape (), aspect width.
        aspect_H: Array of shape (), aspect height.
        image_shape: List, input image shape.

    # Returns
        Tuple: Anchor's half width and height.
    """
    H, W = image_shape
    anchor_half_W = (base_anchor_W * aspect_W / 2.0)
    anchor_half_H = (base_anchor_H * aspect_H / 2.0)
    anchor_half_W_normalized = anchor_half_W / W
    anchor_half_H_normalized = anchor_half_H / H
    return anchor_half_W_normalized, anchor_half_H_normalized


def compute_anchor_centres(stride_y, stride_x, image_shape):
    """Compute anchor centres normalized to image size.

    # Arguments
        stride_y: Array of shape `(num_scale_aspect,)` y-axis stride.
        stride_x: Array of shape `(num_scale_aspect,)` x-axis stride.
        image_shape: List, input image shape.

    # Returns
        Tuple: Normalized anchor centres.
    """
    H, W = image_shape
    x = np.arange(stride_x / 2, W, stride_x)
    y = np.arange(stride_y / 2, H, stride_y)
    center_x, center_y = np.meshgrid(x, y)
    normalized_center_x = center_x.flatten() / W
    normalized_center_y = center_y.flatten() / H
    return normalized_center_x, normalized_center_y


>>> END FILE CONTENTS

## emili-main/paz/backend/angles.py

>>> BEGIN FILE CONTENTS

import numpy as np
from paz.datasets import MANOHandJoints
from paz.backend.groups import to_affine_matrix
from paz.backend.groups import rotation_matrix_to_compact_axis_angle


def calculate_relative_angle(absolute_rotation, links_origin_transform,
                             parents=MANOHandJoints.parents):
    """Calculate the realtive joint rotation for the minimal hand joints.

    # Arguments
        absolute_angles : Array [num_joints, 4].
        Absolute joint angle rotation for the minimal hand joints in
        Euler representation.

    # Returns
        relative_angles: Array [num_joints, 3].
        Relative joint rotation of the minimal hand joints in compact
        axis angle representation.
    """
    relative_angles = np.zeros((len(absolute_rotation), 3))
    for angle_arg in range(len(absolute_rotation)):
        rotation = absolute_rotation[angle_arg]
        transform = to_affine_matrix(rotation, np.array([0, 0, 0]))
        inverted_transform = np.linalg.inv(transform)
        parent_arg = parents[angle_arg]
        if parent_arg is not None:
            link_transform = links_origin_transform[parent_arg]
            child_to_parent_transform = np.dot(inverted_transform,
                                               link_transform)
            child_to_parent_rotation = child_to_parent_transform[:3, :3]
            parent_to_child_rotation = np.linalg.inv(child_to_parent_rotation)
            parent_to_child_rotation = rotation_matrix_to_compact_axis_angle(
                parent_to_child_rotation)
            relative_angles[angle_arg] = parent_to_child_rotation
    return relative_angles


def reorder_relative_angles(relative_angles, root_angle, children,
                            root_joints=[1, 4, 7, 10, 13]):
    """Reorder the relative angles according to the kinematic chain

    # Arguments
        relative_angles: Array
        root_angle: Array. root joint angle for the minimal hand
        children: List, Indexes of the children in the kinematic chain.

    # Returns
        angles: Array. Reordered relative angles
    """
    if root_angle.shape == (3, 3):
        root_angle = rotation_matrix_to_compact_axis_angle(root_angle)
    children_angles = relative_angles[children[1:], :]
    children_angles = np.concatenate(
        [np.expand_dims(root_angle, 0), children_angles])

    # insest zero relative angle to the root joints
    angles = np.insert(children_angles, root_joints, np.array([0, 0, 0]), 0)
    return angles


def change_link_order(joints, config1_labels, config2_labels):
    """Map data from config1_labels to config2_labels.

    # Arguments
        joints: Array
        config1_labels: input joint configuration
        config2_labels: output joint configuration

    # Returns
        Array: joints maped to the config2_labels
    """
    mapped_joints = []
    for joint_arg in range(len(config2_labels)):
        joint_label = config2_labels[joint_arg]
        joint_index_in_config1_labels = config1_labels.index(joint_label)
        joint_in_config1_labels = joints[joint_index_in_config1_labels]
        mapped_joints.append(joint_in_config1_labels)
    mapped_joints = np.stack(mapped_joints, 0)
    return mapped_joints


def is_hand_open(relative_angles, joint_name_to_arg, thresh):
    """Check is the hand is open by calculating relative pip joint angle norm.

       [(theta * ex), (theta * ey), (theta * ez)] = compact axis angle
       ex, ey, ez = normalized_axis
                  _______________________________________________
       norm =    / (theta**2) * [(ex**2) + (ey**2) + (ez**2)]
               \/

       => norm is directly proportional to the theta if axis is normalized.
          If hand is open the relative angle of the pip joint will be less as
          compared to the closed hand.

    # Arguments
        relative_angle: Array
        joint_name_to_arg: Dictionary for the joints
        thresh: Float. Threshold value for theta

    # Returns
        Boolean: Hand is open or closed.
    """
    index_finger_pip_arg = joint_name_to_arg['index_finger_pip']
    theta_index_pip = np.linalg.norm(relative_angles[index_finger_pip_arg])

    middle_finger_pip_arg = joint_name_to_arg['middle_finger_pip']
    theta_middle_pip = np.linalg.norm(relative_angles[middle_finger_pip_arg])

    ring_finger_pip_arg = joint_name_to_arg['ring_finger_pip']
    theta_ring_pip = np.linalg.norm(relative_angles[ring_finger_pip_arg])

    pinky_finger_pip_arg = joint_name_to_arg['pinky_pip']
    theta_pinky_pip = np.linalg.norm(relative_angles[pinky_finger_pip_arg])

    if theta_index_pip > thresh and theta_middle_pip > thresh and \
            theta_ring_pip > thresh and theta_pinky_pip > thresh:
        return False
    else:
        return True


>>> END FILE CONTENTS

## emili-main/paz/backend/boxes.py

>>> BEGIN FILE CONTENTS

import numpy as np


def to_center_form(boxes):
    """Transform from corner coordinates to center coordinates.

    # Arguments
        boxes: Numpy array with shape `(num_boxes, 4)`.

    # Returns
        Numpy array with shape `(num_boxes, 4)`.
    """
    x_min, y_min = boxes[:, 0:1], boxes[:, 1:2]
    x_max, y_max = boxes[:, 2:3], boxes[:, 3:4]
    center_x = (x_max + x_min) / 2.0
    center_y = (y_max + y_min) / 2.0
    W = x_max - x_min
    H = y_max - y_min
    return np.concatenate([center_x, center_y, W, H], axis=1)


def to_corner_form(boxes):
    """Transform from center coordinates to corner coordinates.

    # Arguments
        boxes: Numpy array with shape `(num_boxes, 4)`.

    # Returns
        Numpy array with shape `(num_boxes, 4)`.
    """
    center_x, center_y = boxes[:, 0:1], boxes[:, 1:2]
    W, H = boxes[:, 2:3], boxes[:, 3:4]
    x_min = center_x - (W / 2.0)
    x_max = center_x + (W / 2.0)
    y_min = center_y - (H / 2.0)
    y_max = center_y + (H / 2.0)
    return np.concatenate([x_min, y_min, x_max, y_max], axis=1)


def encode(matched, priors, variances=[0.1, 0.1, 0.2, 0.2]):
    """Encode the variances from the priorbox layers into the ground truth
    boxes we have matched (based on jaccard overlap) with the prior boxes.

    # Arguments
        matched: Numpy array of shape `(num_priors, 4)` with boxes in
            point-form.
        priors: Numpy array of shape `(num_priors, 4)` with boxes in
            center-form.
        variances: (list[float]) Variances of priorboxes

    # Returns
        encoded boxes: Numpy array of shape `(num_priors, 4)`.
    """
    boxes = matched[:, :4]
    boxes = to_center_form(boxes)
    center_difference_x = boxes[:, 0:1] - priors[:, 0:1]
    encoded_center_x = center_difference_x / priors[:, 2:3]
    center_difference_y = boxes[:, 1:2] - priors[:, 1:2]
    encoded_center_y = center_difference_y / priors[:, 3:4]
    encoded_center_x = encoded_center_x / variances[0]
    encoded_center_y = encoded_center_y / variances[1]
    encoded_W = np.log((boxes[:, 2:3] / priors[:, 2:3]) + 1e-8)
    encoded_H = np.log((boxes[:, 3:4] / priors[:, 3:4]) + 1e-8)
    encoded_W = encoded_W / variances[2]
    encoded_H = encoded_H / variances[3]
    encoded_boxes = [encoded_center_x, encoded_center_y, encoded_W, encoded_H]
    return np.concatenate(encoded_boxes + [matched[:, 4:]], axis=1)


def decode(predictions, priors, variances=[0.1, 0.1, 0.2, 0.2]):
    """Decode default boxes into the ground truth boxes

    # Arguments
        loc: Numpy array of shape `(num_priors, 4)`.
        priors: Numpy array of shape `(num_priors, 4)`.
        variances: List of two floats. Variances of prior boxes.

    # Returns
        decoded boxes: Numpy array of shape `(num_priors, 4)`.
    """
    center_x = predictions[:, 0:1] * priors[:, 2:3] * variances[0]
    center_x = center_x + priors[:, 0:1]
    center_y = predictions[:, 1:2] * priors[:, 3:4] * variances[1]
    center_y = center_y + priors[:, 1:2]
    W = priors[:, 2:3] * np.exp(predictions[:, 2:3] * variances[2])
    H = priors[:, 3:4] * np.exp(predictions[:, 3:4] * variances[3])
    boxes = np.concatenate([center_x, center_y, W, H], axis=1)
    boxes = to_corner_form(boxes)
    return np.concatenate([boxes, predictions[:, 4:]], 1)


def compute_ious(boxes_A, boxes_B):
    """Calculates the intersection over union between `boxes_A` and `boxes_B`.
    For each box present in the rows of `boxes_A` it calculates
    the intersection over union with respect to all boxes in `boxes_B`.
    The variables `boxes_A` and `boxes_B` contain the corner coordinates
    of the left-top corner `(x_min, y_min)` and the right-bottom
    `(x_max, y_max)` corner.

    # Arguments
        boxes_A: Numpy array with shape `(num_boxes_A, 4)`.
        boxes_B: Numpy array with shape `(num_boxes_B, 4)`.

    # Returns
        Numpy array of shape `(num_boxes_A, num_boxes_B)`.
    """
    xy_min = np.maximum(boxes_A[:, None, 0:2], boxes_B[:, 0:2])
    xy_max = np.minimum(boxes_A[:, None, 2:4], boxes_B[:, 2:4])
    intersection = np.maximum(0.0, xy_max - xy_min)
    intersection_area = intersection[:, :, 0] * intersection[:, :, 1]
    areas_A = (boxes_A[:, 2] - boxes_A[:, 0]) * (boxes_A[:, 3] - boxes_A[:, 1])
    areas_B = (boxes_B[:, 2] - boxes_B[:, 0]) * (boxes_B[:, 3] - boxes_B[:, 1])
    # broadcasting for outer sum i.e. a sum of all possible combinations
    union_area = (areas_A[:, np.newaxis] + areas_B) - intersection_area
    union_area = np.maximum(union_area, 1e-8)
    return np.clip(intersection_area / union_area, 0.0, 1.0)


def compute_max_matches(boxes, prior_boxes):
    iou_matrix = compute_ious(prior_boxes, boxes)
    per_prior_which_box_iou = np.max(iou_matrix, axis=1)
    per_prior_which_box_arg = np.argmax(iou_matrix, axis=1)
    return per_prior_which_box_iou, per_prior_which_box_arg


def get_matches_masks(boxes, prior_boxes, positive_iou=0.5, negative_iou=0.4):
    prior_boxes = to_corner_form(prior_boxes)
    max_matches = compute_max_matches(boxes, prior_boxes)
    per_prior_which_box_iou, per_prior_which_box_arg = max_matches
    positive_mask = np.greater_equal(per_prior_which_box_iou, positive_iou)
    negative_mask = np.less(per_prior_which_box_iou, negative_iou)
    not_ignoring_mask = np.logical_or(positive_mask, negative_mask)
    # ignoring mask are all masks not positive or negative
    ignoring_mask = np.logical_not(not_ignoring_mask)
    return per_prior_which_box_arg, positive_mask, ignoring_mask


def mask_classes(boxes, positive_mask, ignoring_mask):
    class_indices = boxes[:, 4]
    negative_mask = np.not_equal(positive_mask, 1.0)
    class_indices = np.where(negative_mask, 0.0, class_indices)
    # ignoring_mask = np.equal(ignoring_mask, 1.0)
    # class_indices = np.where(ignoring_mask, -1.0, class_indices)
    class_indices = np.expand_dims(class_indices, axis=-1)
    boxes[:, 4:5] = class_indices
    return boxes


def match_beta(boxes, prior_boxes, positive_iou=0.5, negative_iou=0.0):
    """Matches each prior box with a ground truth box (box from `boxes`).
    It then selects which matched box will be considered positive e.g. iou > .5
    and returns for each prior box a ground truth box that is either positive
    (with a class argument different than 0) or negative.

    # Arguments
        boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`,
            where the first the first four coordinates correspond to
            box coordinates and the last coordinates is the class
            argument. This boxes should be the ground truth boxes.
        prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`.
            where the four coordinates are in center form coordinates.
        positive_iou: Float between [0, 1]. Intersection over union
            used to determine which box is considered a positive box.
        negative_iou: Float between [0, 1]. Intersection over union
            used to determine which box is considered a negative box.

    # Returns
        numpy array of shape `(num_prior_boxes, 4 + 1)`.
            where the first the first four coordinates correspond to point
            form box coordinates and the last coordinates is the class
            argument.
    """
    matches = get_matches_masks(boxes, prior_boxes, positive_iou, negative_iou)
    per_prior_box_which_box_arg, positive_mask, ignoring_mask = matches
    matched_boxes = np.take(boxes, per_prior_box_which_box_arg, axis=0)
    matched_boxes = mask_classes(matched_boxes, positive_mask, ignoring_mask)
    return matched_boxes


def match(boxes, prior_boxes, iou_threshold=0.5):
    """Matches each prior box with a ground truth box (box from `boxes`).
    It then selects which matched box will be considered positive e.g. iou > .5
    and returns for each prior box a ground truth box that is either positive
    (with a class argument different than 0) or negative.

    # Arguments
        boxes: Numpy array of shape `(num_ground_truh_boxes, 4 + 1)`,
            where the first the first four coordinates correspond to
            box coordinates and the last coordinates is the class
            argument. This boxes should be the ground truth boxes.
        prior_boxes: Numpy array of shape `(num_prior_boxes, 4)`.
            where the four coordinates are in center form coordinates.
        iou_threshold: Float between [0, 1]. Intersection over union
            used to determine which box is considered a positive box.

    # Returns
        numpy array of shape `(num_prior_boxes, 4 + 1)`.
            where the first the first four coordinates correspond to point
            form box coordinates and the last coordinates is the class
            argument.
    """
    ious = compute_ious(boxes, to_corner_form(np.float32(prior_boxes)))
    per_prior_which_box_iou = np.max(ious, axis=0)
    per_prior_which_box_arg = np.argmax(ious, 0)

    #  overwriting per_prior_which_box_arg if they are the best prior box
    per_box_which_prior_arg = np.argmax(ious, 1)
    per_prior_which_box_iou[per_box_which_prior_arg] = 2
    for box_arg in range(len(per_box_which_prior_arg)):
        best_prior_box_arg = per_box_which_prior_arg[box_arg]
        per_prior_which_box_arg[best_prior_box_arg] = box_arg

    matches = boxes[per_prior_which_box_arg]
    matches[per_prior_which_box_iou < iou_threshold, 4] = 0
    return matches


def compute_iou(box, boxes):
    """Calculates the intersection over union between 'box' and all 'boxes'.
    Both `box` and `boxes` are in corner coordinates.

    # Arguments
        box: Numpy array with length at least of 4.
        boxes: Numpy array with shape `(num_boxes, 4)`.

    # Returns
        Numpy array of shape `(num_boxes, 1)`.
    """

    x_min_A, y_min_A, x_max_A, y_max_A = box[:4]
    x_min_B, y_min_B = boxes[:, 0], boxes[:, 1]
    x_max_B, y_max_B = boxes[:, 2], boxes[:, 3]
    # calculating the intersection
    inner_x_min = np.maximum(x_min_B, x_min_A)
    inner_y_min = np.maximum(y_min_B, y_min_A)
    inner_x_max = np.minimum(x_max_B, x_max_A)
    inner_y_max = np.minimum(y_max_B, y_max_A)
    inner_w = np.maximum((inner_x_max - inner_x_min), 0)
    inner_h = np.maximum((inner_y_max - inner_y_min), 0)
    intersection_area = inner_w * inner_h
    # calculating the union
    box_area_B = (x_max_B - x_min_B) * (y_max_B - y_min_B)
    box_area_A = (x_max_A - x_min_A) * (y_max_A - y_min_A)
    union_area = box_area_A + box_area_B - intersection_area
    intersection_over_union = intersection_area / union_area
    return intersection_over_union


def apply_non_max_suppression(boxes, scores, iou_thresh=.45, top_k=200):
    """Apply non maximum suppression.

    # Arguments
        boxes: Numpy array, box coordinates of shape `(num_boxes, 4)`
            where each columns corresponds to x_min, y_min, x_max, y_max.
        scores: Numpy array, of scores given for each box in `boxes`.
        iou_thresh: float, intersection over union threshold for removing
            boxes.
        top_k: int, number of maximum objects per class.

    # Returns
        selected_indices: Numpy array, selected indices of kept boxes.
        num_selected_boxes: int, number of selected boxes.
    """

    selected_indices = np.zeros(shape=len(scores))
    if boxes is None or len(boxes) == 0:
        return selected_indices
    x_min = boxes[:, 0]
    y_min = boxes[:, 1]
    x_max = boxes[:, 2]
    y_max = boxes[:, 3]
    areas = (x_max - x_min) * (y_max - y_min)
    remaining_sorted_box_indices = np.argsort(scores)
    remaining_sorted_box_indices = remaining_sorted_box_indices[-top_k:]

    num_selected_boxes = 0
    while len(remaining_sorted_box_indices) > 0:
        best_score_args = remaining_sorted_box_indices[-1]
        selected_indices[num_selected_boxes] = best_score_args
        num_selected_boxes = num_selected_boxes + 1
        if len(remaining_sorted_box_indices) == 1:
            break

        remaining_sorted_box_indices = remaining_sorted_box_indices[:-1]

        best_x_min = x_min[best_score_args]
        best_y_min = y_min[best_score_args]
        best_x_max = x_max[best_score_args]
        best_y_max = y_max[best_score_args]

        remaining_x_min = x_min[remaining_sorted_box_indices]
        remaining_y_min = y_min[remaining_sorted_box_indices]
        remaining_x_max = x_max[remaining_sorted_box_indices]
        remaining_y_max = y_max[remaining_sorted_box_indices]

        inner_x_min = np.maximum(remaining_x_min, best_x_min)
        inner_y_min = np.maximum(remaining_y_min, best_y_min)
        inner_x_max = np.minimum(remaining_x_max, best_x_max)
        inner_y_max = np.minimum(remaining_y_max, best_y_max)

        inner_box_widths = inner_x_max - inner_x_min
        inner_box_heights = inner_y_max - inner_y_min

        inner_box_widths = np.maximum(inner_box_widths, 0.0)
        inner_box_heights = np.maximum(inner_box_heights, 0.0)

        intersections = inner_box_widths * inner_box_heights
        remaining_box_areas = areas[remaining_sorted_box_indices]
        best_area = areas[best_score_args]
        unions = remaining_box_areas + best_area - intersections
        intersec_over_union = intersections / unions
        intersec_over_union_mask = intersec_over_union <= iou_thresh
        remaining_sorted_box_indices = remaining_sorted_box_indices[
            intersec_over_union_mask]

    return selected_indices.astype(int), num_selected_boxes


def nms_per_class(box_data, nms_thresh=.45, epsilon=0.01, top_k=200):
    """Applies non maximum suppression per class.
    This function takes all the detections from the detector which
    consists of boxes and their corresponding class scores to which it
    applies non maximum suppression for every class independently and
    then combines the result.

    # Arguments
        box_data: Array of shape `(num_nms_boxes, 4 + num_classes)`
            containing the box coordinates as well as the predicted
            scores of all the classes for all non suppressed boxes.
        nms_thresh: Float, Non-maximum suppression threshold.
        epsilon: Float, Filter scores with a lower confidence
            value before performing non-maximum supression.
        top_k: Int, Maximum number of boxes per class outputted by nms.

    # Returns
        Tuple: Containing an array non suppressed boxes of shape
            `(num_nms_boxes, 4 + num_classes)` and an array
            of corresponding class labels of shape `(num_nms_boxes, )`.
    """
    decoded_boxes = box_data[:, :4]
    class_predictions = box_data[:, 4:]
    num_classes = class_predictions.shape[1]
    nms_boxes = np.array([], dtype=float).reshape(0, box_data.shape[1])
    class_labels = np.array([], dtype=int)
    args = (decoded_boxes, class_predictions, epsilon, nms_thresh, top_k)
    for class_arg in range(num_classes):
        nms_boxes, class_labels = _nms_per_class(
            nms_boxes, class_labels, class_arg, *args)
    return nms_boxes, class_labels


def _nms_per_class(nms_boxes, class_labels, class_arg, decoded_boxes,
                   class_predictions, epsilon, nms_thresh, top_k):
    """Applies non maximum suppression for a given class.
    This function takes all the detections that belong only to the given
    single class and applies non maximum suppression for that class
    alone and returns the resulting non suppressed boxes.

    # Arguments
        nms_boxes: Array of shape `(num_boxes, 4 + num_classes)`.
        class_labels: Array of shape `(num_boxes, )`.
        class_arg: Int, class index.
        decoded_boxes: Array of shape `(num_prior_boxes, 4)`
            containing the box coordinates of all the
            non suppressed boxes.
        class_predictions: Array of shape
            `(num_nms_boxes, num_classes)` containing the predicted
            scores of all the classes for all the non suppressed boxes.
        epsilon: Float, Filter scores with a lower confidence
            value before performing non-maximum supression.
        nms_thresh: Float, Non-maximum suppression threshold.
        top_k: Int, Maximum number of boxes per class outputted by nms.

    # Returns
        Tuple: Containing an array non suppressed boxes per class of
            shape `(num_nms_boxes_per_class, 4 + num_classes) and an
            array corresponding class labels of shape
            `(num_nms_boxes_per_class, )`.
    """
    scores, mask = pre_filter_nms(class_arg, class_predictions, epsilon)

    if len(scores) != 0:
        boxes = decoded_boxes[mask]
        selected = apply_non_max_suppression(boxes, scores, nms_thresh, top_k)
        indices, count = selected
        selected_indices = indices[:count]
        selected_boxes = boxes[selected_indices]
        selected_classes = class_predictions[mask][selected_indices]
        selections = np.concatenate((selected_boxes, selected_classes), axis=1)
        nms_boxes = np.concatenate((nms_boxes, selections), axis=0)
        class_label = np.repeat(class_arg, count)
        class_labels = np.append(class_labels, class_label)
    return nms_boxes, class_labels


def pre_filter_nms(class_arg, class_predictions, epsilon):
    """Applies score filtering.
    This function takes all the predicted scores of a given class and
    filters out all the predictions less than the given `epsilon` value.

    # Arguments
        class_arg: Int, class index.
        class_predictions: Array of shape
            `(num_nms_boxes, num_classes)` containing the predicted
            scores of all the classes for all the non suppressed boxes.
        epsilon: Float, threshold value for score filtering.

    # Returns
        Tuple: Containing an array filtered scores of shape
            `(num_pre_filtered_boxes, )` and an array filter mask of
            shape `(num_prior_boxes, )`.
    """
    mask = class_predictions[:, class_arg] >= epsilon
    scores = class_predictions[:, class_arg][mask]
    return scores, mask


def merge_nms_box_with_class(box_data, class_labels):
    """Merges box coordinates with their corresponding class
    defined by `class_labels` which is decided by best box geometry
    by non maximum suppression (and not by the best scoring class)
    into a single output.
    This function retains only the predicted score of the class to
    which the box belongs to and sets the scores of all the remaining
    classes to zero, thereby combining box and class information in a
    single variable.

    # Arguments
        box_data: Array of shape `(num_nms_boxes, 4 + num_classes)`
            containing the box coordinates as well as the predicted
            scores of all the classes for all non suppressed boxes.
        class_labels: Array of shape `(num_nms_boxes, )` that contains
            the indices of the class whose score is to be retained.

    # Returns
        boxes: Array of shape `(num_nms_boxes, 4 + num_classes)`,
            containing coordinates of non supressed boxes along with
            scores of the class to which the box belongs. The scores of
            the other classes are zeros.
    """
    decoded_boxes = box_data[:, :4]
    class_predictions = box_data[:, 4:]
    retained_class_score = suppress_other_class_scores(
        class_predictions, class_labels)
    box_data = np.concatenate((decoded_boxes, retained_class_score), axis=1)
    return box_data


def suppress_other_class_scores(class_predictions, class_labels):
    """Retains the score of class in `class_labels` and
    sets other class scores to zero.

    # Arguments
        class_predictions: Array of shape
            `(num_nms_boxes, num_classes)` containing the predicted
            scores of all the classes for all the non suppressed boxes.
        class_labels: Array of shape `(num_nms_boxes, )` that contains
            the indices of the class whose score is to be retained.

    # Returns
        retained_class_score: Array of shape
            `(num_nms_boxes, num_classes)` that consists of score at
            only those location specified by 'class_labels' and zero
            at other class locations.

    # Note
        This approach retains the scores of that class in
        `class_predictions` defined by `class_labels` by generating
        a boolean mask `score_suppress_mask` with elements True at the
        locations where the score in `class_predictions` is to be
        retained and False wherever the class score is to be suppressed.
        This approach of retaining/suppressing scores does not make use
        of for loop, if-else condition and direct value assignment
        to arrays.
    """
    num_nms_boxes, num_classes = class_predictions.shape
    class_indices = np.arange(num_classes)
    class_indices = np.expand_dims(class_indices, axis=0)
    class_indices = np.repeat(class_indices, num_nms_boxes, axis=0)
    class_labels = np.expand_dims(class_labels, axis=1)
    class_labels = np.repeat(class_labels, num_classes, axis=1)
    """
    The difference of class_indices and class_labels contains zero
    at those locations of the result where the score is to be retained
    whose boolean value is False while others being True. This
    difference obtained as a boolean array gives a negative mask which
    when inverted gives the score_suppress_mask.
    """
    negative_mask = np.array(class_indices - class_labels, dtype=bool)
    score_suppress_mask = np.logical_not(negative_mask)
    retained_class_score = np.multiply(class_predictions, score_suppress_mask)
    return retained_class_score


def to_one_hot(class_indices, num_classes):
    """ Transform from class index to one-hot encoded vector.

    # Arguments
        class_indices: Numpy array. One dimensional array specifying
            the index argument of the class for each sample.
        num_classes: Integer. Total number of classes.

    # Returns
        Numpy array with shape `(num_samples, num_classes)`.
    """
    one_hot_vectors = np.zeros((len(class_indices), num_classes))
    for vector_arg, class_args in enumerate(class_indices):
        one_hot_vectors[vector_arg, class_args] = 1.0
    return one_hot_vectors


def make_box_square(box):
    """Makes box coordinates square with sides equal to the longest
        original side.

    # Arguments
        box: Numpy array with shape `(4)` with point corner coordinates.

    # Returns
        returns: List of box coordinates ints.
    """
    # TODO add ``calculate_center`` ``calculate_side_dimensions`` functions.
    x_min, y_min, x_max, y_max = box[:4]
    center_x = (x_max + x_min) / 2.0
    center_y = (y_max + y_min) / 2.0
    width = x_max - x_min
    height = y_max - y_min

    if height >= width:
        half_box = height / 2.0
        x_min = int(center_x - half_box)
        x_max = int(center_x + half_box)

    if width > height:
        half_box = width / 2.0
        y_min = int(center_y - half_box)
        y_max = int(center_y + half_box)

    return x_min, y_min, x_max, y_max


def offset(coordinates, offset_scales):
    """Apply offsets to box coordinates

    # Arguments
        coordinates: List of floats containing coordinates in point form.
        offset_scales: List of floats having x and y scales respectively.

    # Returns
        coordinates: List of floats containing coordinates in point form.
            i.e. [x_min, y_min, x_max, y_max].
    """
    x_min, y_min, x_max, y_max = coordinates
    x_offset_scale, y_offset_scale = offset_scales
    x_offset = (x_max - x_min) * x_offset_scale
    y_offset = (y_max - y_min) * y_offset_scale
    x_min = int(x_min - x_offset)
    y_max = int(y_max + x_offset)
    y_min = int(y_min - y_offset)
    x_max = int(x_max + y_offset)
    return (x_min, y_min, x_max, y_max)


def clip(coordinates, image_shape):
    """Clip box to valid image coordinates
    # Arguments
        coordinates: List of floats containing coordinates in point form
            i.e. [x_min, y_min, x_max, y_max].
        image_shape: List of two integers indicating height and width of image
            respectively.

    # Returns
        List of clipped coordinates.
    """
    height, width = image_shape[:2]
    x_min, y_min, x_max, y_max = coordinates
    if x_min < 0:
        x_min = 0
    if y_min < 0:
        y_min = 0
    if x_max > width:
        x_max = width
    if y_max > height:
        y_max = height
    return x_min, y_min, x_max, y_max


def denormalize_box(box, image_shape):
    """Scales corner box coordinates from normalized values to image dimensions

    # Arguments
        box: Numpy array containing corner box coordinates.
        image_shape: List of integers with (height, width).

    # Returns
        returns: box corner coordinates in image dimensions
    """
    x_min, y_min, x_max, y_max = box[:4]
    height, width = image_shape
    x_min = int(x_min * width)
    y_min = int(y_min * height)
    x_max = int(x_max * width)
    y_max = int(y_max * height)
    return (x_min, y_min, x_max, y_max)


def flip_left_right(boxes, width):
    """Flips box coordinates from left-to-right and vice-versa.
    # Arguments
        boxes: Numpy array of shape `[num_boxes, 4]`.
    # Returns
        Numpy array of shape `[num_boxes, 4]`.
    """
    flipped_boxes = boxes.copy()
    flipped_boxes[:, [0, 2]] = width - boxes[:, [2, 0]]
    return flipped_boxes


def to_image_coordinates(boxes, image):
    """Transforms normalized box coordinates into image coordinates.
    # Arguments
        image: Numpy array.
        boxes: Numpy array of shape `[num_boxes, N]` where N >= 4.
    # Returns
        Numpy array of shape `[num_boxes, N]`.
    """
    height, width = image.shape[:2]
    image_boxes = boxes.copy()
    image_boxes[:, 0] = boxes[:, 0] * width
    image_boxes[:, 2] = boxes[:, 2] * width
    image_boxes[:, 1] = boxes[:, 1] * height
    image_boxes[:, 3] = boxes[:, 3] * height
    return image_boxes


def to_normalized_coordinates(boxes, image):
    """Transforms coordinates in image dimensions to normalized coordinates.
    # Arguments
        image: Numpy array.
        boxes: Numpy array of shape `[num_boxes, N]` where N >= 4.
    # Returns
        Numpy array of shape `[num_boxes, N]`.
    """
    height, width = image.shape[:2]
    normalized_boxes = boxes.copy()
    normalized_boxes[:, 0] = boxes[:, 0] / width
    normalized_boxes[:, 2] = boxes[:, 2] / width
    normalized_boxes[:, 1] = boxes[:, 1] / height
    normalized_boxes[:, 3] = boxes[:, 3] / height
    return normalized_boxes


def extract_bounding_box_corners(points3D):
    """Extracts the (x_min, y_min, z_min) and the (x_max, y_max, z_max)
        coordinates from an array of  points3D
    # Arguments
        points3D: Array (num_points, 3)

    # Returns
        Left-down-bottom corner (x_min, y_min, z_min) and right-up-top
            (x_max, y_max, z_max) corner.
    """
    XYZ_min = np.min(points3D, axis=0)
    XYZ_max = np.max(points3D, axis=0)
    return XYZ_min, XYZ_max


def filter_boxes(boxes, conf_thresh):
    """Filters given boxes based on scores.

    # Arguments
        boxes: Array of shape `(num_nms_boxes, 4 + num_classes)`.
        conf_thresh: Float, Filter boxes with a confidence value
            lower than this.

    # Returns
        confident_boxes: Array of shape
            `(num_filtered_boxes, 4 + num_classes)`.
    """
    class_predictions = boxes[:, 4:]
    class_scores = np.max(class_predictions, axis=1)
    confidence_mask = class_scores >= conf_thresh
    confident_boxes = boxes[confidence_mask]
    return confident_boxes


def scale_box(predictions, image_scales):
    """
    # Arguments
        predictions: Array of shape `(num_boxes, num_classes+N)`
            model predictions.
        image_scales: Array of shape `()`, scale value of boxes.

    # Returns
        predictions: Array of shape `(num_boxes, num_classes+N)`
            model predictions.
    """
    boxes = predictions[:, :4]
    scales = image_scales[np.newaxis][np.newaxis]
    boxes = boxes * scales
    predictions = np.concatenate([boxes, predictions[:, 4:]], 1)
    return predictions


def change_box_coordinates(outputs):
    """Converts box coordinates format from (y_min, x_min, y_max, x_max)
    to (x_min, y_min, x_max, y_max).

    # Arguments
        outputs: Tensor, model output.

    # Returns
        outputs: Array, Processed outputs by merging the features
            at all levels. Each row corresponds to box coordinate
            offsets and sigmoid of the class logits.
    """
    outputs = outputs[0]
    boxes, classes = outputs[:, :4], outputs[:, 4:]
    s1, s2, s3, s4 = np.hsplit(boxes, 4)
    boxes = np.concatenate([s2, s1, s4, s3], axis=1)
    boxes = boxes[np.newaxis]
    classes = classes[np.newaxis]
    outputs = np.concatenate([boxes, classes], axis=2)
    return outputs


>>> END FILE CONTENTS

## emili-main/paz/backend/camera.py

>>> BEGIN FILE CONTENTS

import os
import cv2
import numpy as np
import threading

from ..backend.image import resize_image, convert_color_space, show_image
from ..backend.image import BGR2RGB, write_image

class Camera(object):
    """Camera abstract class.
    By default this camera uses the openCV functionality.
    It can be inherited to overwrite methods in case another camera API exists.
    """
    def __init__(self, device_id=0, name='Camera', intrinsics=None,
                 distortion=None):
        # TODO load parameters from camera name. Use ``load`` method.
        self.device_id = device_id
        self.name = name
        self.intrinsics = intrinsics
        self.distortion = None
        self._camera = None

    @property
    def name(self):
        return self._name

    @name.setter
    def name(self, value):
        self._name = value

    @property
    def intrinsics(self):
        return self._intrinsics

    @intrinsics.setter
    def intrinsics(self, value):
        if value is None:
            value = np.zeros((4))
        self._intrinsics = value

    @property
    def distortion(self):
        return self._distortion

    @distortion.setter
    def distortion(self, distortion):
        self._distortion = distortion

    def start(self):
        """ Starts capturing device

        # Returns
            Camera object.
        """
        self._camera = cv2.VideoCapture(self.device_id)
        if self._camera is None or not self._camera.isOpened():
            raise ValueError('Unable to open device', self.device_id)
        return self._camera

    def stop(self):
        """ Stops capturing device.
        """
        return self._camera.release()

    def read(self):
        """Reads camera input and returns a frame.

        # Returns
            Image array.
        """
        frame = self._camera.read()[1]
        return frame

    def is_open(self):
        """Checks if camera is open.

        # Returns
            Boolean
        """
        return self._camera.isOpened()

    def calibrate(self, images, chess_board_size):
        """Execute the camera calibration for a given chess board size and
        returns the corresponding camera matrix and distortion coefficient.

        # Arguments
            images -- numpy array
            chess_board_size -- tuple of ints

        # Returns
            camera_matrix -- numpy array of shape (3, 3)
                            representing the camera matrix
            distortion_coefficient -- numpy array of shape (1, 5)
                                    representing the distortion coefficient
        """

        object_points = []    # 3D point of real world space
        image_points = []    # 2D point of image

        H, W = chess_board_size
        object_points_2D = np.mgrid[0:W, 0:H].T.reshape(-1, 2)
        zeros = np.zeros((H * W, 1))
        object_points_3D = np.hstack((object_points_2D, zeros))
        object_points_3D = np.asarray(object_points_3D, dtype=np.float32)

        for image in images:
            return_value, corners = cv2.findChessboardCorners(image, (W, H))
            if return_value:
                image_points.append(corners)
                object_points.append(object_points_3D)

        shape = image.shape[::-1]
        calibration_parameters = cv2.calibrateCamera(
            object_points, image_points, shape, None, None)
        _, camera_matrix, distortion_coefficient, _, _ = calibration_parameters
        return camera_matrix, distortion_coefficient

    def save(self, filepath):
        raise NotImplementedError

    def load(self, filepath):
        raise NotImplementedError

    def intrinsics_from_HFOV(self, HFOV=70, image_shape=None):
        """Computes camera intrinsics using horizontal field of view (HFOV).

        # Arguments
            HFOV: Angle in degrees of horizontal field of view.
            image_shape: List of two floats [height, width].

        # Returns
            camera intrinsics array (3, 3).

        # Notes:

                       \           /      ^
                        \         /       |
                         \ lens  /        | w/2
        horizontal field  \     / alpha/2 |
        of view (alpha)____\( )/_________ |      image
                           /( )\          |      plane
                          /     <-- f --> |
                         /       \        |
                        /         \       |
                       /           \      v

                    Pinhole camera model

        From the image above we know that: tan(alpha/2) = w/2f
        -> f = w/2 * (1/tan(alpha/2))

        alpha in webcams and phones is often between 50 and 70 degrees.
        -> 0.7 w <= f <= w
        """
        if image_shape is None:
            self.start()
            height, width = self.read().shape[0:2]
            self.stop()
        else:
            height, width = image_shape[:2]

        focal_length = (width / 2) * (1 / np.tan(np.deg2rad(HFOV / 2.0)))
        intrinsics = np.array([[focal_length, 0, width / 2.0],
                               [0, focal_length, height / 2.0],
                               [0, 0, 1.0]])
        self.intrinsics = intrinsics

    def take_photo(self):
        """Starts camera, reads buffer and returns an image.
        """
        self.start()
        image = self.read()
        # all pipelines start with RGB
        image = convert_color_space(image, BGR2RGB)
        self.stop()
        return image


class VideoPlayer(object):
    """Performs visualization inferences in a real-time video.

    # Properties
        image_size: List of two integers. Output size of the displayed image.
        pipeline: Function. Should take RGB image as input and it should
            output a dictionary with key 'image' containing a visualization
            of the inferences. Built-in pipelines can be found in
            ``paz/processing/pipelines``.

    # Methods
        run()
        record()
    """
    stop_flag = False

    def __init__(self, image_size, pipeline, camera, topic='image'):
        self.image_size = image_size
        self.pipeline = pipeline
        self.camera = camera
        self.topic = topic

    def step(self):
        """ Runs the pipeline process once

        # Returns
            Inferences from ``pipeline``.
        """
        if self.camera.is_open() is False:
            raise ValueError('Camera has not started. Call ``start`` method.')

        frame = self.camera.read()
        if frame is None:
            print('Frame: None')
            return None
        # all pipelines start with an RGB image
        frame = convert_color_space(frame, BGR2RGB)
        return self.pipeline(frame)

    def run(self):
        """Opens camera and starts continuous inference using ``pipeline``,
        until the user presses ``q`` inside the opened window.
        """
        global stop_flag
        self.camera.start()
        while not VideoPlayer.stop_flag:
            output = self.step()
            if output is None:
                continue
            image = resize_image(output[self.topic], tuple(self.image_size))
            show_image(image, 'inference', wait=False)
            if cv2.waitKey(1) & 0xFF == ord('q'): # press q while viewing the videoplayer to quit
                break
        self.camera.stop()
        cv2.destroyAllWindows()

    # def stop(self): # no longer needed
    #     self.stop_event.set()

    def record(self, name='video.avi', fps=20, fourCC='XVID'):
        """Opens camera and records continuous inference using ``pipeline``.

        # Arguments
            name: String. Video name. Must include the postfix .avi.
            fps: Int. Frames per second.
            fourCC: String. Indicates the four character code of the video.
            e.g. XVID, MJPG, X264.
        """
        self.camera.start()
        fourCC = cv2.VideoWriter_fourcc(*fourCC)
        writer = cv2.VideoWriter(name, fourCC, fps, self.image_size)
        while True:
            output = self.step()
            if output is None:
                continue
            image = resize_image(output['image'], tuple(self.image_size))
            show_image(image, 'emotion detector', wait=False)
            image = convert_color_space(image, BGR2RGB)
            writer.write(image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        self.camera.stop()
        writer.release()
        cv2.destroyAllWindows()

    def record_from_file(self, video_file_path, name='video.avi',
                         fps=20, fourCC='XVID'):
        """Load video and records continuous inference using ``pipeline``.

        # Arguments
            video_file_path: String. Path to the video file.
            name: String. Output video name. Must include the postfix .avi.
            fps: Int. Frames per second.
            fourCC: String. Indicates the four character code of the video.
            e.g. XVID, MJPG, X264.
        """

        fourCC = cv2.VideoWriter_fourcc(*fourCC)
        writer = cv2.VideoWriter(name, fourCC, fps, self.image_size)

        video = cv2.VideoCapture(video_file_path)
        if (video.isOpened() is False):
            print("Error opening video  file")

        while video.isOpened():
            is_frame_received, frame = video.read()
            if not is_frame_received:
                print("Frame not received. Exiting ...")
                break
            if is_frame_received is True:
                output = self.pipeline(frame)
                if output is None:
                    continue
                image = resize_image(output['image'], tuple(self.image_size))
                show_image(image, 'emotion detector', wait=False)
                image = convert_color_space(image, BGR2RGB)
                writer.write(image)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        writer.release()
        cv2.destroyAllWindows()

    def record_frames(self, name='video.avi', fps=20, fourCC='XVID'):
        """Opens camera and records continuous inference frames.

        # Arguments
            name: String. Video name. Must include the postfix .avi.
            fps: Int. Frames per second.
            fourCC: String. Indicates the four character code of the video.
            e.g. XVID, MJPG, X264.
        """
        self.camera.start()
        fourCC = cv2.VideoWriter_fourcc(*fourCC)
        writer = cv2.VideoWriter(name, fourCC, fps, self.image_size)
        while True:
            frame = self.camera.read()
            if frame is None:
                print('Frame: None')
                return None
            frame = convert_color_space(frame, BGR2RGB)
            image = resize_image(frame, tuple(self.image_size))
            show_image(image, 'frame', wait=False)
            image = convert_color_space(image, BGR2RGB)
            writer.write(image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        self.camera.stop()
        writer.release()
        cv2.destroyAllWindows()

    def extract_frames_from_video(self, video_file_path,
                                  frame_selection_arg=20):
        """Load video and split into frames.

        # Arguments
            video_file_path: String. Path to the video file.
            frame_selection_arg: Int. Number of frames to be skipped.
        """

        video = cv2.VideoCapture(video_file_path)
        if (video.isOpened() is False):
            print("Error opening video  file")

        frame_arg = 0
        while video.isOpened():
            is_frame_received, frame = video.read()
            if not is_frame_received:
                print("Frame not received. Exiting ...")
                break
            if is_frame_received is True:
                image = resize_image(frame, tuple(self.image_size))
                image_path = os.path.join('./images', str(frame_arg) + '.jpg')
                image = convert_color_space(image, BGR2RGB)
                if frame_arg % frame_selection_arg == 0:
                    write_image(image_path, image)
                frame_arg += 1
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        cv2.destroyAllWindows()


>>> END FILE CONTENTS

## emili-main/paz/backend/groups/SE3.py

>>> BEGIN FILE CONTENTS

import numpy as np


def to_affine_matrix(rotation_matrix, translation):
    """Builds affine matrix from rotation matrix and translation vector.

    # Arguments
        rotation_matrix: Array (3, 3). Representing a rotation matrix.
        translation: Array (3). Translation vector.

    # Returns
        Array (4, 4) representing an affine matrix.
    """
    if len(translation) != 3:
        raise ValueError('Translation should be of lenght 3')
    if rotation_matrix.shape != (3, 3):
        raise ValueError('Rotation matrix should be of shape (3, 3)')
    translation = translation.reshape(3, 1)
    affine_top = np.concatenate([rotation_matrix, translation], axis=1)
    affine_row = np.array([[0.0, 0.0, 0.0, 1.0]])
    affine_matrix = np.concatenate([affine_top, affine_row], axis=0)
    return affine_matrix


def to_affine_matrices(rotations, translations):
    """Construct affine matrices for rotation matrices vector and
       translation vector.

    # Arguments
        ratations: Rotation matrix vector [N, 3, 3].
        translations: Translation vector [N, 3].

    # Returns
        Transformation matrix [N, 4, 4]
    """
    affine_matrices = []
    for rotation, translation in zip(rotations, translations):
        transformation = to_affine_matrix(rotation, translation)
        affine_matrices.append(transformation)
    return np.array(affine_matrices)


>>> END FILE CONTENTS

## emili-main/paz/backend/groups/SO3.py

>>> BEGIN FILE CONTENTS

import numpy as np

# TODO: rmeove cv2 import by computing explicitly rodriguez formula
import cv2


def rotation_vector_to_rotation_matrix(rotation_vector):
    """Transforms rotation vector (axis-angle) form to rotation matrix.

    # Arguments
        rotation_vector: Array (3). Rotation vector in axis-angle form.

    # Returns
        Array (3, 3) rotation matrix.
    """
    rotation_matrix = np.eye(3)
    cv2.Rodrigues(rotation_vector, rotation_matrix)
    return rotation_matrix


def build_rotation_matrix_z(angle):
    """Builds rotation matrix in Z axis.

    # Arguments
        angle: Float. Angle in radians.

    # Return
        Array (3, 3) rotation matrix in Z axis.
    """
    cos_angle = np.cos(angle)
    sin_angle = np.sin(angle)
    rotation_matrix_z = np.array([[+cos_angle, -sin_angle, 0.0],
                                  [+sin_angle, +cos_angle, 0.0],
                                  [0.0, 0.0, 1.0]])
    return rotation_matrix_z


def build_rotation_matrix_x(angle):
    """Builds rotation matrix in X axis.

    # Arguments
        angle: Float. Angle in radians.

    # Return
        Array (3, 3) rotation matrix in Z axis.
    """
    cos_angle = np.cos(angle)
    sin_angle = np.sin(angle)
    rotation_matrix_x = np.array([[1.0, 0.0, 0.0],
                                  [0.0, +cos_angle, -sin_angle],
                                  [0.0, +sin_angle, +cos_angle]])
    return rotation_matrix_x


def build_rotation_matrix_y(angle):
    """Builds rotation matrix in Y axis.

    # Arguments
        angle: Float. Angle in radians.

    # Return
        Array (3, 3) rotation matrix in Z axis.
    """
    cos_angle = np.cos(angle)
    sin_angle = np.sin(angle)
    rotation_matrix_y = np.array([[+cos_angle, 0.0, +sin_angle],
                                  [0.0, 1.0, 0.0],
                                  [-sin_angle, 0.0, +cos_angle]])
    return rotation_matrix_y


def compute_norm_SO3(rotation_mesh, rotation):
    """Computes norm between SO3 elements.

    # Arguments
        rotation_mesh: Array (3, 3), rotation matrix.
        rotation: Array (3, 3), rotation matrix.

    # Returns
        Scalar representing the distance between both rotation matrices.
    """
    difference = np.dot(np.linalg.inv(rotation), rotation_mesh) - np.eye(3)
    distance = np.linalg.norm(difference, ord='fro')
    return distance


def calculate_canonical_rotation(rotation_mesh, rotations):
    """Returns the rotation matrix closest to rotation mesh.

    # Arguments
        rotation_mesh: Array (3, 3), rotation matrix.
        rotations: List of array of (3, 3), rotation matrices.

    # Returns
        Element of list closest to rotation mesh.
    """
    norms = [compute_norm_SO3(rotation_mesh, R) for R in rotations]
    closest_rotation_arg = np.argmin(norms)
    closest_rotation = rotations[closest_rotation_arg]
    canonical_rotation = np.linalg.inv(closest_rotation)
    return canonical_rotation


def rotation_matrix_to_axis_angle(rotation_matrix):
    """Transforms rotation matrix to axis angle.

    # Arguments
        Rotation matrix [3, 3].

    # Returns
        axis_angle: Array containing axis angle represent [wx, wy, wz, theta].
    """
    cos_theta = (np.trace(rotation_matrix) - 1.0) / 2.0
    angle = np.arccos(cos_theta)
    axis = np.array([rotation_matrix[2, 1] - rotation_matrix[1, 2],
                     rotation_matrix[0, 2] - rotation_matrix[2, 0],
                     rotation_matrix[1, 0] - rotation_matrix[0, 1]])
    axis = axis / np.linalg.norm(axis)
    axis_angle = np.hstack([axis, angle])
    return axis_angle


def rotation_matrix_to_compact_axis_angle(matrix):
    """Transforms rotation matrix to compact axis angle.

    # Arguments
        Rotation matrix [3, 3].

    # Returns
        compact axis_angle
    """
    axis_angle = rotation_matrix_to_axis_angle(matrix)
    axis = axis_angle[:3]
    angle = axis_angle[3]
    compact_axis_angle = axis * angle
    return compact_axis_angle


>>> END FILE CONTENTS

## emili-main/paz/backend/groups/__init__.py

>>> BEGIN FILE CONTENTS

from .quaternion import *
from .SO3 import *
from .SE3 import *


>>> END FILE CONTENTS

## emili-main/paz/backend/groups/quaternion.py

>>> BEGIN FILE CONTENTS

import numpy as np


def rotation_vector_to_quaternion(rotation_vector):
    """Transforms rotation vector into quaternion.

    # Arguments
        rotation_vector: Numpy array of shape ``[3]``.

    # Returns
        Numpy array representing a quaternion having a shape ``[4]``.
    """
    theta = np.linalg.norm(rotation_vector)
    rotation_axis = rotation_vector / theta
    half_theta = 0.5 * theta
    norm = np.sin(half_theta)
    quaternion = np.array([
        norm * rotation_axis[0],
        norm * rotation_axis[1],
        norm * rotation_axis[2],
        np.cos(half_theta)])
    return quaternion


def homogenous_quaternion_to_rotation_matrix(quaternion):
    """Transforms quaternion to rotation matrix.

    # Arguments
        quaternion: Array containing quaternion value [q1, q2, q3, w0].

    # Returns
        Rotation matrix [3, 3].

    # Note
        If quaternion is not a unit quaternion the rotation matrix is not
        unitary but still orthogonal i.e. the outputted rotation matrix is
        a scalar multiple of a rotation matrix.
    """
    q1, q2, q3, w0 = quaternion

    r11 = w0**2 + q1**2 - q2**2 - q3**2
    r12 = 2 * ((q1 * q2) - (w0 * q3))
    r13 = 2 * ((w0 * q2) + (q1 * q3))

    r21 = 2 * ((w0 * q3) + (q1 * q2))
    r22 = w0**2 - q1**2 + q2**2 - q3**2
    r23 = 2 * ((q2 * q3) - (w0 * q1))

    r31 = 2 * ((q1 * q3) - (w0 * q2))
    r32 = 2 * ((w0 * q1) + (q2 * q3))
    r33 = w0**2 - q1**2 - q2**2 + q3**2

    rotation_matrix = np.array([[r11, r12, r13],
                                [r21, r22, r23],
                                [r31, r32, r33]])
    return rotation_matrix


def quaternion_to_rotation_matrix(quaternion):
    """Transforms quaternion to rotation matrix.

    # Arguments
        quaternion: Array containing quaternion value [q1, q2, q3, w0].

    # Returns
        Rotation matrix [3, 3].

    # Note
        "If the quaternion "is not a unit quaternion then the homogeneous form
        is still a scalar multiple of a rotation matrix, while the
        inhomogeneous form is in general no longer an orthogonal matrix.
        This is why in numerical work the homogeneous form is to be preferred
        if distortion is to be avoided." [wikipedia](https://en.wikipedia.org/
            wiki/Conversion_between_quaternions_and_Euler_angles)
    """
    matrix = homogenous_quaternion_to_rotation_matrix(quaternion)
    return matrix


def rotation_matrix_to_quaternion(rotation_matrix):
    """Transforms rotation matrix to quaternion.

    # Arguments
        Rotation matrix [3, 3].

    # Returns
        quaternion: Array containing quaternion value [q1, q2, q3, w0].
    """
    rotation_matrix = rotation_matrix[:3, :3]
    trace = np.trace(rotation_matrix)
    w0 = np.sqrt(1.0 + trace) / 2
    q1 = 0.25 * (rotation_matrix[2, 1] - rotation_matrix[1, 2]) / w0
    q2 = 0.25 * (rotation_matrix[0, 2] - rotation_matrix[2, 0]) / w0
    q3 = 0.25 * (rotation_matrix[1, 0] - rotation_matrix[0, 1]) / w0
    quaternion = np.array([q1, q2, q3, w0])
    return quaternion


def get_quaternion_conjugate(quaternion):
    """Estimate conjugate of a quaternion.

    # Arguments
        quaternion: Array containing quaternion value [q1, q2, q3, w0].

    # Returns
        quaternion: Array containing quaternion value [-q1, -q2, -q3, w0].
    """
    q1, q2, q3, w0 = quaternion
    return np.array([-q1, -q2, -q3, w0])


def quaternions_to_rotation_matrices(quaternions):
    """Transform quaternion vectors to rotation matrix vector.

    # Arguments
        quaternions [N, 4].

    # Returns
        Rotated matrices [N, 3, 3]
    """
    rotation_matrices = []
    for quaternion in quaternions:
        rotation_matrix = quaternion_to_rotation_matrix(quaternion)
        rotation_matrices.append(rotation_matrix)
    return np.array(rotation_matrices)


>>> END FILE CONTENTS

## emili-main/paz/backend/heatmaps.py

>>> BEGIN FILE CONTENTS

import numpy as np


def get_keypoints_heatmap(heatmaps, num_keypoints, indices=None, axis=1):
    """Extract the heatmaps that only contains the keypoints.

    # Arguments
        heatmaps: Numpy array of shape (1, 2*num_keypoints, H, W)
        num_keypoints: Int.
        indices: List. Indices of the heatmaps to extract.
        axis: Int.

    # Returns
        keypoints: Numpy array of shape (1, num_keypoints, H, W)
    """
    keypoints = np.take(heatmaps, np.arange(num_keypoints), axis)
    if indices is not None:
        keypoints = np.take(keypoints, indices, axis)
    return keypoints


def get_tags_heatmap(heatmaps, num_keypoints, indices=None, axis=1):
    """Extract the heatmaps that only contains the tags.

    # Arguments
        heatmaps: Numpy array of shape (1, 2*num_keypoints, H, W)
        num_keypoints: Int.
        indices: List. Indices of the heatmaps to extract.
        axis: Int.

    # Returns
        tags: Numpy array of shape (1, num_keypoints, H, W)
    """
    n = heatmaps.shape[axis]
    tags = np.take(heatmaps, np.arange(num_keypoints, n), axis)
    if indices is not None:
        tags = np.take(tags, indices, axis)
    return tags


def get_keypoints_locations(indices, image_width):
    """Calculate the location of keypoints in an image.

    # Arguments
        indices: Numpy array. Indices of the keypoints in the heatmap.
        Image width: Int.

    # Returns
        coordinate: Numpy array. locations of keypoints
    """
    x = (indices % image_width).astype(np.int64)
    y = (indices / image_width).astype(np.int64)
    coordinates = np.stack((x, y), axis=3)
    return np.squeeze(coordinates)


def get_top_k_keypoints_numpy(heatmaps, k):
    """Numpy implementation of get_top_k_keypoints from heatmaps.

    # Arguments
        heatmaps: Keypoints heatmaps. Numpy array of shape
                  (1, num_keypoints, H, W)
        k: Int. Maximum number of instances to return.

    # Returns
        values: Numpy array. Value of heatmaps at top k keypoints
        indices: Numpy array. Indices of top k keypoints.
    """
    num_of_objects, num_of_keypoints = heatmaps.shape[:2]
    indices = np.zeros((num_of_objects, num_of_keypoints, k), dtype=int)
    values = np.zeros((num_of_objects, num_of_keypoints, k))
    for object_arg in range(num_of_objects):
        for keypoint_arg in range(num_of_keypoints):
            top_k_indices = np.argsort(heatmaps[object_arg][keypoint_arg])[-k:]
            top_k_values = heatmaps[object_arg][keypoint_arg][top_k_indices]
            indices[object_arg][keypoint_arg] = top_k_indices
            values[object_arg][keypoint_arg] = top_k_values
    return np.squeeze(values), indices


def get_valid_detections(detection, detection_thresh):
    """Accept the keypoints whose score is greater than the
       detection threshold.

    # Arguments
        detection: Numpy array. Contains the location, value, and
        tags of the keypoints
        detection_thresh: Float. Detection threshold for the keypoint
    """
    mask = detection[:, 2] > detection_thresh
    valid_detection = detection[mask]
    return valid_detection


>>> END FILE CONTENTS

## emili-main/paz/backend/image/__init__.py

>>> BEGIN FILE CONTENTS

from .opencv_image import *
from .image import *
from .draw import *

>>> END FILE CONTENTS

## emili-main/paz/backend/image/draw.py

>>> BEGIN FILE CONTENTS

import numpy as np
import colorsys
import random
import cv2
from paz.backend.keypoints import project_to_image

GREEN = (0, 255, 0)
FONT = cv2.FONT_HERSHEY_SIMPLEX
LINE = cv2.LINE_AA
FILLED = cv2.FILLED

def draw_square(image, center, color, size):
    """Draw a square in an image

    # Arguments
        image: Array ``(H, W, 3)``
        center: List ``(2)`` with ``(x, y)`` values in openCV coordinates.
        size: Float. Length of square size.
        color: List ``(3)`` indicating RGB colors.

    # Returns
        Array ``(H, W, 3)`` with square.
    """
    center_x, center_y = center
    x_min, y_min = center_x - size, center_y - size
    x_max, y_max = center_x + size, center_y + size
    cv2.rectangle(image, (x_min, y_min), (x_max, y_max), tuple(color), FILLED)
    return image


def draw_circle(image, center, color=GREEN, radius=5):
    """Draw a circle in an image

    # Arguments
        image: Array ``(H, W, 3)``
        center: List ``(2)`` with ``(x, y)`` values in openCV coordinates.
        radius: Float. Radius of circle.
        color: Tuple ``(3)`` indicating the RGB colors.

    # Returns
        Array ``(H, W, 3)`` with circle.
    """
    cv2.circle(image, tuple(center), radius, tuple(color), FILLED)
    return image


def draw_triangle(image, center, color, size):
    """Draw a triangle in an image

    # Arguments
        image: Array ``(H, W, 3)``
        center: List ``(2)`` containing ``(x_center, y_center)``.
        size: Float. Length of square size.
        color: Tuple ``(3)`` indicating the RGB colors.

    # Returns
        Array ``(H, W, 3)`` with triangle.
    """
    center_x, center_y = center
    vertex_A = (center_x, center_y - size)
    vertex_B = (center_x - size, center_y + size)
    vertex_C = (center_x + size, center_y + size)
    points = np.array([[vertex_A, vertex_B, vertex_C]], dtype=np.int32)
    cv2.fillPoly(image, points, tuple(color))
    return image


def draw_keypoint(image, point, color=GREEN, radius=5):
    """ Draws a circle in image.

    # Arguments
        image: Numpy array of shape ``[H, W, 3]``.
        point: List of length two indicating ``(y, x)``
            openCV coordinates.
        color: List of length three indicating RGB color of point.
        radius: Integer indicating the radius of the point to be drawn.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with circle.
    """
    cv2.circle(image, tuple(point), radius, (0, 0, 0), FILLED)
    inner_radius = int(0.8 * radius)
    cv2.circle(image, tuple(point), inner_radius, tuple(color), FILLED)
    return image


def put_text(image, text, point, scale, color, thickness):
    """Draws text in image.

    # Arguments
        image: Numpy array.
        text: String. Text to be drawn.
        point: Tuple of coordinates indicating the top corner of the text.
        scale: Float. Scale of text.
        color: Tuple of integers. RGB color coordinates.
        thickness: Integer. Thickness of the lines used for drawing text.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with text.
    """
    # cv2.putText returns an image in contrast to other drawing cv2 functions.
    return cv2.putText(image, text, point, FONT, scale, color, thickness, LINE)


def draw_line(image, point_A, point_B, color=GREEN, thickness=5):
    """ Draws a line in image from ``point_A`` to ``point_B``.

    # Arguments
        image: Numpy array of shape ``[H, W, 3]``.
        point_A: List of length two indicating ``(y, x)`` openCV coordinates.
        point_B: List of length two indicating ``(y, x)`` openCV coordinates.
        color: List of length three indicating RGB color of point.
        thickness: Integer indicating the thickness of the line to be drawn.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with line.
    """
    cv2.line(image, tuple(point_A), tuple(point_B), tuple(color), thickness)
    return image


def draw_rectangle(image, corner_A, corner_B, color, thickness):
    """ Draws a filled rectangle from ``corner_A`` to ``corner_B``.

    # Arguments
        image: Numpy array of shape ``[H, W, 3]``.
        corner_A: List of length two indicating ``(y, x)`` openCV coordinates.
        corner_B: List of length two indicating ``(y, x)`` openCV coordinates.
        color: List of length three indicating RGB color of point.
        thickness: Integer/openCV Flag. Thickness of rectangle line.
            or for filled use cv2.FILLED flag.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with rectangle.
    """
    return cv2.rectangle(
        image, tuple(corner_A), tuple(corner_B), tuple(color), thickness)


def draw_dot(image, point, color=GREEN, radius=5, filled=FILLED):
    """ Draws a dot (small rectangle) in image.

    # Arguments
        image: Numpy array of shape ``[H, W, 3]``.
        point: List of length two indicating ``(y, x)`` openCV coordinates.
        color: List of length three indicating RGB color of point.
        radius: Integer indicating the radius of the point to be drawn.
        filled: Boolean. If `True` rectangle is filled with `color`.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with dot.
    """
    # drawing outer black rectangle
    point_A = (int(point[0] - radius), int(point[1] - radius))
    point_B = (int(point[0] + radius), int(point[1] + radius))
    draw_rectangle(image, tuple(point_A), tuple(point_B), color, filled)

    # drawing innner rectangle with given `color`
    inner_radius = int(0.8 * radius)
    point_A = (int(point[0] - inner_radius), int(point[1] - inner_radius))
    point_B = (int(point[0] + inner_radius), int(point[1] + inner_radius))
    draw_rectangle(image, tuple(point_A), tuple(point_B), color, filled)
    return image


def draw_cube(image, points, color=GREEN, thickness=2, radius=5):
    """Draws a cube in image.

    # Arguments
        image: Numpy array of shape (H, W, 3).
        points: List of length 8  having each element a list
            of length two indicating (U, V) openCV coordinates.
        color: List of length three indicating RGB color of point.
        thickness: Integer indicating the thickness of the line to be drawn.
        radius: Integer indicating the radius of corner points to be drawn.

    # Returns
        Numpy array with shape (H, W, 3). Image with cube.
    """
    if points.shape != (8, 2):
        raise ValueError('Cube points 2D must be of shape (8, 2)')

    # draw bottom
    draw_line(image, points[0], points[1], color, thickness)
    draw_line(image, points[1], points[2], color, thickness)
    draw_line(image, points[3], points[2], color, thickness)
    draw_line(image, points[3], points[0], color, thickness)

    # draw top
    draw_line(image, points[4], points[5], color, thickness)
    draw_line(image, points[6], points[5], color, thickness)
    draw_line(image, points[6], points[7], color, thickness)
    draw_line(image, points[4], points[7], color, thickness)

    # draw sides
    draw_line(image, points[0], points[4], color, thickness)
    draw_line(image, points[7], points[3], color, thickness)
    draw_line(image, points[5], points[1], color, thickness)
    draw_line(image, points[2], points[6], color, thickness)

    # draw X mark on top
    draw_line(image, points[4], points[6], color, thickness)
    draw_line(image, points[5], points[7], color, thickness)

    # draw dots
    [draw_dot(image, np.squeeze(point), color, radius) for point in points]
    return image


def draw_filled_polygon(image, vertices, color):
    """ Draws filled polygon

    # Arguments
        image: Numpy array.
        vertices: List of elements each having a list
            of length two indicating ``(y, x)`` openCV coordinates.
        color: Numpy array specifying RGB color of the polygon.

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with polygon.
    """
    cv2.fillPoly(image, [vertices], color)
    return image


def draw_random_polygon(image, max_radius_scale=.5):
    """Draw random polygon image.

    # Arguments
        image: Numpy array with shape ``[H, W, 3]``.
        max_radius_scale: Float between [0, 1].

    # Returns
        Numpy array with shape ``[H, W, 3]``. Image with polygon.
    """
    height, width = image.shape[:2]
    max_distance = np.max((height, width)) * max_radius_scale
    num_vertices = np.random.randint(3, 7)
    angle_between_vertices = 2 * np.pi / num_vertices
    initial_angle = np.random.uniform(0, 2 * np.pi)
    center = np.random.rand(2) * np.array([width, height])
    vertices = np.zeros((num_vertices, 2), dtype=np.int32)
    for vertex_arg in range(num_vertices):
        angle = initial_angle + (vertex_arg * angle_between_vertices)
        vertex = np.array([np.cos(angle), np.sin(angle)])
        vertex = np.random.uniform(0, max_distance) * vertex
        vertices[vertex_arg] = (vertex + center).astype(np.int32)
    color = np.random.randint(0, 256, 3).tolist()
    draw_filled_polygon(image, vertices, color)
    return image


def lincolor(num_colors, saturation=1, value=1, normalized=False):
    """Creates a list of RGB colors linearly sampled from HSV space with
        randomised Saturation and Value.

    # Arguments
        num_colors: Int.
        saturation: Float or `None`. If float indicates saturation.
            If `None` it samples a random value.
        value: Float or `None`. If float indicates value.
            If `None` it samples a random value.
        normalized: Bool. If True, RGB colors are returned between [0, 1]
            if False, RGB colors are between [0, 255].

    # Returns
        List, for which each element contains a list with RGB color
    """
    RGB_colors = []
    hues = [value / num_colors for value in range(0, num_colors)]
    for hue in hues:

        if saturation is None:
            saturation = random.uniform(0.6, 1)

        if value is None:
            value = random.uniform(0.5, 1)

        RGB_color = colorsys.hsv_to_rgb(hue, saturation, value)
        if not normalized:
            RGB_color = [int(color * 255) for color in RGB_color]
        RGB_colors.append(RGB_color)
    return RGB_colors


def make_mosaic(images, shape, border=0):
    """ Creates an image mosaic.

    # Arguments
        images: Numpy array of shape (num_images, height, width, num_channels)
        shape: List of two integers indicating the mosaic shape.
        border: Integer indicating the border per image.

    # Returns
        A numpy array containing all images.

    # Exceptions
        Shape must satisfy `len(images) > shape[0] * shape[1]`
    """
    num_images, H, W, num_channels = images.shape
    num_rows, num_cols = shape
    if num_images > (num_rows * num_cols):
        raise ValueError('Number of images is bigger than shape')

    total_rows = (num_rows * H) + ((num_rows - 1) * border)
    total_cols = (num_cols * W) + ((num_cols - 1) * border)
    mosaic = np.ones((total_rows, total_cols, num_channels))

    padded_H = H + border
    padded_W = W + border

    for image_arg, image in enumerate(images):
        row = int(np.floor(image_arg / num_cols))
        col = image_arg % num_cols
        mosaic[row * padded_H:row * padded_H + H,
               col * padded_W:col * padded_W + W, :] = image
    return mosaic


def draw_points2D(image, points2D, colors):
    """Draws a pixel for all points2D in UV space using only numpy.

    # Arguments
        image: Array (H, W).
        keypoints: Array (num_points, U, V). Keypoints in image space
        colors: Array (num_points, 3). Colors in RGB space.

    # Returns
        Array with drawn points.
    """
    points2D = points2D.astype(int)
    U = points2D[:, 0]
    V = points2D[:, 1]
    image[V, U, :] = colors
    return image


def draw_keypoints_link(image, keypoints, link_args, link_orders, link_colors,
                        check_scores=False, link_width=2):
    """ Draw link between the keypoints.

    # Arguments
        images: Numpy array.
        keypoints: Keypoint(k0, k1, ...) locations in the image. Numpy array.
        link_args: Keypoint labels. Dictionary. {'k0':0, 'k1':1, ...}
        link_orders: List of tuple. [('k0', 'k1'),('kl', 'k2'), ...]
        link_colors: Color of each link. List of list
        check_scores: Condition to draw links. Boolean.

    # Returns
        A numpy array containing drawn link between the keypoints.
    """
    for pair_arg, pair in enumerate(link_orders):
        color = link_colors[pair_arg]
        point1 = keypoints[link_args[pair[0]]]
        point2 = keypoints[link_args[pair[1]]]
        if check_scores:
            if point1[2] > 0 and point2[2] > 0:
                draw_line(image, (int(point1[0]), int(point1[1])),
                                 (int(point2[0]), int(point2[1])),
                          color, link_width)
        else:
            draw_line(image, (int(point1[0]), int(point1[1])),
                             (int(point2[0]), int(point2[1])),
                      color, link_width)
    return image


def draw_keypoints(image, keypoints, keypoint_colors, check_scores=False,
                   keypoint_radius=6):
    """ Draw a circle at keypoints.

    # Arguments
        images: Numpy array.
        keypoints: Keypoint locations in the image. Numpy array.
        keypoint_colors: Color of each keypoint. List of list
        check_scores: Condition to draw keypoint. Boolean.

    # Returns
        A numpy array containing circle at each keypoints.
    """
    for keypoint_arg, keypoint in enumerate(keypoints):
        color = keypoint_colors[keypoint_arg]
        if check_scores:
            if keypoint[2] > 0:
                draw_keypoint(
                    image, (int(keypoint[0]),
                            int(keypoint[1])), color, keypoint_radius)
        else:
            draw_keypoint(image, (int(keypoint[0]), int(keypoint[1])), color,
                          keypoint_radius)
    return image


def points3D_to_RGB(points3D, object_sizes):
    """Transforms points3D in object frame to RGB color space.
    # Arguments
        points3D: Array (num_points, 3). Points3D a
        object_sizes: Array (3) indicating the
            (width, height, depth) of object.

    # Returns
        Array of ints (num_points, 3) in RGB space.
    """
    # TODO add domain and codomain transform as comments
    colors = points3D / (0.5 * object_sizes)
    colors = colors + 1.0
    colors = colors * 127.5
    colors = colors.astype(np.uint8)
    return colors


def draw_RGB_mask(image, points2D, points3D, object_sizes):
    """Draws RGB mask by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        image: Array (H, W, 3).
        points2D: Array (num_points, 2)
        points3D: Array (num_points, 3)
        object_sizes: Array (x_size, y_size, z_size)

    # Returns
        Image array with drawn masks
    """
    color = points3D_to_RGB(points3D, object_sizes)
    image = draw_points2D(image, points2D, color)
    return image


def draw_RGB_masks(image, points2D, points3D, object_sizes):
    """Draws RGB masks by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        image: Array (H, W, 3).
        points2D: Array (num_samples, num_points, 2)
        points3D: Array (num_samples, num_points, 3)
        object_sizes: Array (x_size, y_size, z_size)

    # Returns
        Image array with drawn masks
    """
    for instance_points2D, instance_points3D in zip(points2D, points3D):
        image = draw_RGB_mask(
            image, instance_points2D, instance_points3D, object_sizes)
    return image


def draw_human_pose6D(image, rotation, translation, camaera_intrinsics):
    """Draw basis vectors for human pose 6D

    # Arguments
        image: numpy array
        rotation: numpy array of size (3 x 3)
        translations: list of length 3
        camera_matrix: numpy array

    # Returns
        image: numpy array
               image with basis vectors of rotaion and translation.
    """
    points3D = np.array([[1, 0, 0],
                         [0, 1, 0],
                         [0, 0, 1]])
    points2D = project_to_image(rotation, translation,
                                points3D, camaera_intrinsics)
    points2D = points2D.astype(np.int32)

    x = points2D[0]
    y = points2D[1]
    z = points2D[2]

    x_hat = (x / np.linalg.norm(x) * 60).astype(np.int32)
    y_hat = (y / np.linalg.norm(y) * 60).astype(np.int32)
    z_hat = (z / np.linalg.norm(z) * 60).astype(np.int32)

    offset = [50, 50]
    image = draw_line(image, offset, x_hat + offset,
                      color=[255, 0, 0], thickness=4)
    image = draw_line(image, offset, y_hat + offset,
                      color=[0, 255, 0], thickness=4)
    image = draw_line(image, offset, z_hat + offset,
                      color=[0, 0, 255], thickness=4)
    return image


>>> END FILE CONTENTS

## emili-main/paz/backend/image/image.py

>>> BEGIN FILE CONTENTS

import numpy as np

from .opencv_image import (convert_color_space, gaussian_image_blur,
                           median_image_blur, warp_affine, resize_image,
                           RGB2HSV, HSV2RGB)


def cast_image(image, dtype):
    """Casts an image into a different type

    # Arguments
        image: Numpy array.
        dtype: String or np.dtype.

    # Returns
        Numpy array.
    """
    return image.astype(dtype)


def random_saturation(image, lower=0.3, upper=1.5):
    """Applies random saturation to an RGB image.

    # Arguments
        image: Numpy array representing an image RGB format.
        lower: Float.
        upper: Float.
    """
    image = convert_color_space(image, RGB2HSV)
    image = cast_image(image, np.float32)
    image[:, :, 1] = image[:, :, 1] * np.random.uniform(lower, upper)
    image[:, :, 1] = np.clip(image[:, :, 1], 0, 255)
    image = cast_image(image, np.uint8)
    image = convert_color_space(image, HSV2RGB)
    return image


def random_brightness(image, delta=32):
    """Applies random brightness to an RGB image.

    # Arguments
        image: Numpy array representing an image RGB format.
        delta: Int.
    """
    image = cast_image(image, np.float32)
    random_brightness = np.random.uniform(-delta, delta)
    image = image + random_brightness
    image = np.clip(image, 0, 255)
    image = cast_image(image, np.uint8)
    return image


def random_contrast(image, lower=0.5, upper=1.5):
    """Applies random contrast to an RGB image.

    # Arguments
        image: Numpy array representing an image RGB format.
        lower: Float.
        upper: Float.
    """
    alpha = np.random.uniform(lower, upper)
    image = cast_image(image, np.float32)
    image = image * alpha
    image = np.clip(image, 0, 255)
    image = cast_image(image, np.uint8)
    return image


def random_hue(image, delta=18):
    """Applies random hue to an RGB image.

    # Arguments
        image: Numpy array representing an image RGB format.
        delta: Int.
    """
    image = convert_color_space(image, RGB2HSV)
    image = cast_image(image, np.float32)
    image[:, :, 0] = image[:, :, 0] + np.random.uniform(-delta, delta)
    image[:, :, 0][image[:, :, 0] > 179.0] -= 179.0
    image[:, :, 0][image[:, :, 0] < 0.0] += 179.0
    image = cast_image(image, np.uint8)
    image = convert_color_space(image, HSV2RGB)
    return image


def flip_left_right(image):
    """Flips an image left and right.

    # Arguments
        image: Numpy array.
    """
    return image[:, ::-1]


def random_flip_left_right(image):
    """Applies random left or right flip.

    # Arguments
        image: Numpy array.
    """
    if np.random.uniform([1], 0, 2) == 1:
        image = flip_left_right(image)
    return image


def crop_image(image, crop_box):
    """Resize image.

    # Arguments
        image: Numpy array.
        crop_box: List of four ints.

    # Returns
        Numpy array.
    """
    if (type(image) != np.ndarray):
        raise ValueError(
            'Recieved Image is not of type numpy array', type(image))
    else:
        cropped_image = image[crop_box[0]:crop_box[2],
                              crop_box[1]:crop_box[3], :]
    return cropped_image


def image_to_normalized_device_coordinates(image):
    """Map image value from [0, 255] -> [-1, 1].
    """
    return (image / 127.5) - 1.0


def normalized_device_coordinates_to_image(image):
    """Map normalized value from [-1, 1] -> [0, 255].
    """
    return (image + 1.0) * 127.5


def random_shape_crop(image, shape):
    """Randomly crops an image of the given ``shape``.

    # Arguments
        image: Numpy array.
        shape: List of two ints ''(H, W)''.

    # Returns
        Numpy array of cropped image.
    """
    H, W = image.shape[:2]
    if (shape[0] >= H) or (shape[1] >= W):
        return None
    x_min = np.random.randint(0, W - shape[1])
    y_min = np.random.randint(0, H - shape[0])
    x_max = int(x_min + shape[1])
    y_max = int(y_min + shape[0])
    cropped_image = image[y_min:y_max, x_min:x_max]
    return cropped_image


def make_random_plain_image(shape):
    """Makes random plain image by sampling three random values.

    # Arguments
        shape: Image shape e.g. ''(H, W, 3)''.

    # Returns
        Numpy array of shape ''(H, W, 3)''.
    """
    if len(shape) != 3:
        raise ValueError('``shape`` must have three values')
    return (np.ones(shape) * np.random.randint(0, 256, shape[-1]))


def blend_alpha_channel(image, background):
    """Blends image with background using an alpha channel.

    # Arguments
        image: Numpy array with alpha channel. Shape must be ''(H, W, 4)''
        background: Numpy array of shape ''(H, W, 3)''.
    """
    if image.shape[-1] != 4:
        raise ValueError('``image`` does not contain an alpha mask.')
    foreground, alpha = np.split(image, [3], -1)
    alpha = alpha / 255.0
    background = (1.0 - alpha) * background.astype(float)
    image = (alpha * foreground.astype(float)) + background
    return image.astype('uint8')


def concatenate_alpha_mask(image, alpha_mask):
    """Concatenates alpha mask to image.

    # Arguments
        image: Numpy array of shape ''(H, W, 3)''.
        alpha_mask: Numpy array array of shape ''(H, W)''.

    # Returns
        Numpy array of shape ''(H, W, 4)''.
    """
    return np.concatenate([image, alpha_mask], axis=2)


def split_and_normalize_alpha_channel(image):
    """Splits alpha channel from an RGBA image and normalizes alpha channel.

    # Arguments
        image: Numpy array of shape ''(H, W, 4)''.

    # Returns
        List of two numpy arrays containing respectively the image and the
            alpha channel.
    """
    if image.shape[-1] != 4:
        raise ValueError('Provided image does not contain alpha mask.')
    image, alpha_channel = np.split(image, [3], -1)
    alpha_channel = alpha_channel / 255.0
    return image, alpha_channel


def random_image_blur(image):
    """Applies random choice blur.

    # Arguments
        image: Numpy array of shape ''(H, W, 3)''.

    # Returns
        Numpy array.
    """
    blur = np.random.choice([gaussian_image_blur, median_image_blur])
    return blur(image)


def translate_image(image, translation, fill_color):
    """Translate image.

    # Arguments
        image: Numpy array.
        translation: A list of length two indicating the x,y translation values
        fill_color: List of three floats representing a color.

    # Returns
        Numpy array
    """
    matrix = np.zeros((2, 3), dtype=np.float32)
    matrix[0, 0], matrix[1, 1] = 1.0, 1.0
    matrix[0, 2], matrix[1, 2] = translation
    image = warp_affine(image, matrix, fill_color)
    return image


def sample_scaled_translation(delta_scale, image_shape):
    """Samples a scaled translation from a uniform distribution.

    # Arguments
        delta_scale: List with two elements having the normalized deltas.
            e.g. ''[.25, .25]''.
        image_shape: List containing the height and width of the image.
    """
    x_delta_scale, y_delta_scale = delta_scale
    x = image_shape[1] * np.random.uniform(-x_delta_scale, x_delta_scale)
    y = image_shape[0] * np.random.uniform(-y_delta_scale, y_delta_scale)
    return [x, y]


def replace_lower_than_threshold(source, threshold=1e-3, replacement=0.0):
    """Replace values from source that are lower than the given threshold.
    This function doesn't create a new array but does replacement in place.

    # Arguments
        source: Array.
        threshold: Float. Values lower than this value will be replaced.
        replacement: Float. Value taken by elements lower than threshold.

    # Returns
        Array of same shape as source.
    """
    lower_than_epsilon = source < threshold
    source[lower_than_epsilon] = replacement
    return source


def normalize_min_max(x, x_min, x_max):
    """Normalized data using it's maximum and minimum values

    # Arguments
        x: array
        x_min: minimum value of x
        x_max: maximum value of x

    # Returns
        min-max normalized data
    """
    return (x - x_min) / (x_max - x_min)


def calculate_image_center(image):
    '''
    Return image center.

    # Arguments
        image: Numpy array.

    # Returns
        image center.
    '''
    H, W = image.shape[:2]
    center_W = W / 2.0
    center_H = H / 2.0
    return center_W, center_H


def get_scaling_factor(image, scale=1, shape=(128, 128)):
    '''
    Return scaling factor for the image.

    # Arguments
        image: Numpy array.
        scale: Int.
        shape: Tuple of integers. eg. (128, 128)

    # Returns
        scaling factor: Numpy array of size 2
    '''
    H, W = image.shape[:2]
    H_scale = H / shape[0]
    W_scale = W / shape[1]
    return np.array([W_scale * scale, H_scale * scale])


def scale_resize(image, image_size):
    """Resizes and crops image by returning the scales to original
    image.

    Args:
        image: Numpy array, raw image.
        image_size: Int, size of the image.

    Returns:
        Tuple: output_image, image_scale.
    """
    H, W = image.shape[0], image.shape[1]
    image_scale_x = image_size / W
    image_scale_y = image_size / H
    image_scale = min(image_scale_x, image_scale_y)
    scaled_H = int(H * image_scale)
    scaled_W = int(W * image_scale)
    scaled_image = resize_image(image, (scaled_W, scaled_H))
    scaled_image = scaled_image[:image_size, :image_size, :]
    output_image = np.zeros((image_size, image_size, image.shape[2]))
    output_image[:scaled_image.shape[0],
                 :scaled_image.shape[1],
                 :scaled_image.shape[2]] = scaled_image
    image_scale = np.array(1 / image_scale)
    output_image = output_image[np.newaxis]
    return output_image, image_scale


>>> END FILE CONTENTS

## emili-main/paz/backend/image/opencv_image.py

>>> BEGIN FILE CONTENTS

import numpy as np
import cv2
import os

RGB2BGR = cv2.COLOR_RGB2BGR
BGR2RGB = cv2.COLOR_BGR2RGB
BGRA2RGBA = cv2.COLOR_BGRA2RGBA
RGB2GRAY = cv2.COLOR_RGB2GRAY
RGB2HSV = cv2.COLOR_RGB2HSV
HSV2RGB = cv2.COLOR_HSV2RGB
_CHANNELS_TO_FLAG = {1: cv2.IMREAD_GRAYSCALE,
                     3: cv2.IMREAD_COLOR,
                     4: cv2.IMREAD_UNCHANGED}
CUBIC = cv2.INTER_CUBIC
BILINEAR = cv2.INTER_LINEAR


def resize_image(image, size, method=BILINEAR):
    """Resize image.

    # Arguments
        image: Numpy array.
        size: List of two ints.
        method: Flag indicating interpolation method i.e.
            paz.backend.image.CUBIC

    # Returns
        Numpy array.
    """
    if (type(image) != np.ndarray):
        raise ValueError(
            'Recieved Image is not of type numpy array', type(image))
    else:
        return cv2.resize(image, size, interpolation=method)


def convert_color_space(image, flag):
    """Convert image to a different color space.

    # Arguments
        image: Numpy array.
        flag: PAZ or openCV flag. e.g. paz.backend.image.RGB2BGR.

    # Returns
        Numpy array.
    """
    return cv2.cvtColor(image, flag)


def load_image(filepath, num_channels=3):
    """Load image from a ''filepath''.

    # Arguments
        filepath: String indicating full path to the image.
        num_channels: Int.

    # Returns
        Numpy array.
    """
    if num_channels not in [1, 3, 4]:
        raise ValueError('Invalid number of channels')

    image = cv2.imread(filepath, _CHANNELS_TO_FLAG[num_channels])
    if num_channels == 3:
        image = convert_color_space(image, BGR2RGB)
    elif num_channels == 4:
        image = convert_color_space(image, BGRA2RGBA)
    return image


def show_image(image, name='image', wait=True):
    """Shows RGB image in an external window.

    # Arguments
        image: Numpy array
        name: String indicating the window name.
        wait: Boolean. If ''True'' window stays open until user presses a key.
            If ''False'' windows closes immediately.
    """
    if image.dtype != np.uint8:
        raise ValueError('``image`` must be of type ``uint8``')
    # openCV default color space is BGR
    image = convert_color_space(image, RGB2BGR)
    cv2.imshow(name, image)
    if wait:
        while True:
            if cv2.waitKey(0) & 0xFF == ord('q'):
                break
        cv2.destroyAllWindows()


def warp_affine(image, matrix, fill_color=[0, 0, 0], size=None):
    """ Transforms `image` using an affine `matrix` transformation.

    # Arguments
        image: Numpy array.
        matrix: Numpy array of shape (2,3) indicating affine transformation.
        fill_color: List/tuple representing BGR use for filling empty space.
    """
    if size is not None:
        width, height = size
    else:
        height, width = image.shape[:2]
    return cv2.warpAffine(
        image, matrix, (width, height), borderValue=fill_color)


def write_image(filepath, image):
    """Writes an image inside ``filepath``. If ``filepath`` doesn't exist
        it makes a directory. If ``image`` has three channels the image is
        converted into BGR and then written. This is done such that this
        function compatible with ``load_image``.

    # Arguments
        filepath: String with image path. It should include postfix e.g. .png
        image: Numpy array.
    """
    directory_name = os.path.dirname(filepath)
    if (not os.path.exists(directory_name) and (len(directory_name) > 0)):
        os.makedirs(directory_name)
    if image.shape[-1] == 3:
        image = convert_color_space(image, RGB2BGR)
    return cv2.imwrite(filepath, image)


def gaussian_image_blur(image, kernel_size=(5, 5)):
    """Applies Gaussian blur to an image.

    # Arguments
        image: Numpy array of shape ''(H, W, 4)''.
        kernel_size: List of two ints e.g. ''(5, 5)''.

    # Returns
        Numpy array
    """
    return cv2.GaussianBlur(image, kernel_size, 0)


def median_image_blur(image, apperture=5):
    """Applies median blur to an image.

    # Arguments
        image: Numpy array of shape ''(H, W, 3)''.
        apperture. Int.

    # Returns
        Numpy array.
    """
    return cv2.medianBlur(image, apperture)


def get_rotation_matrix(center, degrees, scale=1.0):
    """Returns a 2D rotation matrix.

    # Arguments
        center: List of two integer values.
        degrees: Float indicating the angle in degrees.

    # Returns
        Numpy array
    """
    return cv2.getRotationMatrix2D(center, degrees, scale)


def get_affine_transform(source_points, destination_points):
    '''
    Return the transformation matrix.

    # Arguments
        source_points: Numpy array.
        destination_points: Numpy array.

    # Returns
        Transformation matrix.
    '''
    return cv2.getAffineTransform(source_points, destination_points)


>>> END FILE CONTENTS

## emili-main/paz/backend/image/tensorflow_image.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf

# same flags as in openCV
RGB2BGR = 4
BGR2RGB = 4
RGB2GRAY = 7
RGB2HSV = 41
HSV2RGB = 55


def cast_image(image, dtype):
    return tf.image.convert_image_dtype(image, dtype)


def load_image(filepath, num_channels=3):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_image(image, num_channels, expand_animations=False)
    return image


def resize(image, size):
    return tf.image.resize(image, size)


def random_saturation(image, upper, lower):
    return tf.image.random_saturation(image, lower, upper)


def random_brightness(image, max_delta):
    return tf.image.random_brightness(image, max_delta)


def random_contrast(image, lower, upper):
    return tf.image.random_contrast(image, lower, upper)


def random_hue(image, max_delta):
    return tf.image.random_hue(image, max_delta)


def random_image_quality(image, lower, upper):
    return tf.image.random_jpeg_quality(image, lower, upper)


def _RGB_to_grayscale(image):
    return tf.image.rgb_to_grayscale(image)


def _RGB_to_HSV(image):
    return tf.image.rgb_to_hsv(image)


def _HSV_to_RGB(image):
    return tf.image.hsv_to_rgb(image)


def _reverse_channels(image):
    channels = tf.unstack(image, axis=-1)
    image = tf.stack([channels[2], channels[1], channels[0]], axis=-1)
    return image


def convert_color_space(image, flag):
    if flag == RGB2BGR:
        image = _reverse_channels(image)

    elif flag == BGR2RGB:
        image = _reverse_channels(image)

    elif flag == RGB2GRAY:
        image = _RGB_to_grayscale(image)

    elif flag == RGB2HSV:
        image = _RGB_to_HSV(image)

    elif flag == HSV2RGB:
        image = _HSV_to_RGB(image)

    elif flag == RGB2HSV:
        image = _RGB_to_HSV(image)

    else:
        raise ValueError('Invalid flag transformation:', flag)

    return image


def random_crop(image, size):
    return tf.image.random_crop(image, size)


def split_alpha_channel(image):
    if image.shape[-1] != 4:
        raise ValueError('Provided image does not contain alpha mask.')
    image, alpha_channel = tf.split(image, [3], -1)
    alpha_channel = alpha_channel / 255.0
    return image, alpha_channel


def alpha_blend(foreground, background, alpha_channel):
    return (alpha_channel * foreground) + ((1.0 - alpha_channel) * background)


def random_plain_background(image):
    """Adds random plain background to image using a normalized alpha channel
    # Arguments
        image: Float array-like with shape (H, W, 4).
        alpha_channel: Float array-like. Normalized alpha channel for blending.
    """
    image, alpha_channel = split_alpha_channel(image)
    random_color = tf.random.uniform([3], 0, 255)
    random_color = tf.reshape(random_color, [1, 1, 3])
    H, W = image.shape[:2]
    background = tf.tile(random_color, [H, W, 1])
    return alpha_blend(image, background, alpha_channel)


def random_cropped_background(image, background):
    image, alpha_channel = split_alpha_channel(image)
    background = random_crop(background, size=image.shape)
    return alpha_blend(image, background, alpha_channel)


def flip_left_right(image):
    return tf.image.flip_left_right(image)


def random_flip_left_right(image):
    if tf.random.uniform([1], 0, 2) == 1:
        image = flip_left_right(image)
    return image


def imagenet_preprocess_input(image, data_format=None, mode='torch'):
    image = tf.keras.applications.imagenet_utils.preprocess_input(image,
                                                                  data_format,
                                                                  mode)
    return image


>>> END FILE CONTENTS

## emili-main/paz/backend/keypoints.py

>>> BEGIN FILE CONTENTS

from warnings import warn

import cv2
import numpy as np

UPNP = cv2.SOLVEPNP_UPNP
LEVENBERG_MARQUARDT = cv2.SOLVEPNP_ITERATIVE


def build_cube_points3D(width, height, depth):
    """Build the 3D points of a cube in the openCV coordinate system:
                               4--------1
                              /|       /|
                             / |      / |
                            3--------2  |
                            |  8_____|__5
                            | /      | /
                            |/       |/
                            7--------6

                   Z (depth)
                  /
                 /_____X (width)
                 |
                 |
                 Y (height)

    # Arguments
        height: float, height of the 3D box.
        width: float,  width of the 3D box.
        depth: float,  width of the 3D box.

    # Returns
        Numpy array of shape ``(8, 3)'' corresponding to 3D keypoints of a cube
    """
    half_height, half_width, half_depth = height / 2., width / 2., depth / 2.
    point_1 = [+half_width, -half_height, +half_depth]
    point_2 = [+half_width, -half_height, -half_depth]
    point_3 = [-half_width, -half_height, -half_depth]
    point_4 = [-half_width, -half_height, +half_depth]
    point_5 = [+half_width, +half_height, +half_depth]
    point_6 = [+half_width, +half_height, -half_depth]
    point_7 = [-half_width, +half_height, -half_depth]
    point_8 = [-half_width, +half_height, +half_depth]
    return np.array([point_1, point_2, point_3, point_4,
                     point_5, point_6, point_7, point_8])


def normalize_keypoints2D(points2D, height, width):
    """Transform points2D in image coordinates to normalized coordinates i.e.
        [U, V] -> [-1, 1]. UV have maximum values of [W, H] respectively.

             Image plane

                 width
           (0,0)-------->  (U)
             |
      height |
             |
             v

            (V)

    # Arguments
        points2D: Numpy array of shape (num_keypoints, 2).
        height: Int. Height of the image
        width: Int. Width of the image

    # Returns
        Numpy array of shape (num_keypoints, 2).
    """
    image_shape = np.array([width, height])
    points2D = points2D / image_shape  # [W, 0], [0, H] -> [1,  0], [0,  1]
    points2D = 2.0 * points2D          # [1, 0], [0, 1] -> [2,  0], [0,  2]
    points2D = points2D - 1.0          # [2, 0], [0, 2] -> [-1, 1], [-1, 1]
    return points2D


def denormalize_keypoints2D(points2D, height, width):
    """Transform nomralized points2D to image UV coordinates i.e.
        [-1, 1] -> [U, V]. UV have maximum values of [W, H] respectively.

             Image plane

           (0,0)-------->  (U)
             |
             |
             |
             v

            (V)

    # Arguments
        points2D: Numpy array of shape (num_keypoints, 2).
        height: Int. Height of the image
        width: Int. Width of the image

    # Returns
        Numpy array of shape (num_keypoints, 2).
    """
    image_shape = np.array([width, height])
    points2D = points2D + 1.0          # [-1, 1], [-1, 1] -> [2, 0], [0, 2]
    points2D = points2D / 2.0          # [2 , 0], [0 , 2] -> [1, 0], [0, 1]
    points2D = points2D * image_shape  # [1 , 0], [0 , 1] -> [W, 0], [0, H]
    return points2D


def cascade_classifier(path):
    """OpenCV Cascade classifier.

    # Arguments
        path: String. Path to default openCV XML format.

    # Returns
        OpenCV classifier with ``detectMultiScale`` for inference..
    """
    return cv2.CascadeClassifier(path)


def solve_PNP(points3D, points2D, camera, solver):
    """Calculates 6D pose from 3D points and 2D keypoints correspondences.

    # Arguments
        points3D: Numpy array of shape ``(num_points, 3)``.
            3D points known in advance.
        points2D: Numpy array of shape ``(num_points, 2)``.
            Predicted 2D keypoints of object.
        camera: Instance of ''paz.backend.Camera'' containing as properties
            the ''camera_intrinsics'' a Numpy array of shape ''(3, 3)''
            usually calculated from the openCV ''calibrateCamera'' function,
            and the ''distortion'' a Numpy array of shape ''(5)'' in which the
            elements are usually obtained from the openCV
            ''calibrateCamera'' function.
        solver: Flag from e.g openCV.SOLVEPNP_UPNP.
        distortion: Numpy array of shape of 5 elements calculated from
            the openCV calibrateCamera function.

    # Returns
        A list containing success flag, rotation and translation components
        of the 6D pose.
    """
    return cv2.solvePnP(points3D, points2D, camera.intrinsics,
                        camera.distortion, None, None, False, solver)


def project_points3D(points3D, pose6D, camera):
    """Projects 3D points into a specific pose.

    # Arguments
        points3D: Numpy array of shape ``(num_points, 3)``.
        pose6D: An instance of ``paz.abstract.Pose6D``.
        camera: An instance of ``paz.backend.Camera`` object.

    # Returns
        Numpy array of shape ``(num_points, 2)``
    """
    points2D, jacobian = cv2.projectPoints(
        points3D, pose6D.rotation_vector, pose6D.translation,
        camera.intrinsics, camera.distortion)
    # openCV adds a dimension to projection i.e. (num_points, 1, 2)
    points2D = np.squeeze(points2D, axis=1)
    return points2D


def project_to_image(rotation, translation, points3D, camera_intrinsics):
    """Project points3D to image plane using a perspective transformation.

              Image plane

           (0,0)-------->  (U)
             |
             |
             |
             v

            (V)

    # Arguments
        rotation: Array (3, 3). Rotation matrix (Rco).
        translation: Array (3). Translation (Tco).
        points3D: Array (num_points, 3). Points 3D in object frame.
        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
            focal lenghts and last column the image center translation.

    # Returns
        Array (num_points, 2) in UV image space.
    """
    if rotation.shape != (3, 3):
        raise ValueError('Rotation matrix is not of shape (3, 3)')
    if len(translation) != 3:
        raise ValueError('Translation vector is not of length 3')
    if len(points3D.shape) != 2:
        raise ValueError('Points3D should have a shape (num_points, 3)')
    if points3D.shape[1] != 3:
        raise ValueError('Points3D should have a shape (num_points, 3)')
    # TODO missing checks for camera intrinsics conditions
    points3D = np.matmul(rotation, points3D.T).T + translation
    x, y, z = np.split(points3D, 3, axis=1)
    x_focal_length = camera_intrinsics[0, 0]
    y_focal_length = camera_intrinsics[1, 1]
    x_image_center = camera_intrinsics[0, 2]
    y_image_center = camera_intrinsics[1, 2]
    x_points = (x_focal_length * (x / z)) + x_image_center
    y_points = (y_focal_length * (y / z)) + y_image_center
    projected_points2D = np.concatenate([x_points, y_points], axis=1)
    return projected_points2D


def translate_points2D_origin(points2D, coordinates):
    """Translates points2D to a different origin

    # Arguments
        points2D: Array (num_points, 2)
        coordinates: Array (4) containing (x_min, y_min, x_max, y_max)

    # Returns
        Translated points2D array (num_points, 2)
    """
    x_min, y_min, x_max, y_max = coordinates
    points2D[:, 0] = points2D[:, 0] + x_min
    points2D[:, 1] = points2D[:, 1] + y_min
    return points2D


def translate_keypoints(keypoints, translation):
    """Translate keypoints.

    # Arguments
        kepoints: Numpy array of shape ``(num_keypoints, 2)``.
        translation: A list of length two indicating the x,y translation values

    # Returns
        Numpy array
    """
    return keypoints + translation


def _preprocess_image_points2D(image_points2D):
    """Preprocessing image points for openCV's PnPRANSAC

    # Arguments
        image_points2D: Array of shape (num_points, 2)

    # Returns
        Contiguous float64 array of shape (num_points, 1, 2)
    """
    num_points = len(image_points2D)
    image_points2D = image_points2D.reshape(num_points, 1, 2)
    image_points2D = image_points2D.astype(np.float64)
    image_points2D = np.ascontiguousarray(image_points2D)
    return image_points2D


def solve_PnP_RANSAC(object_points3D, image_points2D, camera_intrinsics,
                     inlier_threshold=5, num_iterations=100):
    """Returns rotation (Roc) and translation (Toc) vectors that transform
        3D points in object frame to camera frame.

                               O------------O
                              /|           /|
                             / |          / |
                            O------------O  |
                            |  |    z    |  |
                            |  O____|____|__O
                            |  /    |___y|  /   object
                            | /    /     | /  coordinates
                            |/    x      |/
                            O------------O
                                   ___
                   Z                |
                  /                 | Rco, Tco
                 /_____X     <------|
                 |
                 |    camera
                 Y  coordinates

    # Arguments
        object_points3D: Array (num_points, 3). Points 3D in object reference
            frame. Represented as (0) in image above.
        image_points2D: Array (num_points, 2). Points in 2D in camera UV space.
        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
            focal lenghts and last column the image center translation.
        inlier_threshold: Number of inliers for RANSAC method.
        num_iterations: Maximum number of iterations.

    # Returns
        Rotation vector in axis-angle form (3) and translation vector (3).
    """
    if ((len(object_points3D) < 4) or (len(image_points2D) < 4)):
        raise ValueError('Solve PnP requires at least 4 3D and 2D points')
    image_points2D = _preprocess_image_points2D(image_points2D)
    success, rotation_vector, translation, inliers = cv2.solvePnPRansac(
        object_points3D, image_points2D, camera_intrinsics, None,
        flags=cv2.SOLVEPNP_EPNP, reprojectionError=inlier_threshold,
        iterationsCount=num_iterations)
    translation = np.squeeze(translation, 1)
    return success, rotation_vector, translation


def arguments_to_image_points2D(row_args, col_args):
    """Convert array arguments into UV coordinates.

              Image plane

           (0,0)-------->  (U)
             |
             |
             |
             v

            (V)

    # Arguments
        row_args: Array (num_rows).
        col_args: Array (num_cols).

    # Returns
        Array (num_cols, num_rows) representing points2D in UV space.

    # Notes
        Arguments are row args (V) and col args (U). Image points are in UV
            coordinates; thus, we concatenate them in that order
            i.e. [col_args, row_args]
    """
    row_args = row_args.reshape(-1, 1)
    col_args = col_args.reshape(-1, 1)
    image_points2D = np.concatenate([col_args, row_args], axis=1)  # (U, V)
    return image_points2D


def normalize_keypoints(keypoints, height, width):
    """Transform keypoints in image coordinates to normalized coordinates
    # Arguments
        keypoints: Numpy array of shape ``(num_keypoints, 2)``.
        height: Int. Height of the image
        width: Int. Width of the image
    # Returns
        Numpy array of shape ``(num_keypoints, 2)``.
    """
    warn('DEPRECATED please use denomarlize_points2D')
    normalized_keypoints = np.zeros_like(keypoints, dtype=np.float32)
    for keypoint_arg, keypoint in enumerate(keypoints):
        x, y = keypoint[:2]
        # transform key-point coordinates to image coordinates
        x = (((x + 0.5) - (width / 2.0)) / (width / 2))
        y = (((height - 0.5 - y) - (height / 2.0)) / (height / 2))
        normalized_keypoints[keypoint_arg][:2] = [x, y]
    return normalized_keypoints


def denormalize_keypoints(keypoints, height, width):
    """Transform normalized keypoint coordinates into image coordinates
    # Arguments
        keypoints: Numpy array of shape ``(num_keypoints, 2)``.
        height: Int. Height of the image
        width: Int. Width of the image
    # Returns
        Numpy array of shape ``(num_keypoints, 2)``.
    """
    warn('DEPRECATED please use denomarlize_points2D')
    for keypoint_arg, keypoint in enumerate(keypoints):
        x, y = keypoint[:2]
        # transform key-point coordinates to image coordinates
        x = (min(max(x, -1), 1) * width / 2 + width / 2) - 0.5
        # flip since the image coordinates for y are flipped
        y = height - 0.5 - (min(max(y, -1), 1) * height / 2 + height / 2)
        x, y = int(round(x)), int(round(y))
        keypoints[keypoint_arg][:2] = [x, y]
    return keypoints


def rotate_point2D(point2D, rotation_angle):
    """Rotate keypoint.

    # Arguments
        point2D: keypoint [x, y]
        rotation angle: Int. Angle of rotation.

    # Returns
        List of x and y rotated points
    """
    rotation_angle = np.pi * rotation_angle / 180
    sin_n, cos_n = np.sin(rotation_angle), np.cos(rotation_angle)
    x_rotated = (point2D[0] * cos_n) - (point2D[1] * sin_n)
    y_rotated = (point2D[0] * sin_n) + (point2D[1] * cos_n)
    return [x_rotated, y_rotated]


def transform_keypoint(keypoint, transform):
    """ Transform keypoint.

    # Arguments
        keypoint2D: keypoint [x, y]
        transform: Array. Transformation matrix
    """
    keypoint = np.array([keypoint[0], keypoint[1], 1.]).T
    transformed_keypoint = np.dot(transform, keypoint)
    return transformed_keypoint


def add_offset_to_point(keypoint_location, offset=0):
    """ Add offset to keypoint location

    # Arguments
        keypoint_location: keypoint [y, x]
        offset: Float.
    """
    y, x = keypoint_location
    y = y + offset
    x = x + offset
    return y, x


def flip_keypoints_left_right(keypoints, image_size=(32, 32)):
    """Flip the detected 2D keypoints left to right.

    # Arguments
        keypoints: Array
        image_size: list/tuple
        axis: int

    # Returns
        flipped_keypoints: Numpy array
    """
    x_coordinates, y_coordinates = np.split(keypoints, 2, axis=1)
    flipped_x = image_size[0] - x_coordinates
    keypoints = np.concatenate((flipped_x, y_coordinates), axis=1)
    return keypoints


def compute_orientation_vector(keypoints3D, parents):
    """Compute bone orientations from joint coordinates
       (child joint - parent joint). The returned vectors are normalized.
       For the root joint, it will be a zero vector.

    # Arguments
        keypoints3D : Numpy array [num_keypoints, 3]. Joint coordinates.
        parents: Parents of the keypoints from kinematic chain

    # Returns
        Array [num_keypoints, 3]. The unit vectors from each child joint to
        its parent joint. For the root joint, it's are zero vector.
    """
    delta = []
    for joint_arg in range(len(parents)):
        parent = parents[joint_arg]
        if parent is None:
            delta.append(np.zeros(3))
        else:
            delta.append(keypoints3D[joint_arg] - keypoints3D[parent])
    delta = np.stack(delta, 0)
    return delta


def rotate_keypoints3D(rotation_matrix, keypoints):
    """Rotatate the keypoints by using rotation matrix

    # Arguments
        Rotation matrix [N, 3, 3].
        keypoints [N, 3]

    # Returns
        Rotated keypoints [N, 3]
    """
    keypoint_xyz = np.einsum('ijk, ik -> ij', rotation_matrix, keypoints)
    return keypoint_xyz


def flip_along_x_axis(keypoints, axis=0):
    """Flip the keypoints along the x axis.

    # Arguments
        keypoints: Array
        axis: int/list

    # Returns
        Flipped keypoints: Array
    # """
    x, y, z = np.split(keypoints, 3, axis=1)
    keypoints = np.concatenate((-x, y, z), axis=1)
    return keypoints


def uv_to_vu(keypoints):
    """Flips the uv coordinates to vu.

    # Arguments
        keypoints: Array.
    """
    return keypoints[:, ::-1]


def standardize(data, mean, scale):
    """It takes the data the mean and the standard deviation
       and returns the standardized data

    # Arguments
        data: nxd matrix to normalize
        mean: Array of means
        scale: standard deviation

    # Returns
        standardized poses2D
    # """
    return np.divide((data - mean), scale)


def destandardize(data, mean, scale):
    """It takes the standardized data the mean and the standard
       deviation and returns the destandardized data

    # Arguments
        data: nxd matrix to unnormalize
        mean: Array of means
        scale: standard deviation

    # Returns
        destandardized poses3D
    """
    return (data * scale) + mean


def initialize_translation(joints2D, camera_intrinsics, ratio):
    """Computes initial 3D translation of root joint

    # Arguments
        joints2D: 2D root joint from HigherHRNet
        camera_intrinsics: camera intrinsic parameters
        ratio: ration of sum of 3D bones to 2D bones

    # Returns
        Array of initial estimate of the global position
        of the root joint in 3D
    """
    focal_length = camera_intrinsics[0, 0]
    image_center_x = camera_intrinsics[0, 2]
    image_center_y = camera_intrinsics[1, 2]
    z = focal_length * ratio
    x = (joints2D[:, 0] - image_center_x) * ratio
    y = (joints2D[:, 1] - image_center_y) * ratio
    translation = np.array((x, y, z))
    return translation.flatten()


def solve_least_squares(solver, compute_joints_distance,
                        initial_joints_translation, joints3D,
                        poses2D, camera_intrinsics):
    """Solve the least squares

    # Arguments
        solver: from scipy.optimize import least_squares
        compute_joints_distance: global_pose.compute_joints_distance
        initial_root_translation: initial 3D translation of root joint
        joints3D: 16 moving joints in 3D
        poses2d: 2D poses
        camera_intrinsics: camera intrinsic parameters

    Returns
        optimal translation of root joint for each person
    """
    joints_translation = solver(
        compute_joints_distance, initial_joints_translation, verbose=0,
        args=(joints3D, poses2D, camera_intrinsics))
    joints_translation = np.reshape(joints_translation.x, (-1, 3))
    return joints_translation


def get_bones_length(poses2D, poses3D, start_joints,
                     end_joints=np.arange(1, 16)):
    """Computes sum of bone lengths in 3D

    #Arguments
        poses3D: list of predicted poses in 3D (Nx16x3)
        poses2D: list of poses in 2D    (Nx32)

    #Returns
        sum_bones2D: array of sum of length of all bones in the 2D skeleton
        sum_bones3D: array of sum of length of all bones in the 3D skeleton
    """
    sum_bones2D = []
    sum_bones3D = []
    poses3D = np.reshape(poses3D, (poses3D.shape[0], 16, -1))
    poses2D = np.reshape(poses2D, (poses2D.shape[0], 16, -1))

    for person in poses2D:
        person_sum_2D = 0
        for arg in range(len(start_joints)):
            bone_length = np.linalg.norm(person[start_joints[arg]] -
                                         person[end_joints[arg]])
            person_sum_2D = person_sum_2D + bone_length
        sum_bones2D.append(person_sum_2D)

    for person in poses3D:
        person_sum_3D = 0
        for arg in range(len(start_joints)):
            bone_length = np.linalg.norm(person[start_joints[arg]] -
                                         person[end_joints[arg]])
            person_sum_3D = person_sum_3D + bone_length
        sum_bones3D.append(person_sum_3D)

    return np.array(sum_bones2D), np.array(sum_bones3D)


def compute_reprojection_error(initial_translation, keypoints3D,
                               keypoints2D, camera_intrinsics):
    """compute distance between each person joints

    # Arguments
        initial_translation: initial guess of position of joint
        keypoints3D: 3D keypoints to be optimized (Nx16x3)
        keypoints2D: 2D keypoints (Nx32)
        camera_inrinsics: camera intrinsic parameters

    # Returns
        person_sum: sum of L2 distances between each joint per person
    """
    initial_translation = np.reshape(initial_translation, (-1, 3))
    new_poses3D = np.zeros((keypoints3D.shape))
    for person in range(len(initial_translation)):
        new_poses3D[person] = (keypoints3D[person] +
                               initial_translation[person])
    new_poses3D = new_poses3D.reshape((-1, 3))
    rotation = np.identity(3)
    translation = np.zeros((3,))
    project2D = project_to_image(rotation, translation, new_poses3D,
                                 camera_intrinsics)
    joints_distance = np.linalg.norm(np.ravel(keypoints2D) -
                                     np.ravel(project2D))

    return np.sum(joints_distance)


def merge_into_mean(keypoints2D, args_to_mean):
    """merge keypoints and take the mean

    # Arguments:
             keypoints2D: keypoints2D (Nx17x2)
             args_to_mean: dict of joint indices

    # Returns:
             keypoints2D: keypoints2D after merging
            """
    keypoints = np.array(keypoints2D.copy())
    for point, joints_indices in args_to_mean.items():
        keypoints[:, point] = (keypoints[:, joints_indices[0]] +
                               keypoints[:, joints_indices[1]]) / 2
    return keypoints


def filter_keypoints(keypoints, args_to_joints):
    """filter keypoints.

    # Arguments
        keypoints: points in camera coordinates
        args_to_joints: Array of joints indices

    # Returns
        filtered keypoints
    # """
    return keypoints[:, args_to_joints, :]


def filter_keypoints3D(keypoints3D, args_to_joints3D):
    """Selects 16 moving joints (Neck/Nose excluded) from 32 predicted
       joints in 3D

    # Arguments
        keypoints3D: Nx96 points in camera coordinates
        args_to_joints3D: list of indices

    # Returns
        filtered_joints_3D: Nx48 points (moving joints)
    """
    keypoints_num = len(keypoints3D)
    keypoints3D = np.reshape(keypoints3D, [keypoints_num, 32, 3])
    joints3D = filter_keypoints(keypoints3D, args_to_joints3D)
    return joints3D


def filter_keypoints2D(keypoints2D, args_to_mean, h36m_to_coco_joints2D):
    """Selects 16 moving joints (Neck/Nose excluded) from 17 predicted
       joints in 2D

    # Arguments
        keypoints3D: Nx17x2 points in camera coordinates
        args_to_mean: keypoints indices
        h36m_to_coco_joints2D: human36m dataset list of joints indices

    # Returns
        joints2D: Nx32 points (moving joints)
    """
    joints2D = filter_keypoints(keypoints2D, h36m_to_coco_joints2D)
    joints2D = np.reshape(joints2D, [joints2D.shape[0], -1])
    return joints2D


def compute_optimized_pose3D(keypoints3D, joint_translation,
                             camera_intrinsics):
    """Compute the optimized 3D pose

    # Arguments
        keypoints3D: 3D keypoints
        joint_translation: np array joints translation
        camera_intrinsics: camera intrinsics parameters

    # Returns
        optimized_poses3D: np array of optimized posed3D
    """
    optimized_pose3D = []
    projected_pose2D = []
    for person in range(keypoints3D.shape[0]):
        translated_pose = keypoints3D[person] + joint_translation[person]
        rotation = np.identity(3)
        translation = np.zeros((3,))
        translated_pose = translated_pose.reshape((-1, 3))
        points = project_to_image(rotation, translation, translated_pose,
                                  camera_intrinsics)
        optimized_pose3D.append(translated_pose)
        projected_pose2D.append(np.reshape(points, [1, 64]))
    return np.array(optimized_pose3D), np.array(projected_pose2D)


def human_pose3D_to_pose6D(poses3D):
    """
    Estiate human pose 6D of the root joint from 3D pose of human joints.

    # Arguments
    poses3D: numpy array 
             3D pose of human joint

    # return
    rotation_matrix: numpy array
                     rotation of human root joint
    translation: list
                 translation of human root joint
    """
    right_hip = poses3D[1]
    left_hip = poses3D[6]
    thorax = poses3D[13]

    # Calculate x, y, and z vectors
    x_vector = right_hip - left_hip
    projection_vector = thorax - left_hip

    # Calculate projection of projection_vector onto x_vector
    scalar_projection = np.dot(x_vector, projection_vector)
    scalar_projection = scalar_projection / np.linalg.norm(x_vector) ** 2
    projected_point = left_hip + scalar_projection * x_vector
    z_vector = thorax - projected_point

    # Normalize vectors
    x_unit_vector = x_vector / np.linalg.norm(x_vector)
    z_unit_vector = z_vector / np.linalg.norm(z_vector)
    y_unit_vector = np.cross(z_unit_vector, x_unit_vector)

    # Create rotation matrix
    rotation_matrix = np.column_stack((x_unit_vector, y_unit_vector,
                                       z_unit_vector))

    # Convert translation units and return
    translation = (poses3D[0] / 1e3).tolist()  # Convert mm to meters
    return rotation_matrix, translation


>>> END FILE CONTENTS

## emili-main/paz/backend/munkres.py

>>> BEGIN FILE CONTENTS

import numpy as np


class UnsolvableMatrix(Exception):
    """
    Exception raised for unsolvable matrices
    """
    pass


class DISALLOWED_OBJ(object):
    pass


DISALLOWED = DISALLOWED_OBJ()
DISALLOWED_PRINTVAL = "D"


def get_cover_matrix(shape):
    """Returns the initialized row and column cover matrix.

    # Arguments
        shape: Tuple. Shape of the cover matrix.
    """
    row_covered = np.zeros(shape, dtype='bool')
    col_covered = np.zeros(shape, dtype='bool')
    return row_covered, col_covered


def find_uncovered_zero(n, cost_matrix, row_covered, col_covered, i0, j0):
    row = -1
    col = -1
    done = False
    for row_arg in range(i0, n):
        for col_arg in range(j0, n):
            if (cost_matrix[row_arg][col_arg] == 0) and \
                    (not row_covered[row_arg]) and \
                    (not col_covered[col_arg]):
                row = row_arg
                col = col_arg
                done = True
        if done:
            break
    return (row, col)


def find_star_in_row(n, row_arg, marked):
    col = -1
    for col_arg in range(n):
        if marked[row_arg][col_arg] == 1:
            col = col_arg
            break
    return col


def find_star_in_col(n, col_arg, marked):
    row = -1
    for row_arg in range(n):
        if marked[row_arg][col_arg] == 1:
            row = row_arg
            break
    return row


def find_prime_in_row(n, row_arg, marked):
    col = -1
    for col_arg in range(n):
        if marked[row_arg][col_arg] == 2:
            col = col_arg
            break
    return col


def get_min_value(series):
    values = []
    for x in series:
        if type(x) is not type(DISALLOWED):
            values.append(x)
    if len(values) == 0:
        raise UnsolvableMatrix("One row is entirely DISALLOWED.")
    min_value = np.min(values)
    return min_value


def find_smallest_uncovered(n, row_covered, col_covered, cost_matrix):
    minval = np.inf
    for i in range(n):
        for j in range(n):
            if (not row_covered[i]) and (not col_covered[j]):
                if cost_matrix[i][j] is not DISALLOWED and \
                        minval > cost_matrix[i][j]:
                    minval = cost_matrix[i][j]
    return minval


>>> END FILE CONTENTS

## emili-main/paz/backend/render.py

>>> BEGIN FILE CONTENTS

import numpy as np


def sample_point_in_full_sphere(distance=1.0):
    """Get a point of the top of the unit sphere.

    # Arguments
        distance: Float, indicating distance to origin.

    # Returns
        sphere_point: List of spatial coordinates of a sphere.
    """
    if distance <= 0:
        raise ValueError('distance should be bigger than 0')
    sphere_point = np.random.uniform(-1, 1, size=3)
    return (distance * sphere_point) / np.linalg.norm(sphere_point)


def sample_point_in_top_sphere(distance=1.0):
    """Get a point of the top of the unit sphere.

    # Arguments
        distance: Float, indicating distance to origin.

    # Returns
        sphere_point: List of spatial coordinates of a sphere.
    """
    if distance <= 0:
        raise ValueError('distance should be bigger than 0')
    sphere_point = sample_point_in_full_sphere(distance)
    if sphere_point[1] < 0:
        sphere_point[1] = sphere_point[1] * -1
    return sphere_point


def sample_point_in_sphere(distance, top_only=False):
    """ Samples random points from a sphere

    # Arguments
        distance: Float, indicating distance to origin.

    # Returns:
        List of spatial coordinates of a sphere.

    """
    if distance <= 0:
        raise ValueError('distance should be bigger than 0')
    if top_only:
        sphere_point = sample_point_in_top_sphere(distance)
    else:
        sphere_point = sample_point_in_full_sphere(distance)
    return sphere_point


def random_perturbation(localization, shift):
    """Adds noise to 'localization' vector coordinates.

    # Arguments
        localization: List of 3 floats.
        shift: Float indicating a uniform distribution [-shift, shift].

    # Returns
        perturbed localization
    """
    perturbation = np.random.uniform(-shift, shift, size=3)
    return localization + perturbation


def random_translation(localization, shift):
    """Adds noise to 'localization' vector coordinates.

    # Arguments
        localization: List of 3 floats.
        shift: Float indicating a uniform distribution [-shift, shift].
    # Returns
        perturbed localization
    """
    perturbation = np.zeros((3))
    perturbation[:2] = np.random.uniform(-shift, shift, size=2)
    return localization + perturbation


def get_look_at_transform(camera_position, target_position):
    """Make transformation from target position to camera position
    with orientation looking at the target position.

    # Arguments
        camera_position: Numpy-array of length 3. Camera position.
        target_position: Numpy-array of length 3. Target position.
    """
    camera_direction = camera_position - target_position
    camera_direction = camera_direction / np.linalg.norm(camera_direction)
    world_up = np.array([0.0, 1.0, 0.0])
    camera_right = np.cross(world_up, camera_direction)
    camera_right = camera_right / np.linalg.norm(camera_right)
    camera_up = np.cross(camera_direction, camera_right)
    camera_up = camera_up / np.linalg.norm(camera_up)
    rotation_transform = np.zeros((4, 4))
    rotation_transform[0, :3] = camera_right
    rotation_transform[1, :3] = camera_up
    rotation_transform[2, :3] = camera_direction
    rotation_transform[-1, -1] = 1
    translation_transform = np.eye(4)
    translation_transform[:3, -1] = - camera_position
    look_at_transform = np.matmul(rotation_transform, translation_transform)
    return look_at_transform


def compute_modelview_matrices(camera_origin, world_origin,
                               roll=None, translate=None):
    """Compute model-view matrices from camera to origin and origin to camera.

    # Arguments
        camera_origin: Numpy-array of length 3 determining the camera origin
        world_origin: Numpy-array of length 3 determining the world origin
        roll: `None` or float. If `None` no roll is performed. If float
        value should be between [0, 2*pi)

    # Returns
        Transformation from camera to world and world to camera.
    """
    world_to_camera = get_look_at_transform(camera_origin, world_origin)
    if roll is not None:
        world_to_camera = roll_camera(world_to_camera, roll)
    if translate is not None:
        world_to_camera = translate_camera(world_to_camera, translate)
    camera_to_world = np.linalg.inv(world_to_camera)
    return camera_to_world, world_to_camera


def roll_camera(world_to_camera, angle):
    """ Roll camera coordinate system.

    # Arguments:
        world_to_camera: Numpy array containing the affine transformation.
        max_roll: 'None' or float. If None, the camera is not rolled.
            If float it should be a value between [0, 2*pi)
    """
    angle = np.random.uniform(-angle, angle)
    z_rotation = np.array(
        [[np.cos(angle), -np.sin(angle), 0.],
         [np.sin(angle), +np.cos(angle), 0.],
         [0., 0., 1.]])
    world_to_camera[:3, :3] = np.matmul(z_rotation, world_to_camera[:3, :3])
    return world_to_camera


def translate_camera(world_to_camera, translation):
    """ Translate camera coordinate system in its XY plane.

    # Arguments:
        world_to_camera: Numpy array containing the affine transformation.
        translation: List or array with two inputs.
    """
    translation = np.random.uniform(-translation, translation, 2)
    translation_transform = np.array(
        [[1.0, 0.0, 0.0, translation[0]],
         [0.0, 1.0, 0.0, translation[1]],
         [0.0, 0.0, 1.0, 0.0],
         [0.0, 0.0, 0.0, 1.0]])
    world_to_camera = np.matmul(translation_transform, world_to_camera)
    return world_to_camera


def scale_translation(matrix, scale=10.0):
    """ Changes the scale of the translation vector.
    Used for changing the regression problem to a bigger scale.

    # Arguments:
        matrix: Numpy array of shape [4, 4]
        scale: Float used to multiple all the translation component.

    # Returns:
        Numpy array of shape [4, 4]
    """
    matrix[:3, -1] = matrix[:3, -1] * 10.
    return matrix


def sample_uniformly(value):
    """ Samples from a uniform distribution.

    # Arguments
        values: List or float. If list it must have [min_value, max_value].

    # Returns
        Float
    """
    if isinstance(value, list):
        value = np.random.uniform(value[0], value[1])
    return value


def split_alpha_channel(image):
    """ Splits alpha channel from an RGBD image.

    # Arguments
        image: Numpy array of shape [H, W, 4]

    # Returns
        List of two numpy arrays of shape [H, W, 3] and [H, W]
    """
    image_shape = image.shape
    if len(image_shape) != 3:
        raise ValueError('Invalid image shape')
    if image_shape[-1] != 4:
        raise ValueError('Invalid number of channels')
    return image[..., :3], image[..., 3:4]


>>> END FILE CONTENTS

## emili-main/paz/backend/standard.py

>>> BEGIN FILE CONTENTS

import numpy as np
import tensorflow as tf


def append_values(dictionary, lists, keys):
    """Append dictionary values to lists

    # Arguments
        dictionary: dict
        lists: List of lists
        keys: Keys to dictionary values
    """
    if len(keys) != len(lists):
        assert ValueError('keys and lists must have same length')
    for key_arg, key in enumerate(keys):
        lists[key_arg].append(dictionary[key])
    return lists


def append_lists(intro_lists, outro_lists):
    """Appends multiple new values in intro lists to multiple outro lists

    # Arguments
        intro_lists: List of lists
        outro_lists: List of lists

    # Returns
        Lists with new values of intro lists
    """
    for intro_list, outro_list in zip(intro_lists, outro_lists):
        outro_list.append(intro_list)
    return outro_lists


def get_upper_multiple(x, multiple=64):
    """Returns the upper multiple of 'multiple' to the x.

    # Arguments
        x: Int.
        multiple: Int.

    # Returns
        upper multiple. Int.
    """
    x = x + (multiple - 1)
    floor_value = x // multiple
    upper_multiple = floor_value * multiple
    return upper_multiple


def resize_with_same_aspect_ratio(image, input_size, multiple=64):
    """Resize the sort side of the input image to input_size and keep
    the aspect ratio.

    # Arguments
        input_size: Dimension to be resized. Int.
        H: Int.
        W: Int.

    # Returns
        resized H and W.
    """
    H, W = np.sort(image.shape[:2])
    H_resized = int(input_size)
    W_resized = input_size / H
    W_resized = W_resized * W
    W_resized = int(get_upper_multiple(W_resized, multiple))
    size = np.array([W_resized, H_resized])
    return size


def get_transformation_scale(image, size, scaling_factor):
    """Caluclte scale of resized H and W.

    # Arguments
        H: Int.
        H_resized: Int.
        H_resized: Int.
        scaling_factor: Int.

    # Returns
        scaled H and W
    """
    H, W = image.shape[:2]
    H_resized, W_resized = size

    if H < W:
        H_resized, W_resized = W_resized, H_resized
    H, W = np.sort([H, W])

    scale_H = H / scaling_factor
    aspect_ratio = W_resized / H_resized
    scale_W = aspect_ratio * scale_H
    scale = np.array([scale_W, scale_H])
    return scale


def compare_vertical_neighbours(x, y, image, offset=0.25):
    """Compare two vertical neighbors and add an offset to the smaller one.

    # Arguments
        x: Int. x coordinate of pixel to be compared.
        y: Int. y coordinate of pixel to be compared.
        image: Array.
        offset: Float.
    """
    int_x, int_y = int(x), int(y)
    lower_y = min(int_y + 1, image.shape[1] - 1)
    upper_y = max(int_y - 1, 0)
    if image[int_x, lower_y] > image[int_x, upper_y]:
        y = y + offset
    else:
        y = y - offset
    return y


def compare_horizontal_neighbours(x, y, image, offset=0.25):
    """Compare two horizontal neighbors and add an offset to the smaller one.

    # Arguments
        x: Int. x coordinate of pixel to be compared.
        y: Int. y coordinate of pixel to be compared.
        image: Array.
        offset: Float.
    """
    int_x, int_y = int(x), int(y)
    left_x = max(0, int_x - 1)
    right_x = min(int_x + 1, image.shape[0] - 1)
    if image[right_x, int_y] > image[left_x, int_y]:
        x = x + offset
    else:
        x = x - offset
    return x


def get_all_indices_of_array(array):
    """Get all the indices of an array.

    # Arguments
        array: Array

    # Returns
        Array. Array with the indices of the input array
    """
    all_indices = np.ndarray(array.shape)
    all_indices.fill(True)
    all_indices = np.where(all_indices)
    all_indices = np.array(all_indices).T
    print(all_indices)
    return all_indices


def gather_nd(array, indices, axis):
    """Take the value from the input array on the given indices along the
    given axis.

    # Arguments
        array: Array
        indices: list/Array. values to be gathered from
        axis: Int. Axis along which to gather values.

    # Returns
        Array. Gathered values from the input array
    """
    gathered = np.take_along_axis(array, indices, axis=axis)
    return gathered


def calculate_norm(vector, order=None, axis=None):
    """Calculates the norm of vector.

    # Arguments
        x: List of spatial coordinates (x, y, z)
    """
    return np.linalg.norm(vector, ord=order, axis=axis)


def tensor_to_numpy(tensor):
    """Convert a tensor to a Array.

    # Arguments
        tensor: multidimensional array of type tensor
    """
    return tensor.cpu().numpy()


def pad_matrix(matrix, pool_size=(3, 3), strides=(1, 1),
               padding='valid', value=0):
    """Pad an array

    # Arguments
        matrix: Array.
        padding: String. Type of padding
        value: Int. Value to be added in the padded area.
        poolsize: Int. How many rows and colums to be padded for 'same' padding
    """
    matrix = np.array(matrix)
    H, W = matrix.shape[:2]
    if padding == 'valid':
        padding = ((0, 0), (0, 0))
    if padding == 'square':
        if H > W:
            padding = ((0, 0), (0, H - W))
        else:
            padding = ((0, W - H), (0, 0))
    if padding == 'same':
        if isinstance(pool_size, int):
            pool_size = (pool_size, pool_size)
        if isinstance(strides, int):
            strides = (strides, strides)
        if H % strides[0] == 0:
            height_pad = np.max((pool_size[0] - strides[0]), 0)
        else:
            height_pad = np.max(pool_size[0] - (H % strides[0]), 0)
        if W % strides[1] == 0:
            width_pad = np.max((pool_size[1] - strides[1]), 0)
        else:
            width_pad = np.max(pool_size[1] - (W % strides[1]), 0)

        pad_top = height_pad // 2
        pad_bottom = height_pad - pad_top
        pad_left = width_pad // 2
        pad_right = width_pad - pad_left
        padding = ((pad_top, pad_bottom), (pad_left, pad_right))
    return np.pad(matrix, padding, mode='constant', constant_values=value)


def max_pooling_2d(image, pool_size=3, strides=1, padding='same'):
    """Returns the maximum pooled value of an image.

    # Arguments
        image: Array.
        poolsize: Int or list of len 2. Window size for each pool
        padding: String. Type of padding
    """
    if not isinstance(strides, int):
        strides = strides[0]
    if not isinstance(pool_size, int):
        pool_size = pool_size[0]

    if padding == 'valid':
        max_image = np.zeros((image.shape[0] - pool_size + 1,
                              image.shape[1] - pool_size + 1))
    if padding == 'same':
        max_image = np.zeros_like(image)

    image = pad_matrix(image, pool_size, strides, padding)
    H, W = image.shape[:2]
    for y in range(0, H - pool_size + 1, strides):
        for x in range(0, W - pool_size + 1, strides):
            max_image[y][x] = np.max(image[y:y + pool_size, x:x + pool_size])
    return max_image


def predict(x, model, preprocess=None, postprocess=None):
    """Preprocess, predict and postprocess input.
    # Arguments
        x: Input to model
        model: Callable i.e. Keras model.
        preprocess: Callable, used for preprocessing input x.
        postprocess: Callable, used for postprocessing output of model.

    # Note
        If model outputs a tf.Tensor is converted automatically to numpy array.
    """
    if preprocess is not None:
        x = preprocess(x)
    y = model(x)
    if isinstance(y, tf.Tensor):
        y = y.numpy()
    if postprocess is not None:
        y = postprocess(y)
    return y


>>> END FILE CONTENTS

## emili-main/paz/datasets/CMU_poanoptic.py

>>> BEGIN FILE CONTENTS

import numpy as np
hand_part_labels = ['wrist',
                    'thumb_cmc',
                    'thumb_mcp',
                    'thumb_ip',
                    'thumb_tip',
                    'index_finger_mcp',
                    'index_finger_pip',
                    'index_finger_dip',
                    'index_finger_tip',
                    'middle_finger_mcp',
                    'middle_finger_pip',
                    'middle_finger_dip',
                    'middle_finger_tip',
                    'ring_finger_mcp',
                    'ring_finger_pip',
                    'ring_finger_dip',
                    'ring_finger_tip',
                    'pinky_mcp',
                    'pinky_pip',
                    'pinky_dip',
                    'pinky_tip']

hand_part_arg = {b: a for a, b in enumerate(hand_part_labels)}

hand_part_orders = [('wrist', 'thumb_cmc'),
                    ('thumb_cmc', 'thumb_mcp'),
                    ('thumb_mcp', 'thumb_ip'),
                    ('thumb_ip', 'thumb_tip'),
                    ('wrist', 'index_finger_mcp'),
                    ('index_finger_mcp', 'index_finger_pip'),
                    ('index_finger_pip', 'index_finger_dip'),
                    ('index_finger_dip', 'index_finger_tip'),
                    ('wrist', 'middle_finger_mcp'),
                    ('middle_finger_mcp', 'middle_finger_pip'),
                    ('middle_finger_pip', 'middle_finger_dip'),
                    ('middle_finger_dip', 'middle_finger_tip'),
                    ('wrist', 'ring_finger_mcp'),
                    ('ring_finger_mcp', 'ring_finger_pip'),
                    ('ring_finger_pip', 'ring_finger_dip'),
                    ('ring_finger_dip', 'ring_finger_tip'),
                    ('wrist', 'pinky_mcp'),
                    ('pinky_mcp', 'pinky_pip'),
                    ('pinky_pip', 'pinky_dip'),
                    ('pinky_dip', 'pinky_tip')]


hand_part_color = [[179, 0, 36],
                   [227, 25, 28],
                   [252, 78, 41],
                   [253, 141, 60],
                   [135, 221, 63],
                   [188, 223, 63],
                   [219, 219, 0],
                   [255, 255, 0],
                   [100, 221, 23],
                   [108, 223, 35],
                   [123, 226, 58],
                   [154, 233, 104],
                   [4, 68, 252],
                   [17, 103, 177],
                   [24, 123, 205],
                   [42, 157, 244],
                   [143, 0, 255],
                   [160, 38, 255],
                   [177, 77, 255],
                   [193, 115, 255]]


hand_joint_color = [[140, 26, 255],
                    [179, 0, 36],
                    [227, 25, 28],
                    [252, 78, 41],
                    [253, 141, 60],
                    [135, 221, 63],
                    [188, 223, 63],
                    [219, 219, 0],
                    [255, 255, 0],
                    [100, 221, 23],
                    [108, 223, 35],
                    [123, 226, 58],
                    [154, 233, 104],
                    [4, 68, 252],
                    [17, 103, 177],
                    [24, 123, 205],
                    [42, 157, 244],
                    [143, 0, 255],
                    [160, 38, 255],
                    [177, 77, 255],
                    [193, 115, 255]]


MINIMAL_HAND_CONFIG = {'part_labels': hand_part_labels,
                       'part_arg': hand_part_arg,
                       'part_orders': hand_part_orders,
                       'part_color': hand_part_color,
                       'joint_color': hand_joint_color}


IK_UNIT_LENGTH = 0.09473151311686484


class MANOHandJoints:
    num_joints = 21

    links_origin = np.array(
        [[-0.09566993092407175, 0.006383428857461439, 0.006186305280135194],
         [-0.007572684283876889, 0.0011830717890578813, 0.026872294317232474],
         [0.025106219230007748, 0.005192427198442781, 0.029089362428270107],
         [0.04726213151699109, 0.00389400462527089, 0.028975245669040688],
         [-0.001009489532234269, 0.004904465506518265, 0.0028287644658181762],
         [0.03017318285240305, 0.006765794024899131, -0.0027657440521595294],
         [0.053077823086293004, 0.005513689792181309, -0.006710258054895484],
         [-0.026882958864187647, -0.003556899962987172, -0.03702303672314978],
         [-0.009868550726482567, -0.0034950752461879167, -0.0495218116903115],
         [0.0059983504802553515, -0.004186231140635538, -0.05985371909262174],
         [-0.013934376495261512, 0.002426007704596194, -0.020486887752953],
         [0.014379898506751226, 0.004493014962915457, -0.02558542625500547],
         [0.03790041138358198, 0.0028049031381001317, -0.03321924042737473],
         [-0.07158022412142973, -0.009138905684414268, 0.031999152568217934],
         [-0.0519469835801523, -0.008247619132871264, 0.05569870581415224],
         [-0.029729244228165815, -0.01368059029432867, 0.07022282411348789],
         [0.07238572379473107, 0.002952405275404611, 0.027662233800221883],
         [0.0789928213101902, 0.006146648960141516, -0.012040861038314803],
         [0.023687395956832776, -0.005529320599435923, -0.0697884145827113],
         [0.062491898017990564, 0.002426856258013015, -0.04066927095293306],
         [-0.003715698261416634, -0.01635903331447523, 0.09410496964595245]])

    labels = ['W',
              'I0', 'I1', 'I2',
              'M0', 'M1', 'M2',
              'L0', 'L1', 'L2',
              'R0', 'R1', 'R2',
              'T0', 'T1', 'T2',
              'I3', 'M3', 'L3', 'R3', 'T3']

    parents = [None,
               0, 1, 2,
               0, 4, 5,
               0, 7, 8,
               0, 10, 11,
               0, 13, 14,
               3, 6, 9, 12, 15]

    children = [[1, 4, 7, 10, 13],  # root_joint has multiple children
                2, 3, 16, 5, 6, 17, 8, 9, 18, 11, 12, 19, 14, 15, 20]


class MPIIHandJoints:
    num_joints = 21

    links_origin = np.array([[-0.99924976, 0.01561216, 0.0354427],
                             [-0.74495521, -0.14824392, 0.30792697],
                             [-0.53770379, -0.13883537, 0.558103],
                             [-0.30317002, -0.19618662, 0.71142176],
                             [-0.02856714, -0.22446067, 0.96352525],
                             [-0.06928206, -0.03928359, 0.25380709],
                             [0.27568132, 0.00303977, 0.27721079],
                             [0.50956244, -0.01066658, 0.27600616],
                             [0.77477083, -0.02060624, 0.26214581],
                             [0., 0., 0.],
                             [0.32916895, 0.01964846, -0.05905647],
                             [0.57095375, 0.00643106, -0.10069535],
                             [0.84451634, 0.01311267, -0.15696599],
                             [-0.13643704, -0.02616297, -0.24612351],
                             [0.16245268, -0.00434333, -0.29994444],
                             [0.41073873, -0.02216329, -0.38052812],
                             [0.67033013, -0.02615401, -0.45917176],
                             [-0.27312421, -0.08931944, -0.42068156],
                             [-0.09351757, -0.08866681, -0.5526205],
                             [0.07397581, -0.09596275, -0.66168566],
                             [0.26070401, -0.1101406, -0.76655779]])

    labels = ['W',
              'T0', 'T1', 'T2', 'T3',
              'I0', 'I1', 'I2', 'I3',
              'M0', 'M1', 'M2', 'M3',
              'R0', 'R1', 'R2', 'R3',
              'L0', 'L1', 'L2', 'L3']

    parents = [None,
               0, 1, 2, 3,
               0, 5, 6, 7,
               0, 9, 10, 11,
               0, 13, 14, 15,
               0, 17, 18, 19]

    children = [[1, 5, 9, 13, 17],  # root_joint has multiple children
                2, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 20]


>>> END FILE CONTENTS

## emili-main/paz/datasets/__init__.py

>>> BEGIN FILE CONTENTS

from .utils import get_class_names
from .voc import VOC
from .fat import FAT
from .open_images import OpenImages
from .ferplus import FERPlus
from .fer import FER
from .cityscapes import CityScapes
from .coco import JOINT_CONFIG
from .coco import FLIP_CONFIG
from .coco import HUMAN_JOINT_CONFIG
from .CMU_poanoptic import MINIMAL_HAND_CONFIG
from .CMU_poanoptic import IK_UNIT_LENGTH
from .CMU_poanoptic import MANOHandJoints
from .CMU_poanoptic import MPIIHandJoints
from .shapes import Shapes
from .omniglot import Omniglot


>>> END FILE CONTENTS

## emili-main/paz/datasets/cityscapes.py

>>> BEGIN FILE CONTENTS

import os
import glob

from paz.abstract import Loader

from .utils import get_class_names


class CityScapes(Loader):
    """CityScapes data manager for loading the paths of the RGB and
        segmentation masks.

    # Arguments
        image_path: String. Path to RGB images e.g. '/home/user/leftImg8bit/'
        label_path: String. Path to label masks e.g. '/home/user/gtFine/'
        split: String. Valid option contain 'train', 'val' or 'test'.
        class_names: String or list: If 'all' then it loads all default
            class names.

    # References
        -[The Cityscapes Dataset for Semantic Urban Scene Understanding](
        https://www.cityscapes-dataset.com/citation/)
    """
    def __init__(self, image_path, label_path, split, class_names='all'):
        if split not in ['train', 'val', 'test']:
            raise ValueError('Invalid split name:', split)
        self.image_path = os.path.join(image_path, split)
        self.label_path = os.path.join(label_path, split)
        if class_names == 'all':
            class_names = get_class_names('CityScapes')
        super(CityScapes, self).__init__(
            None, split, class_names, 'CityScapes')

    def load_data(self):
        image_path = os.path.join(self.image_path, '*/*.png')
        label_path = os.path.join(self.label_path, '*/*labelIds.png')
        image_paths = glob.glob(image_path)
        label_paths = glob.glob(label_path)
        image_paths = sorted(image_paths)
        label_paths = sorted(label_paths)
        assert len(image_paths) == len(label_paths)
        dataset = []
        for image_path, label_path in zip(image_paths, label_paths):
            sample = {'image_path': image_path, 'label_path': label_path}
            dataset.append(sample)
        return dataset


>>> END FILE CONTENTS

## emili-main/paz/datasets/coco.py

>>> BEGIN FILE CONTENTS

JOINT_CONFIG = {
    'COCO': [
        0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16],
    'COCO_WITH_CENTER': [
        0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 8, 9, 10, 11, 14, 15, 16, 17]}


FLIP_CONFIG = {
    'COCO': [
        0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15],
    'COCO_WITH_CENTER': [
        0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 17]}


coco_part_labels = ['nose', 'eye_l', 'eye_r', 'ear_l', 'ear_r', 'sholder_l',
                    'sholder_r', 'elbow_l', 'elbow_r', 'wrist_l', 'wrist_r',
                    'hip_l', 'hip_r', 'knee_l', 'knee_r', 'ankle_l', 'ankle_r']

coco_part_arg = {b: a for a, b in enumerate(coco_part_labels)}

coco_part_orders = [('nose', 'eye_l'),
                    ('eye_l', 'eye_r'),
                    ('eye_r', 'nose'),
                    ('eye_l', 'ear_l'),
                    ('eye_r', 'ear_r'),
                    ('ear_l', 'sholder_l'),
                    ('ear_r', 'sholder_r'),
                    ('sholder_l', 'sholder_r'),
                    ('sholder_l', 'hip_l'),
                    ('sholder_r', 'hip_r'),
                    ('hip_l', 'hip_r'),
                    ('sholder_l', 'elbow_l'),
                    ('elbow_l', 'wrist_l'),
                    ('sholder_r', 'elbow_r'),
                    ('elbow_r', 'wrist_r'),
                    ('hip_l', 'knee_l'),
                    ('knee_l', 'ankle_l'),
                    ('hip_r', 'knee_r'),
                    ('knee_r', 'ankle_r')]

coco_part_color = [[198, 26, 255],
                   [255, 26, 255],
                   [255, 26, 198],
                   [140, 26, 255],
                   [255, 26, 140],
                   [83, 26, 255],
                   [255, 83, 26],
                   [255, 255, 26],
                   [77, 77, 255],
                   [26, 255, 140],
                   [26, 198, 255],
                   [198, 255, 26],
                   [140, 255, 26],
                   [255, 198, 26],
                   [255, 140, 26],
                   [26, 140, 255],
                   [26, 83, 255],
                   [26, 255, 198],
                   [26, 255, 255]]


coco_joint_color = [[198, 26, 255],
                    [255, 26, 255],
                    [255, 26, 198],
                    [140, 26, 255],
                    [255, 26, 140],
                    [83, 26, 255],
                    [255, 83, 26],
                    [198, 255, 26],
                    [255, 198, 26],
                    [140, 255, 26],
                    [255, 140, 26],
                    [77, 77, 255],
                    [26, 255, 140],
                    [26, 140, 255],
                    [26, 255, 198],
                    [26, 83, 255],
                    [26, 255, 255]]


HUMAN_JOINT_CONFIG = {
    'COCO': {'part_labels': coco_part_labels,
             'part_arg': coco_part_arg,
             'part_orders': coco_part_orders,
             'part_color': coco_part_color,
             'joint_color': coco_joint_color}}


>>> END FILE CONTENTS

## emili-main/paz/datasets/fat.py

>>> BEGIN FILE CONTENTS

import os
from glob import glob
import json

import numpy as np
from tensorflow.keras.utils import Progbar

from ..abstract import Loader
from .utils import get_class_names


class FAT(Loader):
    """ Dataset loader for the falling things dataset (FAT).

    # Arguments
        path: String indicating full path to dataset
            e.g. /home/user/fat/
        split: String determining the data split to load.
            e.g. `train`, `val` or `test`
        class_names: `all` or list. If list it should contain as elements
            strings indicating each class name.

    # References
        - [Deep Object Pose
            Estimation (DOPE)](https://github.com/NVlabs/Deep_Object_Pose)
    """
    # TODO: Allow selection of class_names.
    def __init__(self, path, split='train', class_names='all'):
        if class_names == 'all':
            class_names = get_class_names('FAT')
        self.class_to_arg = dict(
            zip(class_names, list(range(len(class_names)))))

        super(FAT, self).__init__(path, split, class_names, 'FAT')

    def load_data(self):
        scene_names = glob(self.path + 'mixed/*')
        image_paths, label_paths = [], []
        for scene_name in scene_names:
            scene_image_paths, scene_label_paths = [], []
            for image_side in ['left', 'right']:
                image_names = glob(scene_name + '/*%s.jpg' % image_side)
                side_image_paths = sorted(image_names, key=self._base_number)
                label_names = glob(scene_name + '/0*%s.json' % image_side)
                side_label_paths = sorted(label_names, key=self._base_number)
                scene_image_paths = scene_image_paths + side_image_paths
                scene_label_paths = scene_label_paths + side_label_paths
            image_paths = image_paths + scene_image_paths
            label_paths = label_paths + scene_label_paths

        self.data = []
        progress_bar = Progbar(len(image_paths))
        for sample_arg, sample in enumerate(zip(image_paths, label_paths)):
            image_path, label_path = sample
            if not self._valid_name_match(image_path, label_path):
                raise ValueError('Invalid name match:', image_path, label_path)
            boxes = self._extract_boxes(label_path)
            if boxes is None:
                continue
            self.data.append({'image': image_path, 'boxes': boxes})
            progress_bar.update(sample_arg + 1)
        return self.data

    def _extract_boxes(self, json_filename):
        json_data = json.load(open(json_filename, 'r'))
        num_objects = len(json_data['objects'])
        if num_objects == 0:
            return None
        box_data = np.zeros((num_objects, 5))
        for object_arg, object_data in enumerate(json_data['objects']):
            bounding_box = object_data['bounding_box']
            y_min, x_min = bounding_box['top_left']
            y_max, x_max = bounding_box['bottom_right']
            x_min, y_min = x_min / 960., y_min / 540.
            x_max, y_max = x_max / 960., y_max / 540.
            box_data[object_arg, :4] = x_min, y_min, x_max, y_max
            class_name = object_data['class'][:-4]
            box_data[object_arg, -1] = self.class_to_arg[class_name]
        return box_data

    def _base_number(self, filename):
        order = os.path.basename(filename)
        order = order.split('.')[0]
        order = float(order)
        return order

    def _valid_name_match(self, image_path, label_path):
        image_name = os.path.basename(image_path)
        label_name = os.path.basename(label_path)
        return image_name[:-3] == label_name[:-4]


>>> END FILE CONTENTS

## emili-main/paz/datasets/fer.py

>>> BEGIN FILE CONTENTS

import os
from tensorflow.keras.utils import to_categorical
import numpy as np

from .utils import get_class_names
from ..abstract import Loader
from ..backend.image import resize_image


class FER(Loader):
    """Class for loading FER2013 emotion classification dataset.
    # Arguments
        path: String. Full path to fer2013.csv file.
        split: String. Valid option contain 'train', 'val' or 'test'.
        class_names: String or list: If 'all' then it loads all default
            class names.
        image_size: List of length two. Indicates the shape in which
            the image will be resized.

    # References
        -[FER2013 Dataset and Challenge](kaggle.com/c/challenges-in-\
            representation-learning-facial-expression-recognition-challenge)
    """

    def __init__(
            self, path, split='train', class_names='all', image_size=(48, 48)):

        if class_names == 'all':
            class_names = get_class_names('FER')

        path = os.path.join(path, 'fer2013.csv')
        super(FER, self).__init__(path, split, class_names, 'FER')
        self.image_size = image_size
        self._split_to_filter = {'train': 'Training', 'val': 'PublicTest',
                                 'test': 'PrivateTest'}

    def load_data(self):
        data = np.genfromtxt(self.path, str, delimiter=',', skip_header=1)
        data = data[data[:, -1] == self._split_to_filter[self.split]]
        faces = np.zeros((len(data), *self.image_size))
        for sample_arg, sample in enumerate(data):
            face = np.array(sample[1].split(' '), dtype=int).reshape(48, 48)
            face = resize_image(face, self.image_size)
            faces[sample_arg, :, :] = face
        emotions = to_categorical(data[:, 0].astype(int), self.num_classes)

        data = []
        for face, emotion in zip(faces, emotions):
            sample = {'image': face, 'label': emotion}
            data.append(sample)
        return data


>>> END FILE CONTENTS

## emili-main/paz/datasets/ferplus.py

>>> BEGIN FILE CONTENTS

import os
import numpy as np

from .utils import get_class_names
from ..abstract import Loader
from ..backend.image import resize_image

# IMAGES_PATH = '../datasets/fer2013/fer2013.csv'
# LABELS_PATH = '../datasets/fer2013/fer2013new.csv'


class FERPlus(Loader):
    """Class for loading FER2013 emotion classification dataset.
        with FERPlus labels.
    # Arguments
        path: String. Path to directory that has inside the files:
            `fer2013.csv` and  `fer2013new.csv`
        split: String. Valid option contain 'train', 'val' or 'test'.
        class_names: String or list: If 'all' then it loads all default
            class names.
        image_size: List of length two. Indicates the shape in which
            the image will be resized.

    # References
        - [FerPlus](https://www.kaggle.com/c/challenges-in-representation-\
                learning-facial-expression-recognition-challenge/data)
        - [FER2013](https://arxiv.org/abs/1608.01041)
    """
    def __init__(self, path, split='train', class_names='all',
                 image_size=(48, 48)):

        if class_names == 'all':
            class_names = get_class_names('FERPlus')

        super(FERPlus, self).__init__(path, split, class_names, 'FERPlus')

        self.image_size = image_size
        self.images_path = os.path.join(self.path, 'fer2013.csv')
        self.labels_path = os.path.join(self.path, 'fer2013new.csv')
        self.split_to_filter = {
            'train': 'Training', 'val': 'PublicTest', 'test': 'PrivateTest'}

    def load_data(self):
        data = np.genfromtxt(self.images_path, str, '#', ',', 1)
        data = data[data[:, -1] == self.split_to_filter[self.split]]
        faces = np.zeros((len(data), *self.image_size))
        for sample_arg, sample in enumerate(data):
            face = np.array(sample[1].split(' '), dtype=int).reshape(48, 48)
            face = resize_image(face, self.image_size)
            faces[sample_arg, :, :] = face

        emotions = np.genfromtxt(self.labels_path, str, '#', ',', 1)
        emotions = emotions[emotions[:, 0] == self.split_to_filter[self.split]]
        emotions = emotions[:, 2:10].astype(float)
        N = np.sum(emotions, axis=1)
        mask = N != 0
        N, faces, emotions = N[mask], faces[mask], emotions[mask]
        emotions = emotions / np.expand_dims(N, 1)

        data = []
        for face, emotion in zip(faces, emotions):
            sample = {'image': face, 'label': emotion}
            data.append(sample)
        return data


>>> END FILE CONTENTS

## emili-main/paz/datasets/human36m.py

>>> BEGIN FILE CONTENTS

import numpy as np


data_mean3D = np.array(
    [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, -2.55652587e-01,
     -7.11960574e+00, -9.81433061e-01, -5.65463051e+00, 3.19636009e+02,
     7.19329269e+01, -1.01705840e+01, 6.91147892e+02, 1.55352986e+02,
     -1.15560633e+01, 7.42149725e+02, 1.66477287e+02, -1.18447102e+01,
     7.36763064e+02, 1.65182437e+02, 2.55651314e-01, 7.11954604e+00,
     9.81423862e-01, -5.09729780e+00, 3.27040413e+02, 7.22258095e+01,
     -9.99656606e+00, 7.08277383e+02, 1.58016408e+02, -1.12642400e+01,
     7.48636864e+02, 1.66665977e+02, -1.14090840e+01, 7.36435064e+02,
     1.63713810e+02, 1.21660909e-03, -8.60110488e-02, -1.93000547e-02,
     2.90583675e+00, -2.11363307e+02, -4.74210915e+01, 5.67537804e+00,
     -4.35088906e+02, -9.76974016e+01, 5.93884964e+00, -4.91891970e+02,
     -1.10666618e+02, 7.37352083e+00, -5.83948619e+02, -1.31171400e+02,
     5.67537804e+00, -4.35088906e+02, -9.76974016e+01, 5.41920653e+00,
     -3.83931702e+02, -8.68145417e+01, 2.95964662e+00, -1.87567488e+02,
     -4.34536934e+01, 1.26585821e+00, -1.20170579e+02, -2.82526049e+01,
     1.26585821e+00, -1.20170579e+02, -2.82526049e+01, 1.57900698e+00,
     -1.51780249e+02, -3.52080548e+01, 8.84543990e-01, -1.07795356e+02,
     -2.56307189e+01, 8.84543990e-01, -1.07795356e+02, -2.56307189e+01,
     5.67537804e+00, -4.35088906e+02, -9.76974016e+01, 4.67186639e+00,
     -3.83644089e+02, -8.55125784e+01, 1.67648571e+00, -1.97007177e+02,
     -4.31368364e+01, 8.70569018e-01, -1.68664569e+02, -3.73902498e+01,
     8.70569018e-01, -1.68664569e+02, -3.73902498e+01, 1.39982513e+00,
     -2.00884252e+02, -4.47207875e+01, 5.24591118e-01, -1.65867774e+02,
     -3.68342864e+01, 5.24591118e-01, -1.65867774e+02, -3.68342864e+01])

data_stdev3D = np.array(
    [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.10722440e+02,
     2.23881762e+01, 7.24629442e+01, 1.58563111e+02, 1.89338322e+02,
     2.08804791e+02, 1.91799352e+02, 2.43200617e+02, 2.47561933e+02,
     2.17938049e+02, 2.63285217e+02, 2.96834071e+02, 2.38588944e+02,
     2.73101842e+02, 3.28803702e+02, 1.10721807e+02, 2.23880543e+01,
     7.24625257e+01, 1.58804541e+02, 1.99771878e+02, 2.14706298e+02,
     1.80019441e+02, 2.50527393e+02, 2.48532471e+02, 2.13542308e+02,
     2.68186067e+02, 3.01932987e+02, 2.36099536e+02, 2.76896854e+02,
     3.35285650e+02, 1.99331867e-02, 3.28409350e-02, 2.74274580e-02,
     5.21069402e+01, 5.21140553e+01, 6.90824149e+01, 9.51536655e+01,
     1.01330318e+02, 1.28997325e+02, 1.17424577e+02, 1.26484690e+02,
     1.64650907e+02, 1.23602966e+02, 1.30855389e+02, 1.64333365e+02,
     9.51536655e+01, 1.01330318e+02, 1.28997325e+02, 1.46022319e+02,
     9.70795598e+01, 1.39527313e+02, 2.43475318e+02, 1.29822486e+02,
     2.02301812e+02, 2.44687700e+02, 2.15018164e+02, 2.39382347e+02,
     2.44687700e+02, 2.15018164e+02, 2.39382347e+02, 2.29596780e+02,
     2.22589304e+02, 2.34161811e+02, 2.79422487e+02, 2.57310206e+02,
     2.80385546e+02, 2.79422487e+02, 2.57310206e+02, 2.80385546e+02,
     9.51536655e+01, 1.01330318e+02, 1.28997325e+02, 1.38760844e+02,
     1.00892600e+02, 1.42441097e+02, 2.36875290e+02, 1.44912190e+02,
     2.09808291e+02, 2.44006949e+02, 2.39750283e+02, 2.55205840e+02,
     2.44006949e+02, 2.39750283e+02, 2.55205840e+02, 2.32859559e+02,
     2.36910758e+02, 2.47343753e+02, 2.87219983e+02, 3.05741249e+02,
     3.08336881e+02, 2.87219983e+02, 3.05741249e+02, 3.08336881e+02])

dim_to_use3D = np.array(
    [3, 4, 5, 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23, 24, 25,
     26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54,
     55, 56, 57, 58, 59, 75, 76, 77, 78, 79, 80, 81, 82, 83])

data_stdev2D = np.array(
    [107.73640057, 63.35908715, 119.00836215, 64.12154429,
     119.12412107, 50.53806214, 120.61688044, 56.3844489,
     101.95735273, 62.89636485, 106.24832898, 48.4117812,
     108.46734967, 54.58177069, 109.07369806, 68.70443671,
     111.20130351, 74.87287863, 113.22330789, 79.90670557,
     105.71458329, 73.27049435, 107.05804267, 73.93175782,
     107.97449418, 83.30391802, 121.60675105, 74.25691526,
     134.34378972, 77.48125087, 131.79990653, 89.86721123])

data_mean2D = np.array(
    [532.08351636, 419.74137558, 531.80953144, 418.26071409,
     530.68456968, 493.54259286, 529.36968722, 575.96448517,
     532.29767645, 421.28483335, 531.93946631, 494.72186794,
     529.71984447, 578.96110365, 532.93699382, 370.65225056,
     534.11018559, 317.90342311, 534.86955004, 282.31030884,
     534.11308568, 330.11296794, 533.53637526, 376.2742511,
     533.49380106, 391.72324565, 533.52579143, 330.09494671,
     532.50804963, 374.19047901, 532.72786933, 380.61615716])


args_to_joints3D = [0, 1, 2, 3, 6, 7, 8, 12, 13, 15, 17, 18, 19, 25, 26, 27]

h36m_to_coco_joints2D = [4, 12, 14, 16, 11, 13, 15, 2, 1, 0, 5, 7, 9, 6, 8, 10]
args_to_mean = {1: [5, 6], 4: [11, 12], 2: [1, 4]}

human_start_joints = np.array([0, 1, 2, 0, 4, 5, 0, 7,
                               8, 8, 10, 11, 8, 13, 14])


>>> END FILE CONTENTS

## emili-main/paz/datasets/omniglot.py

>>> BEGIN FILE CONTENTS

import os
import glob
import numpy as np

from tensorflow.keras.utils import Sequence, get_file

from ..backend.image import load_image, resize_image, make_mosaic
from ..abstract import Loader
from ..utils.documentation import docstring


def download(split):
    """Downloads omniglot dataset from original repository source.

    # Arguments:
        split: String indicating dataset split i.e. `train` or `test`.

    # Returns:
        filepath string to data split directory.
    """
    ROOT_URL = 'https://github.com/brendenlake/omniglot/blob/master/python/'
    split_to_name = {'train': 'images_background', 'test': 'images_evaluation'}
    filename = split_to_name[split]
    URL = ROOT_URL + filename + '.zip?raw=true'
    directory = 'paz/datasets/omniglot'
    filepath = get_file(None, URL, cache_subdir=directory, extract=True)
    filepath = os.path.join(os.path.dirname(filepath), filename)
    return filepath


def build_keyname(string):
    """Builds keynames in lower case and without parenthesis.

    # Arguments:
        string: Keyname string.

    # Returns
        String name for easy dictionary access.
    """
    string = os.path.basename(string)
    translations = {ord('('): None, ord(')'): None}
    return string.translate(translations).lower()


def enumerate_filenames(root_path):
    """Enumerates all file names inside given path.

    # Arguments
        root_path: String, path in which to search.

    # Returns
        list of sorted file names inside root path.
    """
    wildcard = os.path.join(root_path, '*')
    directories = glob.glob(wildcard)
    directories = sorted(directories)
    return directories


def load_shot(filepath, shape):
    """Loads images and preprocess it by resizing and normalizing it.

    # Arguments
        filepath: String indicating path to image.
        shape: List of integers indicating new shape (height, width).

    # Returns
        image as numpy array.
    """
    image = load_image(filepath, num_channels=1)
    image = resize_image(image, (shape))
    image = image / 255.0
    return image


def load_shots(shot_filepaths, shape):
    """Loads all images in character directory

    # Arguments:
        shot_filepaths: String. Filepath to character images.
        shape: List of integers indicating new shape (height, width).

    # Returns:
        Image array with all shots
    """
    shots = []
    for shot_filepath in shot_filepaths:
        shots.append(load_shot(shot_filepath, shape))
    return np.array(shots)


def load_characters(character_filepaths, shape):
    """Loads all characters in data directory.

    # Arguments
        character_filepaths: String indicating path to images.
        shape: List of integers indicating new shape (height, width).

    # Returns
        Dictionary with key name character name and value image array.
    """
    characters = {}
    for character_filepath in character_filepaths:
        character_name = build_keyname(character_filepath)
        shot_filepaths = enumerate_filenames(character_filepath)
        shots = load_shots(shot_filepaths, shape)
        characters[character_name] = shots
    return characters


def load(split='train', shape=(28, 28), flat=True):
    """Loads omniglot dataset for in between and within alphabet sampling.

    # Arguments
        split: String. Either `train` or `test`. Indicates which split to load.
        shape: List of two integers indicating resize shape `(H, W)`.
        flat: Boolean. If `True` the returned data dictionary is organized
            using each possible character as a class, with each key being a
            number having as value an image array.
            If `False` the returned data dictionary is organized using as keys
            the language names and as value another dictionary with keys being
            the character number, and as value the image array.
            This is to perform either sampling between alpahabet (`flat=True`)
            or to perform sampling within alphabet (`flat=False`).
            Usually, neural few-shot learning algorithms have been tested using
            in between alphabet sampling, but the original authors tested using
            the more challenging within alphabet sampling.

    # Returns
        dictionary with class names as keys and image numpy arrays as values.
    """
    filepath = download(split)
    language_filepaths = enumerate_filenames(filepath)
    languages = {}
    for language_filepath in language_filepaths:
        language_name = build_keyname(language_filepath)
        character_directories = enumerate_filenames(language_filepath)
        characters = load_characters(character_directories, shape)
        languages[language_name] = characters
    return flatten(languages) if flat else languages


def flatten(dataset):
    """Removes language hierarchy by having classes as each possible character.

    # Arguments:
        dataset: Dictionary with key names language name, and as value a
            dictionary with key names character names, and value an image array

    # Returns:
        Dictionary with key names as numbers and as values image arrays.
    """
    flat_dataset = {}
    flat_key = 0
    for language_name, language in dataset.items():
        for character_name, characters in language.items():
            flat_dataset[flat_key] = characters
            flat_key = flat_key + 1
    return flat_dataset


def sample_between_alphabet(RNG, dataset, num_ways, num_shots, num_tests=1):
    """Samples classification problems with flat dataset.
    Each sample is a meta learning problem with classes from all languages.

    # Arguments:
        RNG: Numpy random number generator.
        dataset: Dictionary.
        num_ways: Int. Number of classes for each meta learning episode.
        num_shots: Int. Number of train images used at each episode.
        num_tests: In. Number of test images at each episode.

    # Returns:
        Two lists. First list has `(train_images, train_labels)` and
        Second list has `(test_images, test_labels)`.
    """
    # dataset is flat, easier for sampling without replacement
    random_classes = RNG.choice(list(dataset.keys()), num_ways, replace=False)
    test_images, test_labels = [], []
    shot_images, shot_labels = [], []
    num_samples = num_shots + num_tests  # num_shots + 1
    for label, class_name in enumerate(random_classes):
        images = RNG.choice(dataset[class_name], num_samples, replace=False)
        labels = np.full(num_samples, label)
        shot_images.append(images[:num_shots])
        shot_labels.append(labels[:num_shots])
        test_images.append(images[num_shots:])
        test_labels.append(labels[num_shots:])
    shot_images = np.concatenate(shot_images, axis=0)
    shot_labels = np.concatenate(shot_labels, axis=0)
    test_images = np.concatenate(test_images, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)
    return (shot_images, shot_labels), (test_images, test_labels)


def sample_within_alphabet(RNG, dataset, num_ways, num_shots, num_tests=1):
    """Samples classification problems with class hierarchical dataset.
    Each sample is a meta learning problem with classes from the same language.

    # Arguments:
        RNG: Numpy random number generator.
        dataset: Dictionary.
        num_ways: Int. Number of classes for each meta learning episode.
        num_shots: Int. Number of train images used at each episode.
        num_tests: In. Number of test images at each episode.

    # Returns:
        Two lists. First list has `(train_images, train_labels)` and
        Second list has `(test_images, test_labels)`.
    """
    alphabet_name = RNG.choice(list(dataset.keys()))
    alphabet = dataset[alphabet_name]
    reuse = True if num_ways > len(alphabet) else False  # FIX as 2019 Lake
    class_names = RNG.choice(list(alphabet.keys()), num_ways, reuse).tolist()
    test_images, test_labels = [], []
    shot_images, shot_labels = [], []
    num_samples = num_shots + num_tests  # num_shots + 1
    for label, class_name in enumerate(class_names):
        images = RNG.choice(alphabet[class_name], num_samples, replace=False)
        labels = np.full(num_samples, label)
        shot_images.append(images[:num_shots])
        shot_labels.append(labels[:num_shots])
        test_images.append(images[num_shots:])
        test_labels.append(labels[num_shots:])
    shot_images = np.concatenate(shot_images, axis=0)
    shot_labels = np.concatenate(shot_labels, axis=0)
    test_images = np.concatenate(test_images, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)
    return (shot_images, shot_labels), (test_images, test_labels)


class Generator(Sequence):
    """Data generator for omniglot dataset with meta-learning episodes
    # Arguments
        sampler:
        num_classes: Int. Number of classes for each meta learning episode.
        num_support: Int. Number of train images used at each episode.
        num_queries: In. Number of test images at each episode.
        image_shape: List of integers indicating new shape (height, width).
        num_steps: Int. Number of samples per epoch.
    """
    def __init__(self, sampler, num_classes, num_support, num_queries,
                 image_shape, num_steps=2000):
        self.sampler = sampler
        self.support_shape = (num_classes, num_support, *image_shape)
        self.queries_shape = (num_classes, num_queries, *image_shape)
        self.num_steps = num_steps

    def __len__(self):
        return self.num_steps

    def __getitem__(self, idx):
        (support, support_labels), (queries, queries_labels) = self.sampler()
        support = np.reshape(support, self.support_shape)
        queries = np.reshape(queries, self.queries_shape)
        return {'support': support, 'queries': queries}, queries_labels


def remove_classes(RNG, data, num_classes):
    """Removes classes by randomly taking out keys from data dictionary.

    # Arguments:
        RNG: Numpy random number generator.
        data: Dictionary with keys as class names and values image arrays.

    # Returns:
        Dictionary with number of classes euqal to `num_classes`.
    """
    keys = RNG.choice(len(data.keys()), num_classes, replace=False)
    data = {key: data[key] for key in keys}
    return data


def split_data(data, validation_split):
    """Splits data keys into training and validation.

    # Arguments:
        data: Dictionary with keys as class names and values image arrays.
        validation_split: Float between `[0, 1]`. Porcentange of training
            data to be used for validation.

    # Returns:
        Two dictionaries with train and vlaidation data dictionaries.
    """
    keys = list(data.keys())
    num_train_keys = int(len(keys) * (1 - validation_split))
    train_keys = keys[:num_train_keys]
    valid_keys = keys[num_train_keys:]
    train_data = {key: data[key] for key in train_keys}
    valid_data = {key: data[key] for key in valid_keys}
    return train_data, valid_data


def plot_language(language):
    """Plots all characters in a language

    # Arguments:
        language: Dict with characters names as keys and image arrays as values

    # Returns:
        Image array with all characters.
    """
    characters = []
    for characters_name, images in language.items():
        images = np.expand_dims(images, axis=-1)
        characters.append(make_mosaic(images, (5, 4), 10))
    characters = np.array(characters)
    characters = make_mosaic(characters, (8, 7), 20)
    return characters


@docstring(load)
class Omniglot(Loader):
    def __init__(self, split, shape, flat=True):
        self.shape = shape
        self.flat = flat
        super(Omniglot, self).__init__(None, split, None, 'Omniglot')

    def load_data(self):
        return load(self.split, self.shape, self.flat)


>>> END FILE CONTENTS

## emili-main/paz/datasets/open_images.py

>>> BEGIN FILE CONTENTS

import os
import mmap

import numpy as np

from ..abstract import Loader


CLASS_DESCRIPTIONS_FILE = 'class-descriptions-boxable.csv'
BBOX_ANNOTATIONS_FILE = '{}-annotations-bbox.csv'


class OpenImages(Loader):
    """ Dataset loader for the OpenImagesV4 dataset.

    # Arguments
        path: String indicating full path to dataset
            e.g. /home/user/open_images/
        split: String determining the data split to load.
            e.g. `train`, `val` or `test`
        class_names: `all` or list. If list it should contain as elements
            the strings of the class names.

    """
    # TODO Allow selection of subset of class names.
    def __init__(self, path, split='train', class_names='all'):

        if split == 'val':
            split = 'validation'

        if split not in ['train', 'validation', 'test']:
            raise NameError('Invalid split name.')

        super(OpenImages, self).__init__(
            path, split, class_names, 'OpenImages')

        self.machine_to_human_name = dict()
        self.machine_to_arg = dict()
        self.load_class_names()
        self.class_distribution = dict()
        for class_name in self.class_names:
            self.class_distribution[class_name] = 0

    def load_class_names(self):
        classes_file = os.path.join(self.path, CLASS_DESCRIPTIONS_FILE)
        class_data = np.loadtxt(classes_file, delimiter=",", dtype=str)

        # class ID zero is background
        self.machine_to_arg['background'] = 0
        self.machine_to_human_name['background'] = 'background'
        class_names, class_arg = [], 1
        class_names.append('background')
        for machine_name, human_name in class_data:

            if self.class_names == 'all':
                self.machine_to_human_name[machine_name] = human_name
                self.machine_to_arg[machine_name] = class_arg
                class_names.append(human_name)
                class_arg = class_arg + 1

            elif human_name in self.class_names:
                self.machine_to_human_name[machine_name] = human_name
                self.machine_to_arg[machine_name] = class_arg
                class_names.append(human_name)
                class_arg = class_arg + 1

        self._class_names = class_names
        self._num_classes = len(self.machine_to_arg)
        print("Found {} {} classes".format(self.num_classes, self.split))

    def _get_num_lines(self, file_path):
        file_data = open(file_path, "r+")
        buf = mmap.mmap(file_data.fileno(), 0)
        lines = 0
        while buf.readline():
            lines = lines + 1
        return lines

    def load_data(self):

        data = dict()
        annotations_filepath = os.path.join(
            self.path, BBOX_ANNOTATIONS_FILE.format(self.split))
        # num_lines = self._get_num_lines(annotations_filepath)
        machine_names = self.machine_to_human_name.keys()
        # load file manually, line by line, in order to reduce memory usage
        with open(annotations_filepath, 'r') as annotations_file:
            # skip header
            annotations_file.readline()

            for line in annotations_file:
                row = line.split(",")

                image_filename = row[0] + ".jpg"
                x_min = float(row[4])
                x_max = float(row[5])
                y_min = float(row[6])
                y_max = float(row[7])

                machine_name = row[2]
                if machine_name not in machine_names:
                    continue

                human_name = self.machine_to_human_name[machine_name]

                absolute_image_path = os.path.join(
                    self.path, self.split, image_filename)

                if human_name in self.class_names:
                    class_arg = self.machine_to_arg[machine_name]

                    if absolute_image_path not in data:
                        data[absolute_image_path] = []

                    sample_data = [x_min, y_min, x_max, y_max, class_arg]
                    data[absolute_image_path].append(sample_data)
                    self.class_distribution[human_name] += 1

        formatted_data = []
        for image_path, ground_truth in data.items():
            sample = {'image': image_path, 'boxes': ground_truth}
            formatted_data.append(sample)

        msg = '{} split: loaded {} images with {} bounding box annotations'
        num_of_boxes = sum(self.class_distribution.values())
        print(msg.format(self.split, len(data), num_of_boxes))
        return formatted_data


>>> END FILE CONTENTS

## emili-main/paz/datasets/shapes.py

>>> BEGIN FILE CONTENTS

from ..abstract.loader import Loader
import numpy as np
from ..backend.image.draw import draw_circle, draw_triangle, draw_square
from ..backend.boxes import apply_non_max_suppression


class Shapes(Loader):
    """ Loader for shapes synthetic dataset.

    # Arguments
        num_samples: Int indicating number of samples to load.
        image_size: (height, width) of input image to load.
        split: String determining the data split to load.
            e.g. `train`, `val` or `test`
        class_names: List of strings or `all`.
        iou_thresh: Float intersection over union.
        max_num_shapes: Int. maximum number of shapes in the image.

    # Returns
        List of dictionaries with keys `image`, `mask`, `box_data`
            containing
    """
    def __init__(self, num_samples, image_size, split='train',
                 class_names='all', iou_thresh=0.3, max_num_shapes=3):
        if class_names == 'all':
            class_names = ['background', 'square', 'circle', 'triangle']
        self.name_to_arg = dict(zip(class_names, range(len(class_names))))
        self.arg_to_name = dict(zip(range(len(class_names)), class_names))
        self.num_samples, self.image_size = num_samples, image_size
        self.labels = ['image', 'masks', 'box_data']
        self.iou_thresh = iou_thresh
        self.max_num_shapes = max_num_shapes
        super(Shapes, self).__init__(None, split, class_names, 'Shapes')

    def load_data(self):
        return [self.load_sample() for arg in range(self.num_samples)]

    def load_sample(self):
        shapes = self._sample_shapes(self.max_num_shapes, *self.image_size)
        boxes = self._compute_bounding_boxes(shapes)
        shapes, boxes = self._filter_shapes(boxes, shapes, self.iou_thresh)
        image = self._draw_shapes(shapes)
        masks = self._draw_masks(shapes)
        class_args = [self.name_to_arg[name[0]] for name in shapes]
        class_args = np.asarray(class_args).reshape(-1, 1)
        box_data = np.concatenate([boxes, class_args], axis=1)
        sample = dict(zip(self.labels, [image, masks, box_data]))
        return sample

    def _sample_shape(self, H, W, offset=20):
        shape = np.random.choice(self.class_names[1:])
        color = tuple(np.random.randint(0, 255, size=3).tolist())
        center_x = np.random.randint(offset, W - offset - 1)
        center_y = np.random.randint(offset, H - offset - 1)
        size = np.random.randint(offset, H // 4)
        return shape, color, (center_x, center_y, size)

    def _sample_shapes(self, num_shapes, H, W, offset=20):
        shapes = []
        for shape_arg in range(num_shapes):
            shapes.append(self._sample_shape(H, W, offset=20))
        return shapes

    def _compute_bounding_box(self, center_x, center_y, size):
        x_min, y_min = center_x - size, center_y - size
        x_max, y_max = center_x + size, center_y + size
        box = [x_min, y_min, x_max, y_max]
        return box

    def _compute_bounding_boxes(self, shapes):
        boxes = []
        for shape in shapes:
            center_x, center_y, size = shape[2]
            box = self._compute_bounding_box(center_x, center_y, size)
            boxes.append(box)
        return np.asarray(boxes)

    def _filter_shapes(self, boxes, shapes, iou_thresh):
        scores = np.ones(len(boxes))  # all shapes have the same score
        args, num_boxes = apply_non_max_suppression(boxes, scores, iou_thresh)
        box_args = args[:num_boxes]
        selected_shapes = []
        for box_arg in box_args:
            selected_shapes.append(shapes[box_arg])
        return selected_shapes, boxes[box_args]

    def _draw_shapes(self, shapes):
        H, W = self.image_size
        background_color = np.random.randint(0, 255, size=3)
        image = np.ones([H, W, 3], dtype=np.uint8)
        image = image * background_color.astype(np.uint8)
        for shape, color, dimensions in shapes:
            image = self._draw_shape(image, shape, dimensions, color)
        return image

    def _draw_shape(self, image, shape, dimensions, color):
        center_x, center_y, size = dimensions
        functions = [draw_square, draw_circle, draw_triangle]
        draw = dict(zip(self.class_names[1:], functions))
        image = draw[shape](image, (center_x, center_y), color, size)
        return image

    def _draw_masks(self, shapes):
        H, W = self.image_size
        class_masks = []
        for class_mask in range(self.num_classes):
            class_masks.append(np.zeros([H, W, 1]))
        class_masks[0] = np.logical_not(class_masks[0])
        for shape_arg, (shape, color, dimensions) in enumerate(shapes):
            mask_arg = self.name_to_arg[shape]
            class_mask = class_masks[mask_arg]
            class_mask = self._draw_shape(
                class_mask, shape, dimensions, (1, 1, 1))
            class_masks[mask_arg] = class_mask
            negative_mask = np.logical_not(class_mask)
            background_mask = class_masks[0].copy()
            class_masks[0] = np.logical_and(negative_mask, background_mask)
        masks = np.concatenate(class_masks, axis=-1).astype(np.uint8)
        return masks


>>> END FILE CONTENTS

## emili-main/paz/datasets/utils.py

>>> BEGIN FILE CONTENTS

def get_class_names(dataset_name='VOC2007'):
    """Gets label names for the classes of the supported datasets.

    # Arguments
        dataset_name: String. Dataset name. Valid dataset names are:
            VOC2007, VOC2012, COCO and YCBVideo.

    # Returns
       List of strings containing the class names for the dataset given.

    # Raises
        ValueError: in case of invalid dataset name
    """

    if dataset_name in ['VOC2007', 'VOC2012', 'VOC']:

        class_names = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
                       'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
                       'diningtable', 'dog', 'horse', 'motorbike', 'person',
                       'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']

    elif dataset_name == 'COCO':
        class_names = ['background', 'person', 'bicycle', 'car', 'motorcycle',
                       'airplane', 'bus', 'train', 'truck', 'boat',
                       'traffic light', 'fire hydrant', 'stop sign',
                       'parking meter', 'bench', 'bird', 'cat', 'dog',
                       'horse', 'sheep', 'cow', 'elephant', 'bear',
                       'zebra', 'giraffe', 'backpack', 'umbrella',
                       'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
                       'snowboard', 'sports ball', 'kite', 'baseball bat',
                       'baseball glove', 'skateboard', 'surfboard',
                       'tennis racket', 'bottle', 'wine glass',
                       'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana',
                       'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
                       'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
                       'potted plant', 'bed', 'dining table', 'toilet',
                       'tv', 'laptop', 'mouse', 'remote', 'keyboard',
                       'cell phone', 'microwave', 'oven', 'toaster',
                       'sink', 'refrigerator', 'book', 'clock', 'vase',
                       'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    elif dataset_name == 'COCO_EFFICIENTDET':
        class_names = ['person', 'bicycle', 'car', 'motorcycle',
                       'airplane', 'bus', 'train', 'truck', 'boat',
                       'traffic light', 'fire hydrant', '0', 'stop sign',
                       'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
                       'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
                       '0', 'backpack', 'umbrella', '0', '0', 'handbag', 'tie',
                       'suitcase', 'frisbee', 'skis', 'snowboard',
                       'sports ball', 'kite', 'baseball bat', 'baseball glove',
                       'skateboard', 'surfboard', 'tennis racket', 'bottle',
                       '0', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                       'bowl', 'banana', 'apple', 'sandwich', 'orange',
                       'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                       'cake', 'chair', 'couch', 'potted plant', 'bed', '0',
                       'dining table', '0', '0', 'toilet', '0', 'tv', 'laptop',
                       'mouse', 'remote', 'keyboard', 'cell phone',
                       'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
                       '0', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
                       'hair drier', 'toothbrush']

    elif dataset_name == 'YCBVideo':
        class_names = ['background', '037_scissors', '008_pudding_box',
                       '024_bowl', '005_tomato_soup_can', '007_tuna_fish_can',
                       '010_potted_meat_can', '061_foam_brick', '011_banana',
                       '035_power_drill', '004_sugar_box', '019_pitcher_base',
                       '006_mustard_bottle', '036_wood_block',
                       '009_gelatin_box', '051_large_clamp',
                       '040_large_marker', '003_cracker_box',
                       '025_mug', '052_extra_large_clamp',
                       '021_bleach_cleanser', '002_master_chef_can']

    elif dataset_name == 'FAT':
        class_names = ['background', '037_scissors', '008_pudding_box',
                       '024_bowl', '005_tomato_soup_can', '007_tuna_fish_can',
                       '010_potted_meat_can', '061_foam_brick', '011_banana',
                       '035_power_drill', '004_sugar_box', '019_pitcher_base',
                       '006_mustard_bottle', '036_wood_block',
                       '009_gelatin_box', '051_large_clamp',
                       '040_large_marker', '003_cracker_box',
                       '025_mug', '052_extra_large_clamp',
                       '021_bleach_cleanser', '002_master_chef_can']

    elif dataset_name == 'FERPlus':
        return ['neutral', 'happiness', 'surprise', 'sadness',
                'anger', 'disgust', 'fear', 'contempt']

    elif dataset_name == 'FER':
        return ['angry', 'disgust', 'fear', 'happy',
                'sad', 'surprise', 'neutral']

    elif dataset_name == 'IMDB':
        return ['man', 'woman']

    elif dataset_name == 'CityScapes':
        return ['void', 'flat', 'construction',
                'object', 'nature', 'sky', 'human', 'vehicle']

    else:
        raise ValueError('Invalid dataset', dataset_name)

    return class_names


def get_arg_to_class(class_names):
    """Constructs dictionary from argument to class names.

    # Arguments
        class_names: List of strings containing the class names.

    # Returns
        Dictionary mapping integer to class name.
    """

    return dict(zip(list(range(len(class_names))), class_names))


>>> END FILE CONTENTS

## emili-main/paz/datasets/voc.py

>>> BEGIN FILE CONTENTS

import os
from xml.etree import ElementTree
from .utils import get_class_names

import numpy as np
from ..abstract import Loader


class VOC(Loader):
    """ Dataset loader for the falling things dataset (FAT).

    # Arguments
        data_path: Data path to VOC2007 annotations
        split: String determining the data split to load.
            e.g. `train`, `val` or `test`
        class_names: `all` or list. If list it should contain as elements
            strings indicating each class name.
        name: String or list indicating with dataset or datasets to load.
            e.g. ``VOC2007`` or ``[''VOC2007'', VOC2012]``.
        with_difficult_samples: Boolean. If ``True`` flagged difficult boxes
            will be added to the returned data.
        evaluate: Boolean. If ``True`` returned data will be loaded without
            normalization for a direct evaluation.

    # Return
        data: List of dictionaries with keys corresponding to the image paths
        and values numpy arrays of shape ``[num_objects, 4 + 1]``
        where the ``+ 1`` contains the ``class_arg`` and ``num_objects`` refers
        to the amount of boxes in the image.

    """
    # TODO check for split
    def __init__(self, path=None, split='train', class_names='all',
                 name='VOC2007', with_difficult_samples=True, evaluate=False):

        super(VOC, self).__init__(path, split, class_names, name)

        self.with_difficult_samples = with_difficult_samples
        self.evaluate = evaluate
        self._class_names = class_names
        if class_names == 'all':
            self._class_names = get_class_names('VOC')
        self.images_path = None
        self.arg_to_class = None

    def load_data(self):
        if ((self.name == 'VOC2007') or (self.name == 'VOC2012')):
            ground_truth_data = self._load_VOC(self.name, self.split)
        elif isinstance(self.name, list):
            if not isinstance(self.split, list):
                raise Exception("'split' should also be a list")
            if set(self.name).issubset(['VOC2007', 'VOC2012']):
                data_A = self._load_VOC(self.name[0], self.split[0])
                data_B = self._load_VOC(self.name[1], self.split[1])
                ground_truth_data = data_A + data_B
        else:
            raise ValueError('Invalid name given.')
        return ground_truth_data

    def _load_VOC(self, dataset_name, split):
        self.parser = VOCParser(dataset_name,
                                split,
                                self._class_names,
                                self.with_difficult_samples,
                                self.path,
                                self.evaluate)
        self.images_path = self.parser.images_path
        self.arg_to_class = self.parser.arg_to_class
        ground_truth_data = self.parser.load_data()
        return ground_truth_data


class VOCParser(object):
    """ Preprocess the VOC2007 xml annotations data.

    # TODO: Add background label

    # Arguments
        data_path: Data path to VOC2007 annotations

    # Return
        data: Dictionary which keys correspond to the image names
        and values are numpy arrays of shape (num_objects, 4 + 1)
        num_objects refers to the number of objects in that specific image
    """

    def __init__(self, dataset_name='VOC2007', split='train',
                 class_names='all', with_difficult_samples=True,
                 dataset_path='../datasets/VOCdevkit/',
                 evaluate=False):

        if dataset_name not in ['VOC2007', 'VOC2012']:
            raise Exception('Invalid dataset name.')

        # creating data set prefix paths variables
        self.dataset_name = dataset_name
        self.dataset_path = os.path.join(dataset_path, dataset_name)
        self.split = split
        self.split_prefix = os.path.join(self.dataset_path, 'ImageSets/Main/')
        self.annotations_path = os.path.join(self.dataset_path, 'Annotations/')
        self.images_path = os.path.join(self.dataset_path, 'JPEGImages/')
        self.with_difficult_samples = with_difficult_samples
        self.evaluate = evaluate

        self.class_names = class_names
        if self.class_names == 'all':
            self.class_names = get_class_names('VOC')
        self.num_classes = len(self.class_names)
        class_keys = np.arange(self.num_classes)
        self.arg_to_class = dict(zip(class_keys, self.class_names))
        self.class_to_arg = {value: key for key, value
                             in self.arg_to_class.items()}
        self.data = []
        self._preprocess_XML()

    def _load_filenames(self):
        split_file = os.path.join(self.split_prefix, self.split) + '.txt'
        splitted_filenames = []
        for line in open(split_file):
            filename = line.strip() + '.xml'
            splitted_filenames.append(filename)
        return splitted_filenames

    def _preprocess_XML(self):
        filenames = self._load_filenames()
        for filename in filenames:
            filename_path = self.annotations_path + filename
            tree = ElementTree.parse(filename_path)
            root = tree.getroot()
            image_name = root.find('filename').text

            box_data = []
            difficulties = []

            size_tree = root.find('size')
            width = float(size_tree.find('width').text)
            height = float(size_tree.find('height').text)
            # check evaluate flag
            if self.evaluate:
                width = 1
                height = 1
            for object_tree in root.findall('object'):
                difficulty = int(object_tree.find('difficult').text)

                if difficulty == 1 and not (self.with_difficult_samples):
                    continue

                class_name = object_tree.find('name').text
                if class_name in self.class_names:
                    class_arg = self.class_to_arg[class_name]
                    bounding_box = object_tree.find('bndbox')
                    # VOC dataset format follows Matlab,
                    # in which indexes start from 0
                    xmin = (float(bounding_box.find('xmin').text) - 1.0) / width
                    ymin = (float(bounding_box.find('ymin').text) - 1.0) / height
                    xmax = (float(bounding_box.find('xmax').text) - 1.0) / width
                    ymax = (float(bounding_box.find('ymax').text) - 1.0) / height

                    box_data.append([xmin, ymin, xmax, ymax, class_arg])
                    difficulties.append(difficulty)

            if len(box_data) == 0:
                continue

            # self.data[self.images_path + image_name] = label_data
            image_path = self.images_path + image_name
            box_data = np.asarray(box_data)
            difficulties = np.asarray(difficulties, dtype=bool)
            if self.evaluate:
                self.data.append({'image': image_path,
                                  'boxes': box_data,
                                  'difficulties': difficulties})
            else:
                self.data.append({'image': image_path, 'boxes': box_data})

    def load_data(self):
        return self.data


>>> END FILE CONTENTS

## emili-main/paz/evaluation/__init__.py

>>> BEGIN FILE CONTENTS

from .detection import evaluateMAP


>>> END FILE CONTENTS

## emili-main/paz/evaluation/detection.py

>>> BEGIN FILE CONTENTS

import numpy as np
from ..backend.boxes import compute_ious
from ..backend.image import load_image


def compute_matches(dataset, detector, class_to_arg, iou_thresh=0.5):
    """
    Arguments:
        dataset: List of dictionaries containing 'image' as key and a
            numpy array representing an image as value.
        detector : Function for performing inference
        class_to_arg: Dict. of class names and their id
        iou_thresh (float): A prediction is correct if its Intersection over
            Union with the ground truth is above this value..

    Returns:
        num_positives: Dict. containing number of positives for each class
        score: Dict. containing matching scores of boxes for each class
        match: Dict. containing match/non-match info of boxes in each class
    """
    # classes_count = len(np.unique(np.concatenate(ground_truth_class_args)))
    num_classes = len(class_to_arg)
    num_positives = {label_id: 0 for label_id in range(1, num_classes + 1)}
    score = {label_id: [] for label_id in range(1, num_classes + 1)}
    match = {label_id: [] for label_id in range(1, num_classes + 1)}
    for sample in dataset:
        # obtaining ground truths
        ground_truth_boxes = np.array(sample['boxes'][:, :4])
        ground_truth_class_args = np.array(sample['boxes'][:, 4])
        if 'difficulties' in sample.keys():
            difficulties = np.array(sample['difficulties'])
        else:
            difficulties = None
        # obtaining predictions
        image = load_image(sample['image'])
        results = detector(image)
        predicted_boxes, predicted_class_args, predicted_scores = [], [], []
        for box2D in results['boxes2D']:
            predicted_scores.append(box2D.score)
            predicted_class_args.append(class_to_arg[box2D.class_name])
            predicted_boxes.append(list(box2D.coordinates))
        predicted_boxes = np.array(predicted_boxes, dtype=np.float32)
        predicted_class_args = np.array(predicted_class_args)
        predicted_scores = np.array(predicted_scores, dtype=np.float32)
        # setting difficulties to ``Easy`` if they are None
        if difficulties is None:
            difficulties = np.zeros(len(ground_truth_boxes), dtype=bool)
        # iterating over each class present in the image
        class_args = np.concatenate(
            (predicted_class_args, ground_truth_class_args))
        class_args = np.unique(class_args).astype(int)
        for class_arg in class_args:
            # masking predictions by class
            class_mask = class_arg == predicted_class_args
            class_predicted_boxes = predicted_boxes[class_mask]
            class_predicted_scores = predicted_scores[class_mask]
            # sort score from maximum to minimum for masked predictions
            sorted_args = class_predicted_scores.argsort()[::-1]
            class_predicted_boxes = class_predicted_boxes[sorted_args]
            class_predicted_scores = class_predicted_scores[sorted_args]
            # masking ground truths by class
            class_mask = class_arg == ground_truth_class_args
            class_ground_truth_boxes = ground_truth_boxes[class_mask]
            class_difficulties = difficulties[class_mask]
            # the number of positives equals the number of easy boxes
            num_easy = np.logical_not(class_difficulties).sum()
            num_positives[class_arg] = num_positives[class_arg] + num_easy
            # add all predicted scores to scores
            score[class_arg].extend(class_predicted_scores)
            # if not predicted boxes for this class continue
            if len(class_predicted_boxes) == 0:
                continue
            # if not ground truth boxes continue but add zeros as matches
            if len(class_ground_truth_boxes) == 0:
                match[class_arg].extend((0,) * len(class_predicted_boxes))
                continue

            # evaluation on VOC follows integer typed bounding boxes.
            class_predicted_boxes = class_predicted_boxes.copy()
            class_predicted_boxes[:, 2:] = (
                class_predicted_boxes[:, 2:] + 1)
            class_ground_truth_boxes = class_ground_truth_boxes.copy()
            class_ground_truth_boxes[:, 2:] = (
                class_ground_truth_boxes[:, 2:] + 1)

            ious = compute_ious(
                class_predicted_boxes, class_ground_truth_boxes)
            ground_truth_args = ious.argmax(axis=1)
            # set -1 if there is no matching ground truth
            ground_truth_args[ious.max(axis=1) < iou_thresh] = -1
            selected = np.zeros(len(class_ground_truth_boxes), dtype=bool)
            for ground_truth_arg in ground_truth_args:
                if ground_truth_arg >= 0:
                    if class_difficulties[ground_truth_arg]:
                        match[class_arg].append(-1)
                    else:
                        if not selected[ground_truth_arg]:
                            match[class_arg].append(1)
                        else:
                            match[class_arg].append(0)
                    selected[ground_truth_arg] = True
                else:
                    match[class_arg].append(0)
    return num_positives, score, match


def calculate_relevance_metrics(num_positives, scores, matches):
    """Calculates precision and recall.
    Arguments:
        num_positives: Dict. with number of positives for each class
        scores: Dict. with matching scores of boxes for each class
        matches: Dict. wth match/non-match info for boxes for each class
    Returns:
        precision: Dict. with precision values per class
        recall : Dict. with recall values per class
    """
    num_classes = max(num_positives.keys()) + 1
    precision, recall = [None] * num_classes, [None] * num_classes
    for class_arg in num_positives.keys():
        class_positive_matches = np.array(matches[class_arg], dtype=np.int8)
        class_scores = np.array(scores[class_arg])
        order = class_scores.argsort()[::-1]
        class_positive_matches = class_positive_matches[order]
        true_positives = np.cumsum(class_positive_matches == 1)
        false_positives = np.cumsum(class_positive_matches == 0)
        precision[class_arg] = (
            true_positives / (false_positives + true_positives))
        if num_positives[class_arg] > 0:
            recall[class_arg] = true_positives / num_positives[class_arg]
    return precision, recall


def calculate_average_precisions(precision, recall, use_07_metric=False):
    """Calculate average precisions based based on PASCAL VOC evaluation
    Arguments:
        num_positives: Dict. with number of positives for each class
        scores: Dict. with matching scores of boxes for each class
        matches: Dict. wth match/non-match info for boxes for each class
    Returns:
    """

    num_classes = len(precision)
    average_precisions = np.empty(num_classes)
    for class_arg in range(num_classes):
        if precision[class_arg] is None or recall[class_arg] is None:
            average_precisions[class_arg] = np.nan
            continue

        if use_07_metric:
            # 11 point metric
            average_precisions[class_arg] = 0
            for t in np.arange(0., 1.1, 0.1):
                if np.sum(recall[class_arg] >= t) == 0:
                    p_interpolation = 0
                else:
                    p_interpolation = np.max(
                        np.nan_to_num(
                            precision[class_arg]
                        )[recall[class_arg] >= t]
                    )
                average_precision_class = average_precisions[class_arg]
                average_precision_class = (average_precision_class +
                                           (p_interpolation / 11))
                average_precisions[class_arg] = average_precision_class

        else:
            # first append sentinel values at the end
            average_precision = np.concatenate(
                ([0], np.nan_to_num(precision[class_arg]), [0]))
            average_recall = np.concatenate(([0], recall[class_arg], [1]))

            average_precision = np.maximum.accumulate(
                average_precision[::-1])[::-1]

            # to calculate area under PR curve, look for points
            # where X axis (recall) changes value
            recall_change_arg = np.where(
                average_recall[1:] != average_recall[:-1])[0]

            # and sum (\Delta recall) * precision
            average_precisions[class_arg] = np.sum(
                (average_recall[recall_change_arg + 1] -
                 average_recall[recall_change_arg]) *
                average_precision[recall_change_arg + 1])
    return average_precisions


def evaluateMAP(detector, dataset, class_to_arg, iou_thresh=0.5,
                use_07_metric=False):
    """Calculate average precisions based on evaluation code of PASCAL VOC.
    Arguments:
        dataset: List of dictionaries containing 'image' as key and a
            numpy array representing an image as value.
        detector : Function for performing inference
        class_to_arg: Dict. of class names and their id
        iou_thresh: Float indicating intersection over union threshold for
            assigning a prediction as correct.
    # Returns:
    """
    positives, score, match = compute_matches(
        dataset, detector, class_to_arg, iou_thresh)
    precision, recall = calculate_relevance_metrics(positives, score, match)
    average_precisions = calculate_average_precisions(
        precision, recall, use_07_metric)
    return {'ap': average_precisions, 'map': np.nanmean(average_precisions)}


>>> END FILE CONTENTS

## emili-main/paz/models/__init__.py

>>> BEGIN FILE CONTENTS

from .detection import SSD300
from .detection import SSD512
from .detection import HaarCascadeDetector
from .detection import EFFICIENTDETD0
from .detection import EFFICIENTDETD1
from .detection import EFFICIENTDETD2
from .detection import EFFICIENTDETD3
from .detection import EFFICIENTDETD4
from .detection import EFFICIENTDETD5
from .detection import EFFICIENTDETD6
from .detection import EFFICIENTDETD7
from .keypoint.simplebaselines import SimpleBaseline
from .keypoint.projector import Projector
from .keypoint.keypointnet import KeypointNet
from .keypoint.keypointnet import KeypointNetShared
from .keypoint.keypointnet import KeypointNet2D
from .keypoint.hrnet import HRNetResidual
from .keypoint.hrnet import HRNetDense
from .keypoint.detnet import DetNet
from .keypoint.iknet import IKNet
from .classification import build_xception
from .classification import MiniXception
from .classification import ProtoEmbedding
from .classification import ProtoNet
from .segmentation import UNET
from .segmentation import UNET_VGG16
from .segmentation import UNET_VGG19
from .segmentation import UNET_RESNET50
from .pose_estimation import HigherHRNet


>>> END FILE CONTENTS

## emili-main/paz/models/classification/__init__.py

>>> BEGIN FILE CONTENTS

from .xception import build_xception
from .xception import MiniXception
from .protonet import ProtoEmbedding
from .protonet import ProtoNet


>>> END FILE CONTENTS

## emili-main/paz/models/classification/protonet.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (Input, Conv2D, BatchNormalization, Layer,
                                     ReLU, MaxPool2D, Flatten, Softmax)

from ...utils.documentation import docstring


def conv_block(x):
    """Basic convolution block used for prototypical networks.

    # Arguments
        x: Tensor.

    # Returns
        Tensor
    """
    x = Conv2D(filters=64, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPool2D((2, 2))(x)
    return x


def ProtoEmbedding(image_shape, num_blocks):
    """Embedding convolutional network used for proto-typical networks

    # Arguments:
        image_shape: List with image shape `(H, W, channels)`.
        num_blocks: Ints. Number of convolution blocks.

    # Returns:
        Keras model.

    # References:
        [prototypical networks](https://arxiv.org/abs/1703.05175)
    """
    x = inputs = Input(image_shape)
    for _ in range(num_blocks):
        x = conv_block(x)
    z = Flatten()(x)
    return Model(inputs, z, name='EMBEDDING')


class FullReshape(Layer):
    """Reshapes all tensor dimensions including the batch dimension.
    """
    def __init__(self, shape, **kwargs):
        super(FullReshape, self).__init__(**kwargs)
        self.shape = shape

    def call(self, x):
        return tf.reshape(x, self.shape)


class ComputePrototypes(Layer):
    def __init__(self, axis=1, **kwargs):
        super(ComputePrototypes, self).__init__(**kwargs)
        self.axis = axis

    def call(self, z_support):
        class_prototypes = tf.reduce_mean(z_support, axis=self.axis)
        return class_prototypes


def compute_pairwise_distances(x, y):
    """Compute euclidean distance for each vector x with each vector y

    # Arguments:
        x: Tensor with shape `(n, vector_dim)`
        y: Tensor with shape `(m, vector_dim)`

    # Returns:
        Tensor with shape `(n, m)` where each value pair n, m corresponds to
        the distance between the vector `n` of `x` with the vector `m` of `y`
    """
    n = x.shape[0]
    m = y.shape[0]
    x = tf.tile(tf.expand_dims(x, 1), [1, m, 1])
    y = tf.tile(tf.expand_dims(y, 0), [n, 1, 1])
    return tf.reduce_mean(tf.math.pow(x - y, 2), 2)


@docstring(compute_pairwise_distances)
class ComputePairwiseDistances(Layer):
    def __init__(self, **kwargs):
        super(ComputePairwiseDistances, self).__init__(**kwargs)

    def call(self, z_queries, class_prototypes):
        return compute_pairwise_distances(z_queries, class_prototypes)


def ProtoNet(embed, num_classes, num_support, num_queries, image_shape):
    """Prototypical networks used for few-shot classification
    # Arguments:
        embed: Keras network for embedding images into metric space.
        num_classes: Number of `ways` for few-shot classification.
        num_support: Number of `shots` used for meta learning.
        num_queries: Number of test images to query.
        image_shape: List with image shape `(H, W, channels)`.

    # Returns:
        Keras model.

    # References:
        [prototypical networks](https://arxiv.org/abs/1703.05175)
    """
    support = Input((num_support, *image_shape), num_classes, name='support')
    queries = Input((num_queries, *image_shape), num_classes, name='queries')
    z_support = FullReshape((num_classes * num_support, *image_shape))(support)
    z_queries = FullReshape((num_classes * num_queries, *image_shape))(queries)
    z_support = embed(z_support)
    z_queries = embed(z_queries)
    z_dim = embed.output_shape[-1]
    z_support = FullReshape((num_classes, num_support, z_dim))(z_support)
    z_queries = FullReshape((num_classes * num_queries, z_dim))(z_queries)
    class_prototypes = ComputePrototypes(axis=1)(z_support)
    distances = ComputePairwiseDistances()(z_queries, class_prototypes)
    outputs = Softmax()(-distances)
    return Model(inputs=[support, queries], outputs=outputs, name='PROTONET')


>>> END FILE CONTENTS

## emili-main/paz/models/classification/xception.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Conv2D, BatchNormalization, SeparableConv2D
from tensorflow.keras.layers import Activation, MaxPooling2D, Add, Input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import get_file


URL = 'https://github.com/oarriaga/altamira-data/releases/download/v0.6/'


def xception_block(input_tensor, num_kernels, l2_reg=0.01):
    """Xception core block.

    # Arguments
        input_tenso: Keras tensor.
        num_kernels: Int. Number of convolutional kernels in block.
        l2_reg: Float. l2 regression.

    # Returns
        output tensor for the block.
    """
    residual = Conv2D(num_kernels, 1, strides=(2, 2),
                      padding='same', use_bias=False)(input_tensor)
    residual = BatchNormalization()(residual)
    x = SeparableConv2D(
        num_kernels, 3, padding='same',
        kernel_regularizer=l2(l2_reg), use_bias=False)(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(num_kernels, 3, padding='same',
                        kernel_regularizer=l2(l2_reg), use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D(3, strides=(2, 2), padding='same')(x)
    x = Add()([x, residual])
    return x


def build_xception(
        input_shape, num_classes, stem_kernels, block_kernels, l2_reg=0.01):
    """Function for instantiating an Xception model.

    # Arguments
        input_shape: List corresponding to the input shape of the model.
        num_classes: Integer.
        stem_kernels: List of integers. Each element of the list indicates
            the number of kernels used as stem blocks.
        block_kernels: List of integers. Each element of the list Indicates
            the number of kernels used in the xception blocks.
        l2_reg. Float. L2 regularization used in the convolutional kernels.

    # Returns
        Tensorflow-Keras model.

    # References
        - [Xception: Deep Learning with Depthwise Separable
            Convolutions](https://arxiv.org/abs/1610.02357)
    """

    x = inputs = Input(input_shape, name='image')
    for num_kernels in stem_kernels:
        x = Conv2D(num_kernels, 3, kernel_regularizer=l2(l2_reg),
                   use_bias=False, padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

    for num_kernels in block_kernels:
        x = xception_block(x, num_kernels, l2_reg)

    x = Conv2D(num_classes, 3, kernel_regularizer=l2(l2_reg),
               padding='same')(x)
    # x = BatchNormalization()(x)
    x = GlobalAveragePooling2D()(x)
    output = Activation('softmax', name='label')(x)

    model_name = '-'.join(['XCEPTION',
                           str(input_shape[0]),
                           str(stem_kernels[0]),
                           str(len(block_kernels))
                           ])
    model = Model(inputs, output, name=model_name)
    return model


def MiniXception(input_shape, num_classes, weights=None):
    """Build MiniXception (see references).

    # Arguments
        input_shape: List of three integers e.g. ``[H, W, 3]``
        num_classes: Int.
        weights: ``None`` or string with pre-trained dataset. Valid datasets
            include only ``FER``.

    # Returns
        Tensorflow-Keras model.

    # References
       - [Real-time Convolutional Neural Networks for Emotion and
            Gender Classification](https://arxiv.org/abs/1710.07557)
    """
    if weights == 'FER':
        filename = 'fer2013_mini_XCEPTION.119-0.65.hdf5'
        path = get_file(filename, URL + filename, cache_subdir='paz/models')
        model = load_model(path)
    else:
        stem_kernels = [32, 64]
        block_data = [128, 128, 256, 256, 512, 512, 1024]
        model_inputs = (input_shape, num_classes, stem_kernels, block_data)
        model = build_xception(*model_inputs)
    model._name = 'MINI-XCEPTION'
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/detection/__init__.py

>>> BEGIN FILE CONTENTS

from .ssd300 import SSD300
from .ssd512 import SSD512
from .haar_cascade import HaarCascadeDetector
from .efficientdet import EFFICIENTDETD0
from .efficientdet import EFFICIENTDETD1
from .efficientdet import EFFICIENTDETD2
from .efficientdet import EFFICIENTDETD3
from .efficientdet import EFFICIENTDETD4
from .efficientdet import EFFICIENTDETD5
from .efficientdet import EFFICIENTDETD6
from .efficientdet import EFFICIENTDETD7


>>> END FILE CONTENTS

## emili-main/paz/models/detection/efficientdet/__init__.py

>>> BEGIN FILE CONTENTS

from .efficientdet import EFFICIENTDETD0
from .efficientdet import EFFICIENTDETD1
from .efficientdet import EFFICIENTDETD2
from .efficientdet import EFFICIENTDETD3
from .efficientdet import EFFICIENTDETD4
from .efficientdet import EFFICIENTDETD5
from .efficientdet import EFFICIENTDETD6
from .efficientdet import EFFICIENTDETD7
from .efficientdet import EfficientNet_to_BiFPN
from .efficientdet import BiFPN
from .efficientnet import EFFICIENTNET


>>> END FILE CONTENTS

## emili-main/paz/models/detection/efficientdet/efficientdet.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.utils import get_file
from ....backend.anchors import build_anchors
from .efficientdet_blocks import (
    BiFPN, build_detector_head, EfficientNet_to_BiFPN)
from .efficientnet import EFFICIENTNET

WEIGHT_PATH = (
    'https://github.com/oarriaga/altamira-data/releases/download/v0.16/')


def EFFICIENTDET(image, num_classes, base_weights, head_weights,
                 FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                 anchor_scale, fusion, return_base, model_name, EfficientNet,
                 num_scales=3, aspect_ratios=[1.0, 2.0, 0.5],
                 survival_rate=None, num_dims=4):
    """Creates EfficientDet model.

    # Arguments
        image: Tensor of shape `(batch_size, input_shape)`.
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        EfficientNet: List, containing branch tensors.
        num_scales: Int, number of anchor box scales.
        aspect_ratios: List, anchor boxes aspect ratios.
        survival_rate: Float, specifying survival probability.
        num_dims: Int, number of output dimensions to regress.

    # Returns
        model: EfficientDet model.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    if base_weights not in ['COCO', 'VOC', None]:
        raise ValueError('Invalid base_weights: ', base_weights)
    if head_weights not in ['COCO', 'VOC', None]:
        raise ValueError('Invalid head_weights: ', head_weights)
    if (base_weights is None) and (head_weights == 'COCO'):
        raise NotImplementedError('Invalid `base_weights` with head_weights')
    if (base_weights is None) and (head_weights == 'VOC'):
        raise NotImplementedError('Invalid `base_weights` with head_weights')

    branches, middles, skips = EfficientNet_to_BiFPN(
        EfficientNet, FPN_num_filters)
    for _ in range(FPN_cell_repeats):
        middles, skips = BiFPN(middles, skips, FPN_num_filters, fusion)

    if return_base:
        outputs = middles
    else:
        outputs = build_detector_head(
            middles, num_classes, num_dims, aspect_ratios, num_scales,
            FPN_num_filters, box_class_repeats, survival_rate)

    model = Model(inputs=image, outputs=outputs, name=model_name)

    if ((base_weights == 'COCO') and (head_weights == 'COCO')):
        model_filename = '-'.join([model_name, str(base_weights),
                                   str(head_weights) + '_weights.hdf5'])
    if ((base_weights == 'VOC') and (head_weights == 'VOC')):
        model_filename = '-'.join([model_name, str(base_weights),
                                   str(head_weights) + '_weights.hdf5'])
    elif ((base_weights == 'COCO') and (head_weights is None)):
        model_filename = '-'.join([model_name, str(base_weights),
                                   str(head_weights) + '_weights.hdf5'])

    if not ((base_weights is None) and (head_weights is None)):
        weights_path = get_file(model_filename, WEIGHT_PATH + model_filename,
                                cache_subdir='paz/models')
        print('Loading %s model weights' % weights_path)
        finetunning_model_names = ['efficientdet-d0-COCO-None_weights.hdf5',
                                   'efficientdet-d1-COCO-None_weights.hdf5',
                                   'efficientdet-d2-COCO-None_weights.hdf5',
                                   'efficientdet-d3-COCO-None_weights.hdf5',
                                   'efficientdet-d4-COCO-None_weights.hdf5',
                                   'efficientdet-d5-COCO-None_weights.hdf5',
                                   'efficientdet-d6-COCO-None_weights.hdf5',
                                   'efficientdet-d7-COCO-None_weights.hdf5']
        by_name = True if model_filename in finetunning_model_names else False
        model.load_weights(weights_path, by_name=by_name)

    image_shape = image.shape[1:3].as_list()
    model.prior_boxes = build_anchors(
        image_shape, branches, num_scales, aspect_ratios, anchor_scale)
    return model


def EFFICIENTDETD0(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(512, 512, 3), FPN_num_filters=64,
                   FPN_cell_repeats=3, box_class_repeats=3, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d0',
                   scaling_coefficients=(1.0, 1.0, 0.8)):
    """Instantiates EfficientDet-D0 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D0 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb0 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb0)
    return model


def EFFICIENTDETD1(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(640, 640, 3), FPN_num_filters=88,
                   FPN_cell_repeats=4, box_class_repeats=3, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d1',
                   scaling_coefficients=(1.0, 1.1, 0.8)):
    """Instantiates EfficientDet-D1 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D1 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb1 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb1)
    return model


def EFFICIENTDETD2(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(768, 768, 3), FPN_num_filters=112,
                   FPN_cell_repeats=5, box_class_repeats=3, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d2',
                   scaling_coefficients=(1.1, 1.2, 0.7)):
    """Instantiate EfficientDet-D2 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D2 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb2 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb2)
    return model


def EFFICIENTDETD3(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(896, 896, 3), FPN_num_filters=160,
                   FPN_cell_repeats=6, box_class_repeats=4, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d3',
                   scaling_coefficients=(1.2, 1.4, 0.7)):
    """Instantiates EfficientDet-D3 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D3 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb3 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb3)
    return model


def EFFICIENTDETD4(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(1024, 1024, 3), FPN_num_filters=224,
                   FPN_cell_repeats=7, box_class_repeats=4, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d4',
                   scaling_coefficients=(1.4, 1.8, 0.6)):
    """Instantiates EfficientDet-D4 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D4 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb4 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb4)
    return model


def EFFICIENTDETD5(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(1280, 1280, 3), FPN_num_filters=288,
                   FPN_cell_repeats=7, box_class_repeats=4, anchor_scale=4.0,
                   fusion='fast', return_base=False,
                   model_name='efficientdet-d5',
                   scaling_coefficients=(1.6, 2.2, 0.6)):
    """Instantiates EfficientDet-D5 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D5 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb5 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb5)
    return model


def EFFICIENTDETD6(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(1280, 1280, 3), FPN_num_filters=384,
                   FPN_cell_repeats=8, box_class_repeats=5, anchor_scale=5.0,
                   fusion='sum', return_base=False,
                   model_name='efficientdet-d6',
                   scaling_coefficients=(1.8, 2.6, 0.5)):
    """Instantiates EfficientDet-D6 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D6 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb6 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb6)
    return model


def EFFICIENTDETD7(num_classes=90, base_weights='COCO', head_weights='COCO',
                   input_shape=(1536, 1536, 3), FPN_num_filters=384,
                   FPN_cell_repeats=8, box_class_repeats=5, anchor_scale=5.0,
                   fusion='sum', return_base=False,
                   model_name='efficientdet-d7',
                   scaling_coefficients=(1.8, 2.6, 0.5)):
    """Instantiates EfficientDet-D7 model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D7 model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb6 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb6)
    return model


def EFFICIENTDETD7x(num_classes=90, base_weights='COCO', head_weights='COCO',
                    input_shape=(1536, 1536, 3), FPN_num_filters=384,
                    FPN_cell_repeats=8, box_class_repeats=5, anchor_scale=4.0,
                    fusion='sum', return_base=False,
                    model_name='efficientdet-d7x',
                    scaling_coefficients=(2.0, 3.1, 0.5)):
    """Instantiates EfficientDet-D7x model.

    # Arguments
        num_classes: Int, number of object classes.
        base_weights: Str, base weights name.
        head_weights: Str, head weights name.
        input_shape: Tuple, holding input image size.
        FPN_num_filters: Int, number of FPN filters.
        FPN_cell_repeats: Int, number of FPN blocks.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        anchor_scale: Int, number of anchor scales.
        fusion: Str, feature fusion weighting method.
        return_base: Bool, whether to return base or not.
        model_name: Str, EfficientDet model name.
        scaling_coefficients: Tuple, EfficientNet scaling coefficients.

    # Returns
        model: EfficientDet-D7x model.
    """
    image = Input(shape=input_shape, name='image')
    EfficientNetb7 = EFFICIENTNET(image, scaling_coefficients)
    model = EFFICIENTDET(image, num_classes, base_weights, head_weights,
                         FPN_num_filters, FPN_cell_repeats, box_class_repeats,
                         anchor_scale, fusion, return_base, model_name,
                         EfficientNetb7)
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/detection/efficientdet/efficientdet_blocks.py

>>> BEGIN FILE CONTENTS

import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Activation, Concatenate, Reshape
from tensorflow.keras.layers import (BatchNormalization, Conv2D, Flatten,
                                     MaxPooling2D, SeparableConv2D,
                                     UpSampling2D, GroupNormalization)
from .layers import FuseFeature, GetDropConnect


def build_detector_head(middles, num_classes, num_dims, aspect_ratios,
                        num_scales, FPN_num_filters, box_class_repeats,
                        survival_rate):
    """Builds EfficientDet object detector's head.
    The built head includes ClassNet and BoxNet for classification and
    regression respectively.

    # Arguments
        middles: List, BiFPN layer output.
        num_classes: Int, number of object classes.
        num_dims: Int, number of output dimensions to regress.
        aspect_ratios: List, anchor boxes aspect ratios.
        num_scales: Int, number of anchor box scales.
        FPN_num_filters: Int, number of FPN filters.
        box_class_repeats: Int, Number of regression
            and classification blocks.
        survival_rate: Float, used in drop connect.

    # Returns
        outputs: Tensor of shape `[num_boxes, num_classes+num_dims]`
    """
    num_anchors = len(aspect_ratios) * num_scales
    args = (middles, num_anchors, FPN_num_filters,
            box_class_repeats, survival_rate)
    _, class_outputs = ClassNet(*args, num_classes)
    class_outputs = [Flatten()(class_output) for class_output in class_outputs]
    _, boxes_outputs = BoxesNet(*args, num_dims)
    boxes_outputs = [Flatten()(boxes_output) for boxes_output in boxes_outputs]
    classes = Concatenate(axis=1)(class_outputs)
    regressions = Concatenate(axis=1)(boxes_outputs)
    num_boxes = K.int_shape(regressions)[-1] // num_dims
    classes = Reshape((num_boxes, num_classes))(classes)
    classes = Activation('softmax')(classes)
    regressions = Reshape((num_boxes, num_dims))(regressions)
    outputs = Concatenate(axis=2, name='boxes')([regressions, classes])
    return outputs


def ClassNet(features, num_anchors=9, num_filters=32, num_blocks=4,
             survival_rate=None, num_classes=90):
    """Initializes ClassNet.

    # Arguments
        features: List, input features.
        num_anchors: Int, number of anchors.
        num_filters: Int, number of intermediate layer filters.
        num_blocks: Int, Number of intermediate layers.
        survival_rate: Float, used in drop connect.
        num_classes: Int, number of object classes.

    # Returns
        class_outputs: List, ClassNet outputs per level.
    """
    bias_initializer = tf.constant_initializer(-np.log((1 - 0.01) / 0.01))
    num_filters = [num_filters, num_classes * num_anchors]
    return build_head(features, num_blocks, num_filters,
                      bias_initializer, survival_rate)


def BoxesNet(features, num_anchors=9, num_filters=32, num_blocks=4,
             survival_rate=None, num_dims=4):
    """Initializes BoxNet.

    # Arguments
        features: List, input features.
        num_anchors: Int, number of anchors.
        num_filters: Int, number of intermediate layer filters.
        num_blocks: Int, Number of intermediate layers.
        survival_rate: Float, used by drop connect.
        num_dims: Int, number of output dimensions to regress.

    # Returns
        boxes_outputs: List, BoxNet outputs per level.
    """
    bias_initializer = tf.zeros_initializer()
    num_filters = [num_filters, num_dims * num_anchors]
    return build_head(features, num_blocks, num_filters,
                      bias_initializer, survival_rate)


def build_head(middle_features, num_blocks, num_filters,
               bias_initializer, survival_rate, normalization='batch'):
    """Builds ClassNet/BoxNet head.

    # Arguments
        middle_features: Tuple. input features.
        num_blocks: Int, number of intermediate layers.
        num_filters: Int, number of intermediate layer filters.
        bias_initializer: Callable, bias initializer.
        survival_rate: Float, used by drop connect.

    # Returns
        head_outputs: List, with head outputs.
    """
    conv_blocks = build_head_conv2D(
        num_blocks, num_filters[0], tf.zeros_initializer())
    final_head_conv = build_head_conv2D(1, num_filters[1], bias_initializer)[0]
    pre_head_outputs, head_outputs = [], []

    if normalization == 'batch':
        normalizer = BatchNormalization
        args = ()

    elif normalization == 'group':
        normalizer = GroupNormalization
        args = (int(num_filters[0] / 16), )

    for x in middle_features:
        for block_arg in range(num_blocks):
            x = conv_blocks[block_arg](x)
            x = normalizer(*args)(x)
            x = tf.nn.swish(x)
            if block_arg > 0 and survival_rate:
                x = x + GetDropConnect(survival_rate=survival_rate)(x)
        pre_head_outputs.append(x)
        x = final_head_conv(x)
        head_outputs.append(x)
    return [pre_head_outputs, head_outputs]


def build_head_conv2D(num_blocks, num_filters, bias_initializer):
    """Builds head convolutional blocks.

    # Arguments
        num_blocks: Int, number of intermediate layers.
        num_filters: Int, number of intermediate layer filters.
        bias_initializer: Callable, bias initializer.

    # Returns
        conv_blocks: List, head convolutional blocks.
    """
    conv_blocks = []
    args_1 = (num_filters, 3, (1, 1), 'same', 'channels_last', (1, 1),
              1, None, True)
    for _ in range(num_blocks):
        args_2 = (tf.initializers.variance_scaling(),
                  tf.initializers.variance_scaling(), bias_initializer)
        conv_blocks.append(SeparableConv2D(*args_1, *args_2))
    return conv_blocks


def EfficientNet_to_BiFPN(branches, num_filters):
    """Preprocess EfficientNet branches prior to feeding BiFPN block.
    The branches generated by the EfficientNet backbone consists of
    features P1, P2, P3, P4, and P5. However, the BiFPN block requires
    features P3, P4, P5, P6, and P7. This function generates features
    P3 to P7 from EfficientNet branches that can be fed to the BiFPN
    block.

    # Arguments
        branches: List, EfficientNet feature maps.
        num_filters: Int, number of intermediate layer filters.

    # Returns
        branches, middles, skips: List, extended branch
            and preprocessed feature maps.
    """
    branches = extend_branch(branches, num_filters)
    P3, P4, P5, P6, P7 = branches
    P3_middle = conv_batchnorm_block(P3, num_filters)
    P4_middle = conv_batchnorm_block(P4, num_filters)
    P5_middle = conv_batchnorm_block(P5, num_filters)
    middles = [P3_middle, P4_middle, P5_middle, P6, P7]

    P4_skip = conv_batchnorm_block(P4, num_filters)
    P5_skip = conv_batchnorm_block(P5, num_filters)
    skips = [None, P4_skip, P5_skip, P6, None]
    return [branches, middles, skips]


def extend_branch(branches, num_filters):
    """Extends branches to comply with BiFPN.
    The input branchs includes features P1-P5. This function extends the
    EfficientNet backbone generated branch. The extended branch contains
    features P3-P7.

    # Arguments
        branches: List, EfficientNet feature maps.
        num_filters: Int, number of intermediate layer filters.

    # Returns
        middles, skips: List, modified branch.
    """
    _, _, P3, P4, P5 = branches
    P6, P7 = build_branch(P5, num_filters)
    branches_extended = [P3, P4, P5, P6, P7]
    return branches_extended


def build_branch(P5, num_filters):
    """Builds feature maps P6 and P7 from P5.

    # Arguments
        P5: Tensor of shape `(batch_size, 16, 16, 320)`,
            EfficientNet's 5th layer output.
        num_filters: Int, number of intermediate layer filters.

    # Returns
        P6, P7: List, EfficientNet's 6th and 7th layer output.
    """
    P6 = conv_batchnorm_block(P5, num_filters)
    P6 = MaxPooling2D(3, 2, 'same')(P6)
    P7 = MaxPooling2D(3, 2, 'same')(P6)
    return [P6, P7]


def conv_batchnorm_block(x, num_filters):
    """Builds 2D convolution and batch normalization layers.

    # Arguments
        x: Tensor, input feature map.
        num_filters: Int, number of intermediate layer filters.

    # Returns
        x: Tensor. Feature after convolution and batch normalization.
    """
    x = Conv2D(num_filters, 1, 1, 'same')(x)
    x = BatchNormalization()(x)
    return x


def BiFPN(middles, skips, num_filters, fusion):
    """BiFPN block.
    BiFPN stands for Bidirectional Feature Pyramid Network.

    # Arguments
        middles: List, BiFPN layer output.
        skips: List, skip feature map from BiFPN node.
        num_filters: Int, number of intermediate layer filters.
        fusion: Str, feature fusion method.

    # Returns
        middles, middles: List, BiFPN block output.
    """
    P3_middle, P4_middle, P5_middle, P6_middle, P7_middle = middles
    _, P4_skip, P5_skip, P6_skip, _ = skips

    # Downpropagation ---------------------------------------------------------
    args = (num_filters, fusion)
    P7_up = UpSampling2D()(P7_middle)
    P6_top_down = node_BiFPN(P7_up, P6_middle, None, None, *args)
    P6_up = UpSampling2D()(P6_top_down)
    P5_top_down = node_BiFPN(P6_up, P5_middle, None, None, *args)
    P5_up = UpSampling2D()(P5_top_down)
    P4_top_down = node_BiFPN(P5_up, P4_middle, None, None, *args)
    P4_up = UpSampling2D()(P4_top_down)
    P3_out = node_BiFPN(P4_up, P3_middle, None, None, *args)

    # Upward propagation ------------------------------------------------------
    P3_down = MaxPooling2D(3, 2, 'same')(P3_out)
    P4_out = node_BiFPN(None, P4_top_down, P3_down, P4_skip, *args)
    P4_down = MaxPooling2D(3, 2, 'same')(P4_out)
    P5_out = node_BiFPN(None, P5_top_down, P4_down, P5_skip, *args)
    P5_down = MaxPooling2D(3, 2, 'same')(P5_out)
    P6_out = node_BiFPN(None, P6_top_down, P5_down, P6_skip, *args)
    P6_down = MaxPooling2D(3, 2, 'same')(P6_out)
    P7_out = node_BiFPN(None, P7_middle, P6_down, None, *args)

    middles = [P3_out, P4_out, P5_out, P6_out, P7_out]
    return [middles, middles]


def node_BiFPN(up, middle, down, skip, num_filters, fusion):
    """Simulates a single node of BiFPN block.

    # Arguments
        up: Tensor, upsampled feature map.
        middle: Tensor, preprocessed feature map.
        down: Tensor, downsampled feature map.
        skip: Tensor, skip feature map.
        num_filters: Int, number of intermediate layer filters.
        fusion: Str, feature fusion method.

    # Returns
        middle: Tensor, BiFPN node output.
    """
    is_layer_one = down is None
    if is_layer_one:
        to_fuse = [middle, up]
    else:
        to_fuse = [middle, down] if skip is None else [skip, middle, down]
    middle = FuseFeature(fusion=fusion)(to_fuse, fusion)
    middle = tf.nn.swish(middle)
    middle = SeparableConv2D(num_filters, 3, 1, 'same', use_bias=True)(middle)
    middle = BatchNormalization()(middle)
    return middle


>>> END FILE CONTENTS

## emili-main/paz/models/detection/efficientdet/efficientnet.py

>>> BEGIN FILE CONTENTS

import math
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, Conv2D, DepthwiseConv2D


def EFFICIENTNET(image, scaling_coefficients, D_divisor=8, excite_ratio=0.25,
                 kernel_sizes=[3, 3, 5, 3, 5, 5, 3],
                 repeats=[1, 2, 2, 3, 3, 4, 1],
                 intro_filters=[32, 16, 24, 40, 80, 112, 192],
                 outro_filters=[16, 24, 40, 80, 112, 192, 320],
                 expand_ratios=[1, 6, 6, 6, 6, 6, 6],
                 strides=[[1, 1], [2, 2], [2, 2], [2, 2],
                          [1, 1], [2, 2], [1, 1]]):
    """A function implementing EfficientNet.

    # Arguments
        image: Tensor of shape `(batch_size, input_shape)`, input image.
        scaling_coefficients: List, EfficientNet scaling coefficients.
        D_divisor: Int, network depth divisor.
        excite_ratio: Float, block's squeeze excite ratio.
        kernel_sizes: List, kernel sizes.
        repeats: Int, number of block repeats.
        intro_filters: Int, block's input filters.
        outro_filters: Int, block's output filters.
        expand_ratios: Int, MBConv block expansion ratio.
        strides: List, filter strides.

    # Returns
        x: List, output features.

    # Raises
        ValueError: when repeats is not greater than zero.

    # References
        [EfficientNet: Rethinking Model Scaling for
         Convolutional Neural Networks]
        (https://arxiv.org/pdf/1905.11946.pdf)
    """
    assert (repeats > np.zeros_like(repeats)).sum() == len(repeats)

    W_coefficient, D_coefficient, survival_rate = scaling_coefficients
    x = conv_block(image, intro_filters, W_coefficient, D_divisor)
    x = MBconv_blocks(
        x, kernel_sizes, intro_filters, outro_filters,
        W_coefficient, D_coefficient, D_divisor, repeats,
        excite_ratio, survival_rate, strides, expand_ratios)
    return x


def conv_block(image, intro_filters, width_coefficient, depth_divisor):
    """Builds EfficientNet's first convolutional layer.

    # Arguments
        image: Tensor of shape `(batch_size, input_shape)`, input image.
        intro_filters: Int, block's input filters.
        width_coefficient: Float, width coefficient.
        depth_divisor: Int, network depth divisor.

    # Returns
        x: Tensor, output features.
    """
    filters = scale_filters(intro_filters[0], width_coefficient, depth_divisor)
    x = Conv2D(filters, [3, 3], [2, 2], 'same', 'channels_last', [1, 1], 1,
               None, False, kernel_initializer)(image)
    x = BatchNormalization()(x)
    x = tf.nn.swish(x)
    return x


def scale_filters(filters, width_coefficient, depth_divisor):
    """Scales filters using depth divisor.

    # Arguments
        filters: Int, filters to be rounded.
        width_coefficient: Float, width coefficient.
        depth_divisor: Int, network depth divisor.

    # Returns
        scaled_filters: Int, scaled filters.
    """
    filters_scaled_by_width = filters * width_coefficient
    half_depth = depth_divisor / 2
    filters_rounded = int(filters_scaled_by_width + half_depth)
    filters_standardized = filters_rounded // depth_divisor
    threshold = filters_standardized * depth_divisor
    scaled_filters = int(max(depth_divisor, threshold))
    if scaled_filters < 0.9 * filters_scaled_by_width:
        scaled_filters = int(scaled_filters + depth_divisor)
    return scaled_filters


def kernel_initializer(shape, dtype=None):
    """Initialize convolutional kernel with
    zero centred Gaussian distribution.

    # Arguments
        shape: variable shape.
        dtype: variable dtype.

    # Returns
        variable initialization.
    """
    kernel_height, kernel_width, _, outro_filters = shape
    fan_output = int(kernel_height * kernel_width * outro_filters)
    return tf.random.normal(shape, 0.0, np.sqrt(2.0 / fan_output), dtype)


def MBconv_blocks(x, kernel_sizes, intro_filters, outro_filters, W_coefficient,
                  D_coefficient, D_divisor, repeats, excite_ratio,
                  survival_rate, strides, expand_ratios):
    """Builds EfficientNet's MBConv blocks.
    MBConv stands for Mobile Inverted Bottleneck Convolution.

    # Arguments
        x: Tensor, input features.
        kernel_sizes: List, kernel sizes.
        intro_filters: Int, block's input filters.
        outro_filters: Int, block's output filters.
        W_coefficient: Float, width coefficient.
        D_coefficient: Float, network depth scaling coefficient.
        D_divisor: Int, network depth divisor.
        repeats: Int, number of block repeats.
        excite_ratio: Float, block's squeeze excite ratio.
        survival_rate: Float, survival probability to drop features.
        strides: List, filter strides.
        expand_ratios: List, MBConv block's expansion ratio.

    # Returns
        feature_maps: List, of output features.
    """
    feature_append_mask = [stride[0] == 2 for stride in strides[1:]]
    feature_append_mask.append(True)

    intro_filters = [scale_filters(intro_filter, W_coefficient, D_divisor)
                     for intro_filter in intro_filters]
    outro_filters = [scale_filters(outro_filter, W_coefficient, D_divisor)
                     for outro_filter in outro_filters]
    repeats = [round_repeats(repeat, D_coefficient) for repeat in repeats]
    excite_ratios = [excite_ratio] * len(outro_filters)
    survival_rates = [survival_rate] * len(outro_filters)

    iterator_1 = list(zip(intro_filters, outro_filters, strides, repeats))
    iterator_2 = list(zip(kernel_sizes, survival_rates, expand_ratios,
                          excite_ratios))
    feature_maps = []
    for feature_arg, args in enumerate(zip(iterator_1, iterator_2)):
        repeat_args, block_args = args
        x = MB_repeat(x, *repeat_args, block_args)
        if feature_append_mask[feature_arg]:
            feature_maps.append(x)
    return feature_maps


def round_repeats(repeats, depth_coefficient):
    """Round number of block repeats using depth divisor.

    # Arguments
        repeats: Int, number of multiplier blocks.
        depth_coefficient: Float, network depth scaling coefficient.

    # Returns
        Int: Rounded block repeats.
    """
    return int(math.ceil(depth_coefficient * repeats))


def MB_repeat(x, intro_filter, outro_filter, stride, repeats, block_args):
    """Computes given MBConv block's features.

    # Arguments
        x: Tensor, input features.
        intro_filter: Int, block's input filter.
        outro_filter: Int, block's output filter.
        stride: Int, filter strides.
        repeats: Int, number of block repeats.
        block_args: Tuple, holding kernel_sizes, survival_rates,
            expand_ratios, excite_ratios.

    # Returns
        Tensor: Output features.
    """
    for _ in range(repeats):
        x = MB_block(x, intro_filter, outro_filter, stride, *block_args)
        intro_filter, stride = outro_filter, [1, 1]
    return x


def MB_block(inputs, intro_filters, outro_filters, strides, kernel_size,
             survival_rate, expand_ratio, excite_ratio):
    """Initialize Mobile Inverted Residual Bottleneck block.

    # Arguments
        inputs: Tensor, input features to MB block.
        intro_filters: Int, block's input filters.
        outro_filters: Int, block's output filters.
        strides: List, conv block filter strides.
        kernel_size: Int, conv block kernel size.
        survival_rate: Float, survival probability to drop features.
        expand_ratio: Int, conv block expansion ratio.
        excite_ratio: Float, squeeze excite block ratio.

    # Returns
        x: Tensor, output features.

    # References
        [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
        (https://arxiv.org/pdf/1801.04381.pdf)
        [EfficientNet: Rethinking Model Scaling for
         Convolutional Neural Networks]
        (https://arxiv.org/pdf/1905.11946.pdf)
    """
    filters = intro_filters * expand_ratio
    x = MB_input(inputs, filters, expand_ratio)
    x = MB_convolution(x, kernel_size, strides)
    x = MB_squeeze_excitation(x, intro_filters, expand_ratio, excite_ratio)
    x = MB_output(x, inputs, intro_filters, outro_filters, strides,
                  survival_rate)
    return x


def MB_input(inputs, filters, expand_ratio):
    if expand_ratio != 1:
        x = MB_conv2D(inputs, filters, use_bias=False)
        x = BatchNormalization()(x)
        x = tf.nn.swish(x)
    else:
        x = inputs
    return x


def MB_conv2D(x, filters, use_bias=False):
    kwargs = {'padding': 'same', 'kernel_initializer': kernel_initializer}
    return Conv2D(filters, 1, use_bias=use_bias, **kwargs)(x)


def MB_convolution(x, kernel_size, strides):
    kwargs = {'padding': 'same', 'depthwise_initializer': kernel_initializer}
    x = DepthwiseConv2D(kernel_size, strides, use_bias=False, **kwargs)(x)
    x = BatchNormalization()(x)
    x = tf.nn.swish(x)
    return x


def MB_squeeze_excitation(x, intro_filters, expand_ratio, excite_ratio):
    num_reduced_filters = max(1, int(intro_filters * excite_ratio))
    SE = tf.reduce_mean(x, [1, 2], keepdims=True)
    SE = MB_conv2D(SE, num_reduced_filters, use_bias=True)
    SE = tf.nn.swish(SE)
    SE = MB_conv2D(SE, intro_filters * expand_ratio, use_bias=True)
    SE = tf.sigmoid(SE)
    return SE * x


def MB_output(x, inputs, intro_filters, outro_filters, strides, survival_rate):
    x = MB_conv2D(x, outro_filters, use_bias=False)
    x = BatchNormalization()(x)
    all_strides_one = all(stride == 1 for stride in strides)
    if all_strides_one and intro_filters == outro_filters:
        if survival_rate:
            x = apply_drop_connect(x, False, survival_rate)
        x = tf.add(x, inputs)
    return x


def apply_drop_connect(x, is_training, survival_rate):
    """Drops conv with given survival probability.

    # Arguments
        x: Tensor, input feature map to undergo drop connection.
        is_training: Bool specifying training phase.
        survival_rate: Float, survival probability to drop features.

    # Returns
        output: Tensor, output feature map after drop connect.

    # References
        [Deep Networks with Stochastic Depth]
        (https://arxiv.org/pdf/1603.09382.pdf)
    """
    if not is_training:
        output = x
    else:
        batch_size = tf.shape(x)[0]
        kwargs = {"shape": [batch_size, 1, 1, 1], "dtype": x.dtype}
        random_tensor = survival_rate + tf.random.uniform(**kwargs)
        binary_tensor = tf.floor(random_tensor)
        output = (x * binary_tensor) / survival_rate
    return output


>>> END FILE CONTENTS

## emili-main/paz/models/detection/efficientdet/layers.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
from tensorflow.keras.layers import Layer


class GetDropConnect(Layer):
    """Dropout for model layers.
    DropConnect is similar to dropout, but instead of setting
    activations to zero, it sets a fraction of the weights in a layer to
    zero. This helps to prevent overfitting by reducing the complexity
    of the model and encouraging the model to rely on a more diverse set
    of weights.

    # Arguments
        survival_rate: Float, survival probability to drop features.

    # Properties
        survival_rate: Float.

    # Methods
        call()

    # References
        [Deep Networks with Stochastic Depth]
        (https://arxiv.org/pdf/1603.09382.pdf)
    """
    def __init__(self, survival_rate, **kwargs):
        super(GetDropConnect, self).__init__(**kwargs)
        self.survival_rate = survival_rate

    def call(self, features, training=None):
        if training:
            batch_size = tf.shape(features)[0]
            random_tensor = self.survival_rate
            kwargs = {"shape": [batch_size, 1, 1, 1], "dtype": features.dtype}
            random_tensor = random_tensor + tf.random.uniform(**kwargs)
            binary_tensor = tf.floor(random_tensor)
            output = (features / self.survival_rate) * binary_tensor
            return output
        else:
            return features


class FuseFeature(Layer):
    """Fuse features from different resolutions and return a
    weighted sum. The resulting weighted sum is the fused feature.
    Lower layers of the network tend to extract more basic features,
    such as edges and shapes, while higher layers extract more complex
    features that are useful for making predictions.
    This class implements function that combines features from various
    levels/layers of the model. This helps to combine the strengths of
    different features and create a more robust and accurate
    representation of the input image.

    # Arguments
        fusion: Str, feature fusion method.

    # Properties
        fusion: Str.

    # Methods
        build()
        call()
        _fuse_fast()
        _fuse_sum()
        get_config()

    # References
        [EfficientDet: Scalable and Efficient Object Detection]
        (https://arxiv.org/pdf/1911.09070.pdf)
    """
    def __init__(self, fusion, **kwargs):
        super().__init__(**kwargs)
        self.fusion = fusion
        if fusion == 'fast':
            self.fuse_method = self._fuse_fast
        elif fusion == 'sum':
            self.fuse_method = self._fuse_sum
        else:
            raise ValueError('FPN weight fusion is not defined')

    def build(self, input_shape):
        num_in = len(input_shape)
        args = (self.name, (num_in,), tf.float32,
                tf.keras.initializers.constant(1 / num_in))
        self.w = self.add_weight(*args, trainable=True)

    def call(self, inputs, fusion):
        inputs = [input for input in inputs if input is not None]
        return self.fuse_method(inputs)

    def _fuse_fast(self, inputs):
        w = tf.keras.activations.relu(self.w)

        pre_activations = []
        for input_arg in range(len(inputs)):
            pre_activations.append(w[input_arg] * inputs[input_arg])
        x = tf.reduce_sum(pre_activations, 0)
        x = x / (tf.reduce_sum(w) + 0.0001)
        return x

    def _fuse_sum(self, inputs):
        x = inputs[0]
        for node in inputs[1:]:
            x = x + node
        return x

    def get_config(self):
        config = super().get_config().copy()
        config.update({'fusion': self.fusion})
        return config


>>> END FILE CONTENTS

## emili-main/paz/models/detection/haar_cascade.py

>>> BEGIN FILE CONTENTS

import cv2
import numpy as np
from tensorflow.keras.utils import get_file

WEIGHT_PATH = ('https://raw.githubusercontent.com/opencv/opencv/'
               'master/data/haarcascades/')


class HaarCascadeDetector(object):
    """Haar cascade face detector.

    # Arguments
        path: String. Postfix to default openCV haarcascades XML files, see [1]
            e.g. `eye`, `frontalface_alt2`, `fullbody`
        class_arg: Int. Class label argument.
        scale = Float. Scale for image reduction
        neighbors: Int. Minimum neighbors

    # Reference
        - [Haar
            Cascades](https://github.com/opencv/opencv/tree/master/data/haarcascades)
    """

    def __init__(self, weights='frontalface_default', class_arg=None,
                 scale=1.3, neighbors=5):
        self.weights = weights
        self.name = 'haarcascade_' + weights + '.xml'
        self.url = WEIGHT_PATH + self.name
        self.path = get_file(self.name, self.url, cache_subdir='paz/models')
        self.model = cv2.CascadeClassifier(self.path)
        self.class_arg = class_arg
        self.scale = scale
        self.neighbors = neighbors

    def __call__(self, gray_image):
        """ Detects faces from gray images.

        # Arguments
            gray_image: Numpy array of shape ``(H, W, 2)``.

        # Returns
            Numpy array of shape ``(num_boxes, 4)``.
        """
        if len(gray_image.shape) != 2:
            raise ValueError('Invalid gray image shape:', gray_image.shape)
        args = (gray_image, self.scale, self.neighbors)
        boxes = self.model.detectMultiScale(*args)
        boxes_point_form = np.zeros_like(boxes)
        if len(boxes) != 0:
            boxes_point_form[:, 0] = boxes[:, 0]
            boxes_point_form[:, 1] = boxes[:, 1]
            boxes_point_form[:, 2] = boxes[:, 0] + boxes[:, 2]
            boxes_point_form[:, 3] = boxes[:, 1] + boxes[:, 3]
            if self.class_arg is not None:
                class_args = np.ones((len(boxes_point_form), 1))
                class_args = class_args * self.class_arg
                boxes_point_form = np.hstack((boxes_point_form, class_args))
        return boxes_point_form.astype('int')


>>> END FILE CONTENTS

## emili-main/paz/models/detection/ssd300.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import get_file

from ..layers import Conv2DNormalization
from .utils import create_multibox_head
from .utils import create_prior_boxes


WEIGHT_PATH = (
    'https://github.com/oarriaga/altamira-data/releases/download/v0.2/')


def SSD300(num_classes=21, base_weights='VOC', head_weights='VOC',
           input_shape=(300, 300, 3), num_priors=[4, 6, 6, 6, 4, 4],
           l2_loss=0.0005, return_base=False, trainable_base=True):

    """Single-shot-multibox detector for 300x300x3 BGR input images.
    # Arguments
        num_classes: Integer. Specifies the number of class labels.
        base_weights: String or None. If string should be a valid dataset name.
            Current valid datasets include `VOC` `FAT` and `VGG`.
        head_weights: String or None. If string should be a valid dataset name.
            Current valid datasets include `VOC` and `FAT`.
        input_shape: List of integers. Input shape to the model including only
            spatial and channel resolution e.g. (300, 300, 3).
        num_priors: List of integers. Number of default box shapes
            used in each detection layer.
        l2_loss: Float. l2 regularization loss for convolutional layers.
        return_base: Boolean. If `True` the model returned is just
            the original base.
        trainable_base: Boolean. If `True` the base model
            weights are also trained.

    # Reference
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """

    if base_weights not in ['VGG', 'VOC', 'FAT', None]:
        raise ValueError('Invalid `base_weights`:', base_weights)

    if head_weights not in ['VOC', 'FAT', None]:
        raise ValueError('Invalid `base_weights`:', base_weights)

    if ((base_weights == 'VGG') and (head_weights is not None)):
        raise NotImplementedError('Invalid `base_weights` with head_weights')

    if ((base_weights is None) and (head_weights is not None)):
        raise NotImplementedError('Invalid `base_weights` with head_weights')

    if ((num_classes != 21) and (head_weights == 'VOC')):
        raise ValueError('Invalid `head_weights` with given `num_classes`')

    if ((num_classes != 22) and (head_weights == 'FAT')):
        raise ValueError('Invalid `head_weights` with given `num_classes`')

    image = Input(shape=input_shape, name='image')

    # Block 1 -----------------------------------------------------------------
    conv1_1 = Conv2D(64, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv1_1')(image)
    conv1_2 = Conv2D(64, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same', )(conv1_2)

    # Block 2 -----------------------------------------------------------------
    conv2_1 = Conv2D(128, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv2_2)

    # Block 3 -----------------------------------------------------------------
    conv3_1 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv3_3)

    # Block 4 -----------------------------------------------------------------
    conv4_1 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_3')(conv4_2)
    conv4_3_norm = Conv2DNormalization(20, name='branch_1')(conv4_3)
    pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv4_3)

    # Block 5 -----------------------------------------------------------------
    conv5_1 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1),
                         padding='same')(conv5_3)

    # Dense 6/7 --------------------------------------------------------------
    pool5z = ZeroPadding2D(padding=(6, 6))(pool5)
    fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6),
                 padding='valid', activation='relu',
                 kernel_regularizer=l2(l2_loss),
                 trainable=trainable_base,
                 name='fc6')(pool5z)

    fc7 = Conv2D(1024, (1, 1), padding='same',
                 activation='relu',
                 kernel_regularizer=l2(l2_loss),
                 trainable=trainable_base,
                 name='branch_2')(fc6)

    # EXTRA layers in SSD -----------------------------------------------------
    # Block 6 -----------------------------------------------------------------
    conv6_1 = Conv2D(256, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss))(fc7)
    conv6_1z = ZeroPadding2D()(conv6_1)
    conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), padding='valid',
                     activation='relu', name='branch_3',
                     kernel_regularizer=l2(l2_loss))(conv6_1z)

    # Block 7 -----------------------------------------------------------------
    conv7_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss))(conv6_2)
    conv7_1z = ZeroPadding2D()(conv7_1)
    conv7_2 = Conv2D(256, (3, 3), padding='valid', strides=(2, 2),
                     activation='relu', name='branch_4',
                     kernel_regularizer=l2(l2_loss))(conv7_1z)

    # Block 8 -----------------------------------------------------------------
    conv8_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss))(conv7_2)
    conv8_2 = Conv2D(256, (3, 3), padding='valid', strides=(1, 1),
                     activation='relu', name='branch_5',
                     kernel_regularizer=l2(l2_loss))(conv8_1)

    # Block 9 -----------------------------------------------------------------
    conv9_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss))(conv8_2)
    conv9_2 = Conv2D(256, (3, 3), padding='valid', strides=(1, 1),
                     activation='relu', name='branch_6',
                     kernel_regularizer=l2(l2_loss))(conv9_1)

    branch_tensors = [conv4_3_norm, fc7, conv6_2, conv7_2, conv8_2, conv9_2]
    if return_base:
        outputs = branch_tensors
    else:
        outputs = create_multibox_head(
            branch_tensors, num_classes, num_priors, l2_loss)

    model = Model(inputs=image, outputs=outputs, name='SSD300')

    if ((base_weights is not None) or (head_weights is not None)):
        model_filename = ['SSD300', str(base_weights), str(head_weights)]
        model_filename = '_'.join(['-'.join(model_filename), 'weights.hdf5'])
        weights_path = get_file(model_filename, WEIGHT_PATH + model_filename,
                                cache_subdir='paz/models')
        print('Loading %s model weights' % weights_path)
        finetunning_model_names = ['SSD300-VGG-None_weights.hdf5',
                                   'SSD300-VOC-None_weights.hdf5']
        by_name = True if model_filename in finetunning_model_names else False
        model.load_weights(weights_path, by_name=by_name)
    model.prior_boxes = create_prior_boxes('VOC')
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/detection/ssd512.py

>>> BEGIN FILE CONTENTS

import os
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import get_file

from ..layers import Conv2DNormalization
from .utils import create_multibox_head
from .utils import create_prior_boxes

WEIGHT_PATH = ('https://github.com/oarriaga/altamira-data/'
               'releases/download/v0.1/')


def SSD512(num_classes=81, base_weights='COCO', head_weights='COCO',
           input_shape=(512, 512, 3), num_priors=[4, 6, 6, 6, 6, 4, 4],
           l2_loss=0.0005, return_base=False, trainable_base=True):
    """Single-shot-multibox detector for 512x512x3 BGR input images.
    # Arguments
        num_classes: Integer. Specifies the number of class labels.
        base_weights: String or None. If string should be a valid dataset name.
            Current valid datasets include `COCO` and `OIV6Hand`.
        head_weights: String or None. If string should be a valid dataset name.
            Current valid datasets include `COCO`, `YCBVideo` and `OIV6Hand`.
        input_shape: List of integers. Input shape to the model including only
            spatial and channel resolution e.g. (512, 512, 3).
        num_priors: List of integers. Number of default box shapes
            used in each detection layer.
        l2_loss: Float. l2 regularization loss for convolutional layers.
        return_base: Boolean. If `True` the model returned is just
            the original base.
        trainable_base: Boolean. If `True` the base model
            weights are also trained.

    # Reference
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """

    if base_weights not in ['COCO', 'OIV6Hand', None]:
        raise ValueError('Invalid `base_weights`:', base_weights)

    if head_weights not in ['COCO', 'YCBVideo', 'OIV6Hand', None]:
        raise ValueError('Invalid `head_weights`:', head_weights)

    if ((base_weights == 'OIV6Hand') and (head_weights != 'OIV6Hand')):
        raise NotImplementedError('Invalid `base_weights` with head_weights')

    if ((num_classes != 81) and (head_weights == 'COCO')):
        raise ValueError('Invalid `head_weights` with given `num_classes`')

    if ((num_classes != 22) and (head_weights == 'YCBVideo')):
        raise ValueError('Invalid `head_weights` with given `num_classes`')

    if ((num_classes != 2) and (head_weights == 'OIV6Hand')):
        raise ValueError('Invalid `head_weights` with given `num_classes`')

    image = Input(shape=input_shape, name='image')

    # Block 1 -----------------------------------------------------------------
    conv1_1 = Conv2D(64, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv1_1')(image)
    conv1_2 = Conv2D(64, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv1_2')(conv1_1)
    pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same', )(conv1_2)

    # Block 2 -----------------------------------------------------------------
    conv2_1 = Conv2D(128, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv2_1')(pool1)
    conv2_2 = Conv2D(128, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv2_2')(conv2_1)
    pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv2_2)

    # Block 3 -----------------------------------------------------------------
    conv3_1 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_1')(pool2)
    conv3_2 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_2')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv3_3')(conv3_2)
    pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv3_3)

    # Block 4 -----------------------------------------------------------------
    conv4_1 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_1')(pool3)
    conv4_2 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_2')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv4_3')(conv4_2)
    conv4_3_norm = Conv2DNormalization(20, name='branch_1')(conv4_3)
    pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2),
                         padding='same')(conv4_3)

    # Block 5 -----------------------------------------------------------------
    conv5_1 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_1')(pool4)
    conv5_2 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_2')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     trainable=trainable_base,
                     name='conv5_3')(conv5_2)
    pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1),
                         padding='same')(conv5_3)

    # Dense 6/7 --------------------------------------------------------------
    pool5z = ZeroPadding2D(padding=(6, 6))(pool5)
    fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6),
                 padding='valid', activation='relu',
                 kernel_regularizer=l2(l2_loss),
                 trainable=trainable_base,
                 name='fc6')(pool5z)
    fc7 = Conv2D(1024, (1, 1), padding='same',
                 activation='relu',
                 kernel_regularizer=l2(l2_loss),
                 trainable=trainable_base,
                 name='branch_2')(fc6)

    # EXTRA layers in SSD -----------------------------------------------------

    # Block 6 -----------------------------------------------------------------
    conv6_1 = Conv2D(256, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     name='conv6_1')(fc7)
    conv6_1z = ZeroPadding2D()(conv6_1)
    conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), padding='valid',
                     activation='relu', name='branch_3',
                     kernel_regularizer=l2(l2_loss))(conv6_1z)

    # Block 7 -----------------------------------------------------------------
    conv7_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     name='conv7_1')(conv6_2)
    conv7_1z = ZeroPadding2D()(conv7_1)
    conv7_2 = Conv2D(256, (3, 3), padding='valid', strides=(2, 2),
                     activation='relu', name='branch_4',
                     kernel_regularizer=l2(l2_loss))(conv7_1z)

    # Block 8 -----------------------------------------------------------------
    conv8_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     name='conv8_1')(conv7_2)
    conv8_1z = ZeroPadding2D()(conv8_1)
    conv8_2 = Conv2D(256, (3, 3), padding='valid', strides=(2, 2),
                     activation='relu', name='branch_5',
                     kernel_regularizer=l2(l2_loss))(conv8_1z)

    # Block 9 -----------------------------------------------------------------
    conv9_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                     kernel_regularizer=l2(l2_loss),
                     name='conv9_1')(conv8_2)
    conv9_1z = ZeroPadding2D()(conv9_1)
    conv9_2 = Conv2D(256, (3, 3), padding='valid', strides=(2, 2),
                     activation='relu', name='branch_6',
                     kernel_regularizer=l2(l2_loss))(conv9_1z)

    # Block 10 ----------------------------------------------------------------
    conv10_1 = Conv2D(128, (1, 1), padding='same', activation='relu',
                      kernel_regularizer=l2(l2_loss),
                      name='conv10_1')(conv9_2)
    conv10_1z = ZeroPadding2D()(conv10_1)
    conv10_2 = Conv2D(256, (4, 4), padding='valid', strides=(1, 1),
                      activation='relu', name='branch_7',
                      kernel_regularizer=l2(l2_loss))(conv10_1z)

    branch_tensors = [conv4_3_norm, fc7, conv6_2, conv7_2,
                      conv8_2, conv9_2, conv10_2]
    if return_base:
        output_tensor = branch_tensors

    else:
        output_tensor = create_multibox_head(
            branch_tensors, num_classes, num_priors, l2_loss)

    model = Model(inputs=image, outputs=output_tensor, name='SSD512')

    if ((base_weights is not None) or (head_weights is not None)):
        model_filename = [str(base_weights), str(head_weights)]
        model_filename = '_'.join(['SSD512', '-'.join(model_filename),
                                   'weights.hdf5'])
        weights_path = get_file(model_filename, WEIGHT_PATH + model_filename,
                                cache_subdir='paz/models')
        print('Loading %s model weights' % weights_path)

        model.load_weights(weights_path)

    model.prior_boxes = create_prior_boxes('COCO')
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/detection/utils.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K

from ..layers import Conv2DNormalization

import numpy as np
from itertools import product


def create_multibox_head(tensors, num_classes, num_priors, l2_loss=0.0005,
                         num_regressions=4, l2_norm=False, batch_norm=False):
    """Adds multibox head with classification and regression output tensors.

    # Arguments
        tensors: List of tensors.
        num_classes: Int. Number of classes.
        num_priors. List of integers. Length should equal to tensors length.
            Each integer represents the amount of bounding boxes shapes in
            each feature map value.
        l2_loss: Float. L2 loss value to be added to convolutional layers.
        num_regressions: Number of values to be regressed per prior box.
            e.g. for 2D bounding boxes we regress 4 coordinates.
        l2_norm: Boolean. If `True` l2 normalization layer is applied to
            each before a convolutional layer.
        batch_norm: Boolean. If `True` batch normalization is applied after
            each convolutional layer.
    """
    classification_layers, regression_layers = [], []
    for layer_arg, base_layer in enumerate(tensors):
        if l2_norm:
            base_layer = Conv2DNormalization(20)(base_layer)

        # classification leaf -------------------------------------------------
        num_kernels = num_priors[layer_arg] * num_classes
        class_leaf = Conv2D(num_kernels, 3, padding='same',
                            kernel_regularizer=l2(l2_loss))(base_layer)
        if batch_norm:
            class_leaf = BatchNormalization()(class_leaf)
        class_leaf = Flatten()(class_leaf)
        classification_layers.append(class_leaf)

        # regression leaf -----------------------------------------------------
        num_kernels = num_priors[layer_arg] * num_regressions
        regress_leaf = Conv2D(num_kernels, 3, padding='same',
                              kernel_regularizer=l2(l2_loss))(base_layer)
        if batch_norm:
            regress_leaf = BatchNormalization()(regress_leaf)

        regress_leaf = Flatten()(regress_leaf)
        regression_layers.append(regress_leaf)

    classifications = Concatenate(axis=1)(classification_layers)
    regressions = Concatenate(axis=1)(regression_layers)
    num_boxes = K.int_shape(regressions)[-1] // num_regressions
    classifications = Reshape((num_boxes, num_classes))(classifications)
    classifications = Activation('softmax')(classifications)
    regressions = Reshape((num_boxes, num_regressions))(regressions)
    outputs = Concatenate(
        axis=2, name='boxes')([regressions, classifications])
    return outputs


def create_prior_boxes(configuration_name='VOC'):
    configuration = get_prior_box_configuration(configuration_name)
    image_size = configuration['image_size']
    feature_map_sizes = configuration['feature_map_sizes']
    min_sizes = configuration['min_sizes']
    max_sizes = configuration['max_sizes']
    steps = configuration['steps']
    model_aspect_ratios = configuration['aspect_ratios']
    mean = []
    for feature_map_arg, feature_map_size in enumerate(feature_map_sizes):
        step = steps[feature_map_arg]
        min_size = min_sizes[feature_map_arg]
        max_size = max_sizes[feature_map_arg]
        aspect_ratios = model_aspect_ratios[feature_map_arg]
        for y, x in product(range(feature_map_size), repeat=2):
            f_k = image_size / step
            center_x = (x + 0.5) / f_k
            center_y = (y + 0.5) / f_k
            s_k = min_size / image_size
            mean = mean + [center_x, center_y, s_k, s_k]
            s_k_prime = np.sqrt(s_k * (max_size / image_size))
            mean = mean + [center_x, center_y, s_k_prime, s_k_prime]
            for aspect_ratio in aspect_ratios:
                mean = mean + [center_x, center_y, s_k * np.sqrt(aspect_ratio),
                               s_k / np.sqrt(aspect_ratio)]
                mean = mean + [center_x, center_y, s_k / np.sqrt(aspect_ratio),
                               s_k * np.sqrt(aspect_ratio)]

    output = np.asarray(mean).reshape((-1, 4))
    # output = np.clip(output, 0, 1)
    return output


def get_prior_box_configuration(configuration_name='VOC'):
    if configuration_name in {'VOC', 'FAT'}:
        configuration = {
            'feature_map_sizes': [38, 19, 10, 5, 3, 1],
            'image_size': 300,
            'steps': [8, 16, 32, 64, 100, 300],
            'min_sizes': [30, 60, 111, 162, 213, 264],
            'max_sizes': [60, 111, 162, 213, 264, 315],
            'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
            'variance': [0.1, 0.2]}

    elif configuration_name in {'COCO', 'YCBVideo'}:
        configuration = {
            'feature_map_sizes': [64, 32, 16, 8, 4, 2, 1],
            'image_size': 512,
            'steps': [8, 16, 32, 64, 128, 256, 512],
            'min_sizes': [21, 51, 133, 215, 297, 379, 461],
            'max_sizes': [51, 133, 215, 297, 379, 461, 542],
            'aspect_ratios': [[2], [2, 3], [2, 3],
                              [2, 3], [2, 3], [2], [2]],
            'variance': [0.1, 0.2]}
    else:
        raise ValueError('Invalid configuration name:', configuration_name)
    return configuration


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/detnet.py

>>> BEGIN FILE CONTENTS


import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.initializers import VarianceScaling
from tensorflow.keras.initializers import truncated_normal
from tensorflow.keras.models import Model


WEIGHT_PATH = ('https://github.com/oarriaga/altamira-data/releases/download'
               '/v0.14/detnet_weights.hdf5')


def zero_padding(tensor, pad_1, pad_2):
    pad_mat = np.array([[0, 0],
                        [pad_1, pad_2],
                        [pad_1, pad_2],
                        [0, 0]])
    return tf.pad(tensor, paddings=pad_mat)


def block(tensor, filters, kernel_size, strides, name, rate=1, with_relu=True):
    if strides == 1:
        x = Conv2D(filters, kernel_size, strides, padding='SAME',
                   use_bias=False, dilation_rate=rate,
                   kernel_regularizer=l2(0.5 * 1.0), name=name + '/conv2d',
                   kernel_initializer=VarianceScaling(
                       mode="fan_avg", distribution="uniform"))(tensor)
    else:
        pad_1 = (kernel_size - 1) // 2
        pad_2 = (kernel_size - 1) - pad_1
        x = zero_padding(tensor, pad_1, pad_2)
        x = Conv2D(filters, kernel_size, strides, padding='VALID',
                   use_bias=False, dilation_rate=rate,
                   kernel_regularizer=l2(0.5 * (1.0)), name=name + '/conv2d',
                   kernel_initializer=VarianceScaling(
                       mode="fan_avg", distribution="uniform"))(x)
    x = BatchNormalization(name=name + '/batch_normalization')(x)
    if with_relu:
        x = ReLU()(x)
    return x


def bottleneck(tensor, filters, strides, name, rate=1):
    shape = tensor.get_shape()[-1]
    if shape == filters:
        if strides == 1:
            x = tensor
        else:
            x = MaxPool2D(strides, strides, 'SAME')(tensor)
    else:
        x = block(tensor, filters, 1, strides, name + '/shortcut',
                  with_relu=False)
    residual = block(tensor, (filters // 4), 1, 1, name + '/conv1')
    residual = block(residual, (filters // 4), 3, strides, name + '/conv2',
                     rate)
    residual = block(residual, filters, 1, 1, name + '/conv3',
                     with_relu=False)
    output = ReLU()(x + residual)
    return output


def resnet50(tensor, name):
    x = block(tensor, 64, 7, 2, name + '/conv1')
    for arg in range(2):
        x = bottleneck(x, 256, 1, name + '/block1/unit%d' % (arg + 1))
    x = bottleneck(x, 256, 2, name + '/block1/unit3')
    for arg in range(4):
        x = bottleneck(x, 512, 1, name + '/block2/unit%d' % (arg + 1), 2)
    for arg in range(6):
        x = bottleneck(x, 1024, 1, name + '/block3/unit%d' % (arg + 1), 4)
    x = block(x, 256, 3, 1, name + '/squeeze')
    return x


def net2D(features, num_keypoints, name):
    x = block(features, 256, 3, 1, name + '/project')
    heat_map = Conv2D(num_keypoints, 1, strides=1, padding='SAME',
                      activation='sigmoid', name=name + '/prediction/conv2d',
                      kernel_initializer=truncated_normal(stddev=0.01))(x)
    return heat_map


def net3D(features, num_keypoints, name, need_norm=False):
    x = block(features, 256, 3, 1, name + '/project')
    delta_map = Conv2D(num_keypoints * 3, 1, strides=1, padding='SAME',
                       name=name + '/prediction/conv2d',
                       kernel_initializer=truncated_normal(stddev=0.01))(x)
    if need_norm:
        delta_map_norm = tf.norm(delta_map, axis=-1, keepdims=True)
        delta_map = delta_map / tf.maximum(delta_map_norm, 1e-6)

    H, W = features.get_shape()[1:3]
    delta_map = Reshape([H, W, num_keypoints, 3])(delta_map)
    if need_norm:
        return delta_map, delta_map_norm
    return delta_map


def get_pose_tile(N):
    x = np.linspace(-1, 1, 32)
    x = np.stack([np.tile(x.reshape([1, 32]), [32, 1]),
                  np.tile(x.reshape([32, 1]), [1, 32])], -1)
    x = np.expand_dims(x, 0)
    x = tf.constant(x, dtype=tf.float32)
    pose_tile = tf.tile(x, [N, 1, 1, 1])
    return pose_tile


def tf_heatmap_to_uv(heatmap):
    shape = tf.shape(heatmap)
    heatmap = tf.reshape(heatmap, (shape[0], -1, shape[3]))
    argmax = tf.math.argmax(heatmap, axis=1, output_type=tf.int32)
    argmax_x = argmax // shape[2]
    argmax_y = argmax % shape[2]
    uv = tf.stack((argmax_x, argmax_y), axis=1)
    uv = tf.transpose(a=uv, perm=[0, 2, 1])
    return uv


def DetNet(input_shape=(128, 128, 3), num_keypoints=21):
    """DetNet: Estimate 3D keypoint positions of minimal hand from input
               color image.

    # Arguments
        input_shape: Shape for 128x128 RGB image of **left hand**.
                     List of integers. Input shape to the model including only
                     spatial and channel resolution e.g. (128, 128, 3).
        num_keypoints: Int. Number of keypoints.

    # Returns
        Tensorflow-Keras model.
        xyz: Numpy array [num_keypoints, 3]. Normalized 3D keypoint locations.
        uv: Numpy array [num_keypoints, 2]. The uv coordinates of the keypoints
            on the heat map, whose resolution is 32x32.

    # Reference
        -[Monocular Real-time Hand Shape and Motion Capture using Multi-modal
          Data](https://arxiv.org/abs/2003.09572)
    """

    image = Input(shape=input_shape, dtype=tf.uint8)
    x = tf.cast(image, tf.float32) / 255

    name = 'prior_based_hand'
    features = resnet50(x, name + '/resnet')
    pose_tile = get_pose_tile(tf.shape(x)[0])
    features = concatenate([features, pose_tile], -1)

    heat_map = net2D(features, num_keypoints, name + '/hmap_0')
    features = concatenate([features, heat_map], axis=-1)

    delta_map = net3D(features, num_keypoints, name + '/dmap_0')
    delta_map_reshaped = Reshape([32, 32, num_keypoints * 3])(delta_map)
    features = concatenate([features, delta_map_reshaped], -1)

    location_map = net3D(features, num_keypoints, name + '/lmap_0')
    location_map_reshaped = Reshape([32, 32, num_keypoints * 3])(location_map)
    features = concatenate([features, location_map_reshaped], -1)

    uv = tf_heatmap_to_uv(heat_map)
    xyz = tf.gather_nd(
        tf.transpose(location_map, perm=[0, 3, 1, 2, 4]), uv, batch_dims=2)[0]
    uv = uv[0]

    model = Model(image, outputs=[xyz, uv])

    URL = ('https://github.com/oarriaga/altamira-data/releases/download'
           '/v0.14/detnet_weights.hdf5')
    filename = os.path.basename(URL)
    weights_path = get_file(filename, URL, cache_subdir='paz/models')
    print('==> Loading %s model weights' % weights_path)
    model.load_weights(weights_path)
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/hrnet.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Permute
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Add
from tensorflow.keras.models import Model
from ..layers import ExpectedValue2D


def dense_block(x, blocks, growth_rate):
    for block_arg in range(blocks):
        x1 = Conv2D(4 * growth_rate, 1, use_bias=False)(x)
        x1 = BatchNormalization(epsilon=1.001e-5)(x)
        x1 = Activation('relu')(x1)
        x1 = Conv2D(growth_rate, 3, padding='same', use_bias=False)(x1)
        x1 = BatchNormalization(epsilon=1.001e-5)(x1)
        x1 = Activation('relu')(x1)
        x = Concatenate(axis=-1)([x, x1])
    return x


def residual_block(x, num_kernels, strides=1):
    residual = x
    x = Conv2D(num_kernels, 3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(num_kernels, 3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Add()([x, residual])
    x = Activation('relu')(x)
    return x


def transition_block(x, alpha):
    filters = int(K.int_shape(x)[-1] * alpha)
    x = Conv2D(filters, 1, strides=2, use_bias=False)(x)
    x = BatchNormalization(epsilon=1.001e-5)(x)
    x = Activation('relu')(x)
    return x


def stem(x, filters):
    x = Conv2D(filters, 3, padding='same', strides=(2, 2), use_bias=False)(x)
    x = BatchNormalization(epsilon=1.1e-5)(x)
    x = Activation('relu')(x)
    x = Conv2D(filters, 3, padding='same', strides=(2, 2), use_bias=False)(x)
    x = BatchNormalization(epsilon=1.1e-5)(x)
    x = Activation('relu')(x)
    return x


def fuse(tensors, base_kernels=32):
    all_tensors = []
    for x_tensor_arg, x in enumerate(tensors):
        x_to_y_tensors = []
        for y_tensor_arg in range(len(tensors)):
            # step: how much the feature map is upsampled or downsampled
            steps = x_tensor_arg - y_tensor_arg

            if steps == 0:
                num_kernels = K.int_shape(x)[-1]
                y = Conv2D(num_kernels, 3, padding='same',
                           strides=1, use_bias=False)(x)
                y = BatchNormalization(epsilon=1.1e-5)(y)
                y = Activation('relu')(y)

            if steps < 0:
                y = x
                for step in range(abs(steps)):
                    num_kernels = int(K.int_shape(x)[-1] * (step + 1))
                    y = Conv2D(num_kernels, 3, strides=2,
                               padding='same', use_bias=False)(y)
                    y = BatchNormalization(epsilon=1.1e-5)(y)
                    y = Activation('relu')(y)

            if steps > 0:
                num_kernels = int(K.int_shape(x)[-1] / steps)
                y = Conv2D(num_kernels, 1, use_bias=False)(x)
                y = BatchNormalization(epsilon=1.1e-5)(y)
                y = Activation('relu')(y)
                y = UpSampling2D(size=(2**steps, 2**steps))(y)

            x_to_y_tensors.append(y)
        all_tensors.append(x_to_y_tensors)

    output_tensors = []
    for reciever_arg in range(len(tensors)):
        same_resolution_tensors = []
        for giver_arg in range(len(tensors)):
            tensor = all_tensors[giver_arg][reciever_arg]
            same_resolution_tensors.append(tensor)
        x = Concatenate()(same_resolution_tensors)
        num_kernels = base_kernels * (2 ** (reciever_arg))
        x = Conv2D(num_kernels, 1, use_bias=False)(x)
        x = BatchNormalization(epsilon=1.1e-5)(x)
        x = Activation('relu')(x)
        output_tensors.append(x)
    return output_tensors


def bottleneck(x, filters=64, expansion=4):
    residual = x
    x = Conv2D(filters, 1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(filters, 3, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(filters * expansion, 1, use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Add()([x, residual])
    x = Activation('relu')(x)
    return x


def HRNetDense(input_shape=(128, 128, 3), num_keypoints=20, growth_rate=4):
    # stem
    inputs = Input(shape=input_shape)
    x1 = stem(inputs, 64)
    x1 = Conv2D(64 * 4, 1, padding='same', use_bias=False)(x1)
    x1 = BatchNormalization()(x1)
    for block in range(4):
        x1 = bottleneck(x1)

    # stage I
    x1 = Conv2D(32, 3, padding='same', use_bias=False)(x1)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    x2 = transition_block(x1, 2)
    print('stage 1', x1.shape, x2.shape)

    # stage II
    x1 = dense_block(x1, 4, growth_rate)
    x2 = dense_block(x2, 4, growth_rate)
    x1, x2 = fuse([x1, x2])
    x3 = transition_block(x2, 0.5)
    print('stage 2', x1.shape, x2.shape, x3.shape)

    # stage III
    x1 = dense_block(x1, 4, growth_rate)
    x2 = dense_block(x2, 4, growth_rate)
    x3 = dense_block(x3, 4, growth_rate)
    x1, x2, x3 = fuse([x1, x2, x3])
    x4 = transition_block(x3, 0.5)
    print('stage 3', x1.shape, x2.shape, x3.shape, x4.shape)

    # stage IV
    x1 = dense_block(x1, 3, growth_rate)
    x2 = dense_block(x2, 3, growth_rate)
    x3 = dense_block(x3, 3, growth_rate)
    x4 = dense_block(x4, 3, growth_rate)
    x1, x2, x3, x4 = fuse([x1, x2, x3, x4])
    print('stage 4', x1.shape, x2.shape, x3.shape, x4.shape)

    x2 = UpSampling2D(size=(2, 2))(x2)
    x3 = UpSampling2D(size=(4, 4))(x3)
    x4 = UpSampling2D(size=(8, 8))(x4)
    x = Concatenate()([x1, x2, x3, x4])

    # head
    x = Conv2D(480, 1)(x)
    x = BatchNormalization(epsilon=1.001e-5)(x)
    x = Activation('relu')(x)
    x = Conv2D(num_keypoints, 1)(x)

    # extra
    x = BatchNormalization(epsilon=1.001e-5)(x)
    x = Activation('relu')(x)
    x = UpSampling2D(size=(4, 4), interpolation='bilinear')(x)
    x = Permute([3, 1, 2])(x)
    x = Reshape([num_keypoints, input_shape[0] * input_shape[1]])(x)
    x = Activation('softmax')(x)
    x = Reshape([num_keypoints, input_shape[0], input_shape[1]])(x)
    outputs = ExpectedValue2D(name='expected_uv')(x)
    model = Model(inputs, outputs, name='hrnet-dense')
    return model


def HRNetResidual(input_shape=(128, 128, 3), num_keypoints=20):
    """Instantiates HRNET Residual model

    # Arguments
        input_shape: List of three elements e.g. ''(H, W, 3)''
        num_keypoints: Int.

    # Returns
        Tensorflow-Keras model.

    # References
       -[High-Resolution Representations for Labeling Pixels
            and Regions](https://arxiv.org/pdf/1904.04514.pdf)
    """

    # stem
    inputs = Input(shape=input_shape, name='image')
    x1 = stem(inputs, 64)
    x1 = Conv2D(64 * 4, 1, padding='same', use_bias=False)(x1)
    x1 = BatchNormalization()(x1)
    for block in range(4):
        x1 = bottleneck(x1)

    # stage I
    x1 = Conv2D(32, 3, padding='same', use_bias=False)(x1)
    x1 = BatchNormalization()(x1)
    x1 = Activation('relu')(x1)
    x2 = transition_block(x1, 2)

    # stage II
    for block in range(4):
        x1 = residual_block(x1, 32)
        x2 = residual_block(x2, 64)
    x1, x2 = fuse([x1, x2])
    x3 = transition_block(x2, 2)

    # stage III
    for module in range(4):
        for block in range(4):
            x1 = residual_block(x1, 32)
            x2 = residual_block(x2, 64)
            x3 = residual_block(x3, 128)
        x1, x2, x3 = fuse([x1, x2, x3])
    x4 = transition_block(x3, 2)

    # stage IV
    for module in range(3):
        for block in range(4):
            x1 = residual_block(x1, 32)
            x2 = residual_block(x2, 64)
            x3 = residual_block(x3, 128)
            x4 = residual_block(x4, 256)
        x1, x2, x3, x4 = fuse([x1, x2, x3, x4])

    # head
    x2 = UpSampling2D(size=(2, 2))(x2)
    x3 = UpSampling2D(size=(4, 4))(x3)
    x4 = UpSampling2D(size=(8, 8))(x4)
    x = Concatenate()([x1, x2, x3, x4])
    x = Conv2D(480, 1)(x)
    x = BatchNormalization(epsilon=1.001e-5)(x)
    x = Activation('relu')(x)
    x = Conv2D(num_keypoints, 1)(x)

    # extra
    x = BatchNormalization(epsilon=1.001e-5)(x)
    x = Activation('relu')(x)
    x = UpSampling2D(size=(4, 4), interpolation='bilinear')(x)
    x = Permute([3, 1, 2])(x)
    x = Reshape([num_keypoints, input_shape[0] * input_shape[1]])(x)
    x = Activation('softmax')(x)
    x = Reshape([num_keypoints, input_shape[0], input_shape[1]])(x)
    outputs = ExpectedValue2D(name='keypoints')(x)
    model = Model(inputs, outputs, name='hrnet-residual')
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/iknet.py

>>> BEGIN FILE CONTENTS


import os
import tensorflow as tf
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.initializers import truncated_normal
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Input, Activation, Reshape
from tensorflow.keras.models import Model


WEIGHT_PATH = ('https://github.com/oarriaga/altamira-data/releases/download/'
               'v0.14/iknet_weight.hdf5')


def dense(x, num_units):
    x = Dense(num_units, activation=None, kernel_regularizer=l2(0.5 * 1.0),
              kernel_initializer=truncated_normal(stddev=0.01))(x)
    return x


def block(x, num_units):
    x = dense(x, num_units)
    x = BatchNormalization()(x)
    return x


def normalize(x):
    norm = tf.norm(x, axis=-1, keepdims=True)
    norm = tf.maximum(norm, 1e-6)
    normalized_x = x / norm
    return normalized_x


def reorder_quaternions(quaternions):
    w = quaternions[:, :, 0:1]
    qs = quaternions[:, :, 1:4]
    quaternions = tf.concat((qs, w), axis=-1)
    return quaternions


def IKNet(input_shape=(84, 3), num_keypoints=21, depth=6, width=1024):
    """IKNet: Estimate absolute joint angle for the minimal hand keypoints.

    # Arguments
        input_shape: [num_keypoint x 4, 3]. Contains 3D keypoints, bone
                     orientation, refrence keypoint, refrence bone orientation.
        num_keypoints: Int. Number of keypoints.

    # Returns
        Tensorflow-Keras model.
        absolute joint angle in quaternion representation.

    # Reference
        - [Monocular Real-time Hand Shape and Motion Capture using Multi-modal
           Data](https://arxiv.org/abs/2003.09572)
    """
    input = Input(shape=input_shape, dtype=tf.float32)
    x = Reshape([1, -1])(input)

    for depth_arg in range(depth):
        x = block(x, width)
        x = Activation('sigmoid')(x)
    x = dense(x, num_keypoints * 4)
    x = Reshape([num_keypoints, 4])(x)
    x = normalize(x)

    positive_mask = tf.tile(x[:, :, 0:1] > 0, [1, 1, 4])
    quaternions = tf.where(positive_mask, x, -x)
    quaternions = reorder_quaternions(quaternions)

    model = Model(input, outputs=[quaternions])

    URL = ('https://github.com/oarriaga/altamira-data/releases/download/'
           'v0.14/iknet_weight.hdf5')
    filename = os.path.basename(URL)
    weights_path = get_file(filename, URL, cache_subdir='paz/models')
    print('==> Loading %s model weights' % weights_path)
    model.load_weights(weights_path)
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/keypointnet.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Permute
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.models import Model

from ..layers import ExpectedDepth
from ..layers import ExpectedValue2D
from ..layers import SubtractScalar


def block(x, num_filters, dilation_rate, alpha, name, kernel_size=(3, 3)):
    x = Conv2D(num_filters, kernel_size, dilation_rate=dilation_rate,
               padding='same', name=name)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha)(x)
    return x


def KeypointNet2D(input_shape, num_keypoints, filters=64, alpha=0.1):
    """Model for discovering keypoint locations in 2D space, modified from

    # Arguments
        input_shape: List of integers indicating ``[H, W, num_channels]``.
        num_keypoints: Int. Number of keypoints to discover.
        filters: Int. Number of filters used in convolutional layers.
        alpha: Float. Alpha parameter of leaky relu.

    # Returns
        Keras/tensorflow model

    # References
        - [Discovery of Latent 3D Keypoints via End-to-end
            Geometric Reasoning](https://arxiv.org/abs/1807.03146)
    """
    width, height = input_shape[:2]
    base = input_tensor = Input(input_shape, name='image')
    for base_arg, rate in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]):
        name = 'conv2D_base-%s' % base_arg
        base = block(base, filters, (rate, rate), alpha, name)

    name = 'uv_volume_features-%s'
    uv_volume = Conv2D(num_keypoints, (3, 3),
                       padding='same', name=name % 0)(base)
    uv_volume = Permute([3, 1, 2], name=name % 1)(uv_volume)
    volume_shape = [num_keypoints, width * height]
    uv_volume = Reshape(volume_shape, name=name % 2)(uv_volume)
    uv_volume = Activation('softmax', name=name % 3)(uv_volume)
    volume_shape = [num_keypoints, width, height]
    uv_volume = Reshape(volume_shape, name='uv_volume')(uv_volume)
    uv = ExpectedValue2D(name='keypoints')(uv_volume)
    model = Model(input_tensor, uv, name='keypointnet2D')
    return model


def KeypointNet(input_shape, num_keypoints, depth=.2, filters=64, alpha=0.1):
    """Keypointnet model for discovering keypoint locations in 3D space

    # Arguments
        input_shape: List of integers indicating ``[H, W, num_channels)``.
        num_keypoints: Int. Number of keypoints to discover.
        depth: Float. Prior depth (centimeters) of keypoints.
        filters: Int. Number of filters used in convolutional layers.
        alpha: Float. Alpha parameter of leaky relu.

    # Returns
        Keras/tensorflow model

    # References
        - [Discovery of Latent 3D Keypoints via End-to-end
            Geometric Reasoning](https://arxiv.org/abs/1807.03146)
    """
    width, height = input_shape[:2]
    base = input_tensor = Input(input_shape, name='image')
    for base_arg, rate in enumerate([1, 1, 2, 4, 8, 16, 1, 2, 4, 8, 16, 1]):
        name = 'conv2D_base-%s' % base_arg
        base = block(base, filters, (rate, rate), alpha, name)

    name = 'uv_volume_features-%s'
    uv_volume = Conv2D(num_keypoints, (3, 3),
                       padding='same', name=name % 0)(base)
    uv_volume = Permute([3, 1, 2], name=name % 1)(uv_volume)
    volume_shape = [num_keypoints, width * height]
    uv_volume = Reshape(volume_shape, name=name % 2)(uv_volume)
    uv_volume = Activation('softmax', name=name % 3)(uv_volume)
    volume_shape = [num_keypoints, width, height]
    uv_volume = Reshape(volume_shape, name='uv_volume')(uv_volume)
    uv = ExpectedValue2D(name='expected_uv')(uv_volume)

    name = 'depth_volume_features-%s'
    depth_volume = Conv2D(num_keypoints, (3, 3),
                          padding='same', name=name % 0)(base)
    depth_volume = SubtractScalar(depth, name=name % 1)(depth_volume)
    depth_volume = Permute([3, 1, 2], name='depth_volume')(depth_volume)
    z = ExpectedDepth(name='expected_z')([depth_volume, uv_volume])
    uvz = Concatenate(axis=-1, name='uvz_points')([uv, z])
    model = Model(input_tensor, [uvz, uv_volume], name='keypointnet')
    return model


def KeypointNetShared(input_shape, num_keypoints, depth, filters, alpha):
    """Keypointnet shared model with two views as input.

    # Arguments
        input_shape: List of integers indicating ``[H, W, num_channels]``.
        num_keypoints: Int. Number of keypoints to discover.
        depth: Float. Prior depth (centimeters) of keypoints.
        filters: Int. Number of filters used in convolutional layers.
        alpha: Float. Alpha parameter of leaky relu.

    # Returns
        Keras/tensorflow model

    # References
        - [Discovery of Latent 3D Keypoints via End-to-end
            Geometric Reasoning](https://arxiv.org/abs/1807.03146)
    """

    model_args = (input_shape, num_keypoints, depth, filters, alpha)
    keypointnet = KeypointNet(*model_args)
    image_A = Input(input_shape, name='image_A')
    image_B = Input(input_shape, name='image_B')
    uvz_A, uv_volume_A = keypointnet(image_A)
    uvz_B, uv_volume_B = keypointnet(image_B)
    uvz_points = Concatenate(axis=1, name='uvz_points-shared')([uvz_A, uvz_B])
    uv_volumes = Concatenate(axis=1, name='uv_volumes-shared')(
        [uv_volume_A, uv_volume_B])
    inputs, outputs = [image_A, image_B], [uvz_points, uv_volumes]
    return Model(inputs, outputs, name='keypointnet-shared')


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/projector.py

>>> BEGIN FILE CONTENTS

import numpy as np
import tensorflow.keras.backend as K


class Projector(object):
    """Projects keypoints from image coordinates to 3D space and viceversa.
    This model uses the camera focal length and the depth estimation of a point
    to project it to image coordinates. It works with numpy matrices or
    tensorflow values. See ``use_numpy``.

    # Arguments
        focal_length: Float. Focal length of camera used to generate keypoints.
        use_numpy: Boolean. If `True` both unproject and project functions
            take numpy arrays as inputs. If `False` takes tf.tensors as inputs.
    """
    def __init__(self, focal_length, use_numpy=False):
        self.focal_length = focal_length
        self.project = self._project_keras
        self.unproject = self._unproject_keras
        if use_numpy:
            self.project = self._project_numpy
            self.unproject = self._unproject_numpy

    def _project_keras(self, xyzw):
        z = xyzw[:, :, 2:3] + 1e-8
        x = - (self.focal_length / z) * xyzw[:, :, 0:1]
        y = - (self.focal_length / z) * xyzw[:, :, 1:2]
        return K.concatenate([x, y, z], axis=2)

    def _project_numpy(self, xyzw):
        z = xyzw[:, :, 2:3] + 1e-8
        x = - (self.focal_length / z) * xyzw[:, :, 0:1]
        y = - (self.focal_length / z) * xyzw[:, :, 1:2]
        return np.concatenate([x, y, z], axis=2)

    def _unproject_keras(self, xyz):
        z = xyz[:, :, 2:3]
        x = - (z / self.focal_length) * xyz[:, :, 0:1]
        y = - (z / self.focal_length) * xyz[:, :, 1:2]
        w = K.ones_like(z)
        xyzw = K.concatenate([x, y, z, w], axis=2)
        return xyzw

    def _unproject_numpy(self, xyz):
        z = xyz[:, :, 2:3]
        x = - (z / self.focal_length) * xyz[:, :, 0:1]
        y = - (z / self.focal_length) * xyz[:, :, 1:2]
        w = np.ones_like(z)
        xyzw = np.concatenate([x, y, z, w], axis=2)
        return xyzw


>>> END FILE CONTENTS

## emili-main/paz/models/keypoint/simplebaselines.py

>>> BEGIN FILE CONTENTS

import os
from tensorflow.keras.utils import get_file
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Reshape


def dense_block(input_x, num_keypoints, rate):
    """Make a bi-linear block with optional residual connection
    # Arguments
        input_x: the batch that enters the block
        num_keypoints: integer. The size of the linear units
        rate: float [0,1]. Probability of dropping something out

    # Returns
        x: the batch after it leaves the block
    """
    kwargs = {'kernel_initializer': HeNormal(), 'bias_initializer': HeNormal(),
              'kernel_constraint': MaxNorm(max_value=1)}
    x = Dense(num_keypoints, **kwargs)(input_x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dropout(rate)(x)
    x = Dense(num_keypoints, **kwargs, )(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dropout(rate)(x)
    x = (x + input_x)
    return x


def SimpleBaseline(input_shape=(32,), num_keypoints=16, keypoints_dim=3,
                   hidden_dim=1024, num_layers=2, rate=1, weights='human36m'):
    """Model that predicts 3D keypoints from 2D keypoints
    # Arguments
        num_keypoints: numer of kepoints
        keypoints_dim: dimension of keypoints
        hidden_dim: size of hidden layers
        input_shape: size of the input
        num_layers: number of layers
        rate: dropout drop rate

    # Returns
        keypoints3D estimation model
    """
    inputs = Input(shape=input_shape)
    kwargs = {'kernel_initializer': HeNormal(), 'bias_initializer': HeNormal(),
              'kernel_constraint': MaxNorm(max_value=1)}
    x = Dense(hidden_dim, use_bias=True, **kwargs)(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dropout(rate)(x)
    for layer in range(num_layers):
        x = dense_block(x, hidden_dim, rate)
    x = Dense(num_keypoints * keypoints_dim, **kwargs)(x)
    x = Reshape((num_keypoints, keypoints_dim))(x)
    model = Model(inputs, outputs=x)
    if weights == 'human36m':
        URL = ('https://github.com/oarriaga/altamira-data/releases/download/'
               'v0.17/SIMPLE-BASELINES.hdf5')
        filename = os.path.basename(URL)
        weights_path = get_file(filename, URL, cache_subdir='paz/models')
        model.load_weights(weights_path)
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/layers.py

>>> BEGIN FILE CONTENTS

import tensorflow.keras.backend as K
import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras.initializers import Constant
import numpy as np


class Conv2DNormalization(Layer):
    """Normalization layer as described in ParseNet paper.

    # Arguments
        scale: Float determining how much to scale the features.
        axis: Integer specifying axis of image channels.

    # Returns
        Feature map tensor normalized with an L2 norm and then scaled.

    # References
        - [ParseNet: Looking Wider to
            See Better](https://arxiv.org/abs/1506.04579)
    """
    def __init__(self, scale, axis=3, **kwargs):
        self.scale = scale
        self.axis = axis
        super(Conv2DNormalization, self).__init__(**kwargs)

    def build(self, input_shape):
        self.gamma = self.add_weight(
            name='gamma', shape=(input_shape[self.axis]),
            initializer=Constant(self.scale), trainable=True)
        # super(Conv2DNormalization, self).build(input_shape)

    def output_shape(self, input_shape):
        return input_shape

    def call(self, x, mask=None):
        return self.gamma * K.l2_normalize(x, self.axis)


class SubtractScalar(Layer):
    """Subtracts scalar value to tensor.

    # Arguments
        constant: Float. Value to be subtracted to all tensor values.
    """
    def __init__(self, constant, **kwargs):
        self.constant = constant
        super(SubtractScalar, self).__init__(**kwargs)

    def build(self, input_shape):
        super(SubtractScalar, self).build(input_shape)

    def call(self, x):
        return x - self.constant

    def compute_output_shape(self, input_shape):
        return input_shape


class ExpectedValue2D(Layer):
    """Calculates the expected value along ''axes''.

    # Arguments
        axes: List of integers. Axes for which the expected value
            will be calculated.
    """
    def __init__(self, axes=[2, 3], **kwargs):
        self.axes = axes
        super(ExpectedValue2D, self).__init__(**kwargs)

    def build(self, input_shape):
        self.num_keypoints = input_shape[1]
        self.feature_map_size = input_shape[2]
        super(ExpectedValue2D, self).build(input_shape)

    def call(self, x):
        range_x, range_y = self.meshgrid(self.feature_map_size)
        expected_x = K.sum(x * range_x, axis=self.axes)
        expected_y = K.sum(x * range_y, axis=self.axes)
        keypoints_stack = K.stack([expected_x, expected_y], -1)
        keypoints = K.reshape(keypoints_stack, [-1, self.num_keypoints, 2])
        return keypoints

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.num_keypoints, 2)

    def meshgrid(self, feature_map_size):
        """ Returns a meshgrid ranging from [-1, 1] in x, y axes."""
        r = np.arange(0.5, feature_map_size, 1) / (feature_map_size / 2) - 1
        range_x, range_y = tf.meshgrid(r, -r)
        return K.cast(range_x, 'float32'), K.cast(range_y, 'float32')


class ExpectedDepth(Layer):
    """Calculates the expected depth along ''axes''.
    This layer takes two inputs. First input is a depth estimation tensor.
    Second input is a probability map of the keypoints.
    It multiplies both values and calculates the expected depth.

    # Arguments
        axes: List of integers. Axes for which the expected value
            will be calculated.
    """
    def __init__(self, axes=[2, 3], **kwargs):
        self.axes = axes
        super(ExpectedDepth, self).__init__(**kwargs)

    def build(self, input_shape):
        self.num_keypoints = input_shape[0][1]
        super(ExpectedDepth, self).build(input_shape)

    def call(self, x):
        z_volume, uv_volume = x
        z = K.sum(z_volume * uv_volume, axis=self.axes)
        z = K.expand_dims(z, -1)
        return z

    def compute_output_shape(self, input_shape):
        return (input_shape[0][0], self.num_keypoints, 1)


>>> END FILE CONTENTS

## emili-main/paz/models/pose_estimation/__init__.py

>>> BEGIN FILE CONTENTS

from .higher_hrnet import HigherHRNet


>>> END FILE CONTENTS

## emili-main/paz/models/pose_estimation/higher_hrnet.py

>>> BEGIN FILE CONTENTS

import os
from tensorflow.keras.utils import get_file
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model


WEIGHT_PATH = ('https://github.com/oarriaga/altamira-data/releases/download'
               '/v0.10/HigherHRNet.hdf5')


def stem(tensor, filters):
    x = ZeroPadding2D(padding=(1, 1), name='pad')(tensor)
    x = Conv2D(filters, 3, strides=2, use_bias=False, name='conv1')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05,
                           name='bn1')(x, training=False)
    x = ReLU(name='relu')(x)
    x = ZeroPadding2D(padding=(1, 1), name='pad_1')(x)
    x = Conv2D(filters, 3, strides=2, use_bias=False, name='conv2')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name='bn2')(x)
    x = ReLU()(x)
    return x


def bottleneck(tensor, filters, expansion, downsample=None, name=None):
    residual = tensor
    x = Conv2D(filters, 1, use_bias=False, name=name + '.conv1')(tensor)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name + '.bn1')(x)
    x = ReLU(name=name + '.relu')(x)
    x = Conv2D(filters, 3, padding='same',
               use_bias=False, name=name + '.conv2')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1.0e-5, name=name + '.bn2')(x)
    x = ReLU()(x)
    x = Conv2D(filters * expansion, 1, use_bias=False, name=name + '.conv3')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1.0e-5, name=name + '.bn3')(x)
    if downsample is not None:
        x1 = Conv2D(256, 1, use_bias=False,
                    name='layer1.0.downsample.0')(tensor)
        residual = BatchNormalization(momentum=0.1, epsilon=1e-05,
                                      name='layer1.0.downsample.1')(x1)
    x = Add()([x, residual])
    x = ReLU()(x)
    return x


def basic_block(tensor, filters, name=None):
    residual = tensor
    x = Conv2D(filters, 3, padding='same',
               use_bias=False, name=name + '.conv1')(tensor)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name + '.bn1')(x)
    x = ReLU(name=name + '.relu')(x)
    x = Conv2D(filters, 3, padding='same',
               use_bias=False, name=name + '.conv2')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name + '.bn2')(x)
    x = Add()([x, residual])
    x = ReLU()(x)
    return x


def transition_block(tensor, alpha, name):
    in_channels = K.int_shape(tensor)[-1]
    if in_channels == 256:
        filters = 32 * alpha
    else:
        filters = in_channels * alpha
    x = ZeroPadding2D(padding=(1, 1))(tensor)
    x = Conv2D(filters, 3, strides=2, use_bias=False, name=name + '0.0')(x)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name + '0.1')(x)
    x = ReLU()(x)
    return x


def blocks_in_branch(tensors, stage, in_channels, name):
    if stage != len(tensors):
        raise ValueError('''outputs {} feed to fuse_layers must to be same as
                         num_branches {}'''.format(tensors, stage))
    for arg in range(stage):
        filters = in_channels * (2 ** arg)
        tensors[arg] = basic_block(tensors[arg], filters,
                                   name=name[:18] + str(arg) + '.' + name[18:])
    return tensors


def final_layers(num_keypoints, with_AE_loss=None, num_deconv=1):
    final_layers = []
    if with_AE_loss[0]:
        output_channels = num_keypoints * 2
    else:
        output_channels = num_keypoints
    x = Conv2D(output_channels, 1, padding='same', name='final_layers.0')
    final_layers.append(x)

    for arg in range(num_deconv):
        if with_AE_loss[arg + 1]:
            output_channels = num_keypoints * 2
        else:
            output_channels = num_keypoints
        x1 = Conv2D(output_channels, 1, padding='same', name='final_layers.1')
        final_layers.append(x1)
    return final_layers


def deconv_layers(tensor, output_channels, num_deconv=1):
    for arg in range(num_deconv):
        x = Conv2DTranspose(output_channels, 4, strides=2,
                            padding='same', use_bias=False,
                            name='deconv_layers.0.0.0')(tensor)
        x = BatchNormalization(momentum=0.1, epsilon=1e-05,
                               name='deconv_layers.0.0.1')(x)
        x = ReLU()(x)
        for block in range(4):
            x = basic_block(
                x, output_channels,
                name='deconv_layers.0.' + str(block + 1) + '.' + '0')
    return x


def get_names(name, branch_arg, stage_arg, counter, iterations=0):
    name1 = '.'.join((name, str(branch_arg), str(stage_arg),
                     str(iterations + counter)))
    name2 = '.'.join((name, str(branch_arg), str(stage_arg),
                     str(iterations + counter + .1)))
    return [name1, name2]


def fuse_layers(tensors, stage, output_branches, filters=32, name=None):
    if stage != len(tensors):
        raise ValueError('''outputs {} feed to fuse_layers must to be same as
                         num_branches {}'''.format(tensors, stage))
    all_tensors = []
    for branch_arg in range(output_branches):
        x_to_y_tensors = []
        for stage_arg in range(stage):
            # step: how much the feature map is upsampled or downsampled
            steps = stage_arg - branch_arg
            if steps == 0:
                y = tensors[branch_arg]

            elif steps > 0:  # upsample
                name0 = '.'.join((name, str(branch_arg), str(stage_arg)))
                y = upsample(tensors[stage_arg], filters * (2 ** branch_arg),
                             size=(2**steps, 2**steps), name=name0)

            elif steps < 0:  # downsample
                y_flag = False
                iterations = 0
                for k in range((-1 * steps) - 1):
                    iterations += 1
                    if y_flag:
                        name1 = get_names(name, branch_arg, stage_arg, 1.0)
                        y = downsample(y, filters * (2 ** stage_arg), name1)

                    else:
                        name2 = get_names(name, branch_arg, stage_arg, 0.0)
                        y = downsample(tensors[stage_arg],
                                       filters * (2 ** stage_arg), name2,
                                       with_padding=False)
                    y = ReLU()(y)
                    y_flag = True

                if not y_flag:
                    tensors[stage_arg] = ZeroPadding2D()(tensors[stage_arg])
                    name3 = get_names(name, branch_arg, stage_arg, 0.0)
                    y = downsample(tensors[stage_arg],
                                   filters * (2 ** branch_arg), name3,
                                   with_padding=False)

                else:
                    name4 = get_names(name, branch_arg, stage_arg,
                                      .0, iterations)
                    y = downsample(y, filters * (2 ** branch_arg), name4)
            x_to_y_tensors.append(y)

        all_tensors.append(x_to_y_tensors)

    x_fused = []
    for x_tensor_arg in range(len(all_tensors)):
        for y_tensor_arg in range(len(x_to_y_tensors)):
            if y_tensor_arg == 0:
                x_fused.append(all_tensors[x_tensor_arg][0])
            else:
                x = Add()([x_fused[x_tensor_arg],
                          all_tensors[x_tensor_arg][y_tensor_arg]])
                x_fused[x_tensor_arg] = x

    for x_fused_arg in range(len(x_fused)):
        x_fused[x_fused_arg] = ReLU()(x_fused[x_fused_arg])
    return x_fused


def upsample(tensor, filters, size, name=None):
    x = Conv2D(filters, 1, use_bias=False, name=name + '.0')(tensor)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name + '.1')(x)
    x = UpSampling2D(size=size, interpolation='nearest', name=name + '.2')(x)
    return x


def downsample(tensor, filters, name=None, with_padding=True):
    if with_padding:
        tensor = ZeroPadding2D(padding=(1, 1))(tensor)
    x = Conv2D(filters, 3, strides=2, use_bias=False, name=name[0])(tensor)
    x = BatchNormalization(momentum=0.1, epsilon=1e-05, name=name[1])(x)
    return x


def HigherHRNet(weights='COCO', input_shape=(None, None, 3), num_keypoints=17,
                with_AE_loss=[True, False]):
    """Human pose estimation detector for any input size of images.
    # Arguments
        weights: String or None. If string should be a valid dataset name.
            Current valid datasets include `COCO`.
        input_shape: List of integers. Input shape to the model including only
            spatial and channel resolution e.g. (512, 512, 3).
        num_keypoints: Int. Number of joints.
        with_AE_loss: List of boolean.

    # Reference
        - [HigherHRNet: Scale-Aware Representation Learning for Bottom-Up
           Human Pose Estimation](https://arxiv.org/abs/1908.10357)
    """

    image = Input(shape=input_shape, name='image')
    x = stem(image, 64)
    # print(f"check 1 stem TF ==> {x.get_shape()}")

    # First group of bottleneck (resnet) modules
    # Stage 1 -----------------------------------------------------------------
    x = bottleneck(x, filters=64, expansion=4,
                   downsample=True, name='layer1' + '.0')
    for block in range(3):
        x = bottleneck(x, filters=64, expansion=4,
                       downsample=None, name='layer1' + '.' + str(block + 1))

    x_list = []
    # Creation of the first two branches (one full and one half resolution)
    x1 = Conv2D(32, 3, strides=1, padding='same',
                use_bias=False, name='transition1.0.0')(x)
    x1 = BatchNormalization(momentum=0.1, name='transition1.0.1')(x1)
    x1 = ReLU()(x1)

    x_list.append(x1)
    x_list.append(transition_block(x, 2, name='transition1.1.'))

    # Stage 2 -----------------------------------------------------------------
    for block in range(4):
        x_list = blocks_in_branch(x_list, stage=2, in_channels=32,
                                  name='stage2.0.branches.' + str(block))
    x_list = fuse_layers(x_list, stage=2, output_branches=2,
                         name='stage2.0.fuse_layers')
    x_list.append(transition_block(x_list[1], 2, name='transition2.2.'))

    # Stage 3 -----------------------------------------------------------------
    for module in range(4):
        for block in range(4):
            name = 'stage3.' + str(module) + '.branches.' + str(block)
            x_list = blocks_in_branch(x_list, stage=3,
                                      in_channels=32, name=name)
        x_list = fuse_layers(x_list, stage=3, output_branches=3,
                             name='stage3.' + str(module) + '.fuse_layers')
    x_list.append(transition_block(x_list[2], 2, name='transition3.3.'))

    # Stage 4 -----------------------------------------------------------------
    for module in range(3):
        for block in range(4):
            name = 'stage4.' + str(module) + '.branches.' + str(block)
            x_list = blocks_in_branch(x_list, stage=4,
                                      in_channels=32, name=name)
        if module == 2:
            name = 'stage4.' + str(module) + '.fuse_layers'
            x_list = fuse_layers(x_list, stage=4, output_branches=1, name=name)
        else:
            name = 'stage4.' + str(module) + '.fuse_layers'
            x_list = fuse_layers(x_list, stage=4, output_branches=4, name=name)

    final_outputs = []
    x2 = x_list[0]
    output = final_layers(num_keypoints, with_AE_loss=with_AE_loss)[0](x2)
    final_outputs.append(output)

    x2 = concatenate((x2, output), -1)
    x2 = deconv_layers(x2, 32)
    x2 = final_layers(num_keypoints, with_AE_loss=with_AE_loss)[1](x2)
    final_outputs.append(x2)

    model = Model(image, outputs=final_outputs, name='HigherHRNet')

    if(weights == 'COCO'):
        URL = ('https://github.com/oarriaga/altamira-data/releases/download'
               '/v0.10/HigherHRNet_weights.hdf5')
        filename = os.path.basename(URL)
        weights_path = get_file(filename, URL, cache_subdir='paz/models')
        print('==> Loading %s model weights' % weights_path)
        model.load_weights(weights_path)
    return model


>>> END FILE CONTENTS

## emili-main/paz/models/segmentation/__init__.py

>>> BEGIN FILE CONTENTS

from .unet import UNET_VGG16
from .unet import UNET_VGG19
from .unet import UNET_RESNET50
from .unet import UNET


>>> END FILE CONTENTS

## emili-main/paz/models/segmentation/unet.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.layers import Conv2DTranspose, Concatenate, UpSampling2D
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation
from tensorflow.keras.layers import MaxPooling2D, Input
from tensorflow.keras import Model
from tensorflow.keras.applications import VGG16, VGG19
from tensorflow.keras.applications import ResNet50V2


def convolution_block(inputs, filters, kernel_size=3, activation='relu'):
    """UNET convolution block containing Conv2D -> BatchNorm -> Activation

    # Arguments
        inputs: Keras/tensorflow tensor input.
        filters: Int. Number of filters.
        kernel_size: Int. Kernel size of convolutions.
        activation: String. Activation used convolution.

    # Returns
        Keras/tensorflow tensor.
    """
    kwargs = {'use_bias': False, 'kernel_initializer': 'he_uniform'}
    x = Conv2D(filters, kernel_size, (1, 1), 'same', **kwargs)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    return x


def upsample_block(x, filters, branch):
    """UNET upsample block. This block upsamples ``x``, concatenates a
    ``branch`` tensor and applies two convolution blocks:
    Upsample -> Concatenate -> 2 x ConvBlock.

    # Arguments
        x: Keras/tensorflow tensor.
        filters: Int. Number of filters
        branch: Tensor to be concatated to the upsamples ``x`` tensor.

    # Returns
        A Keras tensor.
    """
    x = UpSampling2D(size=2)(x)
    x = Concatenate(axis=3)([x, branch])
    x = convolution_block(x, filters)
    x = convolution_block(x, filters)
    return x


def transpose_block(x, filters, branch):
    """UNET transpose block. This block upsamples ``x``, concatenates a
    ``branch`` tensor and applies two convolution blocks:
    Conv2DTranspose -> Concatenate -> 2 x ConvBlock.

    # Arguments
        x: Keras/tensorflow tensor.
        filters: Int. Number of filters
        branch: Tensor to be concatated to the upsamples ``x`` tensor.

    # Returns
        A Keras tensor.
    """
    x = Conv2DTranspose(filters, 4, (2, 2), 'same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Concatenate(axis=3)([x, branch])
    x = convolution_block(x, filters)
    return x


def freeze_model(model):
    """Freezes gradient pass for the entire model

    # Arguments:
        model: Keras/tensorflow model

    # Returns:
        A Keras/tensorflow model
    """
    for layer in model.layers:
        layer.trainable = False
    return model


def get_tensors(model, layer_names):
    """Gets all the tensor outputs of the given layer names.

    # Arguments
        model: Keras/tensorflow model.
        layer_names: List of strings which each string is a layer name.

    # Returns
        List of Keras tensors.
    """
    tensors = []
    for layer_name in layer_names:
        tensors.append(model.get_layer(layer_name).output)
    return model, tensors


def build_backbone(BACKBONE, shape, branch_names, weights,
                   frozen=False, input_tensor=None):
    """Builds ``BACKBONE`` class for UNET model.

    # Arguments
        BACKBONE: Class for instantiating a backbone model
        shape: List of integers: ``(H, W, num_channels)``.
        branch_names: List of strings containing layer names of ``BACKBONE()``.
        weights: String or ``None``.
        frozen: Boolean. If True ``BACKBONE()`` updates are frozen.
        input_tensor: Input tensor. If given ``shape`` is overwritten and this
            tensor is used instead as input.

    # Returns
    """
    kwargs = {'include_top': False, 'input_shape': shape, 'weights': weights}
    if input_tensor is not None:
        kwargs.pop('input_shape')
        kwargs['input_tensor'] = input_tensor
    backbone = BACKBONE(**kwargs)

    if frozen:
        backbone = freeze_model(backbone)

    backbone, branch_tensors = get_tensors(backbone, branch_names)
    return backbone, branch_tensors


def build_UNET(num_classes, backbone, branch_tensors,
               decoder, decoder_filters, activation, name):
    """Build UNET with a given ``backbone`` model.

    # Arguments
        num_classes: Integer used for output number of channels.
        backbone: Instantiated backbone model.
        branch_tensors: List of tensors from ``backbone`` model
        decoder: Function used for upsampling and decoding the output.
        decoder_filters: List of integers used in each application of decoder.
        activation: Output activation of the model.
        name: String. indicating the name of the model.

    # Returns
        A UNET Keras/tensorflow model.
    """
    inputs, x = backbone.input, backbone.output
    if isinstance(backbone.layers[-1], MaxPooling2D):
        x = convolution_block(x, 512)
        x = convolution_block(x, 512)

    for branch, filters in zip(branch_tensors, decoder_filters):
        x = decoder(x, filters, branch)

    kwargs = {'use_bias': True, 'kernel_initializer': 'glorot_uniform'}
    x = Conv2D(num_classes, 3, (1, 1), 'same', **kwargs)(x)
    outputs = Activation(activation, name='masks')(x)
    model = Model(inputs, outputs, name=name)
    return model


def UNET(input_shape, num_classes, branch_names, BACKBONE, weights,
         freeze_backbone=False, activation='sigmoid', decoder_type='upsample',
         decoder_filters=[256, 128, 64, 32, 16], input_tensor=None,
         name='UNET'):
    """Build a generic UNET model with a given ``BACKBONE`` class.

    # Arguments
        input_shape: List of integers: ``(H, W, num_channels)``.
        num_classes: Integer used for output number of channels.
        branch_names: List of strings containing layer names of ``BACKBONE()``.
        BACKBONE: Class for instantiating a backbone model
        weights: String indicating backbone weights e.g.
            ''imagenet'', ``None``.
        freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
        decoder_type: String indicating decoding function e.g.
            ''upsample ''transpose''.
        decoder_filters: List of integers used in each application of decoder.
        activation: Output activation of the model.
        input_tensor: Input tensor. If given ``shape`` is overwritten and this
            tensor is used instead as input.
        name: String. indicating the name of the model.

    # Returns
        A UNET Keras/tensorflow model.
    """
    args = [BACKBONE, input_shape, branch_names,
            weights, freeze_backbone, input_tensor]
    backbone, branch_tensors = build_backbone(*args)
    if decoder_type == 'upsample':
        decoder = upsample_block
    if decoder_type == 'transpose':
        decoder = transpose_block

    model = build_UNET(num_classes, backbone, branch_tensors, decoder,
                       decoder_filters, activation, name)
    return model


def UNET_VGG16(num_classes=1, input_shape=(224, 224, 3), weights='imagenet',
               freeze_backbone=False, activation='sigmoid',
               decoder_type='upsample',
               decode_filters=[256, 128, 64, 32, 16]):
    """Build a UNET model with a ``VGG16`` backbone.

    # Arguments
        input_shape: List of integers: ``(H, W, num_channels)``.
        num_classes: Integer used for output number of channels.
        branch_names: List of strings containing layer names of ``BACKBONE()``.
        BACKBONE: Class for instantiating a backbone model
        weights: String indicating backbone weights e.g.
            ''imagenet'', ``None``.
        freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
        decoder_type: String indicating decoding function e.g.
            ''upsample ''transpose''.
        decoder_filters: List of integers used in each application of decoder.
        activation: Output activation of the model.
        input_tensor: Input tensor. If given ``shape`` is overwritten and this
            tensor is used instead as input.
        name: String. indicating the name of the model.

    # Returns
        A UNET-VGG16 Keras/tensorflow model.
    """
    VGG16_branches = ['block5_conv3', 'block4_conv3', 'block3_conv3',
                      'block2_conv2', 'block1_conv2']
    return UNET(input_shape, num_classes, VGG16_branches, VGG16, weights,
                freeze_backbone, activation, decoder_type, decode_filters,
                name='UNET-VGG16')


def UNET_VGG19(num_classes=1, input_shape=(224, 224, 3), weights='imagenet',
               freeze_backbone=False, activation='sigmoid',
               decoder_type='upsample',
               decode_filters=[256, 128, 64, 32, 16]):
    """Build a UNET model with a ``VGG19`` backbone.

    # Arguments
        input_shape: List of integers: ``(H, W, num_channels)``.
        num_classes: Integer used for output number of channels.
        branch_names: List of strings containing layer names of ``BACKBONE()``.
        BACKBONE: Class for instantiating a backbone model
        weights: String indicating backbone weights e.g.
            ''imagenet'', ``None``.
        freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
        decoder_type: String indicating decoding function e.g.
            ''upsample ''transpose''.
        decoder_filters: List of integers used in each application of decoder.
        activation: Output activation of the model.
        input_tensor: Input tensor. If given ``shape`` is overwritten and this
            tensor is used instead as input.
        name: String. indicating the name of the model.

    # Returns
        A UNET-VGG19 Keras/tensorflow model.
    """

    VGG19_branches = ['block5_conv4', 'block4_conv4', 'block3_conv4',
                      'block2_conv2', 'block1_conv2']
    return UNET(input_shape, num_classes, VGG19_branches, VGG19, weights,
                freeze_backbone, activation, decoder_type, decode_filters,
                name='UNET-VGG19')


def UNET_RESNET50(num_classes=1, input_shape=(224, 224, 3), weights='imagenet',
                  freeze_backbone=False, activation='sigmoid',
                  decoder_type='upsample',
                  decode_filters=[256, 128, 64, 32, 16]):
    """Build a UNET model with a ``RESNET50V2`` backbone.

    # Arguments
        input_shape: List of integers: ``(H, W, num_channels)``.
        num_classes: Integer used for output number of channels.
        branch_names: List of strings containing layer names of ``BACKBONE()``.
        BACKBONE: Class for instantiating a backbone model
        weights: String indicating backbone weights e.g.
            ''imagenet'', ``None``.
        freeze_backbone: Boolean. If True ``BACKBONE()`` updates are frozen.
        decoder_type: String indicating decoding function e.g.
            ''upsample ''transpose''.
        decoder_filters: List of integers used in each application of decoder.
        activation: Output activation of the model.
        input_tensor: Input tensor. If given ``shape`` is overwritten and this
            tensor is used instead as input.
        name: String. indicating the name of the model.

    # Returns
        A UNET-RESNET50V2 Keras/tensorflow model.
    """
    RESNET50_branches = ['conv4_block6_1_relu', 'conv3_block4_1_relu',
                         'conv2_block3_1_relu', 'conv1_conv', 'input_resnet50']
    input_tensor = Input(input_shape, name='input_resnet50')
    return UNET(input_shape, num_classes, RESNET50_branches, ResNet50V2,
                weights, freeze_backbone, activation, decoder_type,
                decode_filters, input_tensor, 'UNET-RESNET50')


>>> END FILE CONTENTS

## emili-main/paz/optimization/__init__.py

>>> BEGIN FILE CONTENTS

from .losses import MultiBoxLoss
from .losses import KeypointNetLoss
from .losses import DiceLoss
from .losses import FocalLoss
from .losses import JaccardLoss


>>> END FILE CONTENTS

## emili-main/paz/optimization/callbacks.py

>>> BEGIN FILE CONTENTS

import os
import numpy as np

from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K

from ..backend.image import write_image

from paz.evaluation import evaluateMAP


class DrawInferences(Callback):
    """Saves an image with its corresponding inferences

    # Arguments
        save_path: String. Path in which the images will be saved.
        images: List of numpy arrays of shape.
        pipeline: Function that takes as input an element of ''images''
            and outputs a ''Dict'' with inferences.
        topic: Key to the ''inferences'' dictionary containing as value the
            drawn inferences.
        verbose: Integer. If is bigger than 1 messages would be displayed.
    """
    def __init__(self, save_path, images, pipeline, topic='image', verbose=1):
        super(DrawInferences, self).__init__()
        self.save_path = os.path.join(save_path, 'images')
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.pipeline = pipeline
        self.images = images
        self.topic = topic
        self.verbose = verbose

    def on_epoch_end(self, epoch, logs=None):
        for image_arg, image in enumerate(self.images):
            inferences = self.pipeline(image.copy())
            epoch_name = 'epoch_%03d' % epoch
            save_path = os.path.join(self.save_path, epoch_name)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            image_name = 'image_%03d.png' % image_arg
            image_name = os.path.join(save_path, image_name)
            write_image(image_name, inferences[self.topic])
        if self.verbose:
            print('Saving predicted images in:', self.save_path)


class LearningRateScheduler(Callback):
    """ Callback for reducing learning rate at specific epochs.

    # Arguments
        learning_rate: float. Indicates the starting learning rate.
        gamma_decay: float. In an scheduled epoch the learning rate
            is multiplied by this factor.
        scheduled_epochs: List of integers. Indicates in which epochs
            the learning rate will be multiplied by the gamma decay factor.
        verbose: Integer. If is bigger than 1 messages would be displayed.
    """
    def __init__(
            self, learning_rate, gamma_decay, scheduled_epochs, verbose=1):
        super(LearningRateScheduler, self).__init__()
        self.learning_rate = learning_rate
        self.gamma_decay = gamma_decay
        self.scheduled_epochs = scheduled_epochs
        self.verbose = verbose

    def on_epoch_begin(self, epoch, logs=None):
        if not hasattr(self.model.optimizer, 'lr'):
            raise ValueError('Optimizer must have a "lr" attribute.')

        learning_rate = float(K.get_value(self.model.optimizer.lr))
        learning_rate = self.schedule(epoch)
        if not isinstance(learning_rate, (float, np.float32, np.float64)):
            raise ValueError('Learning rate should be float.')
        K.set_value(self.model.optimizer.lr, learning_rate)
        if self.verbose > 0:
            print('\nEpoch %05d: LearningRateScheduler reducing learning '
                  'rate to %s.' % (epoch + 1, learning_rate))

    def schedule(self, epoch):
        if epoch in self.scheduled_epochs:
            self.learning_rate = self.learning_rate * self.gamma_decay
        return self.learning_rate


class EvaluateMAP(Callback):
    """Evaluates mean average precision (MAP) of an object detector.

    # Arguments
        data_manager: Data manager and loader class. See ''paz.datasets''
            for examples.
        detector: Tensorflow-Keras model.
        period: Int. Indicates how often the evaluation is performed.
        save_path: Str.
        iou_thresh: Float.
    """
    def __init__(
            self, data_manager, detector, period, save_path, iou_thresh=0.5):
        super(EvaluateMAP, self).__init__()
        self.data_manager = data_manager
        self.detector = detector
        self.period = period
        self.save_path = save_path
        self.dataset = data_manager.load_data()
        self.iou_thresh = iou_thresh
        self.class_names = self.data_manager.class_names
        self.class_dict = {}
        for class_arg, class_name in enumerate(self.class_names):
            self.class_dict[class_name] = class_arg

    def on_epoch_end(self, epoch, logs):
        if (epoch + 1) % self.period == 0:
            result = evaluateMAP(
                self.detector,
                self.dataset,
                self.class_dict,
                iou_thresh=self.iou_thresh,
                use_07_metric=True)

            result_str = 'mAP: {:.4f}\n'.format(result['map'])
            metrics = {'mAP': result['map']}
            for arg, ap in enumerate(result['ap']):
                if arg == 0 or np.isnan(ap):  # skip background
                    continue
                metrics[self.class_names[arg]] = ap
                result_str += '{:<16}: {:.4f}\n'.format(
                    self.class_names[arg], ap)
            print(result_str)

            # Saving the evaluation results
            filename = os.path.join(self.save_path, 'MAP_Evaluation_Log.txt')
            with open(filename, 'a') as eval_log_file:
                eval_log_file.write('Epoch: {}\n{}\n'.format(
                    str(epoch), result_str))


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/__init__.py

>>> BEGIN FILE CONTENTS

from .multi_box_loss import MultiBoxLoss
from .keypointnet_loss import KeypointNetLoss
from .segmentation import DiceLoss
from .segmentation import FocalLoss
from .segmentation import JaccardLoss
from .segmentation import WeightedReconstruction
from .segmentation import WeightedReconstructionWithError


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/keypointnet_loss.py

>>> BEGIN FILE CONTENTS

import math
import tensorflow as tf
import numpy as np

from ...models.keypoint.projector import Projector


class KeypointNetLoss(object):
    """KeypointNet loss for discovering latent keypoints.

    # Arguments
        num_keypints: Int. Number of keypoints to discover.
        focal_length: Float. Focal length of camera
        rotation_noise: Float. Noise added to the estimation of the rotation.
        separation_delta: Float. Delta used for the ''separation'' loss.
        loss_weights: Dict. having as keys strings with the different losses
            names e.g. ''consistency'' and as value the weight used for that
            loss.

    # References
        - [Discovery of Latent 3D Keypoints via End-to-end
            Geometric Reasoning](https://arxiv.org/pdf/1807.03146.pdf)
    """
    def __init__(self, num_keypoints, focal_length, rotation_noise=0.1,
                 separation_delta=0.05, loss_weights={
                     'consistency': 1.0, 'silhouette': 1.0, 'separation': 1.0,
                     'relative_pose': 0.2, 'variance': 0.5}):

        self.num_keypoints = int(num_keypoints)
        self.focal_length = focal_length
        self.projector = Projector(focal_length)
        self.rotation_noise = rotation_noise
        self.separation_delta = separation_delta
        self.loss_weights = loss_weights

    def _reshape_matrix(self, matrix):
        matrix = tf.reshape(matrix, [-1, 4, 4])
        # transpose is for multiplying points with matrices from the left.
        matrix = tf.transpose(matrix, [0, 2, 1])
        return matrix

    def _unpack_matrices(self, matrices):
        world_to_A = self._reshape_matrix(matrices[:, 0, :])
        world_to_B = self._reshape_matrix(matrices[:, 1, :])
        A_to_world = self._reshape_matrix(matrices[:, 2, :])
        B_to_world = self._reshape_matrix(matrices[:, 3, :])
        return world_to_A, world_to_B, A_to_world, B_to_world

    def _unpack_uvz_coordinates(self, uvz_coordinates):
        uvz_A = uvz_coordinates[:, :self.num_keypoints, :]
        uvz_B = uvz_coordinates[:, self.num_keypoints:, :]
        return uvz_A, uvz_B

    def _consistency(self, uvz_M, M_to_world, world_to_N, uvz_N):
        keypoints_M = self.projector.unproject(uvz_M)
        world_coordinates = tf.matmul(keypoints_M, M_to_world)
        keypoints_M_in_N = tf.matmul(world_coordinates, world_to_N)
        uvz_M_in_N = self.projector.project(keypoints_M_in_N)
        squared_difference = tf.square(uvz_M_in_N - uvz_N)
        l2_distance = tf.reduce_sum(squared_difference, axis=[1, 2])
        consistency_loss = l2_distance / self.num_keypoints
        return consistency_loss

    def consistency(self, matrices, uvz_coordinates):
        matrices = self._unpack_matrices(matrices)
        world_to_A, world_to_B, A_to_world, B_to_world = matrices
        uvz_A, uvz_B = self._unpack_uvz_coordinates(uvz_coordinates)
        consistency_A = self._consistency(uvz_A, A_to_world, world_to_B, uvz_B)
        consistency_B = self._consistency(uvz_B, B_to_world, world_to_A, uvz_A)
        consistency_loss = (consistency_A + consistency_B) / 2.0
        consistency_loss = self.loss_weights['consistency'] * consistency_loss
        return consistency_loss

    def _separation(self, uvz):
        keypoints = self.projector.unproject(uvz)
        keypoints_i = tf.tile(keypoints, [1, self.num_keypoints, 1])
        keypoints_j = tf.tile(keypoints, [1, 1, self.num_keypoints])
        keypoints_j = tf.reshape(keypoints_j, tf.shape(keypoints_i))
        squared_difference = tf.square(keypoints_i - keypoints_j)
        squared_l2_distance = tf.reduce_sum(squared_difference, axis=2)
        separation_loss = tf.maximum(
            -squared_l2_distance + self.separation_delta, 0.0)
        separation_loss = tf.reshape(
            separation_loss, [-1, self.num_keypoints, self.num_keypoints])
        separation_loss = tf.reduce_sum(separation_loss, axis=[1, 2])
        separation_loss = separation_loss / self.num_keypoints
        return separation_loss

    def separation(self, matrices, uvz_coordinates):
        uvz_A, uvz_B = self._unpack_uvz_coordinates(uvz_coordinates)
        separation_loss_A = self._separation(uvz_A)
        separation_loss_B = self._separation(uvz_B)
        separation_loss = (separation_loss_A + separation_loss_B) / 2.0
        separation_loss = self.loss_weights['separation'] * separation_loss
        return separation_loss

    def relative_pose(self, matrices, uvz_coordinates):
        matrices = self._unpack_matrices(matrices)
        world_to_A, world_to_B, A_to_world, B_to_world = matrices
        uvz_A, uvz_B = self._unpack_uvz_coordinates(uvz_coordinates)
        keypoints_A = self.projector.unproject(uvz_A)
        keypoints_B = self.projector.unproject(uvz_B)

        A_to_B = tf.matmul(A_to_world, world_to_B)
        rotation_A_to_B = A_to_B[:, :3, :3]
        estimation_args = (keypoints_A, keypoints_B, self.rotation_noise)
        estimated_rotation_A_to_B = self.estimate_rotation(*estimation_args)
        estimated_rotation_A_to_B = estimated_rotation_A_to_B[:, :3, :3]
        squared_A_to_B = tf.square(estimated_rotation_A_to_B - rotation_A_to_B)
        squared_frobenius = tf.reduce_sum(squared_A_to_B, axis=[1, 2])
        frobenius = tf.sqrt(squared_frobenius)
        arcsin_arg = tf.minimum(1.0, frobenius / (2 * math.sqrt(2)))
        angular_loss = 2.0 * tf.asin(arcsin_arg)
        angular_loss = self.loss_weights['relative_pose'] * angular_loss
        return angular_loss

    def uvz_points(self, matrices, uvz_coordinates):
        consistency_loss = self.consistency(matrices, uvz_coordinates)
        separation_loss = self.separation(matrices, uvz_coordinates)
        relative_pose_loss = self.relative_pose(matrices, uvz_coordinates)
        uvz_loss = consistency_loss + separation_loss + relative_pose_loss
        return uvz_loss

    def _silhouette(self, alpha_channel, uv_volume):
        alpha_channel = tf.greater(alpha_channel, tf.zeros_like(alpha_channel))
        alpha_channel = tf.cast(alpha_channel, dtype=tf.float32)
        alpha_channel = tf.expand_dims(alpha_channel, 1)
        silhouette_loss = tf.reduce_sum(uv_volume * alpha_channel, axis=[2, 3])
        silhouette_loss = -tf.math.log(silhouette_loss + 1e-12)
        silhouette_loss = tf.reduce_mean(silhouette_loss, axis=-1)
        return silhouette_loss

    def silhouette(self, alpha_channels, uv_volumes):
        alpha_channel_A = alpha_channels[:, :, :, 0]
        alpha_channel_B = alpha_channels[:, :, :, 1]
        uv_volume_A = uv_volumes[:, :self.num_keypoints, :, :]
        uv_volume_B = uv_volumes[:, self.num_keypoints:, :, :]
        silhouette_loss_A = self._silhouette(alpha_channel_A, uv_volume_A)
        silhouette_loss_B = self._silhouette(alpha_channel_B, uv_volume_B)
        silhouette_loss = (silhouette_loss_A + silhouette_loss_B) / 2.0
        silhouette_loss = self.loss_weights['silhouette'] * silhouette_loss
        return silhouette_loss

    def _variance(self, uv_volume, range_u, range_v):
        expected_keypoint_u = tf.reduce_sum(uv_volume * range_u, axis=[2, 3])
        expected_keypoint_v = tf.reduce_sum(uv_volume * range_v, axis=[2, 3])
        uv = tf.stack([expected_keypoint_u, expected_keypoint_v], -1)
        uv = tf.reshape(uv, [-1, self.num_keypoints, 2])
        uv = tf.reshape(uv, [tf.shape(uv)[0], tf.shape(uv)[1], 1, 1, 2])

        ranges = tf.stack([range_u, range_v], axis=2)
        ranges_sh = tf.shape(ranges)
        ranges = tf.reshape(ranges, [1, 1, ranges_sh[0], ranges_sh[1], 2])
        squared_difference = tf.reduce_sum(tf.square(uv - ranges), axis=4)
        diff = squared_difference * uv_volume
        diff = tf.reduce_sum(diff, axis=[2, 3])
        variance = tf.reduce_mean(diff, axis=-1)
        return variance

    def variance(self, alpha_channels, uv_volumes):
        uv_volume_A = uv_volumes[:, :self.num_keypoints, :, :]
        uv_volume_B = uv_volumes[:, self.num_keypoints:, :, :]
        feature_map_size = uv_volumes.shape[-1]

        arange = np.arange(0.5, feature_map_size, 1)
        arange = arange / (feature_map_size / 2) - 1
        range_u, range_v = tf.meshgrid(arange, -arange)
        range_u = tf.cast(range_u, dtype=tf.float32)
        range_v = tf.cast(range_v, dtype=tf.float32)

        variance_loss_A = self._variance(uv_volume_A, range_u, range_v)
        variance_loss_B = self._variance(uv_volume_B, range_u, range_v)
        variance_loss = (variance_loss_A + variance_loss_B) / 2.0
        variance_loss = self.loss_weights['variance'] * variance_loss
        return variance_loss

    def uv_volumes(self, alpha_channels, uv_volumes):
        variance_loss = self.variance(alpha_channels, uv_volumes)
        silhouette_loss = self.silhouette(alpha_channels, uv_volumes)
        uv_loss = variance_loss + silhouette_loss
        return uv_loss

    def estimate_rotation(self, keypoints_A, keypoints_B, noise=0.1):
        """Estimates the rotation between two sets of keypoints using
        Kabsch algorithm.

        The rotation is estimated by first subtracting mean from each
        set of keypoints and computing SVD of the covariance matrix.

        Arguments:
            xyz0: [batch, num_kp, 3] The first set of keypoints.
            xyz1: [batch, num_kp, 3] The second set of keypoints.
            pconf: [batch, num_kp] The weights used to
                   compute the rotation estimate.
            noise: A number indicating the noise added to the keypoints.

        Returns:
            [batch, 3, 3] A batch of transposed 3 x 3 rotation matrices.
        """

        pconf = tf.ones(
            [tf.shape(keypoints_A)[0],
             tf.shape(keypoints_A)[1]],
            dtype=tf.float32) / self.num_keypoints

        noise_A = tf.random.normal(tf.shape(keypoints_A), mean=0, stddev=noise)
        noise_B = tf.random.normal(tf.shape(keypoints_B), mean=0, stddev=noise)
        keypoints_A = keypoints_A + noise_A
        keypoints_B = keypoints_B + noise_B
        pconf2 = tf.expand_dims(pconf, 2)
        center_A = tf.reduce_sum(pconf2 * keypoints_A, 1, keepdims=True)
        center_B = tf.reduce_sum(pconf2 * keypoints_B, 1, keepdims=True)
        x = keypoints_A - center_A
        y = keypoints_B - center_B
        weighted_x = tf.matmul(
            x, tf.linalg.diag(pconf), transpose_a=True)
        covariance = tf.matmul(weighted_x, y)
        _, u, v = tf.linalg.svd(covariance, full_matrices=True)
        d = tf.linalg.det(tf.matmul(v, u, transpose_b=True))
        ud = tf.concat(
            [u[:, :, :-1],
             u[:, :, -1:] * tf.expand_dims(tf.expand_dims(d, 1), 1)],
            axis=2)
        return tf.matmul(ud, v, transpose_b=True)


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/multi_box_loss.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
import tensorflow.keras.backend as K


class MultiBoxLoss(object):
    """Multi-box loss for a single-shot detection architecture.

    # Arguments
        neg_pos_ratio: Int. Number of negatives used per positive box.
        alpha: Float. Weight parameter for localization loss.
        max_num_negatives: Int. Maximum number of negatives per batch.

    # References
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """
    def __init__(self, neg_pos_ratio=3, alpha=1.0, max_num_negatives=300):
        self.alpha = alpha
        self.neg_pos_ratio = neg_pos_ratio
        self.max_num_negatives = max_num_negatives

    def _smooth_l1(self, y_true, y_pred):
        absolute_value_loss = K.abs(y_true - y_pred)
        square_loss = 0.5 * (y_true - y_pred)**2
        absolute_value_condition = K.less(absolute_value_loss, 1.0)
        l1_smooth_loss = tf.where(
            absolute_value_condition, square_loss, absolute_value_loss - 0.5)
        return K.sum(l1_smooth_loss, axis=-1)

    def _cross_entropy(self, y_true, y_pred):
        y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15)
        cross_entropy_loss = - K.sum(y_true * K.log(y_pred), axis=-1)
        return cross_entropy_loss

    def _calculate_masks(self, y_true):
        negative_mask = y_true[:, :, 4]
        positive_mask = 1.0 - negative_mask
        return positive_mask, negative_mask

    def compute_loss(self, y_true, y_pred):
        """Computes localization and classification losses in a batch.

        # Arguments
            y_true: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with correct labels.
            y_pred: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with predicted inferences.

        # Returns
            Tensor with loss per sample in batch.
        """
        localization_loss = self.localization(y_true, y_pred)
        positive_loss = self.positive_classification(y_true, y_pred)
        negative_loss = self.negative_classification(y_true, y_pred)
        return localization_loss + positive_loss + negative_loss

    def localization(self, y_true, y_pred):
        """Computes localization loss in a batch.

        # Arguments
            y_true: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with correct labels.
            y_pred: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with predicted inferences.

        # Returns
            Tensor with localization loss per sample in batch.
        """
        batch_size = tf.cast(tf.shape(y_pred)[0], tf.float32)
        local_loss = self._smooth_l1(y_true[:, :, :4], y_pred[:, :, :4])
        positive_mask, negative_mask = self._calculate_masks(y_true)
        positive_local_losses = local_loss * positive_mask
        positive_local_loss = tf.reduce_sum(positive_local_losses, axis=-1)
        num_positives = tf.reduce_sum(tf.cast(positive_mask, 'float32'))
        num_positives = tf.maximum(1.0, num_positives)
        return (self.alpha * positive_local_loss * batch_size) / num_positives

    def positive_classification(self, y_true, y_pred):
        """Computes positive classification loss in a batch. Positive boxes are those
            boxes that contain an object.

        # Arguments
            y_true: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with correct labels.
            y_pred: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with predicted inferences.

        # Returns
            Tensor with positive classification loss per sample in batch.
        """
        batch_size = tf.cast(tf.shape(y_pred)[0], tf.float32)
        class_loss = self._cross_entropy(y_true[:, :, 4:], y_pred[:, :, 4:])
        positive_mask, negative_mask = self._calculate_masks(y_true)
        positive_class_losses = class_loss * positive_mask
        positive_class_loss = K.sum(positive_class_losses, axis=-1)
        num_positives = K.sum(K.cast(positive_mask, 'float32'))
        num_positives = tf.maximum(1.0, num_positives)
        return (positive_class_loss * batch_size) / num_positives

    def negative_classification(self, y_true, y_pred):
        """Computes negative classification loss in a batch. Negative boxes are those
            boxes that don't contain an object.

        # Arguments
            y_true: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with correct labels.
            y_pred: Tensor of shape '[batch_size, num_boxes, 4 + num_classes]'
                with predicted inferences.

        # Returns
            Tensor with negative classification loss per sample in batch.
        """
        batch_size = tf.cast(tf.shape(y_pred)[0], tf.float32)
        class_loss = self._cross_entropy(y_true[:, :, 4:], y_pred[:, :, 4:])
        positive_mask, negative_mask = self._calculate_masks(y_true)
        num_positives_per_sample = K.cast(K.sum(positive_mask, -1), 'int32')
        num_hard_negatives = self.neg_pos_ratio * num_positives_per_sample
        num_negatives_per_sample = K.minimum(
            num_hard_negatives, self.max_num_negatives)
        negative_class_losses = class_loss * negative_mask
        elements = (negative_class_losses, num_negatives_per_sample)
        negative_class_loss = tf.map_fn(
            lambda x: K.sum(tf.nn.top_k(x[0], x[1])[0]),
            elements, dtype=tf.float32)
        num_positives = K.sum(K.cast(positive_mask, 'float32'))
        num_positives = tf.maximum(1.0, num_positives)
        return (negative_class_loss * batch_size) / num_positives


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/segmentation/__init__.py

>>> BEGIN FILE CONTENTS

from .focal_loss import FocalLoss
from .focal_loss import compute_focal_loss

from .dice_loss import DiceLoss
from .dice_loss import compute_F_beta_score

from .jaccard_loss import JaccardLoss
from .jaccard_loss import compute_jaccard_score

from .weighted_reconstruction import WeightedReconstruction
from .weighted_reconstruction import WeightedReconstructionWithError
from .weighted_reconstruction import (
    compute_weighted_reconstruction_loss,
    compute_weighted_reconstruction_loss_with_error)


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/segmentation/dice_loss.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
from tensorflow.keras.losses import Loss


def compute_F_beta_score(y_true, y_pred, beta=1.0, class_weights=1.0):
    """Computes the F beta score. The F beta score is the geometric mean
    of the precision and recall, where the recall is B times more important
    than the precision.

    # Arguments
        y_true: Tensor of shape ``(batch, H, W, num_channels)``.
        y_pred: Tensor of shape ``(batch, H, W, num_channels)``.
        beta: Float.
        class_weights: Float or list of floats of shape ``(num_classes)``.

    # Returns
        Tensor of shape ``(batch)`` containing the F beta score per sample.
    """
    true_positives = tf.reduce_sum(y_true * y_pred, axis=[1, 2])
    false_positives = tf.reduce_sum(y_pred, axis=[1, 2]) - true_positives
    false_negatives = tf.reduce_sum(y_true, axis=[1, 2]) - true_positives
    B_squared = tf.math.pow(beta, 2)
    numerator = (1.0 + B_squared) * true_positives
    denominator = numerator + (B_squared * false_negatives) + false_positives
    F_beta_score = numerator / (denominator + 1e-5)
    return class_weights * F_beta_score


class DiceLoss(Loss):
    """Computes the F beta loss. The F beta score is the geometric mean
    of the precision and recall, where the recall is B times more important
    than the precision.

    # Arguments
        beta: Float.
        class_weights: Float or list of floats of shape ``(num_classes)``.
    """
    def __init__(self, beta=1.0, class_weights=1.0):
        super(DiceLoss, self).__init__()
        self.beta = beta
        self.class_weights = class_weights

    def call(self, y_true, y_pred):
        args = (self.beta, self.class_weights)
        return 1.0 - compute_F_beta_score(y_true, y_pred, *args)


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/segmentation/focal_loss.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
from tensorflow.keras.losses import Loss


def compute_focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25):
    """Computes the Focal loss. The Focal loss down weights
        properly classified examples.

    # Arguments
        y_true: Tensor of shape ``(batch, H, W, num_channels)``.
        y_pred: Tensor of shape ``(batch, H, W, num_channels)``.
        gamma: Float.
        alpha: Float.
        class_weights: Float or list of floats of shape ``(num_classes)``.

    # Returns
        Tensor of shape ``(batch)`` containing the F beta score per sample.
    """
    y_pred = tf.clip_by_value(y_pred, 1e-5, 1.0 - 1e-5)
    modulator = alpha * tf.math.pow(1 - y_pred, gamma)
    focal_loss = - modulator * y_true * tf.math.log(y_pred)
    return focal_loss


class FocalLoss(Loss):
    """Computes the Focal loss. The Focal loss down weights
        properly classified examples.

    # Arguments
        gamma: Float.
        alpha: Float.
        class_weights: Float or list of floats of shape ``(num_classes)``.
    """
    def __init__(self, gamma=2.0, alpha=0.25):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha

    def call(self, y_true, y_pred):
        return compute_focal_loss(y_true, y_pred, self.gamma, self.alpha)


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/segmentation/jaccard_loss.py

>>> BEGIN FILE CONTENTS

import tensorflow as tf
from tensorflow.keras.losses import Loss


def compute_jaccard_score(y_true, y_pred, class_weights=1.0):
    """Computes the Jaccard score. The Jaccard score is the intersection
    over union of the predicted with respect to real masks.

    # Arguments
        y_true: Tensor of shape ``(batch, H, W, num_channels)``.
        y_pred: Tensor of shape ``(batch, H, W, num_channels)``.
        class_weights: Float or list of floats of shape ``(num_classes)``.

    # Returns
        Tensor of shape ``(batch)`` containing the F beta score per sample.
    """
    intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2])
    union = tf.reduce_sum(y_true + y_pred, axis=[1, 2]) - intersection
    jaccard_score = (intersection) / (union + 1e-5)
    return class_weights * jaccard_score


class JaccardLoss(Loss):
    """Computes the Jaccard loss. The Jaccard score is the intersection
    over union of the predicted with respect to real masks.

    # Arguments
        class_weights: Float or list of floats of shape ``(num_classes)``.
    """
    def __init__(self, class_weights=1.0):
        super(JaccardLoss, self).__init__()
        self.class_weights = class_weights

    def call(self, y_true, y_pred):
        return 1.0 - compute_jaccard_score(y_true, y_pred, self.class_weights)


>>> END FILE CONTENTS

## emili-main/paz/optimization/losses/segmentation/weighted_reconstruction.py

>>> BEGIN FILE CONTENTS

from tensorflow.keras.losses import Loss
import tensorflow as tf


def split_alpha_mask(RGBA_mask):
    """Splits alpha mask and RGB image.

    # Arguments
        RGBA_mask: Tensor [batch, H, W, 4]

    # Returns
        Color tensor [batch, H, W, 3] and alpha tensor [batch, H, W, 1]
    """
    color_mask = RGBA_mask[:, :, :, 0:3]
    alpha_mask = RGBA_mask[:, :, :, 3:4]
    return color_mask, alpha_mask


def split_error_mask(RGBE_mask):
    """Splits error mask and RGB image.

    # Arguments
        RGBA_mask: Tensor [batch, H, W, 4]

    # Returns
        Color tensor [batch, H, W, 3] and error tensor [batch, H, W, 1]

    """
    color_mask = RGBE_mask[:, :, :, 0:3]
    error_mask = RGBE_mask[:, :, :, 3:4]
    return color_mask, error_mask


def compute_foreground_loss(RGB_true, RGB_pred, alpha_mask):
    """Computes foreground reconstruction L1 loss by using only positive alpha
        mask values.

    # Arguments
        RGB_true: Tensor [batch, H, W, 3]. True RGB label values.
        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
        alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values.

    # Returns
        Tensor [batch, H, W, 3] with foreground loss values.
    """
    foreground_true = RGB_true * alpha_mask
    foreground_pred = RGB_pred * alpha_mask
    foreground_loss = tf.abs(foreground_true - foreground_pred)
    return foreground_loss


def compute_background_loss(RGB_true, RGB_pred, alpha_mask):
    """Computes the L1 reconstruction loss, weighting the inverted alpha
        mask values in the predicted RGB image by beta.

    # Arguments
        RGB_true: Tensor [batch, H, W, 3]. True RGB label values.
        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
        alpha_mask: Tensor [batch, H, W, 1]. True normalized alpha mask values.

    # Returns
        Tensor [batch, H, W, 3] with background loss values.
    """
    background_true = RGB_true * (1.0 - alpha_mask)
    background_pred = RGB_pred * (1.0 - alpha_mask)
    background_loss = tf.abs(background_true - background_pred)
    return background_loss


def compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta=3.0):
    """Computes the L1 reconstruction loss, weighting the positive alpha
        mask values in the predicted RGB image by beta.

    # Arguments
        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.
        beta: Float. Value used to multiple positive alpha mask values.

    # Returns
        Tensor [batch, H, W] with weighted reconstruction loss values.

    """
    RGB_true, alpha_mask = split_alpha_mask(RGBA_true)
    foreground_loss = compute_foreground_loss(RGB_true, RGB_pred, alpha_mask)
    background_loss = compute_background_loss(RGB_true, RGB_pred, alpha_mask)
    reconstruction_loss = (beta * foreground_loss) + background_loss
    return tf.reduce_mean(reconstruction_loss, axis=-1, keepdims=True)


def normalized_image_to_normalized_device_coordinates(image):
    """Map image value from [0, 1] -> [-1, 1].
    """
    return (image * 2.0) - 1.0


def normalized_device_coordinates_to_normalized_image(image):
    """Map image value from [0, 1] -> [-1, 1].
    """
    return (image + 1.0) / 2.0


def compute_weighted_reconstruction_loss_with_error(
        RGBA_true, RGBE_pred, beta=3.0):
    """Computes L1 reconstruction loss by multiplying positive alpha mask
        by beta.

    # Arguments
        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
        RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask.
        beta: Float. Value used to multiple positive alpha mask values.

    # Returns
        Tensor [batch, H, W] with weighted reconstruction loss values.

    """
    RGB_pred, error_mask = split_error_mask(RGBE_pred)
    loss = compute_weighted_reconstruction_loss(RGBA_true, RGB_pred, beta)
    return loss


class WeightedReconstruction(Loss):
    """Computes L1 reconstruction loss by multiplying positive alpha mask
        by beta.

    # Arguments
        beta: Float. Value used to multiple positive alpha mask values.
        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
        RGB_pred: Tensor [batch, H, W, 3]. Predicted RGB values.

    # Returns
        Tensor [batch, H, W] with weighted reconstruction loss values.

    """
    def __init__(self, beta=3.0):
        super(WeightedReconstruction, self).__init__()
        self.beta = beta

    def call(self, RGBA_true, RGB_pred):
        loss = compute_weighted_reconstruction_loss(
            RGBA_true, RGB_pred, self.beta)
        return loss


class WeightedReconstructionWithError(Loss):
    """Computes L1 reconstruction loss by multiplying positive alpha mask
        by beta.

    # Arguments
        RGBA_true: Tensor [batch, H, W, 4]. Color with alpha mask label values.
        RGBE_pred: Tensor [batch, H, W, 4]. Predicted RGB and error mask.
        beta: Float. Value used to multiple positive alpha mask values.

    # Returns
        Tensor [batch, H, W] with weighted reconstruction loss values.

    """
    def __init__(self, beta=3.0):
        super(WeightedReconstructionWithError, self).__init__()
        self.beta = beta

    def call(self, RGBA_true, RGBE_pred):
        reconstruction_loss = compute_weighted_reconstruction_loss_with_error(
            RGBA_true, RGBE_pred, self.beta)
        return reconstruction_loss


>>> END FILE CONTENTS

## emili-main/paz/pipelines/__init__.py

>>> BEGIN FILE CONTENTS

from .image import AugmentImage
from .image import PreprocessImage
from .image import AutoEncoderPredictor
from .image import EncoderPredictor
from .image import DecoderPredictor
from .image import PreprocessImageHigherHRNet

from .detection import AugmentBoxes
from .detection import PreprocessBoxes
from .detection import AugmentDetection
from .detection import PostprocessBoxes2D
from .detection import DetectSingleShot
from .detection import SSD512COCO
from .detection import SSD512YCBVideo
from .detection import SSD300VOC
from .detection import SSD300FAT
from .detection import DetectHaarCascade
from .detection import HaarCascadeFrontalFace
from .detection import DetectMiniXceptionFER
from .detection import DetectKeypoints2D
from .detection import DetectFaceKeypointNet2D32
from .detection import SSD512HandDetection
from .detection import SSD512MinimalHandPose
from .detection import SSDPreprocess
from .detection import SSDPostprocess
from .detection import DetectSingleShotEfficientDet
from .detection import EfficientDetPreprocess
from .detection import EfficientDetPostprocess
from .detection import EFFICIENTDETD0COCO
from .detection import EFFICIENTDETD1COCO
from .detection import EFFICIENTDETD2COCO
from .detection import EFFICIENTDETD3COCO
from .detection import EFFICIENTDETD4COCO
from .detection import EFFICIENTDETD5COCO
from .detection import EFFICIENTDETD6COCO
from .detection import EFFICIENTDETD7COCO
from .detection import EFFICIENTDETD0VOC

from .keypoints import KeypointNetSharedAugmentation
from .keypoints import KeypointNetInference
from .keypoints import EstimateKeypoints2D
from .keypoints import FaceKeypointNet2D32
from .keypoints import GetKeypoints
from .keypoints import TransformKeypoints
from .keypoints import HigherHRNetHumanPose2D
from .keypoints import DetNetHandKeypoints
from .keypoints import MinimalHandPoseEstimation
from .keypoints import DetectMinimalHand
from .keypoints import EstimateHumanPose3D
from .keypoints import EstimateHumanPose

from .renderer import RenderTwoViews
from .renderer import RandomizeRenderedImage

from .classification import MiniXceptionFER
from .classification import ClassifyHandClosure

from .pose import EstimatePoseKeypoints
from .pose import HeadPoseKeypointNet2D32
from .pose import SingleInstancePIX2POSE6D
from .pose import MultiInstancePIX2POSE6D
from .pose import MultiInstanceMultiClassPIX2POSE6D
from .pose import SinglePowerDrillPIX2POSE6D
from .pose import MultiPowerDrillPIX2POSE6D
from .pose import PIX2POSEPowerDrill
from .pose import PIX2YCBTools6D

from .masks import RGBMaskToImagePoints2D
from .masks import RGBMaskToObjectPoints3D
from .masks import PredictRGBMask
from .masks import Pix2Points

from .heatmaps import GetHeatmapsAndTags

from .angles import IKNetHandJointAngles


>>> END FILE CONTENTS

## emili-main/paz/pipelines/angles.py

>>> BEGIN FILE CONTENTS

from paz import processors as pr
from paz.models import IKNet
from paz.datasets import MPIIHandJoints
from paz.backend.keypoints import flip_along_x_axis


class IKNetHandJointAngles(pr.Processor):
    """Estimate absolute and relative joint angle for the minimal hand joints
       using the 3D keypoint locations.

    # Arguments
        links_origin: Array. Reference pose of the minimal hand joints.
        parent: List. Parents of the keypoints from kinematic chain
        right_hand: Boolean. If 'True', estimate angles for right hand, else
                    estimate angles for left hand.
        keypoints3D: Array [num_joints, 3]. 3D location of keypoints.

    # Returns
        absolute_angles: Array [num_joints, 4]. quaternion repesentation
        relative_angles: Array [num_joints, 3]. axis-angle repesentation
    """
    def __init__(self, links_origin=MPIIHandJoints.links_origin,
                 parents=MPIIHandJoints.parents, right_hand=False):
        super(IKNetHandJointAngles, self).__init__()
        self.calculate_orientation = pr.ComputeOrientationVector(parents)
        self.links_origin = links_origin
        self.right_hand = right_hand
        if self.right_hand:
            self.links_origin = flip_along_x_axis(self.links_origin)
        self.links_delta = self.calculate_orientation(self.links_origin)
        self.concatenate = pr.Concatenate(0)
        self.compute_absolute_angles = pr.SequentialProcessor(
            [pr.ExpandDims(0), IKNet(), pr.Squeeze(0)])
        self.compute_relative_angles = pr.CalculateRelativeAngles()
        self.wrap = pr.WrapOutput(['absolute_angles', 'relative_angles'])

    def call(self, keypoints3D):
        delta = self.calculate_orientation(keypoints3D)
        pack = self.concatenate(
            [keypoints3D, delta, self.links_origin, self.links_delta])
        absolute_angles = self.compute_absolute_angles(pack)
        relative_angles = self.compute_relative_angles(absolute_angles)
        return self.wrap(absolute_angles, relative_angles)


>>> END FILE CONTENTS

## emili-main/paz/pipelines/classification.py

>>> BEGIN FILE CONTENTS

from ..abstract import SequentialProcessor
from .. import processors as pr
from . import PreprocessImage
from ..models.classification import MiniXception
from ..datasets import get_class_names
from .keypoints import MinimalHandPoseEstimation


# neutral, happiness, surprise, sadness, anger, disgust, fear, contempt
EMOTION_COLORS = [[255, 0, 0], [45, 90, 45], [255, 0, 255], [255, 255, 0],
                  [0, 0, 255], [0, 255, 255], [0, 255, 0]]


class MiniXceptionFER(SequentialProcessor):
    """Mini Xception pipeline for classifying emotions from RGB faces.

    # Example
        ``` python
        from paz.pipelines import MiniXceptionFER

        classify = MiniXceptionFER()

        # apply directly to an image (numpy-array)
        inference = classify(image)
        ```

     # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``class_names`` and ``scores``.

    # References
       - [Real-time Convolutional Neural Networks for Emotion and
            Gender Classification](https://arxiv.org/abs/1710.07557)

    """
    def __init__(self):
        super(MiniXceptionFER, self).__init__()
        self.classifier = MiniXception((48, 48, 1), 7, weights='FER')
        self.class_names = get_class_names('FER')

        preprocess = PreprocessImage(self.classifier.input_shape[1:3], None)
        preprocess.insert(0, pr.ConvertColorSpace(pr.RGB2GRAY))
        preprocess.add(pr.ExpandDims(0))
        preprocess.add(pr.ExpandDims(-1))
        self.add(pr.Predict(self.classifier, preprocess))
        self.add(pr.CopyDomain([0], [1]))
        self.add(pr.ControlMap(pr.ToClassName(self.class_names), [0], [0]))
        self.add(pr.WrapOutput(['class_name', 'scores']))

    def get_last_hidden_state(self, image):
        # Extract the second to last layer of the model
        intermediate_layer_model = Model(inputs=self.classifier.model.input,
                                         outputs=self.classifier.model.layers[-2].output)
        
        # Assuming `image` is preprocessed appropriately
        last_hidden_state = intermediate_layer_model.predict(image)
        return last_hidden_state


class ClassifyHandClosure(SequentialProcessor):
    """Pipeline to classify minimal hand closure status.

    # Example
        ``` python
        from paz.pipelines import ClassifyHandClosure

        classify = ClassifyHandClosure()

        # apply directly to an image (numpy-array)
        inference = classify(image)
        ```

     # Returns
        A function that takes an RGB image and outputs an image with class
        status drawn on it.
    """
    def __init__(self, draw=True, right_hand=False):
        super(ClassifyHandClosure, self).__init__()
        self.add(MinimalHandPoseEstimation(draw, right_hand))
        self.add(pr.UnpackDictionary(['image', 'relative_angles']))
        self.add(pr.ControlMap(pr.IsHandOpen(), [1], [1]))
        self.add(pr.ControlMap(pr.BooleanToTextMessage('OPEN', 'CLOSE'),
                               [1], [1]))
        if draw:
            self.add(pr.ControlMap(pr.DrawText(), [0, 1], [0], {1: 1}))
        self.add(pr.WrapOutput(['image', 'status']))


>>> END FILE CONTENTS

## emili-main/paz/pipelines/detection.py

>>> BEGIN FILE CONTENTS

import numpy as np
import threading

from .. import processors as pr
from ..abstract import SequentialProcessor, Processor
from ..models import (
    SSD512, SSD300, HaarCascadeDetector, EFFICIENTDETD0, EFFICIENTDETD1,
    EFFICIENTDETD2, EFFICIENTDETD3, EFFICIENTDETD4, EFFICIENTDETD5,
    EFFICIENTDETD6, EFFICIENTDETD7)
from ..datasets import get_class_names

from .image import AugmentImage, PreprocessImage
from .classification import MiniXceptionFER
from .keypoints import FaceKeypointNet2D32, DetectMinimalHand
from .keypoints import MinimalHandPoseEstimation
from ..backend.boxes import change_box_coordinates


class AugmentBoxes(SequentialProcessor):
    """Perform data augmentation with bounding boxes.

    # Arguments
        mean: List of three elements used to fill empty image spaces.
    """
    def __init__(self, mean=pr.BGR_IMAGENET_MEAN):
        super(AugmentBoxes, self).__init__()
        self.add(pr.ToImageBoxCoordinates())
        self.add(pr.Expand(mean=mean))
        self.add(pr.RandomSampleCrop(1.0))
        self.add(pr.RandomFlipBoxesLeftRight())
        self.add(pr.ToNormalizedBoxCoordinates())


class PreprocessBoxes(SequentialProcessor):
    """Preprocess bounding boxes

    # Arguments
        num_classes: Int.
        prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
            prior/default bounding boxes.
        IOU: Float. Intersection over union used to match boxes.
        variances: List of two floats indicating variances to be encoded
            for encoding bounding boxes.
    """
    def __init__(self, num_classes, prior_boxes, IOU, variances):
        super(PreprocessBoxes, self).__init__()
        self.add(pr.MatchBoxes(prior_boxes, IOU),)
        self.add(pr.EncodeBoxes(prior_boxes, variances))
        self.add(pr.BoxClassToOneHotVector(num_classes))


class AugmentDetection(SequentialProcessor):
    """Augment boxes and images for object detection.

    # Arguments
        prior_boxes: Numpy array of shape ``[num_boxes, 4]`` containing
            prior/default bounding boxes.
        split: Flag from `paz.processors.TRAIN`, ``paz.processors.VAL``
            or ``paz.processors.TEST``. Certain transformations would take
            place depending on the flag.
        num_classes: Int.
        size: Int. Image size.
        mean: List of three elements indicating the per channel mean.
        IOU: Float. Intersection over union used to match boxes.
        variances: List of two floats indicating variances to be encoded
            for encoding bounding boxes.
    """
    def __init__(self, prior_boxes, split=pr.TRAIN, num_classes=21, size=300,
                 mean=pr.BGR_IMAGENET_MEAN, IOU=.5,
                 variances=[0.1, 0.1, 0.2, 0.2]):
        super(AugmentDetection, self).__init__()
        # image processors
        self.augment_image = AugmentImage()
        # self.augment_image.add(pr.ConvertColorSpace(pr.RGB2BGR))
        self.preprocess_image = PreprocessImage((size, size), mean)
        self.preprocess_image.insert(0, pr.ConvertColorSpace(pr.RGB2BGR))

        # box processors
        self.augment_boxes = AugmentBoxes()
        args = (num_classes, prior_boxes, IOU, variances)
        self.preprocess_boxes = PreprocessBoxes(*args)

        # pipeline
        self.add(pr.UnpackDictionary(['image', 'boxes']))
        self.add(pr.ControlMap(pr.LoadImage(), [0], [0]))
        if split == pr.TRAIN:
            self.add(pr.ControlMap(self.augment_image, [0], [0]))
            self.add(pr.ControlMap(self.augment_boxes, [0, 1], [0, 1]))
        self.add(pr.ControlMap(self.preprocess_image, [0], [0]))
        self.add(pr.ControlMap(self.preprocess_boxes, [1], [1]))
        self.add(pr.SequenceWrapper(
            {0: {'image': [size, size, 3]}},
            {1: {'boxes': [len(prior_boxes), 4 + num_classes]}}))


class PostprocessBoxes2D(SequentialProcessor):
    """Filters, squares and offsets 2D bounding boxes

    # Arguments
        valid_names: List of strings containing class names to keep.
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
    """
    def __init__(self, offsets, valid_names=None):
        super(PostprocessBoxes2D, self).__init__()
        if valid_names is not None:
            self.add(pr.FilterClassBoxes2D(valid_names))
        self.add(pr.SquareBoxes2D())
        self.add(pr.OffsetBoxes2D(offsets))


class DetectSingleShot(Processor):
    """Single-shot object detection prediction.

    # Arguments
        model: Keras model.
        class_names: List of strings indicating the class names.
        preprocess: Callable, pre-processing pipeline.
        postprocess: Callable, post-processing pipeline.
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        variances: List, of floats.
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.
    """
    def __init__(self, model, class_names, score_thresh, nms_thresh,
                 preprocess=None, postprocess=None,
                 variances=[0.1, 0.1, 0.2, 0.2], draw=True):
        self.model = model
        self.class_names = class_names
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.variances = variances
        self.draw = draw
        if preprocess is None:
            preprocess = SSDPreprocess(model)
        if postprocess is None:
            postprocess = SSDPostprocess(
                model, class_names, score_thresh, nms_thresh)

        super(DetectSingleShot, self).__init__()
        self.predict = pr.Predict(self.model, preprocess, postprocess)
        self.denormalize = pr.DenormalizeBoxes2D()
        self.draw_boxes2D = pr.DrawBoxes2D(self.class_names)
        self.wrap = pr.WrapOutput(['image', 'boxes2D'])

    def call(self, image):
        boxes2D = self.predict(image)
        boxes2D = self.denormalize(image, boxes2D)
        if self.draw:
            image = self.draw_boxes2D(image, boxes2D)
        return self.wrap(image, boxes2D)


class SSDPreprocess(SequentialProcessor):
    """Preprocessing pipeline for SSD.

    # Arguments
        model: Keras model.
        mean: List, of three elements indicating the per channel mean.
        color_space: Int, specifying the color space to transform.
    """
    def __init__(
            self, model, mean=pr.BGR_IMAGENET_MEAN, color_space=pr.RGB2BGR):
        super(SSDPreprocess, self).__init__()
        self.add(pr.ResizeImage(model.input_shape[1:3]))
        self.add(pr.ConvertColorSpace(color_space))
        self.add(pr.SubtractMeanImage(mean))
        self.add(pr.CastImage(float))
        self.add(pr.ExpandDims(axis=0))


class SSDPostprocess(SequentialProcessor):
    """Postprocessing pipeline for SSD.

    # Arguments
        model: Keras model.
        class_names: List, of strings indicating the class names.
        score_thresh: Float, between [0, 1]
        nms_thresh: Float, between [0, 1].
        variances: List, of floats.
        class_arg: Int, index of class to be removed.
        box_method: Int, type of boxes to boxes2D conversion method.
    """
    def __init__(self, model, class_names, score_thresh, nms_thresh,
                 variances=[0.1, 0.1, 0.2, 0.2], class_arg=0, box_method=0):
        super(SSDPostprocess, self).__init__()
        self.add(pr.Squeeze(axis=None))
        self.add(pr.DecodeBoxes(model.prior_boxes, variances))
        self.add(pr.RemoveClass(class_names, class_arg, renormalize=False))
        self.add(pr.NonMaximumSuppressionPerClass(nms_thresh))
        self.add(pr.MergeNMSBoxWithClass())
        self.add(pr.FilterBoxes(class_names, score_thresh))
        self.add(pr.ToBoxes2D(class_names, box_method))


class DetectSingleShotEfficientDet(Processor):
    """Single-shot object detection prediction for EfficientDet models.

    # Arguments
        model: Keras model.
        class_names: List of strings indicating class names.
        preprocess: Callable, preprocessing pipeline.
        postprocess: Callable, postprocessing pipeline.
        draw: Bool. If ``True`` prediction are drawn on the
            returned image.

    # Properties
        model: Keras model.
        draw: Bool.
        preprocess: Callable.
        postprocess: Callable.
        draw_boxes2D: Callable.
        wrap: Callable.

    # Methods
        call()
    """
    def __init__(self, model, class_names, score_thresh, nms_thresh,
                 preprocess=None, postprocess=None, draw=True):
        self.model = model
        self.draw = draw
        self.draw_boxes2D = pr.DrawBoxes2D(class_names)
        self.wrap = pr.WrapOutput(['image', 'boxes2D'])
        if preprocess is None:
            self.preprocess = EfficientDetPreprocess(model)
        if postprocess is None:
            self.postprocess = EfficientDetPostprocess(
                model, class_names, score_thresh, nms_thresh)
        super(DetectSingleShotEfficientDet, self).__init__()

    def call(self, image):
        preprocessed_image, image_scales = self.preprocess(image)
        outputs = self.model(preprocessed_image)
        outputs = change_box_coordinates(outputs)
        boxes2D = self.postprocess(outputs, image_scales)
        if self.draw:
            image = self.draw_boxes2D(image, boxes2D)
        return self.wrap(image, boxes2D)


class EfficientDetPreprocess(SequentialProcessor):
    """Preprocessing pipeline for EfficientDet.

    # Arguments
        model: Keras model.
        mean: Tuple, containing mean per channel on ImageNet.
        standard_deviation: Tuple, containing standard deviations
            per channel on ImageNet.
    """
    def __init__(self, model, mean=pr.RGB_IMAGENET_MEAN,
                 standard_deviation=pr.RGB_IMAGENET_STDEV):
        super(EfficientDetPreprocess, self).__init__()
        self.add(pr.CastImage(float))
        self.add(pr.SubtractMeanImage(mean=mean))
        self.add(pr.DivideStandardDeviationImage(standard_deviation))
        self.add(pr.ScaledResize(image_size=model.input_shape[1]))


class EfficientDetPostprocess(Processor):
    """Postprocessing pipeline for EfficientDet.

    # Arguments
        model: Keras model.
        class_names: List of strings indicating class names.
        score_thresh: Float between [0, 1].
        nms_thresh: Float between [0, 1].
        variances: List of float values.
        class_arg: Int, index of the class to be removed.
        renormalize: Bool, if true scores are renormalized.
        method: Int, method to convert boxes to ``Boxes2D``.
    """
    def __init__(self, model, class_names, score_thresh, nms_thresh,
                 variances=[1.0, 1.0, 1.0, 1.0], class_arg=None):
        super(EfficientDetPostprocess, self).__init__()
        model.prior_boxes = model.prior_boxes * model.input_shape[1]
        self.postprocess = pr.SequentialProcessor([
            pr.Squeeze(axis=None),
            pr.DecodeBoxes(model.prior_boxes, variances),
            pr.RemoveClass(class_names, class_arg)])
        self.scale = pr.ScaleBox()
        self.nms_per_class = pr.NonMaximumSuppressionPerClass(nms_thresh)
        self.merge_box_and_class = pr.MergeNMSBoxWithClass()
        self.filter_boxes = pr.FilterBoxes(class_names, score_thresh)
        self.to_boxes2D = pr.ToBoxes2D(class_names)
        self.round_boxes = pr.RoundBoxes2D()

    def call(self, output, image_scale):
        box_data = self.postprocess(output)
        box_data = self.scale(box_data, image_scale)
        box_data, class_labels = self.nms_per_class(box_data)
        box_data = self.merge_box_and_class(box_data, class_labels)
        box_data = self.filter_boxes(box_data)
        boxes2D = self.to_boxes2D(box_data)
        boxes2D = self.round_boxes(boxes2D)
        return boxes2D


class SSD512COCO(DetectSingleShot):
    """Single-shot inference pipeline with SSD512 trained on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Example
        ``` python
        from paz.pipelines import SSD512COCO

        detect = SSD512COCO()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
     # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    # Reference
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        model = SSD512()
        names = get_class_names('COCO')
        super(SSD512COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class SSD512YCBVideo(DetectSingleShot):
    """Single-shot inference pipeline with SSD512 trained on YCBVideo.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Example
        ``` python
        from paz.pipelines import SSD512YCBVideo

        detect = SSD512YCBVideo()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```

    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('YCBVideo')
        model = SSD512(head_weights='YCBVideo', num_classes=len(names))
        super(SSD512YCBVideo, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class SSD300VOC(DetectSingleShot):
    """Single-shot inference pipeline with SSD300 trained on VOC.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Example
        ``` python
        from paz.pipelines import SSD300VOC

        detect = SSD300VOC()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```

    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    # Reference
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        model = SSD300()
        names = get_class_names('VOC')
        super(SSD300VOC, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class SSD300FAT(DetectSingleShot):
    """Single-shot inference pipeline with SSD300 trained on FAT.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Example
        ``` python
        from paz.pipelines import SSD300FAT

        detect = SSD300FAT()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        model = SSD300(22, 'FAT', 'FAT')
        names = get_class_names('FAT')
        super(SSD300FAT, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class DetectHaarCascade(Processor):
    """HaarCascade prediction pipeline/function from RGB-image.

    # Arguments
        detector: An instantiated ``HaarCascadeDetector`` model.
        offsets: List of two elements. Each element must be between [0, 1].
        class_names: List of strings.
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Returns
        A function for predicting bounding box detections.
    """
    def __init__(self, detector, class_names=None, colors=None, draw=True):
        super(DetectHaarCascade, self).__init__()
        self.detector = detector
        self.class_names = class_names
        self.colors = colors
        self.draw = draw
        RGB2GRAY = pr.ConvertColorSpace(pr.RGB2GRAY)
        postprocess = SequentialProcessor()
        postprocess.add(pr.ToBoxes2D(self.class_names, box_method=2))
        self.predict = pr.Predict(self.detector, RGB2GRAY, postprocess)
        self.draw_boxes2D = pr.DrawBoxes2D(self.class_names, self.colors)
        self.wrap = pr.WrapOutput(['image', 'boxes2D'])

    def call(self, image):
        boxes2D = self.predict(image)
        if self.draw:
            image = self.draw_boxes2D(image, boxes2D)
        return self.wrap(image, boxes2D)


class HaarCascadeFrontalFace(DetectHaarCascade):
    """HaarCascade pipeline for detecting frontal faces

    # Arguments
        class_name: String indicating the class name.
        color: List indicating the RGB color e.g. ``[0, 255, 0]``.
        draw: Boolean. If ``False`` the bounding boxes are not drawn.

    # Example
        ``` python
        from paz.pipelines import HaarCascadeFrontalFace

        detect = HaarCascadeFrontalFace()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    """
    def __init__(self, class_name='Face', color=[0, 255, 0], draw=True):
        self.model = HaarCascadeDetector('frontalface_default', class_arg=0)
        super(HaarCascadeFrontalFace, self).__init__(
            self.model, [class_name], [color], draw)


EMOTION_COLORS = [[255, 0, 0], [45, 90, 45], [255, 0, 255], [255, 255, 0],
                  [0, 0, 255], [0, 255, 255], [0, 255, 0]]


class DetectMiniXceptionFER(Processor):
    """Emotion classification and detection pipeline.

    # Returns
        Dictionary with ``image`` and ``boxes2D``.

    # Example
        ``` python
        from paz.pipelines import DetectMiniXceptionFER

        detect = DetectMiniXceptionFER()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    # References
       - [Real-time Convolutional Neural Networks for Emotion and
            Gender Classification](https://arxiv.org/abs/1710.07557)
    """
    def __init__(self, offsets=[0, 0], colors=EMOTION_COLORS):
        super(DetectMiniXceptionFER, self).__init__()
        self.offsets = offsets
        self.colors = colors

        # detection
        self.detect = HaarCascadeFrontalFace()
        self.square = SequentialProcessor()
        self.square.add(pr.SquareBoxes2D())
        self.square.add(pr.OffsetBoxes2D(offsets))
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()

        # classification
        self.classify = MiniXceptionFER()

        # drawing and wrapping
        self.class_names = self.classify.class_names
        #self.draw is the function that will be called to draw the detected emotion. default is to draw a colored box around the face
        self.draw = pr.MyBoxes2D(self.class_names, self.colors, True) # Lionel's version
#       self.draw = pr.TunnelBoxes(self.timeseries, self.colors, True) # Lionel's version, not yet written
#       self.draw = pr.DrawBoxes2D(self.class_names, self.colors, True) # original paz version
        self.wrap = pr.WrapOutput(['image', 'boxes2D'])

    def call(self, image):
        boxes2D = self.detect(image.copy())['boxes2D']
        boxes2D = self.square(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        for cropped_image, box2D in zip(cropped_images, boxes2D):
            predictions = self.classify(cropped_image)
            # 7-vector of classification probabilities: anger, disgust, fear, happiness, sadness, surprise, neutral
            #print(predictions)
            #print()
            last_hidden_state = self.classify.get_last_hidden_state(cropped_image)
            print("last hidden state: ",last_hidden_state)
            box2D.class_name = predictions['class_name']
            box2D.scores = (predictions['scores'] * 1000000).astype(int) # 7 scores add to 1000000, except for rounding errors
            box2D.score = np.amax(box2D.scores) # highest score
        image = self.draw(image, boxes2D)
        return self.wrap(image, boxes2D)


class DetectKeypoints2D(Processor):
    def __init__(self, detect, estimate_keypoints, offsets=[0, 0], radius=3):
        """General detection and keypoint estimator pipeline.

        # Arguments
            detect: Function for detecting objects. The output should be a
                dictionary with key ``Boxes2D`` containing a list
                of ``Boxes2D`` messages.
            estimate_keypoints: Function for estimating keypoints. The output
                should be a dictionary with key ``keypoints`` containing
                a numpy array of keypoints.
            offsets: List of two elements. Each element must be between [0, 1].
            radius: Int indicating the radius of the keypoints to be drawn.
        """
        super(DetectKeypoints2D, self).__init__()
        self.detect = detect
        self.estimate_keypoints = estimate_keypoints
        self.num_keypoints = estimate_keypoints.num_keypoints
        self.square = SequentialProcessor()
        self.square.add(pr.SquareBoxes2D())
        self.square.add(pr.OffsetBoxes2D(offsets))
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()
        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
        self.draw = pr.DrawKeypoints2D(self.num_keypoints, radius, False)
        self.draw_boxes = pr.DrawBoxes2D(detect.class_names, detect.colors)
        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'keypoints'])

    def call(self, image):
        boxes2D = self.detect(image)['boxes2D']
        boxes2D = self.square(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        keypoints2D = []
        for cropped_image, box2D in zip(cropped_images, boxes2D):
            keypoints = self.estimate_keypoints(cropped_image)['keypoints']
            keypoints = self.change_coordinates(keypoints, box2D)
            keypoints2D.append(keypoints)
            image = self.draw(image, keypoints)
        image = self.draw_boxes(image, boxes2D)
        return self.wrap(image, boxes2D, keypoints2D)


class DetectFaceKeypointNet2D32(DetectKeypoints2D):
    """Frontal face detection pipeline with facial keypoint estimation.

    # Arguments
        offsets: List of two elements. Each element must be between [0, 1].
        radius: Int indicating the radius of the keypoints to be drawn.

    # Example
        ``` python
        from paz.pipelines import DetectFaceKeypointNet2D32

        detect = DetectFaceKeypointNet2D32()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    """
    def __init__(self, offsets=[0, 0], radius=3):
        detect = HaarCascadeFrontalFace(draw=False)
        estimate_keypoints = FaceKeypointNet2D32(draw=False)
        super(DetectFaceKeypointNet2D32, self).__init__(
            detect, estimate_keypoints, offsets, radius)


class SSD512HandDetection(DetectSingleShot):
    """Minimal hand detection with SSD512Custom trained on OPenImageV6.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Example
        ``` python
        from paz.pipelines import SSD512HandDetection

        detect = SSD512HandDetection()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```
     # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``boxes2D``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a list of ``paz.abstract.messages.Boxes2D``.

    # Reference
        - [SSD: Single Shot MultiBox
            Detector](https://arxiv.org/abs/1512.02325)
    """
    def __init__(self, score_thresh=0.40, nms_thresh=0.45, draw=True):
        class_names = ['background', 'hand']
        num_classes = len(class_names)
        model = SSD512(num_classes, base_weights='OIV6Hand',
                       head_weights='OIV6Hand')
        super(SSD512HandDetection, self).__init__(
            model, class_names, score_thresh, nms_thresh, draw=draw)


class SSD512MinimalHandPose(DetectMinimalHand):
    """Hand detection and minimal hand pose estimation pipeline.

    # Arguments
        right_hand: Boolean. True for right hand inference.
        offsets: List of two elements. Each element must be between [0, 1].

    # Example
        ``` python
        from paz.pipelines import SSD512MinimalHandPose

        detect = SSD512MinimalHandPose()

        # apply directly to an image (numpy-array)
        inferences = detect(image)
        ```

    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image``,  ``boxes2D``,
        ``Keypoints2D``, ``Keypoints3D``.
        The corresponding values of these keys contain the image with the drawn
        inferences.
    """
    def __init__(self, right_hand=False, offsets=[0.25, 0.25]):
        detector = SSD512HandDetection()
        keypoint_estimator = MinimalHandPoseEstimation(right_hand)
        super(SSD512MinimalHandPose, self).__init__(
            detector, keypoint_estimator, offsets)


class EFFICIENTDETD0COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD0 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD0(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD0COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD1COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD1 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD1(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD1COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD2COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD2 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD2(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD2COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD3COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD3 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD3(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD3COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD4COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD4 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD4(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD4COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD5COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD5 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD5(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD5COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD6COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD6 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD6(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD6COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD7COCO(DetectSingleShotEfficientDet):
    """Single-shot inference pipeline with EFFICIENTDETD7 trained
    on COCO.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('COCO_EFFICIENTDET')
        model = EFFICIENTDETD7(num_classes=len(names),
                               base_weights='COCO', head_weights='COCO')
        super(EFFICIENTDETD7COCO, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


class EFFICIENTDETD0VOC(DetectSingleShot):
    """Single-shot inference pipeline with EFFICIENTDETD0 trained
    on VOC.

    # Arguments
        score_thresh: Float between [0, 1]
        nms_thresh: Float between [0, 1].
        draw: Boolean. If ``True`` prediction are drawn in the
            returned image.

    # References
        [Google AutoML repository implementation of EfficientDet](
        https://github.com/google/automl/tree/master/efficientdet)
    """
    def __init__(self, score_thresh=0.60, nms_thresh=0.45, draw=True):
        names = get_class_names('VOC')
        model = EFFICIENTDETD0(num_classes=len(names),
                               base_weights='VOC', head_weights='VOC')
        super(EFFICIENTDETD0VOC, self).__init__(
            model, names, score_thresh, nms_thresh, draw=draw)


>>> END FILE CONTENTS

## emili-main/paz/pipelines/heatmaps.py

>>> BEGIN FILE CONTENTS

import numpy as np
from paz import processors as pr


class GetHeatmapsAndTags(pr.Processor):
    """Get Heatmaps and Tags from the model output.
    # Arguments
        model: Model weights trained on HigherHRNet model.
        flipped_keypoint_order: List of length 17 (number of keypoints).
            Flipped list of keypoint order.
        data_with_center: Boolean. True is the model is trained using the
            center.
        image: Numpy array. Input image of shape (H, W)

    # Returns
        heatmaps: Numpy array of shape (1, num_keypoints, H, W)
        Tags: Numpy array of shape (1, num_keypoints, H, W)
    """
    def __init__(self, model, flipped_keypoint_order, with_flip,
                 data_with_center, scale_output=True, axes=[0, 3, 1, 2]):
        super(GetHeatmapsAndTags, self).__init__()
        self.with_flip = with_flip
        self.predict = pr.SequentialProcessor(
            [pr.Predict(model), pr.TransposeOutput(axes), pr.ScaleOutput(2)])
        self.get_heatmaps = pr.GetHeatmaps(flipped_keypoint_order)
        self.get_tags = pr.GetTags(flipped_keypoint_order)
        self.postprocess = pr.SequentialProcessor()
        if data_with_center:
            self.postprocess.add(pr.RemoveLastElement())
        if scale_output:
            self.postprocess.add(pr.ScaleOutput(2, full_scaling=True))

    def call(self, image):
        outputs = self.predict(image)
        heatmaps = self.get_heatmaps(outputs, with_flip=False)
        tags = self.get_tags(outputs, with_flip=False)
        if self.with_flip:
            outputs = self.predict(np.flip(image, [2]))
            heatmaps_flip = self.get_heatmaps(outputs, self.with_flip)
            tags_flip = self.get_tags(outputs, self.with_flip)
            heatmaps = [heatmaps, heatmaps_flip]
            tags = [tags, tags_flip]
        heatmaps = self.postprocess(heatmaps)
        tags = self.postprocess(tags)
        return heatmaps, tags


>>> END FILE CONTENTS

## emili-main/paz/pipelines/image.py

>>> BEGIN FILE CONTENTS

from ..abstract import SequentialProcessor
from .. import processors as pr
from ..backend.image import get_affine_transform


class AugmentImage(SequentialProcessor):
    """Augments an RGB image by randomly changing contrast, brightness
        saturation and hue.
    """
    def __init__(self):
        super(AugmentImage, self).__init__()
        self.add(pr.RandomContrast())
        self.add(pr.RandomBrightness())
        self.add(pr.RandomSaturation(0.7))
        self.add(pr.RandomHue())


class PreprocessImage(SequentialProcessor):
    """Preprocess RGB image by resizing it to the given ``shape``. If a
    ``mean`` is given it is substracted from image and it not the image gets
    normalized.

    # Arguments
        shape: List of two Ints.
        mean: List of three Ints indicating the per-channel mean to be
            subtracted.
    """
    def __init__(self, shape, mean=pr.BGR_IMAGENET_MEAN):
        super(PreprocessImage, self).__init__()
        self.add(pr.ResizeImage(shape))
        self.add(pr.CastImage(float))
        if mean is None:
            self.add(pr.NormalizeImage())
        else:
            self.add(pr.SubtractMeanImage(mean))


class AutoEncoderPredictor(SequentialProcessor):
    """Pipeline for predicting values from an auto-encoder.

    # Arguments
        model: Keras model.
    """
    def __init__(self, model):
        super(AutoEncoderPredictor, self).__init__()
        preprocess = SequentialProcessor(
            [pr.ResizeImage(model.input_shape[1:3]),
             pr.ConvertColorSpace(pr.RGB2BGR),
             pr.NormalizeImage(),
             pr.ExpandDims(0)])
        self.add(pr.Predict(model, preprocess))
        self.add(pr.Squeeze(0))
        self.add(pr.DenormalizeImage())
        self.add(pr.CastImage('uint8'))
        self.add(pr.WrapOutput(['image']))


class EncoderPredictor(SequentialProcessor):
    """Pipeline for predicting latent vector of an encoder.

    # Arguments
        model: Keras model.
    """
    def __init__(self, encoder):
        super(EncoderPredictor, self).__init__()
        self.encoder = encoder
        preprocess = SequentialProcessor([
            pr.ConvertColorSpace(pr.RGB2BGR),
            pr.ResizeImage(encoder.input_shape[1:3]),
            pr.NormalizeImage(),
            pr.ExpandDims(0)])
        self.add(pr.Predict(encoder, preprocess, pr.Squeeze(0)))


class DecoderPredictor(SequentialProcessor):
    """Pipeline for predicting decoded image from a latent vector.

    # Arguments
        model: Keras model.
    """
    def __init__(self, decoder):
        self.decoder = decoder
        super(DecoderPredictor, self).__init__()
        self.add(pr.Predict(decoder, pr.ExpandDims(0), pr.Squeeze(0)))
        self.add(pr.DenormalizeImage())
        self.add(pr.CastImage('uint8'))
        self.add(pr.ConvertColorSpace(pr.BGR2RGB))


class PreprocessImageHigherHRNet(pr.Processor):
    """Transform the image according to the HigherHRNet model requirement.
    # Arguments
        scaling_factor: Int. scale factor for image dimensions.
        input_size: Int. resize the first dimension of image to input size.
        inverse: Boolean. Reverse the affine transform input.
        image: Numpy array. Input image

    # Returns
        image: resized and transformed image
        center: center of the image
        scale: scaled image dimensions
    """
    def __init__(self, scaling_factor=200, input_size=512, multiple=64):
        super(PreprocessImageHigherHRNet, self).__init__()
        self.get_image_center = pr.GetImageCenter()
        self.get_size = pr.GetTransformationSize(input_size, multiple)
        self.get_scale = pr.GetTransformationScale(scaling_factor)
        self.get_source_destination_point = pr.GetSourceDestinationPoints(
            scaling_factor)
        self.transform_image = pr.SequentialProcessor(
            [pr.WarpAffine(), pr.ImagenetPreprocessInput(), pr.ExpandDims(0)])

    def call(self, image):
        center = self.get_image_center(image)
        size = self.get_size(image)
        scale = self.get_scale(image, size)
        source_point, destination_point = self.get_source_destination_point(
            center, scale, size)
        transform = get_affine_transform(source_point, destination_point)
        image = self.transform_image(image, transform, size)
        return image, center, scale


>>> END FILE CONTENTS

## emili-main/paz/pipelines/keypoints.py

>>> BEGIN FILE CONTENTS

import numpy as np

from tensorflow.keras.utils import get_file

from .renderer import RenderTwoViews
from .image import PreprocessImageHigherHRNet
from .heatmaps import GetHeatmapsAndTags

from .. import processors as pr
from ..abstract import SequentialProcessor, Processor
from ..models import KeypointNet2D, HigherHRNet, DetNet, SimpleBaseline
from .angles import IKNetHandJointAngles

from ..backend.image import get_affine_transform, lincolor
from ..backend.keypoints import human_pose3D_to_pose6D
from ..backend.keypoints import flip_keypoints_left_right, uv_to_vu
from ..datasets import JOINT_CONFIG, FLIP_CONFIG

from ..datasets.human36m import data_mean2D, data_stdev2D, args_to_mean
from ..datasets.human36m import data_mean3D, data_stdev3D, dim_to_use3D
from ..datasets.human36m import h36m_to_coco_joints2D, args_to_joints3D


class KeypointNetSharedAugmentation(SequentialProcessor):
    """Wraps ``RenderTwoViews`` as a sequential processor for using it directly
        with a ``paz.GeneratingSequence``.

    # Arguments
        renderer: ``RenderTwoViews`` processor.
        size: Image size.
    """
    def __init__(self, renderer, size):
        super(KeypointNetSharedAugmentation, self).__init__()
        self.renderer = renderer
        self.size = size
        self.add(RenderTwoViews(self.renderer))
        self.add(pr.SequenceWrapper(
            {0: {'image_A': [size, size, 3]},
             1: {'image_B': [size, size, 3]}},
            {2: {'matrices': [4, 4 * 4]},
             3: {'alpha_channels': [size, size, 2]}}))


class KeypointNetInference(Processor):
    """Performs inference from a ``KeypointNetShared`` model.

    # Arguments
        model: Keras model for predicting keypoints.
        num_keypoints: Int or None. If None ``num_keypoints`` is
            tried to be inferred from ``model.output_shape``
        radius: Int. used for drawing the predicted keypoints.
    """
    def __init__(self, model, num_keypoints=None, radius=5):
        super(KeypointNetInference, self).__init__()
        self.num_keypoints, self.radius = num_keypoints, radius
        if self.num_keypoints is None:
            self.num_keypoints = model.output_shape[1]

        preprocessing = SequentialProcessor()
        preprocessing.add(pr.NormalizeImage())
        preprocessing.add(pr.ExpandDims(axis=0))
        self.predict_keypoints = SequentialProcessor()
        self.predict_keypoints.add(pr.Predict(model, preprocessing))
        self.predict_keypoints.add(pr.SelectElement(0))
        self.predict_keypoints.add(pr.Squeeze(axis=0))
        self.postprocess_keypoints = SequentialProcessor()
        self.postprocess_keypoints.add(pr.DenormalizeKeypoints())
        self.postprocess_keypoints.add(pr.RemoveKeypointsDepth())
        self.draw = pr.DrawKeypoints2D(self.num_keypoints, self.radius, False)
        self.wrap = pr.WrapOutput(['image', 'keypoints'])

    def call(self, image):
        keypoints = self.predict_keypoints(image)
        keypoints = self.postprocess_keypoints(keypoints, image)
        image = self.draw(image, keypoints)
        return self.wrap(image, keypoints)


class EstimateKeypoints2D(Processor):
    """Basic 2D keypoint prediction pipeline.

    # Arguments
        model: Keras model for predicting keypoints.
        num_keypoints: Int or None. If None ``num_keypoints`` is
            tried to be inferred from ``model.output_shape``
        draw: Boolean indicating if inferences should be drawn.
        radius: Int. used for drawing the predicted keypoints.
    """
    def __init__(self, model, num_keypoints, draw=True, radius=3,
                 color=pr.RGB2BGR):
        self.model = model
        self.num_keypoints = num_keypoints
        self.draw, self.radius, self.color = draw, radius, color
        self.preprocess = SequentialProcessor()
        self.preprocess.add(pr.ResizeImage(self.model.input_shape[1:3]))
        self.preprocess.add(pr.ConvertColorSpace(self.color))
        self.preprocess.add(pr.NormalizeImage())
        self.preprocess.add(pr.ExpandDims(0))
        self.preprocess.add(pr.ExpandDims(-1))
        self.predict = pr.Predict(model, self.preprocess, pr.Squeeze(0))
        self.denormalize = pr.DenormalizeKeypoints()
        self.draw = pr.DrawKeypoints2D(self.num_keypoints, self.radius, False)
        self.wrap = pr.WrapOutput(['image', 'keypoints'])

    def call(self, image):
        keypoints = self.predict(image)
        keypoints = self.denormalize(keypoints, image)
        if self.draw:
            image = self.draw(image, keypoints)
        return self.wrap(image, keypoints)


class FaceKeypointNet2D32(EstimateKeypoints2D):
    """KeypointNet2D model trained with Kaggle Facial Detection challenge.

    # Arguments
        draw: Boolean indicating if inferences should be drawn.
        radius: Int. used for drawing the predicted keypoints.

    # Example
        ``` python
        from paz.pipelines import FaceKeypointNet2D32

        estimate_keypoints= FaceKeypointNet2D32()

        # apply directly to an image (numpy-array)
        inference = estimate_keypoints(image)
        ```
    # Returns
        A function that takes an RGB image and outputs the predictions
        as a dictionary with ``keys``: ``image`` and ``keypoints``.
        The corresponding values of these keys contain the image with the drawn
        inferences and a numpy array representing the keypoints.
    """
    def __init__(self, draw=True, radius=3):
        model = KeypointNet2D((96, 96, 1), 15, 32, 0.1)
        self.weights_URL = ('https://github.com/oarriaga/altamira-data/'
                            'releases/download/v0.7/')
        weights_path = self.get_weights_path(model)
        model.load_weights(weights_path)
        super(FaceKeypointNet2D32, self).__init__(
            model, 15, draw, radius, pr.RGB2GRAY)

    def get_weights_path(self, model):
        model_name = '_'.join(['FaceKP', model.name, '32', '15'])
        model_name = '%s_weights.hdf5' % model_name
        URL = self.weights_URL + model_name
        return get_file(model_name, URL, cache_subdir='paz/models')


class GetKeypoints(Processor):
    """Extract out the top k keypoints heatmaps and group the keypoints with
       their respective tags value. Adjust and refine the keypoint locations
       by removing the margins.
    # Arguments
        max_num_instance: Int. Maximum number of instances to be detected.
        keypoint_order: List of length 17 (number of keypoints).
        heatmaps: Numpy array of shape (1, num_keypoints, H, W)
        Tags: Numpy array of shape (1, num_keypoints, H, W, 2)

    # Returns
        grouped_keypoints: numpy array. keypoints grouped by tag
        scores: int: score for the keypoint
    """
    def __init__(self, max_num_instance, keypoint_order, detection_thresh=0.2,
                 tag_thresh=1):
        super(GetKeypoints, self).__init__()
        self.group_keypoints = pr.SequentialProcessor(
            [pr.TopKDetections(max_num_instance), pr.GroupKeypointsByTag(
                keypoint_order, tag_thresh, detection_thresh)])
        self.adjust_keypoints = pr.AdjustKeypointsLocations()
        self.get_scores = pr.GetScores()
        self.refine_keypoints = pr.RefineKeypointsLocations()

    def call(self, heatmaps, tags, adjust=True, refine=True):
        grouped_keypoints = self.group_keypoints(heatmaps, tags)
        if adjust:
            grouped_keypoints = self.adjust_keypoints(
                heatmaps, grouped_keypoints)[0]
        scores = self.get_scores(grouped_keypoints)
        if refine:
            grouped_keypoints = self.refine_keypoints(
                heatmaps[0], tags[0], grouped_keypoints)
        return grouped_keypoints, scores


class TransformKeypoints(Processor):
    """Transform the keypoint coordinates.
    # Arguments
        grouped_keypoints: Numpy array. keypoints grouped by tag
        center: Tuple. center of the imput image
        scale: Float. scaled imput image dimension
        shape: Tuple/List

    # Returns
        transformed_keypoints: keypoint location with respect to the
                               input image
    """
    def __init__(self, inverse=False):
        super(TransformKeypoints, self).__init__()
        self.inverse = inverse
        self.get_source_destination_point = pr.GetSourceDestinationPoints(
            scaling_factor=200)
        self.transform_keypoints = pr.TransformKeypoints()

    def call(self, grouped_keypoints, center, scale, shape):
        source_point, destination_point = self.get_source_destination_point(
            center, scale, shape)
        if self.inverse:
            source_point, destination_point = destination_point, source_point
        transform = get_affine_transform(source_point, destination_point)
        transformed_keypoints = self.transform_keypoints(grouped_keypoints,
                                                         transform)
        return transformed_keypoints


class HigherHRNetHumanPose2D(Processor):
    """Estimate human pose 2D keypoints and draw a skeleton.

    # Arguments
        model: Weights trained on HigherHRNet model.
        keypoint_order: List of length 17 (number of keypoints).
            where the keypoints are listed order wise.
        flipped_keypoint_order: List of length 17 (number of keypoints).
            Flipped list of keypoint order.
        dataset: String. Name of the dataset used for training the model.
        data_with_center: Boolean. True is the model is trained using the
            center.

    # Returns
        dictonary with the following keys:
            image: contains the image with skeleton drawn on it.
            keypoints: location of keypoints
            score: score of detection
    """
    def __init__(self, dataset='COCO', data_with_center=False,
                 max_num_people=30, with_flip=True, draw=True):
        super(HigherHRNetHumanPose2D, self).__init__()
        keypoint_order = JOINT_CONFIG[dataset]
        flipped_keypoint_order = FLIP_CONFIG[dataset]
        self.with_flip = with_flip
        self.draw = draw
        self.model = HigherHRNet(weights=dataset)
        self.transform_image = PreprocessImageHigherHRNet()
        self.get_heatmaps_and_tags = pr.SequentialProcessor(
            [GetHeatmapsAndTags(self.model, flipped_keypoint_order,
             with_flip, data_with_center), pr.AggregateResults(with_flip)])
        self.get_keypoints = GetKeypoints(max_num_people, keypoint_order)
        self.transform_keypoints = TransformKeypoints(inverse=True)
        self.draw_skeleton = pr.DrawHumanSkeleton(dataset, check_scores=True)
        self.extract_keypoints_locations = pr.ExtractKeypointsLocations()
        self.wrap = pr.WrapOutput(['image', 'keypoints', 'scores'])

    def call(self, image):
        resized_image, center, scale = self.transform_image(image)
        heatmaps, tags = self.get_heatmaps_and_tags(resized_image)
        keypoints, scores = self.get_keypoints(heatmaps, tags)
        shape = [heatmaps.shape[3], heatmaps.shape[2]]
        keypoints = self.transform_keypoints(keypoints, center, scale, shape)
        if self.draw:
            image = self.draw_skeleton(image, keypoints)
        keypoints = self.extract_keypoints_locations(keypoints)
        return self.wrap(image, keypoints, scores)


class DetNetHandKeypoints(pr.Processor):
    """Estimate 2D and 3D keypoints from minimal hand and draw a skeleton.

    # Arguments
        shape: List/tuple. Input image shape for DetNet model.
        draw: Boolean. Draw hand skeleton if true.
        right_hand: Boolean. If 'True', detect keypoints for right hand, else
                    detect keypoints for left hand.
        input_image: Array

    # Returns
        image: contains the image with skeleton drawn on it.
        keypoints2D: Array [num_joints, 2]. 2D location of keypoints.
        keypoints3D: Array [num_joints, 3]. 3D location of keypoints.
    """
    def __init__(self, shape=(128, 128), draw=True, right_hand=False):
        super(DetNetHandKeypoints).__init__()
        self.draw = draw
        self.right_hand = right_hand
        self.preprocess = pr.SequentialProcessor()
        self.preprocess.add(pr.ResizeImage(shape))
        self.preprocess.add(pr.ExpandDims(axis=0))
        if self.right_hand:
            self.preprocess.add(pr.FlipLeftRightImage())
        self.predict = pr.Predict(model=DetNet(), preprocess=self.preprocess)
        self.scale_keypoints = pr.ScaleKeypoints(scale=4, shape=shape)
        self.draw_skeleton = pr.DrawHandSkeleton()
        self.wrap = pr.WrapOutput(['image', 'keypoints3D', 'keypoints2D'])

    def call(self, image):
        keypoints3D, keypoints2D = self.predict(image)
        keypoints3D = keypoints3D.numpy()
        keypoints2D = keypoints2D.numpy()
        if self.right_hand:
            keypoints2D = flip_keypoints_left_right(keypoints2D)
        keypoints2D = uv_to_vu(keypoints2D)
        keypoints2D = self.scale_keypoints(keypoints2D, image)
        if self.draw:
            image = self.draw_skeleton(image, keypoints2D)
        return self.wrap(image, keypoints3D, keypoints2D)


class MinimalHandPoseEstimation(pr.Processor):
    """Estimate 2D and 3D keypoints from minimal hand and draw a skeleton.
       Estimate absolute and relative joint angle for the minimal hand joints
       using the 3D keypoint locations.

    # Arguments
        draw: Boolean. Draw hand skeleton if true.
        right_hand: Boolean. If 'True', detect keypoints for right hand, else
                    detect keypoints for left hand.

    # Returns
        image: contains the image with skeleton drawn on it.
        keypoints2D: Array [num_joints, 2]. 2D location of keypoints.
        keypoints3D: Array [num_joints, 3]. 3D location of keypoints.
        absolute_angles: Array [num_joints, 4]. quaternion repesentation
        relative_angles: Array [num_joints, 3]. axis-angle repesentation
    """
    def __init__(self, draw=True, right_hand=False):
        super(MinimalHandPoseEstimation, self).__init__()
        self.keypoints_estimator = DetNetHandKeypoints(draw=draw,
                                                       right_hand=right_hand)
        self.angle_estimator = IKNetHandJointAngles(right_hand=right_hand)
        self.wrap = pr.WrapOutput(['image', 'keypoints3D', 'keypoints2D',
                                   'absolute_angles', 'relative_angles'])

    def call(self, image):
        keypoints = self.keypoints_estimator(image)
        angles = self.angle_estimator(keypoints['keypoints3D'])
        return self.wrap(keypoints['image'], keypoints['keypoints3D'],
                         keypoints['keypoints2D'], angles['absolute_angles'],
                         angles['relative_angles'])


class DetectMinimalHand(pr.Processor):
    def __init__(self, detect, estimate_keypoints, offsets=[0, 0], radius=3):
        """Minimal hand detection and keypoint estimator pipeline.

        # Arguments
            detect: Function for detecting objects. The output should be a
                dictionary with key ``Boxes2D`` containing a list
                of ``Boxes2D`` messages.
            estimate_keypoints: Function for estimating keypoints. The output
                should be a dictionary with key ``keypoints`` containing
                a numpy array of keypoints.
            offsets: List of two elements. Each element must be between [0, 1].
            radius: Int indicating the radius of the keypoints to be drawn.
        """
        super(DetectMinimalHand, self).__init__()
        self.class_names = ['OPEN', 'CLOSE']
        self.colors = lincolor(len(self.class_names))
        self.detect = detect
        self.estimate_keypoints = estimate_keypoints
        self.classify_hand_closure = pr.SequentialProcessor(
            [pr.IsHandOpen(), pr.BooleanToTextMessage('OPEN', 'CLOSE')])
        self.square = pr.SequentialProcessor()
        self.square.add(pr.SquareBoxes2D())
        self.square.add(pr.OffsetBoxes2D(offsets))
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()
        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
        self.draw = pr.DrawHandSkeleton(keypoint_radius=radius)
        self.draw_boxes = pr.DrawBoxes2D(self.class_names, self.colors,
                                         with_score=False)
        self.wrap = pr.WrapOutput(
            ['image', 'boxes2D', 'keypoints2D', 'keypoints3D'])

    def call(self, image):
        boxes2D = self.detect(image.copy())['boxes2D']
        boxes2D = self.square(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        keypoints2D = []
        keypoints3D = []
        for cropped_image, box2D in zip(cropped_images, boxes2D):
            inference = self.estimate_keypoints(cropped_image)
            keypoints = self.change_coordinates(
                inference['keypoints2D'], box2D)
            hand_closure_status = self.classify_hand_closure(
                inference['relative_angles'])
            box2D.class_name = hand_closure_status
            keypoints2D.append(keypoints)
            keypoints3D.append(inference['keypoints3D'])
            image = self.draw(image, keypoints)
        image = self.draw_boxes(image, boxes2D)
        return self.wrap(image, boxes2D, keypoints2D, keypoints3D)


class EstimateHumanPose3D(Processor):
    """ Estimate human pose 3D from 2D human pose.

    # Arguments
    input_shape: tuple
    num_keypoints: Int. Number of keypoints.

    # Return
        keypoints3D: human pose 3D
    """
    def __init__(self, input_shape=(32,), num_keypoints=16):
        super(EstimateHumanPose3D, self).__init__()
        self.model = SimpleBaseline(input_shape, num_keypoints)
        self.preprocessing = SequentialProcessor(
            [pr.MergeKeypoints2D(args_to_mean),
             pr.FilterKeypoints2D(args_to_mean, h36m_to_coco_joints2D),
             pr.StandardizeKeypoints2D(data_mean2D, data_stdev2D)])
        self.postprocess = pr.DestandardizeKeypoints2D(
            data_mean3D, data_stdev3D, dim_to_use3D)
        self.predict = pr.Predict(self.model, self.preprocessing,
                                  self.postprocess)

    def call(self, keypoints2D):
        keypoints3D = self.predict(keypoints2D)
        return keypoints3D


class EstimateHumanPose(pr.Processor):
    """ Estimates 2D and 3D keypoints of human from an image

    # Arguments
        estimate_keypoints_3D: 3D simple baseline model
        args_to_mean: keypoints indices
        h36m_to_coco_joints2D: h36m joints indices

    # Returns
        keypoints2D, keypoints3D
    """
    def __init__(self, solver, camera_intrinsics,
                 args_to_joints3D=args_to_joints3D, filter=True, draw=True,
                 draw_pose=True):
        super(EstimateHumanPose, self).__init__()
        self.pose3D = []
        self.pose6D = []
        self.draw = draw
        self.filter = filter
        self.draw_pose = draw_pose
        self.estimate_keypoints_2D = HigherHRNetHumanPose2D(draw=draw)
        self.estimate_keypoints_3D = EstimateHumanPose3D()
        self.optimize = pr.OptimizeHumanPose3D(
            args_to_joints3D, solver, camera_intrinsics)
        self.draw_text = pr.DrawText(scale=0.5, thickness=1)
        self.draw_pose6D = pr.DrawHumanPose6D(camera_intrinsics)
        self.wrap = pr.WrapOutput(['image', 'keypoints2D', 'keypoints3D',
                                   'pose6D'])

    def call(self, image):
        inferences2D = self.estimate_keypoints_2D(image)
        keypoints2D = inferences2D['keypoints']
        if self.draw:
            image = inferences2D['image']
        if len(keypoints2D) > 0:
            keypoints3D = self.estimate_keypoints_3D(keypoints2D)
            keypoints3D = np.reshape(keypoints3D, (-1, 32, 3))
            optimized_output = self.optimize(keypoints3D, keypoints2D)
            joints2D, joints3D, self.pose3D, projection2D = optimized_output
            self.pose6D = human_pose3D_to_pose6D(self.pose3D[0])
            if self.draw_pose:
                rotation, translation = self.pose6D
                image = self.draw_pose6D(image, rotation, translation)
                translation = ["%.2f" % item for item in translation]
                image = self.draw_text(image, str(translation), (30, 30))
        return self.wrap(image, keypoints2D, self.pose3D, self.pose6D)


>>> END FILE CONTENTS

## emili-main/paz/pipelines/masks.py

>>> BEGIN FILE CONTENTS

from ..abstract import SequentialProcessor, Processor
from .. import processors as pr
from ..backend.image import resize_image, BILINEAR
from ..backend.keypoints import normalize_keypoints2D


class PredictRGBMask(SequentialProcessor):
    """Predicts RGB mask from a segmentation model
    # Arguments
        model: Keras segmentation model.
        epsilon: Float. Values below this value would be replaced by 0.
    """
    def __init__(self, model, epsilon=0.15):
        super(PredictRGBMask, self).__init__()
        self.add(pr.ResizeImage(model.input_shape[1:3]))
        self.add(pr.NormalizeImage())
        self.add(pr.ExpandDims(0))
        self.add(pr.Predict(model))
        self.add(pr.Squeeze(0))
        self.add(pr.ReplaceLowerThanThreshold(epsilon))
        self.add(pr.DenormalizeImage())
        self.add(pr.CastImage('uint8'))


class RGBMaskToObjectPoints3D(SequentialProcessor):
    """Predicts 3D keypoints from an RGB mask.
    # Arguments
        object_sizes: Array (3) determining the (width, height, depth)
    """
    def __init__(self, object_sizes):
        super(RGBMaskToObjectPoints3D, self).__init__()
        self.add(pr.GetNonZeroValues())
        self.add(pr.ImageToNormalizedDeviceCoordinates())
        self.add(pr.Scale(object_sizes / 2.0))


class RGBMaskToImagePoints2D(SequentialProcessor):
    """Predicts 2D image keypoints from an RGB mask.
    """
    def __init__(self):
        super(RGBMaskToImagePoints2D, self).__init__()
        self.add(pr.GetNonZeroArguments())
        self.add(pr.ArgumentsToImageKeypoints2D())


class Pix2Points(Processor):
    """Predicts RGB_mask and corresponding points2D and points3D.

    # Arguments
        model: Keras segmentation model.
        object_sizes: Array (3) determining the (width, height, depth)
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized to original shape.
        method: Interpolation method to use if resize is True.

    # Note
        Compare with and without RGB interpolation.
    """
    def __init__(self, model, object_sizes, epsilon=0.15,
                 resize=False, method=BILINEAR):
        self.model = model
        self.resize = resize
        self.method = method
        self.object_sizes = object_sizes
        self.predict_RGBMask = PredictRGBMask(model, epsilon)
        self.mask_to_points3D = RGBMaskToObjectPoints3D(self.object_sizes)
        self.mask_to_points2D = RGBMaskToImagePoints2D()
        self.wrap = pr.WrapOutput(['points2D', 'points3D', 'RGB_mask'])

    def call(self, image):
        RGB_mask = self.predict_RGBMask(image)
        if self.resize:
            H, W, num_channels = image.shape
            RGB_mask = resize_image(RGB_mask, (W, H), self.method)
        else:
            H, W = self.model.output_shape[1:3]
        points3D = self.mask_to_points3D(RGB_mask)
        points2D = self.mask_to_points2D(RGB_mask)
        points2D = normalize_keypoints2D(points2D, H, W)
        return self.wrap(points2D, points3D, RGB_mask)


>>> END FILE CONTENTS

## emili-main/paz/pipelines/pose.py

>>> BEGIN FILE CONTENTS

import numpy as np
from tensorflow.keras.utils import get_file

from .. import processors as pr
from ..abstract import Processor, SequentialProcessor, Pose6D
from ..models import UNET_VGG16
from ..backend.image.draw import draw_points2D, points3D_to_RGB
from ..backend.standard import append_lists
from ..backend.keypoints import (
    translate_points2D_origin, denormalize_keypoints2D)

from .masks import Pix2Points
from .detection import HaarCascadeFrontalFace
from .keypoints import FaceKeypointNet2D32
from .detection import SSD300FAT, PostprocessBoxes2D


class EstimatePoseKeypoints(Processor):
    def __init__(self, detect, estimate_keypoints, camera, offsets,
                 model_points, class_to_dimensions, radius=3, thickness=1):
        """Pose estimation pipeline using keypoints.

        # Arguments
            detect: Function that outputs a dictionary with a key
                ``boxes2D`` having a list of ``Box2D`` messages.
            estimate_keypoints: Function that outputs a dictionary
                with a key ``keypoints`` with numpy array as value
            camera: Instance of ``paz.backend.camera.Camera`` with
                camera intrinsics.
            offsets: List of floats indicating the scaled offset to
                be added to the ``Box2D`` coordinates.
            model_points: Numpy array of shape ``(num_keypoints, 3)``
                indicating the 3D coordinates of the predicted keypoints
                from the ``esimate_keypoints`` function.
            class_to_dimensions: Dictionary with keys being the class labels
                of the predicted ``Box2D`` messages and the values a list of
                three integers indicating the width, height and depth of the
                object e.g. {'PowerDrill': [30, 20, 10]}.
            radius: Int. radius of keypoint to be drawn.
            thickness: Int. thickness of 3D box.

        # Returns
            A function that takes an RGB image and outputs the following
            inferences as keys of a dictionary:
                ``image``, ``boxes2D``, ``keypoints`` and ``poses6D``.

        """
        super(EstimatePoseKeypoints, self).__init__()
        self.num_keypoints = estimate_keypoints.num_keypoints
        self.detect = detect
        self.estimate_keypoints = estimate_keypoints
        self.square = SequentialProcessor()
        self.square.add(pr.SquareBoxes2D())
        self.square.add(pr.OffsetBoxes2D(offsets))
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()
        self.change_coordinates = pr.ChangeKeypointsCoordinateSystem()
        self.solve_PNP = pr.SolvePNP(model_points, camera)
        self.draw_keypoints = pr.DrawKeypoints2D(self.num_keypoints, radius)
        self.draw_box = pr.DrawBoxes3D(camera, class_to_dimensions,
                                       thickness=thickness)
        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'keypoints', 'poses6D'])

    def call(self, image):
        boxes2D = self.detect(image)['boxes2D']
        boxes2D = self.square(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        poses6D, keypoints2D = [], []
        for cropped_image, box2D in zip(cropped_images, boxes2D):
            keypoints = self.estimate_keypoints(cropped_image)['keypoints']
            keypoints = self.change_coordinates(keypoints, box2D)
            pose6D = self.solve_PNP(keypoints)
            image = self.draw_keypoints(image, keypoints)
            image = self.draw_box(image, pose6D)
            keypoints2D.append(keypoints)
            poses6D.append(pose6D)
        return self.wrap(image, boxes2D, keypoints2D, poses6D)


class HeadPoseKeypointNet2D32(EstimatePoseKeypoints):
    """Head pose estimation pipeline using a ``HaarCascade`` face detector
        and a pre-trained ``KeypointNet2D`` estimation model.

        # Arguments
            camera: Instance of ``paz.backend.camera.Camera`` with
                camera intrinsics.
            offsets: List of floats indicating the scaled offset to
                be added to the ``Box2D`` coordinates.
            radius: Int. radius of keypoint to be drawn.

        # Example
            ``` python
            from paz.pipelines import HeadPoseKeypointNet2D32

            estimate_pose = HeadPoseKeypointNet2D32()

            # apply directly to an image (numpy-array)
            inferences = estimate_pose(image)
            ```

        # Returns
            A function that takes an RGB image and outputs the following
            inferences as keys of a dictionary:
                ``image``, ``boxes2D``, ``keypoints`` and ``poses6D``.
        """
    def __init__(self, camera, offsets=[0, 0], radius=5, thickness=2):
        detect = HaarCascadeFrontalFace(draw=False)
        estimate_keypoints = FaceKeypointNet2D32(draw=False)
        """
                               4--------1
                              /|       /|
                             / |      / |
                            3--------2  |
                            |  8_____|__5
                            | /      | /
                            |/       |/
                            7--------6

                   Z (depth)
                  /
                 /_____X (width)
                 |
                 |
                 Y (height)
        """
        KEYPOINTS3D = np.array([
            [-220, 1138, 678],  # left--center-eye
            [+220, 1138, 678],  # right-center-eye
            [-131, 1107, 676],  # left--eye close to nose
            [-294, 1123, 610],  # left--eye close to ear
            [+131, 1107, 676],  # right-eye close to nose
            [+294, 1123, 610],  # right-eye close to ear
            [-106, 1224, 758],  # left--eyebrow close to nose
            [-375, 1208, 585],  # left--eyebrow close to ear
            [+106, 1224, 758],  # right-eyebrow close to nose
            [+375, 1208, 585],  # right-eyebrow close to ear
            [0.0, 919, 909],  # nose
            [-183, 683, 691],  # lefty-lip
            [+183, 683, 691],  # right-lip
            [0.0, 754, 826],  # up---lip
            [0.0, 645, 815],  # down-lip
        ])
        KEYPOINTS3D = KEYPOINTS3D - np.mean(KEYPOINTS3D, axis=0)
        super(HeadPoseKeypointNet2D32, self).__init__(
            detect, estimate_keypoints, camera, offsets,
            KEYPOINTS3D, {None: [900, 1200, 800]}, radius, thickness)


class SingleInstancePIX2POSE6D(Processor):
    """Predicts a single pose6D from an image. Optionally if a box2D message is
        given it translates the predicted points2D to new origin located at
        box2D top-left corner.

    # Arguments
        model: Keras segmentation model.
        object_sizes: Array (3) determining the (width, height, depth)
        camera: PAZ Camera with intrinsic matrix.
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized before computing PnP.
        class_name: Str indicating object name.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred points2D, points3D, pose6D and image.
    """
    def __init__(self, model, object_sizes, camera,
                 epsilon=0.15, resize=False, class_name=None, draw=True):
        super(SingleInstancePIX2POSE6D, self).__init__()
        self.camera = camera
        self.pix2points = Pix2Points(model, object_sizes, epsilon, resize)
        self.solvePnP = pr.SolveChangingObjectPnPRANSAC(self.camera.intrinsics)
        self.draw_pose6D = pr.DrawPose6D(object_sizes, self.camera.intrinsics)
        self.wrap = pr.WrapOutput(['image', 'points2D', 'points3D', 'pose6D'])
        self.class_name = str(class_name)
        self.object_sizes = object_sizes
        self.draw = draw

    def call(self, image, box2D=None):
        inferences = self.pix2points(image)
        points2D = inferences['points2D']
        points3D = inferences['points3D']
        points2D = denormalize_keypoints2D(points2D, *image.shape[:2])
        if box2D is not None:
            points2D = translate_points2D_origin(points2D, box2D.coordinates)
            self.class_name = box2D.class_name
        pose6D = None
        if len(points3D) > self.solvePnP.MIN_REQUIRED_POINTS:
            success, R, T = self.solvePnP(points3D, points2D)
            if success:
                pose6D = Pose6D.from_rotation_vector(R, T, self.class_name)
        if (self.draw and (box2D is None) and (pose6D is not None)):
            colors = points3D_to_RGB(points3D, self.object_sizes)
            image = draw_points2D(image, points2D, colors)
            image = self.draw_pose6D(image, pose6D)
        inferences = self.wrap(image, points2D, points3D, pose6D)
        return inferences


class MultiInstancePIX2POSE6D(Processor):
    """Predicts poses6D of multiple instances the same object from an image.

    # Arguments
        estimate_pose: Function that takes as input an image and outputs a
            dictionary with points2D, points3D and pose6D messages e.g
            SingleInstancePIX2POSE6D
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
        camera: PAZ Camera with intrinsic matrix.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred boxes2D, poses6D and image.
    """
    def __init__(self, estimate_pose, offsets, camera=None, draw=True):
        super(MultiInstancePIX2POSE6D, self).__init__()
        self.draw = draw
        self.estimate_pose = estimate_pose
        self.object_sizes = self.estimate_pose.object_sizes
        self.camera = self.estimate_pose.camera if camera is None else camera
        valid_names = [self.estimate_pose.class_name]
        self.postprocess_boxes = PostprocessBoxes2D(offsets, valid_names)

        self.append_values = pr.AppendValues(
            ['pose6D', 'points2D', 'points3D'])
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()
        self.draw_RGBmask = pr.DrawRGBMasks(self.object_sizes)
        self.draw_boxes2D = pr.DrawBoxes2D(valid_names, colors=[[0, 255, 0]])
        self.draw_poses6D = pr.DrawPoses6D(
            self.object_sizes, camera.intrinsics)
        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'poses6D'])

    def call(self, image, boxes2D):
        boxes2D = self.postprocess_boxes(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        poses6D, points2D, points3D = [], [], []
        for crop, box2D in zip(cropped_images, boxes2D):
            inferences = self.estimate_pose(crop, box2D)
            self.append_values(inferences, [poses6D, points2D, points3D])
        if self.draw:
            image = self.draw_boxes2D(image, boxes2D)
            image = self.draw_RGBmask(image, points2D, points3D)
            image = self.draw_poses6D(image, poses6D)
        return self.wrap(image, boxes2D, poses6D)


class SinglePowerDrillPIX2POSE6D(SingleInstancePIX2POSE6D):
    """Predicts the pose6D of the YCB 035_power_drill object from an image.
        Optionally if a box2D message is given it translates the predicted
        points2D to new origin located at box2D top-left corner.

    # Arguments
        camera: PAZ Camera with intrinsic matrix.
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized before computing PnP.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred points2D, points3D, pose6D and image.
    """
    def __init__(self, camera, epsilon=0.15, resize=False, draw=True):
        model = UNET_VGG16(3, (128, 128, 3))
        URL = ('https://github.com/oarriaga/altamira-data/'
               'releases/download/v0.13/')
        name = 'UNET-VGG16_POWERDRILL_weights.hdf5'
        weights_path = get_file(name, URL + name, cache_subdir='paz/models')
        print('Loading %s model weights' % weights_path)
        model.load_weights(weights_path)
        object_sizes = np.array([1840, 1870, 520]) / 10000
        class_name = '035_power_drill'
        super(SinglePowerDrillPIX2POSE6D, self).__init__(
            model, object_sizes, camera, epsilon, resize, class_name, draw)


class MultiPowerDrillPIX2POSE6D(MultiInstancePIX2POSE6D):
    """Predicts poses6D of multiple instances the YCB 035_power_drill object
        from an image.

    # Arguments
        camera: PAZ Camera with intrinsic matrix.
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized before computing PnP.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred boxes2D, poses6D and image.
    """
    def __init__(self, camera, offsets, epsilon=0.15, resize=False, draw=True):
        estimate_pose = SinglePowerDrillPIX2POSE6D(
            camera, epsilon, resize, draw=False)
        super(MultiPowerDrillPIX2POSE6D, self).__init__(
            estimate_pose, offsets, camera, draw)


class PIX2POSEPowerDrill(Processor):
    """PIX2POSE inference pipeline with SSD300 trained on FAT and UNET-VGG16
        trained with domain randomization for the YCB object 035_power_drill.

    # Arguments
        score_thresh: Float between [0, 1] for object detector.
        nms_thresh: Float between [0, 1] indicating the non-maximum supression.
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
        epsilon: Float. Values below this value would be replaced by 0.
        draw: Boolean. If ``True`` prediction are drawn in the returned image.

    # Returns
        Dictionary with inferred boxes2D, poses6D and image.
    """
    def __init__(self, camera, score_thresh=0.50, nms_thresh=0.45,
                 offsets=[0.5, 0.5], epsilon=0.15, resize=False, draw=True):
        self.detect = SSD300FAT(score_thresh, nms_thresh, draw=False)
        self.estimate_pose = MultiPowerDrillPIX2POSE6D(
            camera, offsets, epsilon, resize, draw)

    def call(self, image):
        return self.estimate_pose(image, self.detect(image)['boxes2D'])


class MultiInstanceMultiClassPIX2POSE6D(Processor):
    """Predicts poses6D of multiple instances of multiple objects from an image

    # Arguments
        detect: Function that takes as input an image and outputs a dictionary
            containing Boxes2D messages.
        name_to_model: Dictionary with class name as key and as value a
            Keras segmentation model.
        name_to_size: Dictionary with class name as key and as value the
            object sizes.
        camera: PAZ Camera with intrinsic matrix.
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized before computing PnP.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred boxes2D, poses6D and image.
    """
    def __init__(self, detect, name_to_model, name_to_size, camera, offsets,
                 epsilon=0.15, resize=False, draw=True):
        super(MultiInstanceMultiClassPIX2POSE6D, self).__init__()
        if set(name_to_model.keys()) != set(name_to_size.keys()):
            raise ValueError('models and sizes must have same class names')
        self.detect = detect
        self.name_to_pix2points = self._build_pix2points(
            name_to_model, name_to_size, epsilon, resize)
        valid_names = list(self.name_to_model.keys())
        self.postprocess_boxes = PostprocessBoxes2D(offsets, valid_names)
        self.draw_boxes2D = pr.DrawBoxes2D(valid_names)
        self.draw_RGBmask = self._build_draw_RGBmask(name_to_size)
        self.draw_pose6D = self._build_draw_pose6D(name_to_size, camera)
        self.wrap = pr.WrapOutput(['image', 'boxes2D', 'points3D', 'poses6D'])
        self.solvePnP = pr.SolveChangingObjectPnPRANSAC(camera.intrinsics)
        self.clip = pr.ClipBoxes2D()
        self.crop = pr.CropBoxes2D()
        self.draw = draw

    def _build_pix2points(self, name_to_model, name_to_size, epsilon, resize):
        name_to_pix2points = {}
        print(name_to_model)
        for name, model in name_to_model.items():
            pix2points = Pix2Points(model, name_to_size[name], epsilon, resize)
            name_to_pix2points[name] = pix2points
        return name_to_pix2points

    def _build_draw_pose6D(self, name_to_size, camera):
        name_to_draw = {}
        for name, object_sizes in name_to_size.items():
            draw = pr.DrawPose6D(object_sizes, camera.intrinsics)
            name_to_draw[name] = draw
        return name_to_draw

    def _build_draw_RGBmask(self, name_to_size):
        name_to_draw = {}
        for name, object_sizes in name_to_size.items():
            draw = pr.DrawRGBMask(object_sizes)
            name_to_draw[name] = draw
        return name_to_draw

    def estimate_pose(self, image, box2D):
        inferences = self.name_to_pix2points[box2D.class_name](image)
        points2D = inferences['points2D']
        points3D = inferences['points3D']
        points2D = denormalize_keypoints2D(points2D, *image.shape[:2])
        points2D = translate_points2D_origin(points2D, box2D.coordinates)
        pose6D = None
        if len(points3D) > self.solvePnP.MIN_REQUIRED_POINTS:
            success, R, T = self.solvePnP(points3D, points2D)
            if success:
                pose6D = Pose6D.from_rotation_vector(R, T, box2D.class_name)
        return points2D, points3D, pose6D

    def call(self, image):
        boxes2D = self.detect(image)['boxes2D']
        boxes2D = self.postprocess_boxes(boxes2D)
        boxes2D = self.clip(image, boxes2D)
        cropped_images = self.crop(image, boxes2D)
        points2D, points3D, poses6D = [], [], []
        for crop, box2D in zip(cropped_images, boxes2D):
            inferences = self.estimate_pose(crop, box2D)
            append_lists(inferences, [points2D, points3D, poses6D])
        if self.draw:
            image = self.draw_boxes2D(image, boxes2D)
            for box2D, pose6D in zip(boxes2D, poses6D):
                name = box2D.class_name
                image = self.draw_pose6D[name](image, pose6D)
            for box2D, p2D, p3D in zip(boxes2D, points2D, points3D):
                image = self.draw_RGBmask[name](image, p2D, p3D)
        return self.wrap(image, boxes2D, points3D, poses6D)


class PIX2YCBTools6D(MultiInstanceMultiClassPIX2POSE6D):
    """Predicts poses6D of multiple instances of the YCB tools:
        '035_power_drill', '051_large_clamp', '037_scissors'

    # Arguments
        camera: PAZ Camera with intrinsic matrix.
        score_thresh: Float between [0, 1] for filtering Boxes2D.
        nsm_thresh: Float between [0, 1] non-maximum-supression filtering.
        offsets: List of length two containing floats e.g. (x_scale, y_scale)
        epsilon: Float. Values below this value would be replaced by 0.
        resize: Boolean. If True RGB mask is resized before computing PnP.
        draw: Boolean. If True drawing functions are applied to output image.

    # Returns
        Dictionary with inferred boxes2D, poses6D and image.
    """
    def __init__(self, camera, score_thresh=0.45, nms_thresh=0.15,
                 offsets=[0.25, 0.25], epsilon=0.15, resize=False, draw=True):

        self.detect = SSD300FAT(score_thresh, nms_thresh, draw=False)
        self.name_to_sizes = self._build_name_to_sizes()
        self.name_to_model = self._build_name_to_model()
        super(PIX2YCBTools6D, self).__init__(
            self.detect, self.name_to_model, self.name_to_sizes, camera,
            offsets, epsilon, resize, draw)

    def _build_name_to_model(self):
        URL = ('https://github.com/oarriaga/altamira-data/'
               'releases/download/v0.13/')

        UNET_power_drill = UNET_VGG16(3, (128, 128, 3))
        name = 'UNET-VGG16_POWERDRILL_weights.hdf5'
        weights_path = get_file(name, URL + name, cache_subdir='paz/models')
        UNET_power_drill.load_weights(weights_path)

        UNET_large_clamp = UNET_VGG16(3, (128, 128, 3))
        name = 'UNET-VGG16_LARGE-CLAMP_weights.hdf5'
        weights_path = get_file(name, URL + name, cache_subdir='paz/models')
        UNET_large_clamp.load_weights(weights_path)

        UNET_scissors = UNET_VGG16(3, (128, 128, 3))
        name = 'UNET-VGG16_SCISSORS_weights.hdf5'
        weights_path = get_file(name, URL + name, cache_subdir='paz/models')
        UNET_scissors.load_weights(weights_path)

        name_to_model = {'035_power_drill': UNET_power_drill,
                         '051_large_clamp': UNET_large_clamp,
                         '037_scissors': UNET_scissors
                         }
        return name_to_model

    def _build_name_to_sizes(self):
        name_to_sizes = {
            '035_power_drill': np.array([1840, 1874, 572]) / 10000,
            '051_large_clamp': np.array([2022, 1652, 362]) / 10000,
            '037_scissors': np.array([960, 2014, 156]) / 10000
        }
        return name_to_sizes


>>> END FILE CONTENTS

## emili-main/paz/pipelines/renderer.py

>>> BEGIN FILE CONTENTS

from ..abstract import SequentialProcessor, Processor
from .. import processors as pr

from .image import AugmentImage


class RenderTwoViews(Processor):
    """Renders two views along with their transformations.

    # Arguments
        renderer: A class with a method ``render`` that outputs
            two lists. The first list contains two numpy arrays
            representing the images e.g. ``(image_A, image_B)`` each
            of shape ``[H, W, 3]``.
            The other list contains three numpy arrays representing the
            transformations from the origin to the cameras and the
            two alpha channels of both images e.g.
            ``[matrices, alpha_channel_A, alpha_channel_B]``.
            ``matrices`` is a numpy array of shape ``(4, 4 * 4)``.
            Each row is a matrix of ``4 x 4`` representing the following
            transformations respectively: ``world_to_A``, ``world_to_B``,
            ``A_to_world`` and  ``B_to_world``.
            The shape of each ``alpha_channel`` should be ``[H, W]``.
    """
    def __init__(self, renderer):
        super(RenderTwoViews, self).__init__()
        self.render = pr.Render(renderer)

        self.preprocess_image = SequentialProcessor()
        self.preprocess_image.add(pr.ConvertColorSpace(pr.RGB2BGR))
        self.preprocess_image.add(pr.NormalizeImage())

        self.preprocess_alpha = SequentialProcessor()
        self.preprocess_alpha.add(pr.NormalizeImage())
        self.concatenate = pr.Concatenate(-1)

    def call(self):
        data = self.render()
        image_A = self.preprocess_image(data['image_A'])
        image_B = self.preprocess_image(data['image_B'])
        alpha_A = self.preprocess_alpha(data['alpha_A'])
        alpha_B = self.preprocess_alpha(data['alpha_B'])
        alpha_channels = self.concatenate([alpha_A, alpha_B])
        matrices = data['matrices']
        return image_A, image_B, matrices, alpha_channels


class RandomizeRenderedImage(SequentialProcessor):
    """Performs alpha blending and data-augmentation to an image and
        it's alpha channel.
    image_paths: List of strings indicating the paths to the images used for
        the background.
    num_occlusions: Int. number of occlusions to be added to the image.
    max_radius_scale: Float between [0, 1] indicating the maximum radius in
        scale of the image size.
    """
    def __init__(self, image_paths, num_occlusions=1, max_radius_scale=0.5):
        super(RandomizeRenderedImage, self).__init__()
        self.add(pr.ConcatenateAlphaMask())
        self.add(pr.BlendRandomCroppedBackground(image_paths))
        for arg in range(num_occlusions):
            self.add(pr.AddOcclusion(max_radius_scale))
        self.add(pr.RandomImageBlur())
        self.add(AugmentImage())


>>> END FILE CONTENTS

## emili-main/paz/processors/__init__.py

>>> BEGIN FILE CONTENTS

# imports are done directly to keep user's auto-complete clean

from .detection import SquareBoxes2D
from .detection import DenormalizeBoxes2D
from .detection import RoundBoxes2D
from .detection import ClipBoxes2D
from .detection import FilterClassBoxes2D
from .detection import CropBoxes2D
from .detection import ToBoxes2D
from .detection import MatchBoxes
from .detection import EncodeBoxes
from .detection import DecodeBoxes
from .detection import NonMaximumSuppressionPerClass
from .detection import FilterBoxes
from .detection import OffsetBoxes2D
from .detection import CropImage
from .detection import RemoveClass
from .detection import ScaleBox
from .detection import BoxesToBoxes2D
from .detection import BoxesWithOneHotVectorsToBoxes2D
from .detection import BoxesWithClassArgToBoxes2D
from .detection import RoundBoxes
from .detection import MergeNMSBoxWithClass

from .mydraw import MyBoxes2D # Lionel added
from .mydraw import TunnelBoxes # Lionel added

from .draw import DrawBoxes2D
from .draw import DrawKeypoints2D
from .draw import DrawBoxes3D
from .draw import DrawRandomPolygon
from .draw import DrawPose6D
from .draw import DrawPoses6D
from .draw import DrawHumanSkeleton
from .draw import DrawHandSkeleton
from .draw import DrawRGBMask
from .draw import DrawRGBMasks
from .draw import DrawText
from .draw import DrawHumanPose6D

from .image import CastImage
from .image import SubtractMeanImage
from .image import AddMeanImage
from .image import NormalizeImage
from .image import DenormalizeImage
from .image import LoadImage
from .image import RandomSaturation
from .image import RandomBrightness
from .image import RandomContrast
from .image import RandomHue
from .image import ResizeImage
from .image import ResizeImages
from .image import RandomImageBlur
from .image import RandomGaussianBlur
from .image import RandomFlipImageLeftRight
from .image import ConvertColorSpace
from .image import ShowImage
from .image import ImageDataProcessor
from .image import AlphaBlending
from .image import RandomShapeCrop
from .image import RandomImageCrop
from .image import MakeRandomPlainImage
from .image import ConcatenateAlphaMask
from .image import BlendRandomCroppedBackground
from .image import AddOcclusion
from .image import ImageToNormalizedDeviceCoordinates
from .image import NormalizedDeviceCoordinatesToImage
from .image import ReplaceLowerThanThreshold
from .image import GetNonZeroArguments
from .image import GetNonZeroValues
from .image import FlipLeftRightImage
from .image import ImagenetPreprocessInput
from .image import DivideStandardDeviationImage
from .image import ScaledResize


from .image import BGR_IMAGENET_MEAN
from .image import RGB_IMAGENET_MEAN
from .image import RGB_IMAGENET_STDEV

from .renderer import Render

from .geometric import RandomFlipBoxesLeftRight
from .geometric import ToImageBoxCoordinates
from .geometric import ToNormalizedBoxCoordinates
from .geometric import RandomSampleCrop
from .geometric import Expand
from .geometric import ApplyTranslation
from .geometric import RandomTranslation
from .geometric import RandomKeypointTranslation
from .geometric import RandomKeypointRotation
from .geometric import RandomRotation
from .geometric import TranslateImage
from .geometric import GetTransformationSize
from .geometric import GetTransformationScale
from .geometric import GetSourceDestinationPoints
from .geometric import GetImageCenter
from .geometric import WarpAffine


from .keypoints import ChangeKeypointsCoordinateSystem
from .keypoints import DenormalizeKeypoints
from .keypoints import NormalizeKeypoints
from .keypoints import PartitionKeypoints
from .keypoints import ProjectKeypoints
from .keypoints import RemoveKeypointsDepth
from .keypoints import TranslateKeypoints
from .keypoints import DenormalizeKeypoints2D
from .keypoints import NormalizeKeypoints2D
from .keypoints import ArgumentsToImageKeypoints2D
from .keypoints import ScaleKeypoints
from .keypoints import ComputeOrientationVector
from .keypoints import MergeKeypoints2D
from .keypoints import FilterKeypoints2D
from .keypoints import StandardizeKeypoints2D
from .keypoints import DestandardizeKeypoints2D
from .keypoints import OptimizeHumanPose3D


from .standard import ControlMap
from .standard import ExpandDomain
from .standard import CopyDomain
from .standard import ExtendInputs
from .standard import SequenceWrapper
from .standard import Predict
from .standard import ToClassName
from .standard import ExpandDims
from .standard import BoxClassToOneHotVector
from .standard import Squeeze
from .standard import Copy
from .standard import Lambda
from .standard import UnpackDictionary
from .standard import WrapOutput
from .standard import Concatenate
from .standard import SelectElement
from .standard import StochasticProcessor
from .standard import Stochastic
from .standard import UnwrapDictionary
from .standard import Scale
from .standard import AppendValues
from .standard import BooleanToTextMessage
from .standard import PrintTopics

from .pose import SolvePNP
from .pose import SolveChangingObjectPnPRANSAC
from .pose import Translation3DFromBoxWidth

from .groups import ToAffineMatrix
from .groups import RotationVectorToQuaternion
from .groups import RotationVectorToRotationMatrix

from ..backend.image.opencv_image import RGB2BGR
from ..backend.image.opencv_image import BGR2RGB
from ..backend.image.opencv_image import RGB2GRAY
from ..backend.image.opencv_image import RGB2HSV
from ..backend.image.opencv_image import HSV2RGB

from ..backend.keypoints import UPNP
from ..backend.keypoints import LEVENBERG_MARQUARDT

from ..backend.image.draw import GREEN
from ..backend.image.draw import FONT
from ..backend.image.draw import LINE

from ..abstract import Processor
from ..abstract import SequentialProcessor


from .heatmaps import TransposeOutput
from .heatmaps import ScaleOutput
from .heatmaps import GetHeatmaps
from .heatmaps import GetTags
from .heatmaps import RemoveLastElement
from .heatmaps import AggregateResults
from .heatmaps import TopKDetections
from .heatmaps import GroupKeypointsByTag
from .heatmaps import AdjustKeypointsLocations
from .heatmaps import GetScores
from .heatmaps import RefineKeypointsLocations
from .heatmaps import TransformKeypoints
from .heatmaps import ExtractKeypointsLocations

from .munkres import Munkres

from .angles import ChangeLinkOrder
from .angles import CalculateRelativeAngles
from .angles import IsHandOpen


TRAIN = 0
VAL = 1
TEST = 2


>>> END FILE CONTENTS

## emili-main/paz/processors/angles.py

>>> BEGIN FILE CONTENTS

from paz import processors as pr
from paz.backend.angles import change_link_order
from paz.datasets import MANOHandJoints
from paz.backend.keypoints import flip_along_x_axis
from paz.backend.groups import quaternions_to_rotation_matrices
from paz.backend.groups import to_affine_matrices
from paz.backend.keypoints import rotate_keypoints3D
from paz.backend.keypoints import compute_orientation_vector
from paz.backend.angles import calculate_relative_angle
from paz.backend.angles import reorder_relative_angles
from paz.backend.angles import is_hand_open
from paz.datasets import MPIIHandJoints
from paz.datasets.CMU_poanoptic import hand_part_arg


class ChangeLinkOrder(pr.Processor):
    """Map data from one config to another.

    # Arguments
        joints: Array
        config1_labels: input joint configuration
        config2_labels: output joint configuration

    # Returns
        Array: joints maped to the config2_labels
    """
    def __init__(self, config1_labels, config2_labels):
        super(ChangeLinkOrder, self).__init__()
        self.config1_labels = config1_labels
        self.config2_labels = config2_labels

    def call(self, joints):
        mapped_joints = change_link_order(joints, self.config1_labels,
                                          self.config2_labels)
        return mapped_joints


class CalculateRelativeAngles(pr.Processor):
    """Compute the realtive joint rotation for the minimal hand joints and map
       it to the output_config kinematic chain form.

    # Arguments
        absolute_quaternions : Array [num_joints, 4].
        Absolute joint angle rotation for the minimal hand joints in
        quaternion representation [q1, q2, q3, w0].

    # Returns
        relative_angles: Array [num_joints, 3].
        Relative joint rotation of the minimal hand joints in compact
        axis angle representation.
    """
    def __init__(self, right_hand=False, input_config=MANOHandJoints,
                 output_config=MPIIHandJoints):
        super(CalculateRelativeAngles, self).__init__()
        output_labels = output_config.labels
        input_labels = input_config.labels
        links_origin = input_config.links_origin
        self.parents = input_config.parents
        self.children = output_config.children
        if right_hand:
            links_origin = flip_along_x_axis(links_origin)
        self.links_orientation = compute_orientation_vector(
            links_origin, self.parents)
        self.quaternions_to_rotations = pr.SequentialProcessor([
            pr.ChangeLinkOrder(output_labels, input_labels),
            quaternions_to_rotation_matrices])
        self.calculate_relative_angle = pr.SequentialProcessor([
            calculate_relative_angle,
            pr.ChangeLinkOrder(input_labels, output_labels)])

    def call(self, absolute_quaternions):
        absolute_rotation = self.quaternions_to_rotations(absolute_quaternions)
        rotated_links_origin = rotate_keypoints3D(
            absolute_rotation, self.links_orientation)
        rotated_links_origin_transform = to_affine_matrices(
            absolute_rotation, rotated_links_origin)
        relative_angles = self.calculate_relative_angle(
            absolute_rotation, rotated_links_origin_transform, self.parents)
        relative_angles = reorder_relative_angles(
            relative_angles, absolute_rotation[0], self.children)
        return relative_angles


class IsHandOpen(pr.Processor):
    """Check is the hand is open by by using the relative angles of the joint.
    # Arguments
        joint_name_to_arg: Dictionary for the joints
        thresh: Float. Threshold value for theta
        relative_angle: Array

    # Returns
        String: Hand is open or closed.
    """
    def __init__(self, joint_name_to_arg=hand_part_arg, thresh=0.4):
        super(IsHandOpen, self).__init__()
        self.joint_name_to_arg = joint_name_to_arg
        self.thresh = thresh

    def call(self, relative_angles):
        return is_hand_open(relative_angles, self.joint_name_to_arg,
                            self.thresh)


>>> END FILE CONTENTS

## emili-main/paz/processors/detection.py

>>> BEGIN FILE CONTENTS

from __future__ import division

import numpy as np

from ..abstract import Processor, Box2D
from ..backend.boxes import match
from ..backend.boxes import encode
from ..backend.boxes import decode
from ..backend.boxes import offset
from ..backend.boxes import clip
from ..backend.boxes import nms_per_class
from ..backend.boxes import merge_nms_box_with_class
from ..backend.boxes import denormalize_box
from ..backend.boxes import make_box_square
from ..backend.boxes import filter_boxes
from ..backend.boxes import scale_box


class SquareBoxes2D(Processor):
    """Transforms bounding rectangular boxes into square bounding boxes.
    """
    def __init__(self):
        super(SquareBoxes2D, self).__init__()

    def call(self, boxes2D):
        for box2D in boxes2D:
            box2D.coordinates = make_box_square(box2D.coordinates)
        return boxes2D


class DenormalizeBoxes2D(Processor):
    """Denormalizes boxes shapes to be in accordance to the original
    image size.

    # Arguments:
        image_size: List containing height and width of an image.
    """
    def __init__(self):
        super(DenormalizeBoxes2D, self).__init__()

    def call(self, image, boxes2D):
        shape = image.shape[:2]
        for box2D in boxes2D:
            box2D.coordinates = denormalize_box(box2D.coordinates, shape)
        return boxes2D


class RoundBoxes2D(Processor):
    """Round to integer box coordinates.
    """
    def __init__(self):
        super(RoundBoxes2D, self).__init__()

    def call(self, boxes2D):
        for box2D in boxes2D:
            box2D.coordinates = [int(x) for x in box2D.coordinates]
        return boxes2D


class FilterClassBoxes2D(Processor):
    """Filters boxes with valid class names.

    # Arguments
        valid_class_names: List of strings indicating class names to be kept.
    """
    def __init__(self, valid_class_names):
        self.valid_class_names = valid_class_names
        super(FilterClassBoxes2D, self).__init__()

    def call(self, boxes2D):
        filtered_boxes2D = []
        for box2D in boxes2D:
            if box2D.class_name in self.valid_class_names:
                filtered_boxes2D.append(box2D)
        return filtered_boxes2D


class CropBoxes2D(Processor):
    """Creates a list of images cropped from the bounding boxes.

    # Arguments
        offset_scales: List of floats having x and y scales respectively.
    """
    def __init__(self):
        super(CropBoxes2D, self).__init__()

    def call(self, image, boxes2D):
        image_crops = []
        for box2D in boxes2D:
            x_min, y_min, x_max, y_max = box2D.coordinates
            image_crops.append(image[y_min:y_max, x_min:x_max])
        return image_crops


class ClipBoxes2D(Processor):
    """Clips boxes coordinates into the image dimensions"""
    def __init__(self):
        super(ClipBoxes2D, self).__init__()

    def call(self, image, boxes2D):
        image_height, image_width = image.shape[:2]
        for box2D in boxes2D:
            box2D.coordinates = clip(box2D.coordinates, image.shape[:2])
        return boxes2D


class OffsetBoxes2D(Processor):
    """Offsets the height and widht of a list of ``Boxes2D``.

    # Arguments
        offsets: Float between [0, 1].
    """
    def __init__(self, offsets):
        super(OffsetBoxes2D, self).__init__()
        self.offsets = offsets

    def call(self, boxes2D):
        for box2D in boxes2D:
            box2D.coordinates = offset(box2D.coordinates, self.offsets)
        return boxes2D


class ToBoxes2D(Processor):
    """Transforms boxes from dataset into `Boxes2D` messages.

    # Arguments
        class_names: List of class names ordered with respect to the
            class indices from the dataset ``boxes``.
        one_hot_encoded: Bool, indicating if scores are one hot vectors.
        default_score: Float, score to set.
        default_class: Str, class to set.
        box_method: Int, method to convert boxes to ``Boxes2D``.

    # Properties
        one_hot_encoded: Bool.
        box_processor: Callable.

    # Methods
        call()
    """
    def __init__(
            self, class_names=None, one_hot_encoded=False,
            default_score=1.0, default_class=None, box_method=0):
        if class_names is not None:
            arg_to_class = dict(zip(range(len(class_names)), class_names))
        self.one_hot_encoded = one_hot_encoded
        method_to_processor = {
            0: BoxesWithOneHotVectorsToBoxes2D(arg_to_class),
            1: BoxesToBoxes2D(default_score, default_class),
            2: BoxesWithClassArgToBoxes2D(arg_to_class, default_score)}
        self.box_processor = method_to_processor[box_method]
        super(ToBoxes2D, self).__init__()

    def call(self, box_data):
        return self.box_processor(box_data)


class BoxesToBoxes2D(Processor):
    """Transforms boxes from dataset into `Boxes2D` messages given no
    class names and score.

    # Arguments
        default_score: Float, score to set.
        default_class: Str, class to set.

    # Properties
        default_score: Float.
        default_class: Str.

    # Methods
        call()
    """
    def __init__(self, default_score=1.0, default_class=None):
        self.default_score = default_score
        self.default_class = default_class
        super(BoxesToBoxes2D, self).__init__()

    def call(self, box_data):
        boxes2D = []
        for box in box_data:
            boxes2D.append(
                Box2D(box[:4], self.default_score, self.default_class))
        return boxes2D


class BoxesWithOneHotVectorsToBoxes2D(Processor):
    """Transforms boxes from dataset into `Boxes2D` messages given boxes
    with scores as one hot vectors.

    # Arguments
        arg_to_class: List, of classes.

    # Properties
        arg_to_class: List.

    # Methods
        call()
    """
    def __init__(self, arg_to_class):
        self.arg_to_class = arg_to_class
        super(BoxesWithOneHotVectorsToBoxes2D, self).__init__()

    def call(self, box_data):
        boxes2D = []
        for box in box_data:
            class_scores = box[4:]
            class_arg = np.argmax(class_scores)
            score = class_scores[class_arg]
            class_name = self.arg_to_class[class_arg]
            boxes2D.append(Box2D(box[:4], score, class_name))
        return boxes2D


class BoxesWithClassArgToBoxes2D(Processor):
    """Transforms boxes from dataset into `Boxes2D` messages given boxes
    with class argument.

    # Arguments
        default_score: Float, score to set.
        arg_to_class: List, of classes.

    # Properties
        default_score: Float.
        arg_to_class: List.

    # Methods
        call()
    """
    def __init__(self, arg_to_class, default_score=1.0):
        self.default_score = default_score
        self.arg_to_class = arg_to_class
        super(BoxesWithClassArgToBoxes2D, self).__init__()

    def call(self, box_data):
        boxes2D = []
        for box in box_data:
            class_name = self.arg_to_class[box[-1]]
            boxes2D.append(Box2D(box[:4], self.default_score, class_name))
        return boxes2D


class RoundBoxes(Processor):
    """Rounds the floating value coordinates of the box coordinates
    into integer type.

    # Methods
        call()
    """
    def __init__(self):
        super(RoundBoxes, self).__init__()

    def call(self, boxes2D):
        for box2D in boxes2D:
            box2D.coordinates = box2D.coordinates.astype(int)
        return boxes2D


class MatchBoxes(Processor):
    """Match prior boxes with ground truth boxes.

    # Arguments
        prior_boxes: Numpy array of shape (num_boxes, 4).
        iou: Float in [0, 1]. Intersection over union in which prior boxes
            will be considered positive. A positive box is box with a class
            different than `background`.
        variance: List of two floats.
    """
    def __init__(self, prior_boxes, iou=.5):
        self.prior_boxes = prior_boxes
        self.iou = iou
        super(MatchBoxes, self).__init__()

    def call(self, boxes):
        boxes = match(boxes, self.prior_boxes, self.iou)
        return boxes


class EncodeBoxes(Processor):
    """Encodes bounding boxes.

    # Arguments
        prior_boxes: Numpy array of shape (num_boxes, 4).
        variances: List of two float values.
    """
    def __init__(self, prior_boxes, variances=[0.1, 0.1, 0.2, 0.2]):
        self.prior_boxes = prior_boxes
        self.variances = variances
        super(EncodeBoxes, self).__init__()

    def call(self, boxes):
        encoded_boxes = encode(boxes, self.prior_boxes, self.variances)
        return encoded_boxes


class DecodeBoxes(Processor):
    """Decodes bounding boxes.

    # Arguments
        prior_boxes: Numpy array of shape (num_boxes, 4).
        variances: List of two float values.
    """
    def __init__(self, prior_boxes, variances=[0.1, 0.1, 0.2, 0.2]):
        self.prior_boxes = prior_boxes
        self.variances = variances
        super(DecodeBoxes, self).__init__()

    def call(self, boxes):
        decoded_boxes = decode(boxes, self.prior_boxes, self.variances)
        return decoded_boxes


class NonMaximumSuppressionPerClass(Processor):
    """Applies non maximum suppression per class.

    # Arguments
        nms_thresh: Float between [0, 1].
        epsilon: Float between [0, 1].
    """
    def __init__(self, nms_thresh=.45, epsilon=0.01):
        self.nms_thresh = nms_thresh
        self.epsilon = epsilon
        super(NonMaximumSuppressionPerClass, self).__init__()

    def call(self, box_data):
        box_data, class_labels = nms_per_class(
            box_data, self.nms_thresh, self.epsilon)
        return box_data, class_labels


class MergeNMSBoxWithClass(Processor):
    """Merges box coordinates with their corresponding class
    defined by `class_labels` which is decided by best box geometry
    by non maximum suppression (and not by the best scoring class)
    into a single output.
    """
    def __init__(self):
        super(MergeNMSBoxWithClass, self).__init__()

    def call(self, box_data, class_labels):
        box_data = merge_nms_box_with_class(box_data, class_labels)
        return box_data


class FilterBoxes(Processor):
    """Filters boxes outputted from function ``detect`` as
    ``Box2D`` messages.

    # Arguments
        class_names: List of class names.
        conf_thresh: Float between [0, 1].
    """
    def __init__(self, class_names, conf_thresh=0.5):
        self.class_names = class_names
        self.conf_thresh = conf_thresh
        self.arg_to_class = dict(zip(
            list(range(len(self.class_names))), self.class_names))
        super(FilterBoxes, self).__init__()

    def call(self, box_data):
        box_data = filter_boxes(box_data, self.conf_thresh)
        return box_data


class CropImage(Processor):
    """Crop images using a list of ``box2D``.
    """
    def __init__(self):
        super(CropImage, self).__init__()

    def call(self, image, box2D):
        x_min, y_min, x_max, y_max = box2D.coordinates
        return image[y_min:y_max, x_min:x_max]


class RemoveClass(Processor):
    """Remove a particular class from the pipeline.

    # Arguments
        class_names: List, indicating given class names.
        class_arg: Int, index of the class to be removed.
        renormalize: Bool, if true scores are renormalized.

    # Properties
        class_arg: Int.
        renormalize: Bool

    # Methods
        call()
    """
    def __init__(self, class_names, class_arg=None, renormalize=False):
        self.class_arg = class_arg
        self.renormalize = renormalize
        if class_arg is not None:
            del class_names[class_arg]
        super(RemoveClass, self).__init__()

    def call(self, box_data):
        if not self.renormalize and self.class_arg is not None:
            box_data = np.delete(box_data, 4 + self.class_arg, axis=1)
        elif self.renormalize:
            raise NotImplementedError
        return box_data


class ScaleBox(Processor):
    """Scale box coordinates of the prediction.

    # Arguments
        scales: Array of shape `()`, value to scale boxes.

    # Properties
        scales: Int.

    # Methods
        call()
    """
    def __init__(self):
        super(ScaleBox, self).__init__()

    def call(self, boxes, scales):
        boxes = scale_box(boxes, scales)
        return boxes


>>> END FILE CONTENTS

## emili-main/paz/processors/draw.py

>>> BEGIN FILE CONTENTS

import numpy as np

from ..abstract import Processor
from ..backend.image import lincolor
from ..backend.image import draw_rectangle
from ..backend.image import put_text
from ..backend.image import draw_keypoint
from ..backend.image import draw_cube
from ..backend.image import GREEN
from ..backend.image import draw_random_polygon
from ..backend.image import draw_keypoints_link
from ..backend.image import draw_keypoints
from ..backend.image import draw_RGB_mask
from ..backend.image import draw_RGB_masks
from ..backend.image import draw_human_pose6D
from ..backend.keypoints import project_points3D
from ..backend.keypoints import build_cube_points3D
from ..backend.groups import quaternion_to_rotation_matrix
from ..backend.keypoints import project_to_image
from ..datasets import HUMAN_JOINT_CONFIG
from ..datasets import MINIMAL_HAND_CONFIG


class DrawBoxes2D(Processor):
    """Draws bounding boxes from Boxes2D messages.

    # Arguments
        class_names: List of strings.
        colors: List of lists containing the color values
        weighted: Boolean. If ``True`` the colors are weighted with the
            score of the bounding box.
        scale: Float. Scale of drawn text.
    """
    def __init__(self, class_names=None, colors=None,
                 weighted=False, scale=0.7, with_score=True):
        self.class_names = class_names
        self.colors = colors
        self.weighted = weighted
        self.with_score = with_score
        self.scale = scale

        if (self.class_names is not None and
                not isinstance(self.class_names, list)):
            raise TypeError("Class name should be of type 'List of strings'")

        if (self.colors is not None and
                not all(isinstance(color, list) for color in self.colors)):
            raise TypeError("Colors should be of type 'List of lists'")

        if self.colors is None:
            self.colors = lincolor(len(self.class_names))

        if self.class_names is not None:
            self.class_to_color = dict(zip(self.class_names, self.colors))
        else:
            self.class_to_color = {None: self.colors, '': self.colors}
        super(DrawBoxes2D, self).__init__()

    def call(self, image, boxes2D):
        for box2D in boxes2D:
            x_min, y_min, x_max, y_max = box2D.coordinates
            class_name = box2D.class_name
            color = self.class_to_color[class_name]
            if self.weighted:
                color = [int(channel * box2D.score) for channel in color]
            if self.with_score:
                text = '{:0.2f}, {}'.format(box2D.score, class_name)
            if not self.with_score:
                text = '{}'.format(class_name)
            put_text(image, text, (x_min, y_min - 10), self.scale, color, 1)
            draw_rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)
        return image


class DrawKeypoints2D(Processor):
    """Draws keypoints into image.

    # Arguments
        num_keypoints: Int. Used initialize colors for each keypoint
        radius: Float. Approximate radius of the circle in pixel coordinates.
    """
    def __init__(self, num_keypoints, radius=3, normalized=False):
        super(DrawKeypoints2D, self).__init__()
        self.colors = lincolor(num_keypoints, normalized=normalized)
        self.radius = radius

    def call(self, image, keypoints):
        for keypoint_arg, keypoint in enumerate(keypoints):
            color = self.colors[keypoint_arg]
            draw_keypoint(image, keypoint.astype('int'), color, self.radius)
        return image


class DrawBoxes3D(Processor):
    def __init__(self, camera, class_to_dimensions,
                 color=GREEN, thickness=5, radius=2):
        """Draw boxes 3D of multiple objects

        # Arguments
            camera: Instance of ``paz.backend.camera.Camera''.
            class_to_dimensions: Dictionary that has as keys the
                class names and as value a list [model_height, model_width]
            thickness: Int. Thickness of 3D box
        """
        super(DrawBoxes3D, self).__init__()
        self.camera = camera
        self.class_to_dimensions = class_to_dimensions
        self.class_to_points = self._build_class_to_points(class_to_dimensions)
        self.color = color
        self.radius = radius
        self.thickness = thickness

    def _build_class_to_points(self, class_to_dimensions):
        class_to_points = {}
        for class_name, dimensions in self.class_to_dimensions.items():
            width, height, depth = dimensions
            points = build_cube_points3D(width, height, depth)
            class_to_points[class_name] = points
        return class_to_points

    def call(self, image, pose6D):
        points3D = self.class_to_points[pose6D.class_name]
        points2D = project_points3D(points3D, pose6D, self.camera)
        points2D = points2D.astype(np.int32)
        draw_cube(image, points2D, self.color, self.thickness, self.radius)
        return image


class DrawRandomPolygon(Processor):
    """ Adds occlusion to image

    # Arguments
        max_radius_scale: Maximum radius in scale with respect to image i.e.
                each vertex radius from the polygon is sampled
                from ``[0, max_radius_scale]``. This radius is later
                multiplied by the image dimensions.
    """
    def __init__(self, max_radius_scale=.5):
        super(DrawRandomPolygon, self).__init__()
        self.max_radius_scale = max_radius_scale

    def call(self, image):
        return draw_random_polygon(image)


def draw_pose6D(image, pose6D, points3D, intrinsics, thickness):
    """Draws cube in image by projecting points3D with intrinsics and pose6D.

    # Arguments
        image: Array (H, W).
        pose6D: paz.abstract.Pose6D instance.
        intrinsics: Array (3, 3). Camera intrinsics for projecting
            3D rays into 2D image.
        points3D: Array (num_points, 3).
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    quaternion, translation = pose6D.quaternion, pose6D.translation
    rotation = quaternion_to_rotation_matrix(quaternion)
    points2D = project_to_image(rotation, translation, points3D, intrinsics)
    image = draw_cube(image, points2D.astype(np.int32), thickness=thickness)
    return image


class DrawPoses6D(Processor):
    """Draws multiple cubes in image by projecting points3D.

    # Arguments
        object_sizes: Array (3) indicating (x, y, z) sizes of object.
        camera_intrinsics: Array (3, 3).
            Camera intrinsics for projecting 3D rays into 2D image.
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    def __init__(self, object_sizes, camera_intrinsics, thickness=2):
        self.points3D = build_cube_points3D(*object_sizes)
        self.intrinsics = camera_intrinsics
        self.thickness = thickness

    def call(self, image, poses6D):
        if poses6D is None:
            return image
        if not isinstance(poses6D, list):
            raise ValueError('Poses6D must be a list of Pose6D messages')
        for pose6D in poses6D:
            image = draw_pose6D(
                image, pose6D, self.points3D, self.intrinsics, self.thickness)
        return image


class DrawPose6D(Processor):
    """Draws a single cube in image by projecting points3D.

    # Arguments
        object_sizes: Array (3) indicating (x, y, z) sizes of object.
        camera_intrinsics: Array (3, 3).
            Camera intrinsics for projecting 3D rays into 2D image.
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    def __init__(self, object_sizes, camera_intrinsics, thickness=2):
        self.points3D = build_cube_points3D(*object_sizes)
        self.intrinsics = camera_intrinsics
        self.thickness = thickness

    def call(self, image, pose6D):
        if pose6D is None:
            return image
        image = draw_pose6D(
            image, pose6D, self.points3D, self.intrinsics, self.thickness)
        return image


class DrawHumanSkeleton(Processor):
    """ Draw human pose skeleton on image.

    # Arguments
        images: Numpy array.
        grouped_joints: Joint locations of all the person model detected
                        in the image. List of numpy array.
        dataset: String.
        check_scores: Boolean. Flag to check score before drawing.

    # Returns
        A numpy array containing pose skeleton.
    """
    def __init__(self, dataset, check_scores, link_width=2, keypoint_radius=4):
        super(DrawHumanSkeleton, self).__init__()
        self.link_orders = HUMAN_JOINT_CONFIG[dataset]['part_orders']
        self.link_colors = HUMAN_JOINT_CONFIG[dataset]['part_color']
        self.link_args = HUMAN_JOINT_CONFIG[dataset]['part_arg']
        self.keypoint_colors = HUMAN_JOINT_CONFIG[dataset]['joint_color']
        self.check_scores = check_scores
        self.link_width = link_width
        self.keypoint_radius = keypoint_radius

    def call(self, image, grouped_joints):
        for one_person_joints in grouped_joints:
            image = draw_keypoints_link(
                image, one_person_joints, self.link_args, self.link_orders,
                self.link_colors, self.check_scores, self.link_width)
            image = draw_keypoints(image, one_person_joints,
                                   self.keypoint_colors, self.check_scores,
                                   self.keypoint_radius)
        return image


class DrawHandSkeleton(Processor):
    """ Draw hand pose skeleton on image.

    # Arguments
        image: Array (H, W, 3)
        keypoints: Array. All the joint locations detected by model
                        in the image.
    # Returns
        A numpy array containing pose skeleton.
    """
    def __init__(self, check_scores=False, link_width=2, keypoint_radius=4):
        super(DrawHandSkeleton, self).__init__()
        self.link_orders = MINIMAL_HAND_CONFIG['part_orders']
        self.link_colors = MINIMAL_HAND_CONFIG['part_color']
        self.link_args = MINIMAL_HAND_CONFIG['part_arg']
        self.keypoint_colors = MINIMAL_HAND_CONFIG['joint_color']
        self.check_scores = check_scores
        self.link_width = link_width
        self.keypoint_radius = keypoint_radius

    def call(self, image, keypoints):
        image = draw_keypoints_link(
            image, keypoints, self.link_args, self.link_orders,
            self.link_colors, self.check_scores, self.link_width)
        image = draw_keypoints(image, keypoints, self.keypoint_colors,
                               self.check_scores, self.keypoint_radius)
        return image


class DrawRGBMask(Processor):
    """Draws RGB mask by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        object_sizes: Array (x_size, y_size, z_size)
    """
    def __init__(self, object_sizes):
        super(DrawRGBMask, self).__init__()
        self.object_sizes = object_sizes

    def call(self, image, points2D, points3D):
        image = draw_RGB_mask(image, points2D, points3D, self.object_sizes)
        return image


class DrawRGBMasks(Processor):
    """Draws RGB masks by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        object_sizes: Array (x_size, y_size, z_size)
    """
    def __init__(self, object_sizes):
        super(DrawRGBMasks, self).__init__()
        self.object_sizes = object_sizes

    def call(self, image, points2D, points3D):
        return draw_RGB_masks(image, points2D, points3D, self.object_sizes)


class DrawText(Processor):
    """Draws text to image.

    # Arguments
        color: List. Color of text to
        thickness: Int. Thickness of text.
        scale: Int. Size scale for text.
        message: Str. Text to be added on the image.
        location: List/tuple of int. Pixel corordinte in image to add text.
    """
    def __init__(self, color=GREEN, thickness=2, scale=1):
        super(DrawText, self).__init__()
        self.color = color
        self.thickness = thickness
        self.scale = scale

    def call(self, image, message, location=(50, 50)):
        image = put_text(image, message, location, self.scale,
                         self.color, self.thickness)
        return image


class DrawHumanPose6D(Processor):
    """Draw basis vectors for human pose 6D

    # Arguments
        image: numpy array
        rotation: numpy array of size (3 x 3)
        translations: list of length 3

    # Returns
        image: numpy array
    """
    def __init__(self, camera_intrinsics):
        super(DrawHumanPose6D, self).__init__()
        self.K = camera_intrinsics

    def call(self, image, rotation, translation):
        image = draw_human_pose6D(image, rotation, translation, self.K)
        return image


>>> END FILE CONTENTS

## emili-main/paz/processors/geometric.py

>>> BEGIN FILE CONTENTS

import numpy as np

from ..abstract import Processor

from ..backend.boxes import flip_left_right
from ..backend.boxes import to_image_coordinates
from ..backend.boxes import to_normalized_coordinates
from ..backend.boxes import compute_iou
from ..backend.image import warp_affine
from ..backend.image import translate_image
from ..backend.image import sample_scaled_translation
from ..backend.image import get_rotation_matrix
from ..backend.image import calculate_image_center
from ..backend.keypoints import translate_keypoints
from ..backend.keypoints import rotate_point2D
from ..backend.standard import resize_with_same_aspect_ratio
from ..backend.standard import get_transformation_scale


class RandomFlipBoxesLeftRight(Processor):
    """Flips image and implemented labels horizontally.
    """
    def __init__(self):
        super(RandomFlipBoxesLeftRight, self).__init__()

    def call(self, image, boxes):
        if np.random.randint(0, 2):
            boxes = flip_left_right(boxes, image.shape[1])
            image = image[:, ::-1]
        return image, boxes


class ToImageBoxCoordinates(Processor):
    """Convert normalized box coordinates to image-size box coordinates.
    """
    def __init__(self):
        super(ToImageBoxCoordinates, self).__init__()

    def call(self, image, boxes):
        boxes = to_image_coordinates(boxes, image)
        return image, boxes


class ToNormalizedBoxCoordinates(Processor):
    """Convert image-size box coordinates to normalized box coordinates.
    """
    def __init__(self):
        super(ToNormalizedBoxCoordinates, self).__init__()

    def call(self, image, boxes):
        boxes = to_normalized_coordinates(boxes, image)
        return image, boxes


class RandomSampleCrop(Processor):
    """Crops image while adjusting the normalized corner form
    bounding boxes.

    # Arguments
        probability: Float between ''[0, 1]''.
    """
    def __init__(self, probability=0.50, max_trials=50):
        self.probability = probability
        self.max_trials = max_trials
        self.jaccard_min_max = (
            None,
            (0.1, np.inf),
            (0.3, np.inf),
            (0.7, np.inf),
            (0.9, np.inf),
            (-np.inf, np.inf))

    def call(self, image, boxes):

        if self.probability < np.random.rand():
            return image, boxes

        labels = boxes[:, -1:]
        boxes = boxes[:, :4]
        H_original, W_original = image.shape[:2]

        mode = np.random.randint(0, len(self.jaccard_min_max), 1)[0]
        if self.jaccard_min_max[mode] is not None:
            min_iou, max_iou = self.jaccard_min_max[mode]
            for trial_arg in range(self.max_trials):
                W = np.random.uniform(0.3 * W_original, W_original)
                H = np.random.uniform(0.3 * H_original, H_original)
                aspect_ratio = H / W
                if (aspect_ratio < 0.5) or (aspect_ratio > 2):
                    continue
                x_min = np.random.uniform(W_original - W)
                y_min = np.random.uniform(H_original - H)
                x_max = int(x_min + W)
                y_max = int(y_min + H)
                x_min = int(x_min)
                y_min = int(y_min)

                image_crop_box = np.array([x_min, y_min, x_max, y_max])
                overlap = compute_iou(image_crop_box, boxes)
                if ((overlap.max() < min_iou) or (overlap.min() > max_iou)):
                    continue

                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
                centers_above_x_min = x_min < centers[:, 0]
                centers_above_y_min = y_min < centers[:, 1]
                centers_below_x_max = x_max > centers[:, 0]
                centers_below_y_max = y_max > centers[:, 1]
                mask = (centers_above_x_min * centers_above_y_min *
                        centers_below_x_max * centers_below_y_max)
                if not mask.any():
                    continue

                cropped_image = image[y_min:y_max, x_min:x_max, :].copy()
                masked_boxes = boxes[mask, :].copy()
                masked_labels = labels[mask].copy()
                # should we use the box left and top corner or the crop's
                masked_boxes[:, :2] = np.maximum(masked_boxes[:, :2],
                                                 image_crop_box[:2])
                # adjust to crop (by substracting crop's left,top)
                masked_boxes[:, :2] -= image_crop_box[:2]
                masked_boxes[:, 2:] = np.minimum(masked_boxes[:, 2:],
                                                 image_crop_box[2:])
                # adjust to crop (by substracting crop's left,top)
                masked_boxes[:, 2:] -= image_crop_box[:2]
                return cropped_image, np.hstack([masked_boxes, masked_labels])

        boxes = np.hstack([boxes, labels])
        return image, boxes


class Expand(Processor):
    """Expand image size up to 2x, 3x, 4x and fill values with mean color.
    This transformation is applied with a probability of 50%.

    # Arguments
        max_ratio: Float.
        mean: None/List: If `None` expanded image is filled with
            the image mean.
        probability: Float between ''[0, 1]''.
    """
    def __init__(self, max_ratio=2, mean=None, probability=0.5):
        super(Expand, self).__init__()
        self.max_ratio = max_ratio
        self.mean = mean
        self.probability = probability

    def call(self, image, boxes):
        if self.probability < np.random.rand():
            return image, boxes
        height, width, num_channels = image.shape
        ratio = np.random.uniform(1, self.max_ratio)
        left = np.random.uniform(0, width * ratio - width)
        top = np.random.uniform(0, height * ratio - height)
        expanded_image = np.zeros((int(height * ratio),
                                   int(width * ratio), num_channels),
                                  dtype=image.dtype)

        if self.mean is None:
            expanded_image[:, :, :] = np.mean(image, axis=(0, 1))
        else:
            expanded_image[:, :, :] = self.mean

        expanded_image[int(top):int(top + height),
                       int(left):int(left + width)] = image
        expanded_boxes = boxes.copy()
        expanded_boxes[:, 0:2] = boxes[:, 0:2] + (int(left), int(top))
        expanded_boxes[:, 2:4] = boxes[:, 2:4] + (int(left), int(top))
        return expanded_image, expanded_boxes


class ApplyTranslation(Processor):
    """Applies a translation of image and labels.

    # Arguments
        translation: A list of length two indicating the x,y translation values
        fill_color: List of three integers indicating the
            color values e.g. ''[0, 0, 0]''
    """
    def __init__(self, translation, fill_color=None):
        super(ApplyTranslation, self).__init__()
        self._matrix = np.zeros((2, 3), dtype=np.float32)
        self._matrix[0, 0], self._matrix[1, 1] = 1.0, 1.0
        self.fill_color = fill_color
        self.translation = translation

    @property
    def translation(self):
        return self._translation

    @translation.setter
    def translation(self, translation):
        if translation is None:
            self._translation = None
        elif len(translation) == 2:
            self._translation = translation
            self._matrix[0, 2], self._matrix[1, 2] = translation
        else:
            raise ValueError('Translation should be `None` or have length two')

    def call(self, image, keypoints=None):
        height, width = image.shape[:2]
        if self.fill_color is None:
            fill_color = np.mean(image, axis=(0, 1))
        image = warp_affine(image, self._matrix, fill_color)
        if keypoints is not None:
            keypoints[:, 0] = keypoints[:, 0] + self.translation[0]
            keypoints[:, 1] = keypoints[:, 1] + self.translation[1]
            return image, keypoints
        return image


class RandomTranslation(Processor):
    """Applies a random translation to image and labels

    # Arguments
        delta_scale: List with two elements having the normalized deltas.
            e.g. ''[.25, .25]''.

        fill_color: List of three integers indicating the
            color values e.g. ''[0, 0, 0]''.
    """
    def __init__(
            self, delta_scale=[0.25, 0.25], fill_color=None):
        super(RandomTranslation, self).__init__()
        self.delta_scale = delta_scale
        self.apply_translation = ApplyTranslation(None, fill_color)

    @property
    def delta_scale(self):
        return self._delta_scale

    @delta_scale.setter
    def delta_scale(self, delta_scale):
        x_delta_scale, y_delta_scale = delta_scale
        if (x_delta_scale < 0) or (y_delta_scale < 0):
            raise ValueError('Delta scale values should be a positive scalar')
        self._delta_scale = delta_scale

    def call(self, image):
        height, width = image.shape[:2]
        x_delta_scale, y_delta_scale = self.delta_scale
        x = image.shape[1] * np.random.uniform(-x_delta_scale, x_delta_scale)
        y = image.shape[0] * np.random.uniform(-y_delta_scale, y_delta_scale)
        self.apply_translation.translation = [x, y]
        return self.apply_translation(image)


class RandomKeypointTranslation(Processor):
    """Applies a random translation to image and keypoints.

    # Arguments
        delta_scale: List with two elements having the normalized deltas.
            e.g. ''[.25, .25]''.
        fill_color: ''None'' or List of three integers indicating the
            color values e.g. ''[0, 0, 0]''. If ''None'' mean channel values of
            the image will be calculated as fill values.
        probability: Float between ''[0, 1]''.
    """
    def __init__(self, delta_scale=[.2, .2], fill_color=None, probability=0.5):
        super(RandomKeypointTranslation, self).__init__()
        self.delta_scale = delta_scale
        self.fill_color = fill_color
        self.probability = probability

    @property
    def probability(self):
        return self._probability

    @probability.setter
    def probability(self, value):
        if not (0.0 < value <= 1.0):
            raise ValueError('Probability should be between "[0, 1]".')
        self._probability = value

    @property
    def delta_scale(self):
        return self._delta_scale

    @delta_scale.setter
    def delta_scale(self, delta_scale):
        x_delta_scale, y_delta_scale = delta_scale
        if (x_delta_scale < 0) or (y_delta_scale < 0):
            raise ValueError('Delta scale values should be positive')
        if (x_delta_scale > 1) or (y_delta_scale > 1):
            raise ValueError('Delta scale values should be less than one')
        self._delta_scale = delta_scale

    def _sample_random_translation(self, delta_scale, image_shape):
        x_delta_scale, y_delta_scale = delta_scale
        x = image_shape[1] * np.random.uniform(-x_delta_scale, x_delta_scale)
        y = image_shape[0] * np.random.uniform(-y_delta_scale, y_delta_scale)
        return [x, y]

    def call(self, image, keypoints):
        if self.probability >= np.random.rand():
            shape = image.shape[:2]
            translation = sample_scaled_translation(self.delta_scale, shape)
            if self.fill_color is None:
                fill_color = np.mean(image, axis=(0, 1))
            image = translate_image(image, translation, fill_color)
            keypoints = translate_keypoints(keypoints, translation)
        return image, keypoints


class RandomKeypointRotation(Processor):
    """Randomly rotate an images with its corresponding keypoints.

    # Arguments
        rotation_range: Int. indicating the max and min values in degrees
            of the uniform distribution ''[-range, range]'' from which the
            angles are sampled.
        fill_color: ''None'' or List of three integers indicating the
            color values e.g. ''[0, 0, 0]''. If ''None'' mean channel values of
            the image will be calculated as fill values.
    """
    def __init__(self, rotation_range=30, fill_color=None, probability=0.5):
        super(RandomKeypointRotation, self).__init__()
        self.rotation_range = rotation_range
        self.fill_color = fill_color
        self.probability = probability

    @property
    def probability(self):
        return self._probability

    @probability.setter
    def probability(self, value):
        if not (0.0 < value <= 1.0):
            raise ValueError('Probability should be between "[0, 1]".')
        self._probability = value

    def _calculate_image_center(self, image):
        return (int(image.shape[0] / 2), int(image.shape[1] / 2))

    def _rotate_image(self, image, degrees):
        center = self._calculate_image_center(image)
        matrix = get_rotation_matrix(center, degrees)
        if self.fill_color is None:
            fill_color = np.mean(image, axis=(0, 1))
        return warp_affine(image, matrix, fill_color)

    def _degrees_to_radians(self, degrees):
        # negative sign changes rotation direction to follow openCV convention.
        return - (3.14159 / 180) * degrees

    def _build_rotation_matrix(self, radians):
        return np.array([[np.cos(radians), - np.sin(radians)],
                         [np.sin(radians), + np.cos(radians)]])

    def _rotate_keypoints(self, keypoints, radians, image_center):
        keypoints = keypoints - image_center
        matrix = self._build_rotation_matrix(radians)
        keypoints = np.matmul(matrix, keypoints.T).T
        keypoints = keypoints + image_center
        return keypoints

    def _sample_rotation(self, rotation_range):
        return np.random.uniform(-rotation_range, rotation_range)

    def call(self, image, keypoints):
        if self.probability >= np.random.rand():
            degrees = self._sample_rotation(self.rotation_range)
            image = self._rotate_image(image, degrees)
            center = self._calculate_image_center(image)
            radians = self._degrees_to_radians(degrees)
            keypoints = self._rotate_keypoints(keypoints, radians, center)
        return image, keypoints


class RandomRotation(Processor):
    """Randomly rotate an images

    # Arguments
        rotation_range: Int. indicating the max and min values in degrees
            of the uniform distribution ``[-range, range]`` from which the
            angles are sampled.
        fill_color: ''None'' or List of three integers indicating the
            color values e.g. ``[0, 0, 0]``. If ``None`` mean channel values of
            the image will be calculated as fill values.
        probability: Float between 0 and 1.
    """
    def __init__(self, rotation_range=30, fill_color=None, probability=0.5):
        super(RandomRotation, self).__init__()
        self.rotation_range = rotation_range
        self.fill_color = fill_color
        self.probability = probability

    @property
    def probability(self):
        return self._probability

    @probability.setter
    def probability(self, value):
        if not (0.0 < value <= 1.0):
            raise ValueError('Probability should be between "[0, 1]".')
        self._probability = value

    def _calculate_image_center(self, image):
        return (int(image.shape[0] / 2), int(image.shape[1] / 2))

    def _rotate_image(self, image, degrees):
        center = self._calculate_image_center(image)
        matrix = get_rotation_matrix(center, degrees)
        if self.fill_color is None:
            fill_color = np.mean(image, axis=(0, 1))
        return warp_affine(image, matrix, fill_color)

    def _sample_rotation(self, rotation_range):
        return np.random.uniform(-rotation_range, rotation_range)

    def call(self, image):
        if self.probability >= np.random.rand():
            degrees = self._sample_rotation(self.rotation_range)
            image = self._rotate_image(image, degrees)
        return image


class TranslateImage(Processor):
    """Applies a translation of image.
    The translation is a list of length two indicating the x, y values.

    # Arguments
        fill_color: List of three integers indicating the
            color values e.g. ``[0, 0, 0]``
    """
    def __init__(self, fill_color=None):
        super(TranslateImage, self).__init__()
        self.fill_color = fill_color

    def call(self, image, translation):
        return translate_image(image, translation, self.fill_color)


class GetTransformationSize(Processor):
    """Calculate the transformation size for the imgae.
    The size is tuple of length two indicating the x, y values.

    # Arguments
        image: Numpy array
    """
    def __init__(self, input_size, multiple):
        super(GetTransformationSize, self).__init__()
        self.input_size = input_size
        self.multiple = multiple

    def call(self, image):
        size = resize_with_same_aspect_ratio(image, self.input_size,
                                             self.multiple)
        H, W = image.shape[:2]
        if W < H:
            size[0], size[1] = size[1], size[0]
        return size


class GetTransformationScale(Processor):
    """Calculate the transformation scale for the imgae.
    The scale is a numpy array of size two indicating the
    width and height scale.

    # Arguments
        image: Numpy array
        size: Numpy array of length 2
    """
    def __init__(self, scaling_factor):
        super(GetTransformationScale, self).__init__()
        self.scaling_factor = scaling_factor

    def call(self, image, size):
        scale = get_transformation_scale(image, size, self.scaling_factor)
        H, W = image.shape[:2]
        if W < H:
            scale[0], scale[1] = scale[1], scale[0]
        return scale


class GetSourceDestinationPoints(Processor):
    """Returns the source and destination points for affine transformation.

    # Arguments
        center: Numpy array of shape (2,). Center coordinates of image
        scale: Numpy array of shape (2,). Scale of width and height of image
        size: List of length 2. Size of image
    """
    def __init__(self, scaling_factor):
        super(GetSourceDestinationPoints, self).__init__()
        self.scaling_factor = scaling_factor

    def _calculate_third_point(self, point2D_a, point2D_b):
        difference = point2D_a - point2D_b
        return point2D_a + np.array([-difference[1],
                                     difference[0]], dtype=np.float32)

    def _get_transformation_source_point(self, scale, center):
        scale = scale * self.scaling_factor
        center_W = scale[0] / 2
        direction_vector = rotate_point2D([0, -center_W], 0)
        points = np.zeros((3, 2), dtype=np.float32)
        points[0, :] = center
        points[1, :] = center + direction_vector
        points[2:, :] = self._calculate_third_point(points[0, :], points[1, :])
        return points

    def _get_transformation_destination_point(self, output_size):
        center_W, center_H = np.array(output_size[:2]) / 2
        direction_vector = np.array([0, -center_W], np.float32)
        points = np.zeros((3, 2), dtype=np.float32)
        points[0, :] = [center_W, center_H]
        points[1, :] = np.array([center_W, center_H]) + direction_vector
        points[2:, :] = self._calculate_third_point(points[0, :], points[1, :])
        return points

    def call(self, center, scale, size):
        if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
            scale = np.array([scale, scale])
        source_point = self._get_transformation_source_point(scale, center)
        destination_point = self._get_transformation_destination_point(size)
        return source_point, destination_point


class GetImageCenter(Processor):
    """Calculate the center of the image and add an offset to the center.

    # Arguments
        image: Numpy array
        offset: Float
    """
    def __init__(self, offset=0.5):
        super(GetImageCenter, self).__init__()
        self.offset = offset

    def _add_offset(self, x, offset):
        return (x + offset)

    def call(self, image):
        center_W, center_H = calculate_image_center(image)
        center_W = int(self._add_offset(center_W, self.offset))
        center_H = int(self._add_offset(center_H, self.offset))
        return np.array([center_W, center_H])


class WarpAffine(Processor):
    """Applies an affine transformation to an image

    # Arguments
        image: Numpy array
        transform: Numpy array. Transformation matrix
        size: Numpy array. Transformation size
    """
    def __init__(self):
        super(WarpAffine, self).__init__()

    def call(self, image, transform, size):
        image = warp_affine(image, transform, size=size)
        return image


>>> END FILE CONTENTS

## emili-main/paz/processors/groups.py

>>> BEGIN FILE CONTENTS

from ..abstract import Processor
from ..backend.groups import rotation_vector_to_quaternion
from ..backend.groups import rotation_vector_to_rotation_matrix
from ..backend.groups import to_affine_matrix


class RotationVectorToQuaternion(Processor):
    """Transforms rotation vector into quaternion.
    """
    def __init__(self):
        super(RotationVectorToQuaternion, self).__init__()

    def call(self, rotation_vector):
        quaternion = rotation_vector_to_quaternion(rotation_vector)
        return quaternion


class RotationVectorToRotationMatrix(Processor):
    """Transforms rotation vector into a rotation matrix.
    """
    def __init__(self):
        super(RotationVectorToRotationMatrix, self).__init__()

    def call(self, rotation_vector):
        return rotation_vector_to_rotation_matrix(rotation_vector)


class ToAffineMatrix(Processor):
    """Builds affine matrix from a rotation matrix and a translation vector.
    """
    def __init__(self):
        super(ToAffineMatrix, self).__init__()

    def call(self, rotation_matrix, translation):
        affine_matrix = to_affine_matrix(rotation_matrix, translation)
        return affine_matrix


>>> END FILE CONTENTS

## emili-main/paz/processors/heatmaps.py

>>> BEGIN FILE CONTENTS

import numpy as np
import tensorflow as tf

from ..abstract import Processor
from paz import processors as pr

from ..backend.keypoints import transform_keypoint
from ..backend.image import resize_image
from ..backend.keypoints import add_offset_to_point
from ..backend.heatmaps import get_keypoints_locations, get_keypoints_heatmap
from ..backend.heatmaps import get_top_k_keypoints_numpy
from ..backend.heatmaps import get_tags_heatmap, get_valid_detections
from ..backend.standard import calculate_norm, pad_matrix, tensor_to_numpy
from ..backend.standard import compare_vertical_neighbours, gather_nd
from ..backend.standard import compare_horizontal_neighbours
from ..backend.standard import max_pooling_2d


class TransposeOutput(Processor):
    """Transpose the output of the HigherHRNet model
    # Arguments
        axes: List or tuple
        Output: List of numpy array

    """
    def __init__(self, axes):
        super(TransposeOutput, self).__init__()
        self.axes = axes

    def call(self, outputs):
        for arg in range(len(outputs)):
            outputs[arg] = np.transpose(outputs[arg], self.axes)
        return outputs


class ScaleOutput(Processor):
    """Scale the output of the HigherHRNet model
    # Arguments
        scaling_factor: Int.
        full_scaling: Boolean. If all the array of array are to be scaled.
        Output: List of numpy array

    """
    def __init__(self, scale_factor, full_scaling=False):
        super(ScaleOutput, self).__init__()
        self.scale_factor = int(scale_factor)
        self.full_scaling = full_scaling

    def _resize_output(self, output, size):
        resized_output = []
        for heatmap_arg, heatmap in enumerate(output):
            resized_heatmaps = []
            for keypoint_arg in range(len(heatmap)):
                resized = resize_image(output[heatmap_arg][keypoint_arg], size)
                resized_heatmaps.append(resized)
            resized_heatmaps = np.stack(resized_heatmaps, axis=0)
        resized_output.append(resized_heatmaps)
        resized_output = np.stack(resized_output, axis=0)
        return resized_output

    def call(self, outputs):
        for arg in range(len(outputs)):
            H, W = outputs[arg].shape[-2:]
            H, W = self.scale_factor * H, self.scale_factor * W
            if self.full_scaling:
                outputs[arg] = self._resize_output(outputs[arg], (W, H))
            else:
                if len(outputs) > 1 and arg != len(outputs) - 1:
                    outputs[arg] = self._resize_output(outputs[arg], (W, H))
        return outputs


class GetHeatmaps(Processor):
    """Get Heatmaps from the model output.
    # Arguments
        flipped_keypoint_order: List of length 17 (number of keypoints).
            Flipped list of keypoint order.
        outputs: List of numpy arrays. Output of HigherHRNet model
        with_flip: Boolean. indicates whether to flip the output

    # Returns
        heatmaps: Numpy array of shape (1, num_keypoints, H, W)
    """
    def __init__(self, flipped_keypoint_order):
        super(GetHeatmaps, self).__init__()
        self.indices = flipped_keypoint_order
        self.num_keypoints = len(flipped_keypoint_order)

    def call(self, outputs, with_flip):
        num_heatmaps = 0
        heatmap_sum = 0
        if with_flip:
            for output in outputs:
                output = np.flip(output, [3])
                heatmap_sum = heatmap_sum + get_keypoints_heatmap(
                    output, self.num_keypoints, indices=self.indices)
                num_heatmaps = num_heatmaps + 1

        if not with_flip:
            for output in outputs:
                heatmap_sum = heatmap_sum + get_keypoints_heatmap(
                    output, self.num_keypoints)
                num_heatmaps = num_heatmaps + 1

        heatmaps = heatmap_sum / num_heatmaps
        return heatmaps


class GetTags(Processor):
    """Get Tags from the model output.
    # Arguments
        flipped_keypoint_order: List of length 17 (number of keypoints).
            Flipped list of keypoint order.
        outputs: List of numpy arrays. Output of HigherHRNet model
        with_flip: Boolean. indicates whether to flip the output

    # Returns
        Tags: Numpy array of shape (1, num_keypoints, H, W)
    """
    def __init__(self, flipped_keypoint_order):
        super(GetTags, self).__init__()
        self.indices = flipped_keypoint_order
        self.num_keypoints = len(flipped_keypoint_order)

    def call(self, outputs, with_flip):
        output = outputs[0]
        if not with_flip:
            tags = get_tags_heatmap(output, self.num_keypoints)

        if with_flip:
            output = np.flip(output, [3])
            tags = get_tags_heatmap(output, self.num_keypoints, self.indices)
        return tags


class RemoveLastElement(Processor):
    """Remove last element of array
    # Arguments
        x: array or list of arrays

    """
    def __init__(self):
        super(RemoveLastElement, self).__init__()

    def call(self, x):
        if all(isinstance(each, list) for each in x):
            return [each[:, :-1] for each in x]
        else:
            return x[:, :-1]


class AggregateResults(Processor):
    """Aggregate heatmaps and tags to get final heatmaps and tags for
       processing.
    # Arguments
        heatmaps: Numpy array of shape (1, num_keypoints, H, W)
        Tags: Numpy array of shape (1, num_keypoints, H, W)

    # Returns
        heatmaps: Numpy array of shape (1, num_keypoints, H, W)
        Tags: Numpy array of shape (1, num_keypoints, H, W, 2)
    """

    def __init__(self, with_flip=False):
        super(AggregateResults, self).__init__()
        self.with_flip = with_flip

    def _expand_tags_dimension(self, tags):
        updated_tags = []
        for tag in tags:
            updated_tags.append(np.expand_dims(tag, -1))
        return updated_tags

    def _calculate_heatmaps_average(self, heatmaps):
        if self.with_flip:
            heatmaps_average = (heatmaps[0] + heatmaps[1]) / 2.0
        else:
            heatmaps_average = heatmaps[0]
        return heatmaps_average

    def call(self, heatmaps, tags):
        heatmaps_average = self._calculate_heatmaps_average(heatmaps)
        heatmaps = heatmaps_average + heatmaps_average
        tags = self._expand_tags_dimension(tags)
        tags = np.concatenate(tags, 4)
        return heatmaps, tags


class TopKDetections(Processor):
    """Extract out the top k detections
    # Arguments
        k: Int. Maximum number of instances to be detected.
        use_numpy: Boolean. Whether to use numpy functions or tf functions.
        heatmaps: Numpy array of shape (1, num_joints, H, W)
        Tags: Numpy array of shape (1, num_joints, H, W, 2)

    # Returns
        top_k_detections: Numpy array. Contains the top k keypoints locations
                          of the detection with their value and tags.
    """
    def __init__(self, k, use_numpy=False):
        super(TopKDetections, self).__init__()
        self.k = k
        self.use_numpy = use_numpy

    def _max_pooing_2d(self, heatmaps, pool_size, strides, padding,
                       use_numpy=False):
        if use_numpy:
            heatmaps = np.squeeze(heatmaps)
            heatmaps = np.transpose(heatmaps, [2, 0, 1])
            max_heatmaps = np.zeros_like(heatmaps)
            for arg, heatmap in enumerate(heatmaps):
                max_heatmaps[arg] = max_pooling_2d(heatmap, pool_size,
                                                   strides, padding)
            max_heatmaps = np.transpose(max_heatmaps, [1, 2, 0])
            max_pooled_values = np.expand_dims(max_heatmaps, 0)
        else:
            max_pooled_values = tf.keras.layers.MaxPooling2D(
                pool_size, strides, padding)(heatmaps)
        return max_pooled_values

    def _filter_heatmaps(self, heatmaps):
        heatmaps = np.transpose(heatmaps, [0, 2, 3, 1])
        maximum_values = self._max_pooing_2d(heatmaps, pool_size=3, strides=1,
                                             padding='same',
                                             use_numpy=self.use_numpy)
        maximum_values = np.equal(maximum_values, heatmaps)
        maximum_values = maximum_values.astype(np.float32)
        filtered_heatmaps = heatmaps * maximum_values
        filtered_heatmaps = np.transpose(filtered_heatmaps, [0, 3, 1, 2])
        return filtered_heatmaps

    def _get_top_k_keypoints(self, heatmaps, k, use_numpy):
        if use_numpy:
            top_k_keypoints, indices = get_top_k_keypoints_numpy(heatmaps, k)
        else:
            top_k_keypoints, indices = tf.math.top_k(heatmaps, k)
            top_k_keypoints = np.squeeze(top_k_keypoints)
            indices = tensor_to_numpy(indices)
        return top_k_keypoints, indices

    def _get_top_k_tags(self, tags, indices):
        indices = np.expand_dims(indices, -1)
        gathered = gather_nd(tags, indices, axis=2)
        return np.squeeze(gathered)

    def call(self, heatmaps, tags):
        tags = tags.astype(np.int64)
        heatmaps = self._filter_heatmaps(heatmaps)
        num_images, keypoints_count, H, W = heatmaps.shape[:4]
        heatmaps = np.reshape(heatmaps, [num_images, keypoints_count, -1])
        tags = np.reshape(tags, [num_images, keypoints_count, W * H, -1])

        top_k_keypoints, indices = self._get_top_k_keypoints(
            heatmaps, self.k, self.use_numpy)
        top_k_tags = self._get_top_k_tags(tags, indices)
        top_k_locations = get_keypoints_locations(indices, W)

        top_k_keypoints = np.expand_dims(top_k_keypoints, axis=-1)
        top_k_detections = np.concatenate((top_k_locations,
                                           top_k_keypoints,
                                           top_k_tags), 2)
        return top_k_detections


class GroupKeypointsByTag(Processor):
    """Group the keypoints with their respective tags value.
    # Arguments
        keypoint_order: List of length 17 (number of keypoints).
        tag_thresh: Float.
        detection_thresh: Float.
        Detection: Numpy array containing the location, value and tags
                   of top k keypoints

    # Returns
        grouped_keypoints: Numpy array. keypoints grouped by tag
    """
    def __init__(self, keypoint_order, tag_thresh, detection_thresh):
        super(GroupKeypointsByTag, self).__init__()
        self.keypoint_order = keypoint_order
        self.tag_thresh = tag_thresh
        self.detection_thresh = detection_thresh
        self.munkres = pr.Munkres()

    def _update_dictionary(self, tags, keypoints, arg,
                           default, keypoint_dict, tag_dict):
        for tag, keypoint in zip(tags, keypoints):
            key = tag[0]
            keypoint_dict.setdefault(key, np.copy(default))[arg] = keypoint
            tag_dict[key] = [tag]

    def _group_tags(self, grouped_keys, tag_dict):
        grouped_tags = []
        for arg in grouped_keys:
            grouped_tags.append(np.mean(tag_dict[arg], axis=0))
        return grouped_tags

    def call(self, detections):
        keypoint_dict, tag_dict = {}, {}
        default = np.zeros((detections.shape[0], detections.shape[-1]))

        for arg, keypoint_arg in enumerate(self.keypoint_order):
            keypoints = get_valid_detections(detections[keypoint_arg],
                                             self.detection_thresh)
            tags = keypoints[:, -2:]
            if arg == 0 or len(keypoint_dict) == 0:
                self._update_dictionary(tags, keypoints, keypoint_arg,
                                        default, keypoint_dict, tag_dict)
            else:
                grouped_keys = list(keypoint_dict.keys())
                grouped_tags = self._group_tags(grouped_keys, tag_dict)
                difference = np.expand_dims(tags, 1) - np.expand_dims(
                    grouped_tags, 0)
                norm = calculate_norm(difference, order=2, axis=2)
                norm = pad_matrix(norm, padding='square', value=1e10)
                lowest_cost = self.munkres.compute(norm)
                lowest_cost = np.array(lowest_cost).astype(np.int32)

                for row_arg, col_arg in lowest_cost:
                    if norm[row_arg][col_arg] < self.tag_thresh:
                        key = grouped_keys[col_arg]
                        keypoint_dict[key][keypoint_arg] = keypoints[row_arg]
                        tag_dict[key].append(tags[row_arg])
                    else:
                        self._update_dictionary(tags, keypoints, keypoint_arg,
                                                default, keypoint_dict,
                                                tag_dict)
        grouped_keypoints = list(keypoint_dict.values())
        return [np.array(grouped_keypoints)]


class AdjustKeypointsLocations(Processor):
    """Adjust the keypoint locations by removing the margins.
    # Arguments
        heatmaps: Numpy array.
        grouped_keypoints: numpy array. keypoints grouped by tag
    """
    def __init__(self):
        super(AdjustKeypointsLocations, self).__init__()

    def call(self, heatmaps, grouped_keypoints):
        for batch_id, objects in enumerate(grouped_keypoints):
            for object_id, object in enumerate(objects):
                for keypoint_id, keypoint in enumerate(object):
                    heatmap = heatmaps[batch_id][keypoint_id]
                    if keypoint[2] > 0:
                        y, x = keypoint[0:2]
                        y = compare_vertical_neighbours(x, y, heatmap)
                        x = compare_horizontal_neighbours(x, y, heatmap)
                        grouped_keypoints[batch_id][
                            object_id, keypoint_id, 0:2] = add_offset_to_point(
                                (y, x), offset=0.5)
        return grouped_keypoints


class GetScores(Processor):
    """Calculate the score of the detection results.
    # Arguments
        grouped_keypoints: numpy array. keypoints grouped by tag
    """
    def __init__(self):
        super(GetScores, self).__init__()

    def call(self, grouped_keypoints):
        score = []
        for keypoint in grouped_keypoints:
            score.append(keypoint[:, 2].mean())
        return score


class RefineKeypointsLocations(Processor):
    """Refine the keypoint locations by removing the margins.
    # Arguments
        heatmaps: Numpy array.
        Tgas: Numpy array.
        grouped_keypoints: numpy array. keypoints grouped by tag
    """
    def __init__(self):
        super(RefineKeypointsLocations, self).__init__()

    def _calculate_tags_mean(self, keypoints, tags):
        keypoints_tags = []
        for arg in range(keypoints.shape[0]):
            if keypoints[arg, 2] > 0:
                x, y = keypoints[arg][:2].astype(np.int32)
                keypoints_tags.append(tags[arg, y, x])
        tags_mean = np.mean(keypoints_tags, axis=0)
        tags_mean = np.expand_dims(tags_mean, axis=[0, 1])
        return tags_mean

    def _normalize_heatmap(self, arg, tags, tags_mean, heatmap):
        normalized_tags = (tags[arg, :, :] - tags_mean)
        normalized_tags_squared_sum = (normalized_tags ** 2).sum(axis=2)
        return heatmap - np.round(np.sqrt(normalized_tags_squared_sum))

    def _find_max_position(self, heatmap_per_keypoint,
                           normalized_heatmap_per_keypoint):
        max_indices = np.argmax(normalized_heatmap_per_keypoint)
        shape = heatmap_per_keypoint.shape
        x, y = np.unravel_index(max_indices, shape)
        return x, y

    def _update_keypoints(self, keypoints, updated_keypoints, heatmaps):
        updated_keypoints = np.array(updated_keypoints)
        for i in range(heatmaps.shape[0]):
            if updated_keypoints[i, 2] > 0 and keypoints[i, 2] == 0:
                keypoints[i, :3] = updated_keypoints[i, :3]
        return keypoints

    def call(self, heatmaps, tags, grouped_keypoints):
        if len(tags.shape) == 3:
            tags = np.expand_dims(tags, -1)
        for arg in range(len(grouped_keypoints)):
            tags_mean = self._calculate_tags_mean(grouped_keypoints[arg], tags)
            updated_keypoints = []
            for keypoint_arg in range(grouped_keypoints[arg].shape[0]):
                heatmap_per_keypoint = heatmaps[keypoint_arg, :, :]
                normalized_heatmap_per_keypoint = self._normalize_heatmap(
                    keypoint_arg, tags, tags_mean, heatmap_per_keypoint)

                x, y = self._find_max_position(
                    heatmap_per_keypoint, normalized_heatmap_per_keypoint)
                max_heatmaps_value = heatmap_per_keypoint[x, y]
                x, y = add_offset_to_point((x, y), offset=0.5)
                y = compare_vertical_neighbours(x, y, heatmap_per_keypoint)
                x = compare_horizontal_neighbours(x, y, heatmap_per_keypoint)
                updated_keypoints.append((y, x, max_heatmaps_value))

            grouped_keypoints[arg] = self._update_keypoints(
                grouped_keypoints[arg], updated_keypoints, heatmaps)
        return grouped_keypoints


class TransformKeypoints(Processor):
    """Transform keypoint.

    # Arguments
        grouped_keypoints: numpy array. keypoints grouped by tag
        transform: Numpy array. Transformation matrix
    """
    def __init__(self):
        super(TransformKeypoints, self).__init__()

    def call(self, grouped_keypointss, transform):
        transformed_keypointss = []
        for keypointss in grouped_keypointss:
            for keypoints in keypointss:
                keypoints[0:2] = transform_keypoint(keypoints[0:2],
                                                    transform)[:2]
            transformed_keypointss.append(keypointss[:, :3])
        return transformed_keypointss


class ExtractKeypointsLocations(Processor):
    """Extract keypoint location.

    # Arguments
        keypoints: numpy array
    """
    def __init__(self):
        super(ExtractKeypointsLocations, self).__init__()

    def call(self, keypoints):
        for keypoints_arg in range(len(keypoints)):
            keypoints[keypoints_arg] = keypoints[keypoints_arg][:, :2]
        return keypoints


>>> END FILE CONTENTS

## emili-main/paz/processors/image.py

>>> BEGIN FILE CONTENTS

import numpy as np

from ..abstract import Processor

from ..backend.image import cast_image
from ..backend.image import load_image
from ..backend.image import random_saturation
from ..backend.image import random_brightness
from ..backend.image import random_contrast
from ..backend.image import random_hue
from ..backend.image import resize_image
from ..backend.image import scale_resize
from ..backend.image import random_image_blur
from ..backend.image import random_flip_left_right
from ..backend.image import convert_color_space
from ..backend.image import show_image
from ..backend.image import blend_alpha_channel
from ..backend.image import random_shape_crop
from ..backend.image import make_random_plain_image
from ..backend.image import concatenate_alpha_mask
from ..backend.image import draw_filled_polygon
from ..backend.image import gaussian_image_blur
from ..backend.image import normalized_device_coordinates_to_image
from ..backend.image import image_to_normalized_device_coordinates
from ..backend.image import replace_lower_than_threshold
from ..backend.image import flip_left_right
from ..backend.image import BILINEAR
from ..backend.image.tensorflow_image import imagenet_preprocess_input


B_IMAGENET_MEAN, G_IMAGENET_MEAN, R_IMAGENET_MEAN = 104, 117, 123
BGR_IMAGENET_MEAN = (B_IMAGENET_MEAN, G_IMAGENET_MEAN, R_IMAGENET_MEAN)
RGB_IMAGENET_MEAN = (R_IMAGENET_MEAN, G_IMAGENET_MEAN, B_IMAGENET_MEAN)
B_IMAGENET_STDEV, G_IMAGENET_STDEV, R_IMAGENET_STDEV = 57.3, 57.1, 58.4
RGB_IMAGENET_STDEV = (R_IMAGENET_STDEV, G_IMAGENET_STDEV, B_IMAGENET_STDEV)


class CastImage(Processor):
    """Cast image to given dtype.

    # Arguments
        dtype: Str or np.dtype
    """
    def __init__(self, dtype):
        self.dtype = dtype
        super(CastImage, self).__init__()

    def call(self, image):
        return cast_image(image, self.dtype)


class SubtractMeanImage(Processor):
    """Subtract channel-wise mean to image.

    # Arguments
        mean: List of length 3, containing the channel-wise mean.
    """
    def __init__(self, mean):
        self.mean = mean
        super(SubtractMeanImage, self).__init__()

    def call(self, image):
        return image - self.mean


class AddMeanImage(Processor):
    """Adds channel-wise mean to image.

    # Arguments
        mean: List of length 3, containing the channel-wise mean.
    """
    def __init__(self, mean):
        self.mean = mean
        super(AddMeanImage, self).__init__()

    def call(self, image):
        return image + self.mean


class NormalizeImage(Processor):
    """Normalize image by diving all values by 255.0.
    """
    def __init__(self):
        super(NormalizeImage, self).__init__()

    def call(self, image):
        return image / 255.0


class DenormalizeImage(Processor):
    """Denormalize image by multiplying all values by 255.0.
    """
    def __init__(self):
        super(DenormalizeImage, self).__init__()

    def call(self, image):
        return image * 255.0


class LoadImage(Processor):
    """Loads image.

    # Arguments
        num_channels: Integer, valid integers are: 1, 3 and 4.
    """
    def __init__(self, num_channels=3):
        self.num_channels = num_channels
        super(LoadImage, self).__init__()

    def call(self, image):
        return load_image(image, self.num_channels)


class RandomSaturation(Processor):
    """Applies random saturation to an image in RGB space.

    # Arguments
        lower: Float, lower bound for saturation factor.
        upper: Float, upper bound for saturation factor.
    """
    def __init__(self, lower=0.3, upper=1.5):
        self.lower = lower
        self.upper = upper
        super(RandomSaturation, self).__init__()

    def call(self, image):
        return random_saturation(image, self.lower, self.upper)


class RandomBrightness(Processor):
    """Adjust random brightness to an image in RGB space.

    # Arguments
        max_delta: Float.
    """
    def __init__(self, delta=32):
        self.delta = delta
        super(RandomBrightness, self).__init__()

    def call(self, image):
        return random_brightness(image, self.delta)


class RandomContrast(Processor):
    """Applies random contrast to an image in RGB

    # Arguments
        lower: Float, indicating the lower bound of the random number
            to be multiplied with the BGR/RGB image.
        upper: Float, indicating the upper bound of the random number
        to be multiplied with the BGR/RGB image.
    """
    def __init__(self, lower=0.5, upper=1.5):
        self.lower = lower
        self.upper = upper
        super(RandomContrast, self).__init__()

    def call(self, image):
        return random_contrast(image, self.lower, self.upper)


class RandomHue(Processor):
    """Applies random hue to an image in RGB space.

    # Arguments
        delta: Int, indicating the range (-delta, delta ) of possible
            hue values.
    """
    def __init__(self, delta=18):
        self.delta = delta
        super(RandomHue, self).__init__()

    def call(self, image):
        return random_hue(image, self.delta)


class ResizeImage(Processor):
    """Resize image.

    # Arguments
        size: List of two ints.
    """
    def __init__(self, shape, method=BILINEAR):
        self.shape = shape
        self.method = method
        super(ResizeImage, self).__init__()

    def call(self, image):
        return resize_image(image, self.shape, self.method)


class ResizeImages(Processor):
    """Resize list of images.

    # Arguments
        size: List of two ints.
    """
    def __init__(self, shape):
        self.shape = shape
        super(ResizeImages, self).__init__()

    def call(self, images):
        return [resize_image(image, self.shape) for image in images]


class RandomImageBlur(Processor):
    """Randomizes image quality

    # Arguments
        probability: Float between [0, 1]. Assigns probability of how
            often a random image blur is applied.
    """
    def __init__(self, probability=0.5):
        super(RandomImageBlur, self).__init__()
        self.probability = probability

    def call(self, image):
        if self.probability >= np.random.rand():
            image = random_image_blur(image)
        return image


class RandomGaussianBlur(Processor):
    """Randomizes image quality

    # Arguments
        probability: Float between [0, 1]. Assigns probability of how
            often a random image blur is applied.
    """
    def __init__(self, kernel_size=(5, 5), probability=0.5):
        super(RandomGaussianBlur, self).__init__()
        self.kernel_size = kernel_size
        self.probability = probability

    def call(self, image):
        if self.probability >= np.random.rand():
            image = gaussian_image_blur(image, self.kernel_size)
        return image


class RandomFlipImageLeftRight(Processor):
    """Randomly flip the image left or right
    """
    def __init__(self):
        super(RandomFlipImageLeftRight, self).__init__()

    def call(self, image):
        return random_flip_left_right(image)


class ConvertColorSpace(Processor):
    """Converts image to a different color space.

    # Arguments
        flag: Flag found in ``processors``indicating transform e.g.
            ``pr.BGR2RGB``
    """
    def __init__(self, flag):
        self.flag = flag
        super(ConvertColorSpace, self).__init__()

    def call(self, image):
        return convert_color_space(image, self.flag)


class ShowImage(Processor):
    """Shows image in a separate window.

    # Arguments
        window_name: String. Window name.
        wait: Boolean
    """
    def __init__(self, window_name='image', wait=True):
        super(ShowImage, self).__init__()
        self.window_name = window_name
        self.wait = wait

    def call(self, image):
        return show_image(image, self.window_name, self.wait)


class ImageDataProcessor(Processor):
    """Wrapper for Keras ImageDataGenerator

    # Arguments
        generator: An instantiated Keras ImageDataGenerator
    """
    def __init__(self, generator):
        super(ImageDataProcessor, self).__init__()
        self.generator = generator

    def call(self, image):
        random_parameters = self.generator.get_random_transform(image.shape)
        image = self.generator.apply_transform(image, random_parameters)
        image = self.generator.standardize(image)
        return image


class AlphaBlending(Processor):
    """Blends image to background using the image's alpha channel.
    """
    def __init__(self):
        super(AlphaBlending, self).__init__()

    def call(self, image, background):
        return blend_alpha_channel(image, background)


class RandomShapeCrop(Processor):
    """Randomly crops a part of an image of always the same given ``shape``.

    # Arguments
        shape: List of two ints [height, width].
            Dimensions of image to be cropped.
    """
    def __init__(self, shape):
        super(RandomShapeCrop, self).__init__()
        self.shape = shape

    def call(self, image):
        return random_shape_crop(image, self.shape)


class MakeRandomPlainImage(Processor):
    """Makes random plain image by randomly sampling an RGB color.

    # Arguments
        shape: List of two ints [height, width].
            Dimensions of plain image to be generated.
    """
    def __init__(self, shape):
        super(MakeRandomPlainImage, self).__init__()
        self.shape = shape

    def call(self):
        return make_random_plain_image(self.shape)


class ConcatenateAlphaMask(Processor):
    """Concatenates alpha mask to original image.
    """
    def __init__(self, **kwargs):
        super(ConcatenateAlphaMask, self).__init__(**kwargs)

    def call(self, image, alpha_mask):
        return concatenate_alpha_mask(image, alpha_mask)


class BlendRandomCroppedBackground(Processor):
    """Blends image with a randomly cropped background.

    # Arguments
        background_paths: List of strings. Each element of the list is a
            full-path to an image used for cropping a background.
    """
    def __init__(self, background_paths):
        super(BlendRandomCroppedBackground, self).__init__()
        if not isinstance(background_paths, list):
            raise ValueError('``background_paths`` must be list')
        if len(background_paths) == 0:
            raise ValueError('No paths given in ``background_paths``')
        self.background_paths = background_paths

    def call(self, image):
        random_arg = np.random.randint(0, len(self.background_paths))
        background_path = self.background_paths[random_arg]
        background = load_image(background_path)
        background = random_shape_crop(background, image.shape[:2])
        if background is None:
            H, W, num_channels = image.shape
            # background contains always a channel less
            num_channels = num_channels - 1
            background = make_random_plain_image((H, W, num_channels))
        return blend_alpha_channel(image, background)


class AddOcclusion(Processor):
    """Adds a random occlusion to image by generating random vertices and
        drawing a polygon.

    # Arguments
        max_radius_scale: Float between [0, 1].
            Value multiplied with largest image dimension to obtain the maximum
                radius possible of a vertex in the occlusion polygon.
        probability: Float between [0, 1]. Assigns probability of how
            often an occlusion to an image is generated.
    """
    def __init__(self, max_radius_scale=0.5, probability=0.5):
        super(AddOcclusion, self).__init__()
        self.max_radius_scale = max_radius_scale
        self.probability = probability

    def _random_vertices(self, center, max_radius, min_vertices, max_vertices):
        num_vertices = np.random.randint(min_vertices, max_vertices)
        angle_delta = 2 * np.pi / num_vertices
        initial_angle = np.random.uniform(0, 2 * np.pi)
        angles = initial_angle + np.arange(0, num_vertices) * angle_delta
        x_component = np.cos(angles).reshape(-1, 1)
        y_component = np.sin(angles).reshape(-1, 1)
        vertices = np.concatenate([x_component, y_component], -1)
        random_lengths = np.random.uniform(0, max_radius, num_vertices)
        random_lengths = random_lengths.reshape(num_vertices, 1)
        vertices = vertices * random_lengths
        vertices = vertices + center
        return vertices.astype(np.int32)

    def add_occlusion(self, image, max_radius_scale):
        height, width = image.shape[:2]
        max_radius = np.max((height, width)) * max_radius_scale
        center = np.random.rand(2) * np.array([width, height])
        vertices = self._random_vertices(center, max_radius, 3, 7)
        color = np.random.randint(0, 256, 3).tolist()
        return draw_filled_polygon(image, vertices, color)

    def call(self, image):
        if self.probability >= np.random.rand():
            image = self.add_occlusion(image, self.max_radius_scale)
        return image


class RandomImageCrop(Processor):
    """Crops randomly a rectangle from an image.

    # Arguments
        crop_factor: Float between ``[0, 1]``.
        probability: Float between ``[0, 1]``.
    """
    def __init__(self, crop_factor=0.3, probability=0.5):
        self.crop_factor = crop_factor
        self.probability = probability
        super(RandomImageCrop, self).__init__()

    def call(self, image):
        if self.probability < np.random.rand():
            return image
        H, W = image.shape[:2]
        W_crop = np.random.uniform(self.crop_factor * W, W)
        H_crop = np.random.uniform(self.crop_factor * H, H)
        x_min = np.random.uniform(W - W_crop)
        y_min = np.random.uniform(H - H_crop)
        x_max = x_min + W_crop
        y_max = y_min + H_crop
        cropped_image = image[int(x_min):int(x_max), int(y_min):int(y_max), :]
        return cropped_image


class ImageToNormalizedDeviceCoordinates(Processor):
    """Map image value from [0, 255] -> [-1, 1].
    """
    def __init__(self):
        super(ImageToNormalizedDeviceCoordinates, self).__init__()

    def call(self, image):
        return image_to_normalized_device_coordinates(image)


class NormalizedDeviceCoordinatesToImage(Processor):
    """Map normalized value from [-1, 1] -> [0, 255].
    """
    def __init__(self):
        super(NormalizedDeviceCoordinatesToImage, self).__init__()

    def call(self, image):
        return normalized_device_coordinates_to_image(image)


class ReplaceLowerThanThreshold(Processor):
    def __init__(self, threshold=1e-8, replacement=0.0):
        super(ReplaceLowerThanThreshold, self).__init__()
        self.threshold = threshold
        self.replacement = replacement

    def call(self, values):
        return replace_lower_than_threshold(
            values, self.threshold, self.replacement)


class GetNonZeroValues(Processor):
    def __init__(self):
        super(GetNonZeroValues, self).__init__()

    def call(self, array):
        channel_wise_sum = np.sum(array, axis=2)
        non_zero_arguments = np.nonzero(channel_wise_sum)
        return array[non_zero_arguments]


class GetNonZeroArguments(Processor):
    def __init__(self):
        super(GetNonZeroArguments, self).__init__()

    def call(self, array):
        channel_wise_sum = np.sum(array, axis=2)
        non_zero_rows, non_zero_columns = np.nonzero(channel_wise_sum)
        return non_zero_rows, non_zero_columns


class ImagenetPreprocessInput(Processor):
    def __init__(self):
        super(ImagenetPreprocessInput, self).__init__()

    def call(self, image):
        return imagenet_preprocess_input(image)


class FlipLeftRightImage(Processor):
    """Flips an image left and right.

    # Arguments
        image: Numpy array.
    """
    def __init__(self):
        super(FlipLeftRightImage, self).__init__()

    def call(self, image):
        return flip_left_right(image)


class DivideStandardDeviationImage(Processor):
    """Divide channel-wise standard deviation to image.

    # Arguments
        standard_deviation: List of length 3, containing the
            channel-wise standard deviation.

    # Properties
        standard_deviation: List.

    # Methods
        call()
    """
    def __init__(self, standard_deviation):
        self.standard_deviation = standard_deviation
        super(DivideStandardDeviationImage, self).__init__()

    def call(self, image):
        return image / self.standard_deviation


class ScaledResize(Processor):
    """Resizes image by returning the scales to original image.

    # Arguments
        image_size: Int, desired size of the model input.

    # Properties
        image_size: Int.

    # Methods
        call()
    """
    def __init__(self, image_size):
        self.image_size = image_size
        super(ScaledResize, self).__init__()

    def call(self, image):
        """
        # Arguments
            image: Array, raw input image.
        """
        output_image, image_scale = scale_resize(image, self.image_size)
        return output_image, image_scale


>>> END FILE CONTENTS

## emili-main/paz/processors/keypoints.py

>>> BEGIN FILE CONTENTS

from warnings import warn

import numpy as np

from .. import processors as pr
from ..abstract import SequentialProcessor, Processor
from ..backend.keypoints import translate_keypoints
from ..backend.keypoints import arguments_to_image_points2D
from ..backend.keypoints import normalize_keypoints2D
from ..backend.keypoints import denormalize_keypoints2D
from ..backend.keypoints import normalize_keypoints
from ..backend.keypoints import denormalize_keypoints
from ..backend.keypoints import compute_orientation_vector
from ..backend.image import get_scaling_factor
from ..backend.keypoints import standardize
from ..backend.keypoints import filter_keypoints2D
from ..backend.keypoints import destandardize
from ..backend.keypoints import merge_into_mean
from ..backend.keypoints import filter_keypoints3D
from ..backend.keypoints import initialize_translation, solve_least_squares
from ..backend.keypoints import get_bones_length, compute_reprojection_error
from ..backend.keypoints import compute_optimized_pose3D
from ..datasets.human36m import args_to_mean
from ..datasets.human36m import h36m_to_coco_joints2D
from ..datasets.human36m import human_start_joints


class ProjectKeypoints(Processor):
    """Projects homogenous keypoints (4D) in the camera coordinates system into
        image coordinates using a projective transformation.

    # Arguments
        projector: Instance of ''paz.models.Project''.
        keypoints: Numpy array of shape ''(num_keypoints, 3)''
    """
    def __init__(self, projector, keypoints):
        self.projector = projector
        self.keypoints = keypoints
        super(ProjectKeypoints, self).__init__()

    def call(self, world_to_camera):
        keypoints = np.matmul(self.keypoints, world_to_camera.T)
        keypoints = np.expand_dims(keypoints, 0)
        keypoints = self.projector.project(keypoints)[0]
        return keypoints


class NormalizeKeypoints2D(Processor):
    """Transform keypoints in image-size coordinates to normalized coordinates.

    # Arguments
        image_size: List of two ints indicating ''(height, width)''
    """
    def __init__(self, image_size):
        self.image_size = image_size
        super(NormalizeKeypoints2D, self).__init__()

    def call(self, keypoints):
        height, width = self.image_size[0:2]
        keypoints = normalize_keypoints2D(keypoints, height, width)
        return keypoints


class DenormalizeKeypoints2D(Processor):
    """Transform normalized keypoints coordinates into image-size coordinates.

    # Arguments
        image_size: List of two floats having height and width of image.
    """
    def __init__(self):
        super(DenormalizeKeypoints2D, self).__init__()

    def call(self, keypoints, image):
        height, width = image.shape[0:2]
        keypoints = denormalize_keypoints2D(keypoints, height, width)
        return keypoints


class NormalizeKeypoints(Processor):
    """Transform keypoints in image-size coordinates to normalized coordinates.

    # Arguments
        image_size: List of two ints indicating ''(height, width)''
    """
    def __init__(self, image_size):
        self.image_size = image_size
        warn('DEPRECATED please use normalize_keypoints2D')
        super(NormalizeKeypoints, self).__init__()

    def call(self, keypoints):
        height, width = self.image_size[0:2]
        keypoints = normalize_keypoints(keypoints, height, width)
        return keypoints


class DenormalizeKeypoints(Processor):
    """Transform normalized keypoints coordinates into image-size coordinates.

    # Arguments
        image_size: List of two floats having height and width of image.
    """
    def __init__(self):
        warn('DEPRECATED please use denormalize_keypoints2D')
        super(DenormalizeKeypoints, self).__init__()

    def call(self, keypoints, image):
        height, width = image.shape[0:2]
        keypoints = denormalize_keypoints(keypoints, height, width)
        return keypoints


class RemoveKeypointsDepth(Processor):
    """Removes Z component from keypoints.
    """
    def __init__(self):
        super(RemoveKeypointsDepth, self).__init__()

    def call(self, keypoints):
        return keypoints[:, :2]


class PartitionKeypoints(Processor):
    """Partitions keypoints from shape [num_keypoints, 2] into a list of the
    form ((2), (2), ....) and length equal to num_of_keypoints.
    """
    def __init__(self):
        super(PartitionKeypoints, self).__init__()

    def call(self, keypoints):
        keypoints = np.vsplit(keypoints, len(keypoints))
        keypoints = [np.squeeze(keypoint) for keypoint in keypoints]
        partioned_keypoints = []
        for keypoint_arg, keypoint in enumerate(keypoints):
            partioned_keypoints.append(keypoint)
        return np.asarray(partioned_keypoints)


class ChangeKeypointsCoordinateSystem(Processor):
    """Changes ``keypoints`` 2D coordinate system using ``box2D`` coordinates
        to locate the new origin at the openCV image origin (top-left).
    """
    def __init__(self):
        super(ChangeKeypointsCoordinateSystem, self).__init__()

    def call(self, keypoints, box2D):
        x_min, y_min, x_max, y_max = box2D.coordinates
        keypoints[:, 0] = keypoints[:, 0] + x_min
        keypoints[:, 1] = keypoints[:, 1] + y_min
        return keypoints


class TranslateKeypoints(Processor):
    """Applies a translation to keypoints.
    The translation is a list of length two indicating the x, y values.
    """
    def __init__(self):
        super(TranslateKeypoints, self).__init__()

    def call(self, keypoints, translation):
        return translate_keypoints(keypoints, translation)


class ArgumentsToImageKeypoints2D(Processor):
    """Convert array arguments into UV coordinates.

              Image plane

           (0,0)-------->  (U)
             |
             |
             |
             v

            (V)

    # Arguments
        row_args: Array (num_rows).
        col_args: Array (num_cols).

    # Returns
        Array (num_cols, num_rows) representing points2D in UV space.

    # Notes
        Arguments are row args (V) and col args (U). Image points are in UV
            coordinates; thus, we concatenate them in that order
            i.e. [col_args, row_args]
    """
    def __init__(self):
        super(ArgumentsToImageKeypoints2D, self).__init__()

    def call(self, row_args, col_args):
        image_points2D = arguments_to_image_points2D(row_args, col_args)
        return image_points2D


class ScaleKeypoints(Processor):
    """Scale keypoints to input image shape.

    # Arguments
        keypoints: Array. Detected keypoints by the model
        image: Array. Input image.

    # Returns
        Scaled keypoints: Array. keypoints scaled to input image shape.
    """
    def __init__(self, scale=1, shape=(128, 128)):
        super(ScaleKeypoints, self).__init__()
        self.scale = scale
        self.shape = shape

    def call(self, keypoints, image):
        scale = get_scaling_factor(image, self.scale, self.shape)
        scaled_keypoints = keypoints * scale
        return np.array(scaled_keypoints, dtype=np.uint)


class ComputeOrientationVector(Processor):
    """Calculate the orientation of keypoints links with 3D keypoints.

    # Arguments
        keypoints: Array. 3D keypoints

    # Returns
        orientation: Array. Orientation of keypoint links
    """
    def __init__(self, parents):
        super(ComputeOrientationVector, self).__init__()
        self.parents = parents

    def call(self, keypoints):
        orientation = compute_orientation_vector(keypoints, self.parents)
        return orientation


class MergeKeypoints2D(Processor):
    def __init__(self, args_to_mean):
        """ Merges keypoints together then takes the mean of the keypoints

        # Arguments
            args_to_mean: keypoints indices

        # Returns
            Filtered keypoints2D
        """
        super(MergeKeypoints2D, self).__init__()
        self.args_to_mean = args_to_mean

    def call(self, keypoints2D):
        return merge_into_mean(keypoints2D, self.args_to_mean)


class FilterKeypoints2D(Processor):
    def __init__(self, args_to_mean, h36m_to_coco_joints2D):
        """ Filter keypoints2D

        # Arguments
            args_to_mean: keypoints indices
            h36m_to_coco_joints2D: h36m joints indices

        # Returns
            Filtered keypoints2D
        """
        super(FilterKeypoints2D, self).__init__()
        self.h36m_to_coco_joints2D = h36m_to_coco_joints2D
        self.args_to_mean = args_to_mean

    def call(self, keypoints2D):
        return filter_keypoints2D(keypoints2D, self.args_to_mean,
                                  self.h36m_to_coco_joints2D)


class StandardizeKeypoints2D(Processor):
    def __init__(self, data_mean2D, data_stdev2D):
        """ Standardize 2D keypoints

        # Arguments
            data_mean2D: mean 2D
            data_stdev2D: standard deviation 2D

        # Return
            standerized keypoints2D
        """
        self.mean = data_mean2D
        self.stdev = data_stdev2D
        super(StandardizeKeypoints2D, self).__init__()

    def call(self, keypoints2D):
        return standardize(keypoints2D, self.mean, self.stdev)


class DestandardizeKeypoints2D(Processor):
    def __init__(self, data_mean3D, data_stdev3D, dim_to_use):
        """ Destandardize 2D keypoints

        # Arguments
            data_mean3D: mean 3D
            data_stdev3D: standard deviation 3D
            dim_to_use: dimensions to use

        # Return
            detandardize 2D keypoints
        """
        self.mean = data_mean3D
        self.stdev = data_stdev3D
        self.valid = dim_to_use
        super(DestandardizeKeypoints2D, self).__init__()

    def call(self, keypoints2D):
        data = keypoints2D.reshape(-1, 48)
        rearanged_data = np.zeros((len(data), len(self.mean)),
                                  dtype=np.float32)
        rearanged_data[:, self.valid] = data
        destandardize_data = destandardize(rearanged_data, self.mean,
                                           self.stdev)
        return destandardize_data


class OptimizeHumanPose3D(Processor):
    """ Optimize human 3D pose

    #Arguments
        solver: library solver
        camera_intrinsics: camera intrinsic parameters

    #Returns
        keypoints3D, optimized keypoints3D
    """
    def __init__(self, args_to_joints3D, solver, camera_intrinsics):
        super(OptimizeHumanPose3D, self).__init__()
        self.args_to_joints3D = args_to_joints3D
        self.camera_intrinsics = camera_intrinsics
        self.filter_keypoints2D = SequentialProcessor(
            [pr.MergeKeypoints2D(args_to_mean),
             pr.FilterKeypoints2D(args_to_mean, h36m_to_coco_joints2D)])
        self.solver = solver

    def call(self, keypoints3D, keypoints2D):
        joints3D = filter_keypoints3D(keypoints3D, self.args_to_joints3D)
        joints2D = self.filter_keypoints2D(keypoints2D)
        root2D = joints2D[:, :2]
        length2D, length3D = get_bones_length(
            joints2D, keypoints3D, human_start_joints)
        ratio = length3D / length2D
        initial_joint_translation = initialize_translation(
            root2D, self.camera_intrinsics, ratio)
        joint_translation = solve_least_squares(
            self.solver, compute_reprojection_error, initial_joint_translation,
            joints3D, joints2D, self.camera_intrinsics)
        optimized_poses3D, projection2D = compute_optimized_pose3D(
            keypoints3D, joint_translation, self.camera_intrinsics)
        return joints2D, joints3D, optimized_poses3D, projection2D


>>> END FILE CONTENTS

## emili-main/paz/processors/munkres.py

>>> BEGIN FILE CONTENTS

import numpy as np
from ..abstract import Processor

from ..backend.munkres import UnsolvableMatrix
from ..backend.munkres import DISALLOWED_OBJ
from ..backend.munkres import get_cover_matrix
from ..backend.munkres import find_uncovered_zero
from ..backend.munkres import find_star_in_row
from ..backend.munkres import find_star_in_col
from ..backend.munkres import find_prime_in_row
from ..backend.munkres import get_min_value
from ..backend.munkres import find_smallest_uncovered

from ..backend.standard import pad_matrix

DISALLOWED = DISALLOWED_OBJ()


class Munkres(Processor):
    """
    Provides an implementation of the Munkres algorithm.

    # References
    https://brc2.com/the-algorithm-workshop/
    https://software.clapper.org/munkres/
    https://github.com/bmc/munkres
    """
    def __init__(self):
        super(Munkres, self).__init__()
        self.Z0_r = 0
        self.Z0_c = 0
        self.done = False
        self.steps = {1: self._step1,
                      2: self._step2,
                      3: self._step3,
                      4: self._step4,
                      5: self._step5,
                      6: self._step6}

    def compute(self, cost_matrix):
        self.H, self.W = np.array(cost_matrix).shape[:2]
        self.cost_matrix = pad_matrix(cost_matrix, padding='square')
        self.n = len(self.cost_matrix)
        self.marked = np.zeros((self.n, self.n), dtype='int')
        self.path = np.zeros((self.n * 2, self.n * 2), dtype='int')
        self.row_covered = np.zeros((self.n, 1), 'bool')
        self.col_covered = np.zeros((self.n, 1), 'bool')

        step = 1
        while not self.done:
            step_func = self.steps[step]
            step = step_func()
            if step == 7:
                break
        cost = []

        for row_arg in range(self.H):
            for col_arg in range(self.W):
                if self.marked[row_arg][col_arg]:
                    cost = cost + [(row_arg, col_arg)]
        return cost

    def _convert_path(self, path, count):
        for i in range(count+1):
            if self.marked[path[i][0]][path[i][1]] == 1:
                self.marked[path[i][0]][path[i][1]] = 0
            else:
                self.marked[path[i][0]][path[i][1]] = 1

    def _erase_primes(self):
        for i in range(self.n):
            for j in range(self.n):
                if self.marked[i][j] == 2:
                    self.marked[i][j] = 0

    def _step1(self):
        '''
        For each row of the matrix, find the smallest element and subtract
        it from every element in its row.  Go to Step 2.
        '''
        for row in range(self.n):
            min_value = get_min_value(self.cost_matrix[row])
            for col in range(self.n):
                if type(self.cost_matrix[row][col]) is not type(DISALLOWED):
                    self.cost_matrix[row][col] = \
                        self.cost_matrix[row][col] - min_value
        return 2

    def _step2(self):
        '''
        In the resulting matrix, look for a zero (Z). Star Z if there isn't
        a starred zero in its row or column. For each element in the matrix,
        repeat the process. Continue to Step 3.
        '''
        for row in range(self.n):
            for col in range(self.n):
                if (self.cost_matrix[row][col] == 0) and \
                    (not self.row_covered[row]) and \
                        (not self.col_covered[col]):
                    self.marked[row][col] = 1
                    self.row_covered[row] = True
                    self.col_covered[col] = True
                    break
        self.row_covered, self.col_covered = get_cover_matrix((self.n, 1))
        return 3

    def _step3(self):
        '''
        Cover each column containing a starred zero.  If K columns are covered,
        the starred zeros describe a complete set of unique assignments.
        In this case, Go to DONE, otherwise, Go to Step 4.
        '''
        count = 0
        for row in range(self.n):
            for col in range(self.n):
                if self.marked[row][col] == 1 and not self.col_covered[col]:
                    self.col_covered[col] = True
                    count = count + 1

        if count >= self.n:
            step = 7
        else:
            step = 4
        return step

    def _step4(self):
        '''
        Find a noncovered zero and prime it.  If there is no starred zero in
        the row containing this primed zero, Go to Step 5. Otherwise, cover
        this row and uncover the column containing the starred zero. Continue
        in this manner until there are no uncovered zeros left. Save the
        smallest uncovered value and Go to Step 6.
        '''
        done = False
        row = 0
        col = 0
        star_col = -1
        while not done:
            (row, col) = find_uncovered_zero(self.n, self.cost_matrix,
                                             self.row_covered,
                                             self.col_covered, row, col)

            if row < 0:
                done = True
                step = 6
            else:
                self.marked[row][col] = 2
                star_col = find_star_in_row(self.n, row, self.marked)
                if star_col >= 0:
                    col = star_col
                    self.row_covered[row] = True
                    self.col_covered[col] = False
                else:
                    done = True
                    self.Z0_r = row
                    self.Z0_c = col
                    step = 5
        return step

    def _step5(self):
        '''
        Construct a series of alternating primed and starred zeros as follows.
        Let Z0 represent the uncovered primed zero found in Step 4. Let Z1
        denote the starred zero in the column of Z0 (if any). Let Z2 denote
        the primed zero in the row of Z1 (there will always be one). Continue
        until the series terminates at a primed zero that has no starred zero
        in its column. Unstar each starred zero of the series, star each primed
        zero of the series, erase all primes and uncover every line in the
        matrix. Return to Step 3.
        '''
        count = 0
        path = self.path
        path[count][0] = self.Z0_r
        path[count][1] = self.Z0_c
        done = False
        while not done:
            row = find_star_in_col(self.n, path[count][1], self.marked)
            if row >= 0:
                count += 1
                path[count][0] = row
                path[count][1] = path[count-1][1]
            else:
                done = True

            if not done:
                col = find_prime_in_row(self.n, path[count][0], self.marked)
                count += 1
                path[count][0] = path[count-1][0]
                path[count][1] = col

        self._convert_path(path, count)
        self.row_covered, self.col_covered = get_cover_matrix((self.n, 1))
        self._erase_primes()
        return 3

    def _step6(self):
        '''
        Add the value found in Step 4 to every element of each covered row,
        and subtract it from every element of each uncovered column. Return
        to Step 4 without altering any stars, primes, or covered lines.
        '''
        minval = find_smallest_uncovered(self.n, self.row_covered,
                                         self.col_covered,
                                         self.cost_matrix)
        events = 0
        for row_arg in range(self.n):
            for col_arg in range(self.n):
                if type(self.cost_matrix[row_arg][col_arg]) \
                        is type(DISALLOWED):
                    continue
                if self.row_covered[row_arg]:
                    self.cost_matrix[row_arg][col_arg] += minval
                    events += 1
                if not self.col_covered[col_arg]:
                    self.cost_matrix[row_arg][col_arg] -= minval
                    events += 1
                if self.row_covered[row_arg] and not self.col_covered[col_arg]:
                    events -= 2
        if (events == 0):
            raise UnsolvableMatrix("Matrix cannot be solved!")
        return 4


>>> END FILE CONTENTS

## emili-main/paz/processors/mydraw.py

>>> BEGIN FILE CONTENTS

# Lionel's version of paz processors/draw.py

import numpy as np

from ..abstract import Processor
from ..backend.image import lincolor
from ..backend.image import draw_rectangle
from ..backend.image import put_text
from ..backend.image import draw_keypoint
from ..backend.image import draw_cube
from ..backend.image import GREEN
from ..backend.image import draw_random_polygon
from ..backend.image import draw_keypoints_link
from ..backend.image import draw_keypoints
from ..backend.image import draw_RGB_mask
from ..backend.image import draw_RGB_masks
from ..backend.image import draw_human_pose6D
from ..backend.keypoints import project_points3D
from ..backend.keypoints import build_cube_points3D
from ..backend.groups import quaternion_to_rotation_matrix
from ..backend.keypoints import project_to_image
from ..datasets import HUMAN_JOINT_CONFIG
from ..datasets import MINIMAL_HAND_CONFIG

class TunnelBoxes(Processor): # Turns a time series into a colorful tunnel

    def __init__(self, class_names=None, colors=None,
                 weighted=True, scale=1.0, with_score=True):
        self.class_names = class_names
        self.colors = colors
        self.weighted = weighted
        self.with_score = with_score
        self.scale = scale

        if (self.class_names is not None and
                not isinstance(self.class_names, list)):
            raise TypeError("Class name should be of type 'List of strings'")

        if (self.colors is not None and
                not all(isinstance(color, list) for color in self.colors)):
            raise TypeError("Colors should be of type 'List of lists'")

        if self.colors is None:
            self.colors = lincolor(len(self.class_names))

        if self.class_names is not None:
            self.class_to_color = dict(zip(self.class_names, self.colors))
        else:
            self.class_to_color = {None: self.colors, '': self.colors}
        super(TunnelBoxes, self).__init__()

    def call(self, image, boxes2D):
        #print("image.shape",image.shape) # shape (720, 1280, 3)
        for box2D in boxes2D:
#            x_min, y_min, x_max, y_max = box2D.coordinates
            x_min, y_min, x_max, y_max = 0, 0, image.shape[1], image.shape[0]
            class_name = box2D.class_name
            color = self.class_to_color[class_name]
            if self.weighted:
                color = [int(channel * box2D.score) for channel in color]
            if self.with_score:
                text = f"{class_name} ({int(box2D.score//1e4)})"
            if not self.with_score:
                text = '{}'.format(class_name)
            put_text(image, text, (x_min, y_min - 20), self.scale, color, 2)
            draw_rectangle(image, (x_min, y_min), (x_max, y_max), color, 10)
        return image

class MyBoxes2D(Processor): # Lionel's version of DrawBoxes2D
    """Draws bounding boxes from Boxes2D messages.

    # Arguments
        class_names: List of strings.
        colors: List of lists containing the color values
        weighted: Boolean. If ``True`` the colors are weighted with the
            score of the bounding box.
        scale: Float. Scale of drawn text.
    """
    def __init__(self, class_names=None, colors=None,
                 weighted=False, scale=1.0, with_score=True):
        self.class_names = class_names
        self.colors = colors
        self.weighted = weighted
        self.with_score = with_score
        self.scale = scale

        if (self.class_names is not None and
                not isinstance(self.class_names, list)):
            raise TypeError("Class name should be of type 'List of strings'")

        if (self.colors is not None and
                not all(isinstance(color, list) for color in self.colors)):
            raise TypeError("Colors should be of type 'List of lists'")

        if self.colors is None:
            self.colors = lincolor(len(self.class_names))

        if self.class_names is not None:
            self.class_to_color = dict(zip(self.class_names, self.colors))
        else:
            self.class_to_color = {None: self.colors, '': self.colors}
        super(MyBoxes2D, self).__init__()

    def call(self, image, boxes2D):
        for box2D in boxes2D:
            x_min, y_min, x_max, y_max = box2D.coordinates
            class_name = box2D.class_name
            color = self.class_to_color[class_name]
            if self.weighted:
                color = [int(channel * box2D.score) for channel in color]
            if self.with_score:
                text = f"{class_name} ({int(box2D.score//1e4)})"
            if not self.with_score:
                text = '{}'.format(class_name)
            put_text(image, text, (x_min, y_min - 20), self.scale, color, 2)
            draw_rectangle(image, (x_min, y_min), (x_max, y_max), color, 10)
        return image


class DrawKeypoints2D(Processor):
    """Draws keypoints into image.

    # Arguments
        num_keypoints: Int. Used initialize colors for each keypoint
        radius: Float. Approximate radius of the circle in pixel coordinates.
    """
    def __init__(self, num_keypoints, radius=3, normalized=False):
        super(DrawKeypoints2D, self).__init__()
        self.colors = lincolor(num_keypoints, normalized=normalized)
        self.radius = radius

    def call(self, image, keypoints):
        for keypoint_arg, keypoint in enumerate(keypoints):
            color = self.colors[keypoint_arg]
            draw_keypoint(image, keypoint.astype('int'), color, self.radius)
        return image


class DrawBoxes3D(Processor):
    def __init__(self, camera, class_to_dimensions,
                 color=GREEN, thickness=5, radius=2):
        """Draw boxes 3D of multiple objects

        # Arguments
            camera: Instance of ``paz.backend.camera.Camera''.
            class_to_dimensions: Dictionary that has as keys the
                class names and as value a list [model_height, model_width]
            thickness: Int. Thickness of 3D box
        """
        super(DrawBoxes3D, self).__init__()
        self.camera = camera
        self.class_to_dimensions = class_to_dimensions
        self.class_to_points = self._build_class_to_points(class_to_dimensions)
        self.color = color
        self.radius = radius
        self.thickness = thickness

    def _build_class_to_points(self, class_to_dimensions):
        class_to_points = {}
        for class_name, dimensions in self.class_to_dimensions.items():
            width, height, depth = dimensions
            points = build_cube_points3D(width, height, depth)
            class_to_points[class_name] = points
        return class_to_points

    def call(self, image, pose6D):
        points3D = self.class_to_points[pose6D.class_name]
        points2D = project_points3D(points3D, pose6D, self.camera)
        points2D = points2D.astype(np.int32)
        draw_cube(image, points2D, self.color, self.thickness, self.radius)
        return image


class DrawRandomPolygon(Processor):
    """ Adds occlusion to image

    # Arguments
        max_radius_scale: Maximum radius in scale with respect to image i.e.
                each vertex radius from the polygon is sampled
                from ``[0, max_radius_scale]``. This radius is later
                multiplied by the image dimensions.
    """
    def __init__(self, max_radius_scale=.5):
        super(DrawRandomPolygon, self).__init__()
        self.max_radius_scale = max_radius_scale

    def call(self, image):
        return draw_random_polygon(image)


def draw_pose6D(image, pose6D, points3D, intrinsics, thickness):
    """Draws cube in image by projecting points3D with intrinsics and pose6D.

    # Arguments
        image: Array (H, W).
        pose6D: paz.abstract.Pose6D instance.
        intrinsics: Array (3, 3). Camera intrinsics for projecting
            3D rays into 2D image.
        points3D: Array (num_points, 3).
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    quaternion, translation = pose6D.quaternion, pose6D.translation
    rotation = quaternion_to_rotation_matrix(quaternion)
    points2D = project_to_image(rotation, translation, points3D, intrinsics)
    image = draw_cube(image, points2D.astype(np.int32), thickness=thickness)
    return image


class DrawPoses6D(Processor):
    """Draws multiple cubes in image by projecting points3D.

    # Arguments
        object_sizes: Array (3) indicating (x, y, z) sizes of object.
        camera_intrinsics: Array (3, 3).
            Camera intrinsics for projecting 3D rays into 2D image.
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    def __init__(self, object_sizes, camera_intrinsics, thickness=2):
        self.points3D = build_cube_points3D(*object_sizes)
        self.intrinsics = camera_intrinsics
        self.thickness = thickness

    def call(self, image, poses6D):
        if poses6D is None:
            return image
        if not isinstance(poses6D, list):
            raise ValueError('Poses6D must be a list of Pose6D messages')
        for pose6D in poses6D:
            image = draw_pose6D(
                image, pose6D, self.points3D, self.intrinsics, self.thickness)
        return image


class DrawPose6D(Processor):
    """Draws a single cube in image by projecting points3D.

    # Arguments
        object_sizes: Array (3) indicating (x, y, z) sizes of object.
        camera_intrinsics: Array (3, 3).
            Camera intrinsics for projecting 3D rays into 2D image.
        thickness: Positive integer indicating line thickness.

    # Returns
        Image array (H, W) with drawn inferences.
    """
    def __init__(self, object_sizes, camera_intrinsics, thickness=2):
        self.points3D = build_cube_points3D(*object_sizes)
        self.intrinsics = camera_intrinsics
        self.thickness = thickness

    def call(self, image, pose6D):
        if pose6D is None:
            return image
        image = draw_pose6D(
            image, pose6D, self.points3D, self.intrinsics, self.thickness)
        return image


class DrawHumanSkeleton(Processor):
    """ Draw human pose skeleton on image.

    # Arguments
        images: Numpy array.
        grouped_joints: Joint locations of all the person model detected
                        in the image. List of numpy array.
        dataset: String.
        check_scores: Boolean. Flag to check score before drawing.

    # Returns
        A numpy array containing pose skeleton.
    """
    def __init__(self, dataset, check_scores, link_width=2, keypoint_radius=4):
        super(DrawHumanSkeleton, self).__init__()
        self.link_orders = HUMAN_JOINT_CONFIG[dataset]['part_orders']
        self.link_colors = HUMAN_JOINT_CONFIG[dataset]['part_color']
        self.link_args = HUMAN_JOINT_CONFIG[dataset]['part_arg']
        self.keypoint_colors = HUMAN_JOINT_CONFIG[dataset]['joint_color']
        self.check_scores = check_scores
        self.link_width = link_width
        self.keypoint_radius = keypoint_radius

    def call(self, image, grouped_joints):
        for one_person_joints in grouped_joints:
            image = draw_keypoints_link(
                image, one_person_joints, self.link_args, self.link_orders,
                self.link_colors, self.check_scores, self.link_width)
            image = draw_keypoints(image, one_person_joints,
                                   self.keypoint_colors, self.check_scores,
                                   self.keypoint_radius)
        return image


class DrawHandSkeleton(Processor):
    """ Draw hand pose skeleton on image.

    # Arguments
        image: Array (H, W, 3)
        keypoints: Array. All the joint locations detected by model
                        in the image.
    # Returns
        A numpy array containing pose skeleton.
    """
    def __init__(self, check_scores=False, link_width=2, keypoint_radius=4):
        super(DrawHandSkeleton, self).__init__()
        self.link_orders = MINIMAL_HAND_CONFIG['part_orders']
        self.link_colors = MINIMAL_HAND_CONFIG['part_color']
        self.link_args = MINIMAL_HAND_CONFIG['part_arg']
        self.keypoint_colors = MINIMAL_HAND_CONFIG['joint_color']
        self.check_scores = check_scores
        self.link_width = link_width
        self.keypoint_radius = keypoint_radius

    def call(self, image, keypoints):
        image = draw_keypoints_link(
            image, keypoints, self.link_args, self.link_orders,
            self.link_colors, self.check_scores, self.link_width)
        image = draw_keypoints(image, keypoints, self.keypoint_colors,
                               self.check_scores, self.keypoint_radius)
        return image


class DrawRGBMask(Processor):
    """Draws RGB mask by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        object_sizes: Array (x_size, y_size, z_size)
    """
    def __init__(self, object_sizes):
        super(DrawRGBMask, self).__init__()
        self.object_sizes = object_sizes

    def call(self, image, points2D, points3D):
        image = draw_RGB_mask(image, points2D, points3D, self.object_sizes)
        return image


class DrawRGBMasks(Processor):
    """Draws RGB masks by transforming points3D to RGB space and putting in
        them in their 2D coordinates (points2D)

    # Arguments
        object_sizes: Array (x_size, y_size, z_size)
    """
    def __init__(self, object_sizes):
        super(DrawRGBMasks, self).__init__()
        self.object_sizes = object_sizes

    def call(self, image, points2D, points3D):
        return draw_RGB_masks(image, points2D, points3D, self.object_sizes)


class DrawText(Processor):
    """Draws text to image.

    # Arguments
        color: List. Color of text to
        thickness: Int. Thickness of text.
        scale: Int. Size scale for text.
        message: Str. Text to be added on the image.
        location: List/tuple of int. Pixel corordinte in image to add text.
    """
    def __init__(self, color=GREEN, thickness=2, scale=1):
        super(DrawText, self).__init__()
        self.color = color
        self.thickness = thickness
        self.scale = scale

    def call(self, image, message, location=(50, 50)):
        image = put_text(image, message, location, self.scale,
                         self.color, self.thickness)
        return image


class DrawHumanPose6D(Processor):
    """Draw basis vectors for human pose 6D

    # Arguments
        image: numpy array
        rotation: numpy array of size (3 x 3)
        translations: list of length 3

    # Returns
        image: numpy array
    """
    def __init__(self, camera_intrinsics):
        super(DrawHumanPose6D, self).__init__()
        self.K = camera_intrinsics

    def call(self, image, rotation, translation):
        image = draw_human_pose6D(image, rotation, translation, self.K)
        return image


>>> END FILE CONTENTS

## emili-main/paz/processors/pose.py

>>> BEGIN FILE CONTENTS

import numpy as np

from ..abstract import Processor, Pose6D
from ..backend.keypoints import solve_PNP
from ..backend.keypoints import LEVENBERG_MARQUARDT
from ..backend.keypoints import solve_PnP_RANSAC


class SolvePNP(Processor):
    """Calculates 6D pose from 3D points and 2D keypoints correspondences.

    # Arguments
        model_points: Numpy array of shape ``[num_points, 3]``.
            Model 3D points known in advance.
        camera: Instance of ''paz.backend.Camera'' containing as properties
            the ``camera_intrinsics`` a Numpy array of shape ``[3, 3]``
            usually calculated from the openCV ``calibrateCamera`` function,
            and the ``distortion`` a Numpy array of shape ``[5]`` in which the
            elements are usually obtained from the openCV
            ``calibrateCamera`` function.
        solver: Flag specifying solvers. Current solvers are:
            ``paz.processors.LEVENBERG_MARQUARDT`` and ``paz.processors.UPNP``.

    # Returns
        Instance from ``Pose6D`` message.
    """
    def __init__(self, points3D, camera, solver=LEVENBERG_MARQUARDT):
        super(SolvePNP, self).__init__()
        self.points3D = points3D
        self.camera = camera
        self.solver = solver
        self.num_keypoints = len(points3D)

    def call(self, keypoints):
        keypoints = keypoints[:, :2]
        keypoints = keypoints.astype(np.float64)
        keypoints = keypoints.reshape((self.num_keypoints, 1, 2))

        (success, rotation, translation) = solve_PNP(
            self.points3D, keypoints, self.camera, self.solver)

        return Pose6D.from_rotation_vector(rotation, translation)


class SolveChangingObjectPnPRANSAC(Processor):
    """Returns rotation (Roc) and translation (Toc) vectors that transform
        3D points in object frame to camera frame.

                               O------------O
                              /|           /|
                             / |          / |
                            O------------O  |
                            |  |    z    |  |
                            |  O____|____|__O
                            |  /    |___y|  /   object
                            | /    /     | /  coordinates
                            |/    x      |/
                            O------------O
                                   ___
                   Z                |
                  /                 | Rco, Tco
                 /_____X     <------|
                 |
                 |    camera
                 Y  coordinates

    # Arguments
        object_points3D: Array (num_points, 3). Points 3D in object reference
            frame. Represented as (0) in image above.
        image_points2D: Array (num_points, 2). Points in 2D in camera UV space.
        camera_intrinsics: Array of shape (3, 3). Diagonal elements represent
            focal lenghts and last column the image center translation.
        inlier_threshold: Number of inliers for RANSAC method.
        num_iterations: Maximum number of iterations.

    # Returns
        Boolean indicating success, rotation vector in axis-angle form (3)
            and translation vector (3).
    """

    def __init__(self, camera_intrinsics, inlier_thresh=5, num_iterations=100):
        super(SolveChangingObjectPnPRANSAC, self).__init__()
        self.camera_intrinsics = camera_intrinsics
        self.inlier_thresh = inlier_thresh
        self.num_iterations = num_iterations
        self.MIN_REQUIRED_POINTS = 4

    def call(self, object_points3D, image_points2D):
        success, rotation_vector, translation = solve_PnP_RANSAC(
            object_points3D, image_points2D, self.camera_intrinsics,
            self.inlier_thresh, self.num_iterations)
        rotation_vector = np.squeeze(rotation_vector)
        return success, rotation_vector, translation


class Translation3DFromBoxWidth(Processor):
    """Computes 3D translation from box width and real width ratio.

    # Arguments
        camera: Instance of ''paz.backend.Camera'' containing as properties
            the ``camera_intrinsics`` a Numpy array of shape ``[3, 3]``
            usually calculated from the openCV ``calibrateCamera`` function,
            and the ``distortion`` a Numpy array of shape ``[5]`` in which the
            elements are usually obtained from the openCV
            ``calibrateCamera`` function.
        real_width: Real width of the predicted box2D.

    # Returns
        Array (num_boxes, 3) containing all 3D translations.
    """
    def __init__(self, camera, real_width=0.3):
        super(Translation3DFromBoxWidth, self).__init__()
        self.camera = camera
        self.real_width = real_width
        self.focal_length = self.camera.intrinsics[0, 0]
        self.u_camera_center = self.camera.intrinsics[0, 2]
        self.v_camera_center = self.camera.intrinsics[1, 2]

    def call(self, boxes2D):
        hands_center = []
        for box in boxes2D:
            u_box_center, v_box_center = box.center
            z_center = (self.real_width * self.focal_length) / box.width
            u = u_box_center - self.u_camera_center
            v = v_box_center - self.v_camera_center
            x_center = (z_center * u) / self.focal_length
            y_center = (z_center * v) / self.focal_length
            hands_center.append([x_center, y_center, z_center])
        return np.array(hands_center)


>>> END FILE CONTENTS

## emili-main/paz/processors/renderer.py

>>> BEGIN FILE CONTENTS

from ..abstract import Processor


class Render(Processor):
    """Render images and labels.

    # Arguments
        renderer: Object that renders images and labels using a method
            ''render_sample()''.
    """
    def __init__(self, renderer):
        super(Render, self).__init__()
        self.renderer = renderer

    def call(self):
        return self.renderer.render()


>>> END FILE CONTENTS

## emili-main/paz/processors/standard.py

>>> BEGIN FILE CONTENTS

import numpy as np

from ..abstract import Processor
from ..backend.boxes import to_one_hot
from ..backend.standard import append_values, predict


class ControlMap(Processor):
    """Controls which inputs are passed ''processor'' and the order of its
        outputs.

    # Arguments
        processor: Function e.g. a ''paz.processor''
        intro_indices: List of Ints.
        outro_indices: List of Ints.
        keep: ''None'' or dictionary. If ``None`` control maps operates
            without explicitly retaining an input. If dict it must contain
            as keys the input args to be kept and as values where they should
            be located at the end.
    """
    def __init__(self, processor, intro_indices=[0], outro_indices=[0],
                 keep=None):
        self.processor = processor
        if not isinstance(intro_indices, list):
            raise ValueError('``intro_indices`` must be a list')
        if not isinstance(outro_indices, list):
            raise ValueError('``outro_indices`` must be a list')
        self.intro_indices = intro_indices
        self.outro_indices = outro_indices
        name = '-'.join([self.__class__.__name__, self.processor.name])
        self.keep = keep
        super(ControlMap, self).__init__(name)

    def _select(self, inputs, indices):
        return [inputs[index] for index in indices]

    def _remove(self, inputs, indices):
        return [inputs[i] for i in range(len(inputs)) if i not in indices]

    def _split(self, inputs, indices):
        return self._select(inputs, indices), self._remove(inputs, indices)

    def _insert(self, args, extra_args, indices):
        [args.insert(index, arg) for index, arg in zip(indices, extra_args)]
        return args

    def call(self, *args):
        selected_args, remaining_args = self._split(args, self.intro_indices)
        processed_args = self.processor(*selected_args)
        if not isinstance(processed_args, tuple):
            processed_args = [processed_args]
        return_args = self._insert(
            remaining_args, processed_args, self.outro_indices)

        if self.keep is not None:
            keep_intro = list(self.keep.keys())
            keep_outro = list(self.keep.values())
            keep_args = self._select(args, keep_intro)
            return_args = self._insert(return_args, keep_args, keep_outro)

        return tuple(return_args)


class ExpandDomain(ControlMap):
    """Extends number of inputs a function can take applying the identity
    function to all new/extended inputs.
    e.g. For a given function f(x) = y. If g = ExtendInputs(f), we can
    now have g(x, x1, x2, ..., xn) = y, x1, x2, ..., xn.

    # Arguments
        processor: Function e.g. any procesor in ''paz.processors''.
    """
    def __init__(self, processor):
        super(ExpandDomain, self).__init__(processor)


class CopyDomain(Processor):
    """Copies ''intro_indices'' and places it ''outro_indices''.

    # Arguments
        intro_indices: List of Ints.
        outro_indices: List of Ints.
    """
    def __init__(self, intro_indices, outro_indices):
        super(CopyDomain, self).__init__()
        if not isinstance(intro_indices, list):
            raise ValueError('``intro_indices`` must be a list')
        if not isinstance(outro_indices, list):
            raise ValueError('``outro_indices`` must be a list')
        self.intro_indices = intro_indices
        self.outro_indices = outro_indices

    def _select(self, inputs, indices):
        return [inputs[index] for index in indices]

    def _insert(self, args, axes, values):
        [args.insert(axis, value) for axis, value in zip(axes, values)]
        return args

    def call(self, *args):
        selections = self._select(args, self.intro_indices)
        args = self._insert(list(args), self.outro_indices, selections)
        return tuple(args)


class UnpackDictionary(Processor):
    """Unpacks dictionary into a tuple.
    # Arguments
        order: List of strings containing the keys of the dictionary.
            The order of the list is the order in which the tuple
            would be ordered.
    """
    def __init__(self, order):
        if not isinstance(order, list):
            raise ValueError('``order`` must be a list')
        self.order = order
        super(UnpackDictionary, self).__init__()

    def call(self, kwargs):
        args = tuple([kwargs[name] for name in self.order])
        return args


class WrapOutput(Processor):
    """Wraps arguments in dictionary

    # Arguments
        keys: List of strings representing the keys used to wrap the inputs.
            The order of the list must correspond to the same order of
            inputs (''args'').
    """
    def __init__(self, keys):
        if not isinstance(keys, list):
            raise ValueError('``order`` must be a list')
        self.keys = keys
        super(WrapOutput, self).__init__()

    def call(self, *args):
        return dict(zip(self.keys, args))


class ExtendInputs(Processor):
    """Extends number of inputs a function can take applying the identity
    function to all new/extended inputs.
    e.g. For a given function f(x) = y. If g = ExtendInputs(f), we can
    now have g(x, x1, x2, ..., xn) = y, x1, x2, ..., xn.

    # Arguments
        processor: Function e.g. any procesor in ''paz.processors''.
    """
    def __init__(self, processor):
        self.processor = processor
        name = '-'.join([self.__class__.__name__, self.processor.name])
        super(ExtendInputs, self).__init__(name)

    def call(self, X, *args):
        return self.processor(X), args


class Concatenate(Processor):
    """Concatenates a list of arrays in given ''axis''.

    # Arguments
        axis: Int.
    """
    def __init__(self, axis):
        super(Concatenate, self)
        self.axis = axis

    def call(self, inputs):
        return np.concatenate(inputs, self.axis)


class SequenceWrapper(Processor):
    """Wraps arguments to directly use
    ''paz.abstract.ProcessingSequence'' or
    ''paz.abstract.GeneratingSequence''.

    # Arguments
        inputs_info: Dictionary containing an integer per key representing
            the argument to grab, and as value a dictionary containing the
            tensor name as key and the tensor shape of a single sample as value
            e.g. {0: {'input_image': [300, 300, 3]}, 1: {'depth': [300, 300]}}.
            The values given here are for the inputs of the model.
        labels_info: Dictionary containing an integer per key representing
            the argument to grab, and as value a dictionary containing the
            tensor name as key and the tensor shape of a single sample as value
            e.g. {2: {'classes': [10]}}.
            The values given here are for the labels of the model.
    """
    def __init__(self, inputs_info, labels_info):
        if not isinstance(inputs_info, dict):
            raise ValueError('``inputs_info`` must be a dictionary')
        self.inputs_info = inputs_info
        if not isinstance(labels_info, dict):
            raise ValueError('``inputs_info`` must be a dictionary')
        self.labels_info = labels_info
        self.inputs_name_to_shape = self._extract_name_to_shape(inputs_info)
        self.labels_name_to_shape = self._extract_name_to_shape(labels_info)
        self.ordered_input_names = self._extract_ordered_names(inputs_info)
        self.ordered_label_names = self._extract_ordered_names(labels_info)
        super(SequenceWrapper, self).__init__()

    def _extract_name_to_shape(self, info):
        name_to_shape = {}
        for values in info.values():
            for key, value in values.items():
                name_to_shape[key] = value
        return name_to_shape

    def _extract_ordered_names(self, info):
        arguments = list(info.keys())
        arguments.sort()
        names = []
        for argument in arguments:
            names.append(list(info[argument].keys())[0])
        return names

    def _wrap(self, args, info):
        wrap = {}
        for arg, name_to_shape in info.items():
            name = list(name_to_shape.keys())[0]
            wrap[name] = args[arg]
        return wrap

    def call(self, *args):
        inputs = self._wrap(args, self.inputs_info)
        labels = self._wrap(args, self.labels_info)
        return {'inputs': inputs, 'labels': labels}


class Predict(Processor):
    """Perform input preprocessing, model prediction and output postprocessing.

    # Arguments
        model: Class with a ''predict'' method e.g. a Keras model.
        preprocess: Function applied to given inputs.
        postprocess: Function applied to outputted predictions from model.
    """
    def __init__(self, model, preprocess=None, postprocess=None):
        super(Predict, self).__init__()
        self.model = model
        self.preprocess = preprocess
        self.postprocess = postprocess

    def call(self, x):
        return predict(x, self.model, self.preprocess, self.postprocess)


class ToClassName(Processor):
    def __init__(self, labels):
        super(ToClassName, self).__init__()
        self.labels = labels

    def call(self, x):
        return self.labels[np.argmax(x)]


class ExpandDims(Processor):
    """Expand dimension of given array.

    # Arguments
        axis: Int.
    """
    def __init__(self, axis):
        super(ExpandDims, self).__init__()
        self.axis = axis

    def call(self, x):
        return np.expand_dims(x, self.axis)


class SelectElement(Processor):
    """Selects element of input value.

    # Arguments
        index: Int. argument to select from ''inputs''.
    """
    def __init__(self, index):
        super(SelectElement, self).__init__()
        self.index = index

    def call(self, inputs):
        return inputs[self.index]


class BoxClassToOneHotVector(Processor):
    """Transform box data with class index to a one-hot encoded vector.

    # Arguments
        num_classes: Integer. Total number of classes.
    """
    def __init__(self, num_classes):
        self.num_classes = num_classes
        super(BoxClassToOneHotVector, self).__init__()

    def call(self, boxes):
        class_indices = boxes[:, 4].astype('int')
        one_hot_vectors = to_one_hot(class_indices, self.num_classes)
        one_hot_vectors = one_hot_vectors.reshape(-1, self.num_classes)
        boxes = np.hstack([boxes[:, :4], one_hot_vectors.astype('float')])
        return boxes


class Squeeze(Processor):
    """Wrap around numpy `squeeze` due to common use before model predict.
    # Arguments
        expand_dims: Int or list of Ints.
        topic: String.
    """
    def __init__(self, axis):
        super(Squeeze, self).__init__()
        self.axis = axis

    def call(self, x):
        return np.squeeze(x, axis=self.axis)


class Copy(Processor):
    """Copies value passed to function.
    """
    def __init__(self):
        super(Copy, self).__init__()

    def call(self, x):
        return x.copy()


class Lambda(object):
    """Applies a lambda function as a processor transformation.

    # Arguments
        function: Function.
    """

    def __init__(self, function):
        self.function = function

    def __call__(self, x):
        return self.function(x)


class StochasticProcessor(Processor):
    def __init__(self, probability=0.5, name=None):
        """Adds stochasticity to the user implemented ``call`` function

        # Arguments:
            probability: Probability of calling ``call`` function

        # Example:
        ```python
        class RandomAdd(StochasticProcessor):
        def __init__(self, probability=0.5):
            super(StochasticProcessor, self).__init__(probability)

        def call(self, x):
            return x + 1

        random_add = RandomAdd(probability=0.5)
        # value can be either 1.0 or 2.0
        value = random_add(1.0)
        ```
        """
        super(StochasticProcessor, self).__init__(name=name)
        self.probability = probability

    def call(self, X):
        raise NotImplementedError

    def __call__(self, X):
        if self.probability >= np.random.rand():
            return self.call(X)
        return X


class Stochastic(Processor):
    def __init__(self, function, probability=0.5, name=None):
        """Adds stochasticity to a given ``function``

        # Arguments:
            function: Callable object i.e. python function or
                ``paz.abstract.Processor``.
            probability: Probability of calling ``function``.

        # Example:
        ```python
        stochastic_add_one = Stochastic(lambda x: x + 1, 0.5)
        # value can be either 0.0 or 1.0
        value = random_add(0.0)
        ```
        """
        super(Stochastic, self).__init__(name=name)
        self.function = function
        self.probability = probability

    @property
    def probability(self):
        return self._probability

    @probability.setter
    def probability(self, probability):
        assert 0.0 <= probability <= 1.0, 'Probability must be between 0 and 1'
        self._probability = probability

    def call(self, X):
        if self.probability >= np.random.rand():
            return self.function(X)
        return X


class UnwrapDictionary(Processor):
    """Unwraps a dictionry into a list given the key order.
    """
    def __init__(self, keys):
        super(UnwrapDictionary, self).__init__()
        self.keys = keys

    def call(self, dictionary):
        return [dictionary[key] for key in self.keys]


class Scale(Processor):
    """Scales an input.
    """
    def __init__(self, scales):
        super(Scale, self).__init__()
        self.scales = scales

    def call(self, values):
        return self.scales * values


class AppendValues(Processor):
    """Append dictionary values to lists

    # Arguments
        keys: Keys to dictionary values
    """
    def __init__(self, keys):
        super(AppendValues, self).__init__()
        self.keys = keys

    def call(self, dictionary, lists):
        return append_values(dictionary, lists, self.keys)


class BooleanToTextMessage(Processor):
    """Convert a boolean to text message.
    # Arguments
        true_message: String. Message for true case.
        false_message: String. Message for false case.
        Flag: Boolean.

    # Returns
        message: String.
    """
    def __init__(self, true_message, false_message):
        super(BooleanToTextMessage, self).__init__()
        self.true_message = true_message
        self.false_message = false_message

    def call(self, flag):
        if flag:
            message = self.true_message
        else:
            message = self.false_message
        return message


class PrintTopics(Processor):
    """Prints topics
    # Arguments
        topics: List of keys to the inputted dictionary

    # Returns
        Returns same dictionary but outputs to terminal topic values.
    """
    def __init__(self, topics):
        super(PrintTopics, self).__init__()
        self.topics = topics

    def call(self, dictionary):
        [print(dictionary[topic]) for topic in self.topics]
        return dictionary


>>> END FILE CONTENTS

## emili-main/paz/utils/__init__.py

>>> BEGIN FILE CONTENTS

from .logger import write_dictionary
from .logger import build_directory
from .logger import make_directory
from .logger import write_weights
from .documentation import docstring


>>> END FILE CONTENTS

## emili-main/paz/utils/documentation.py

>>> BEGIN FILE CONTENTS

def docstring(original):
    """Doctors (documents) `target` `Callable` with `original` docstring.

    # Arguments:
        original: Object with documentation string.

    # Returns
        Function that replaces `target` docstring with `original` docstring.
    """
    def wrapper(target):
        target.__doc__ = original.__doc__
        return target
    return wrapper


>>> END FILE CONTENTS

## emili-main/paz/utils/logger.py

>>> BEGIN FILE CONTENTS

import os
import json
import glob
from pathlib import Path
from datetime import datetime


def build_directory(root='experiments', label=None):
    """Builds and makes directory with time date and user given label.

    # Arguments:
        root: String with partial or full path.
        label: String user label.

    # Returns
        Full directory path
    """
    directory_name = build_directory_name(root, label)
    make_directory(directory_name)
    return directory_name


def build_directory_name(root, label=None):
    """Build directory name with time date and user label

    # Arguments:
        root: String with partial or full path.
        label: String user label.

    # Returns
        Full directory path
    """
    directory_name = [datetime.now().strftime("%d-%m-%Y_%H-%M-%S")]
    if label is not None:
        directory_name.extend([label])
    directory_name = '_'.join(directory_name)
    return os.path.join(root, directory_name)


def make_directory(directory_name):
    """Makes directory.

    # Arguments:
        directory_name: String. Directory name.
    """
    Path(directory_name).mkdir(parents=True, exist_ok=True)


def write_dictionary(dictionary, directory, filename, indent=4):
    """Writes dictionary as json file.

    # Arguments:
        dictionary: Dictionary to write in memory.
        directory: String. Directory name.
        filename: String. Filename.
        indent: Number of spaces between keys.
    """
    fielpath = os.path.join(directory, filename)
    filedata = open(fielpath, 'w')
    json.dump(dictionary, filedata, indent=indent)


def write_weights(model, directory, name=None):
    """Writes Keras weights in memory.

    # Arguments:
        model: Keras model.
        directory: String. Directory name.
        name: String or `None`. Weights filename.
    """
    name = model.name if name is None else name
    weights_path = os.path.join(directory, name + '_weights.hdf5')
    model.save_weights(weights_path)


def find_path(wildcard):
    filenames = glob.glob(wildcard)
    filepaths = []
    for filename in filenames:
        if os.path.isdir(filename):
            filepaths.append(filename)
    return max(filepaths, key=os.path.getmtime)


def load_latest(wildcard, filename):
    filepath = find_path(wildcard)
    filepath = os.path.join(filepath, filename)
    filedata = open(filepath, 'r')
    parameters = json.load(filedata)
    return parameters


>>> END FILE CONTENTS

## emili-main/pygame_test.py

>>> BEGIN FILE CONTENTS

import pygame
import pygame.midi
from time import sleep

# Initialize pygame MIDI
pygame.init()
pygame.mixer.init()
pygame.midi.init()


# List all available MIDI devices
for i in range(pygame.midi.get_count()):
    device_info = pygame.midi.get_device_info(i)
    print(f"Device {i}: {device_info}")

output_port = 1

# Open the specified MIDI output port
midi_out = pygame.midi.Output(output_port)

# Set up an instrument (0 is usually a grand piano)
instrument = 1
midi_out.set_instrument(instrument)

# Play a middle C note (note number 60)
note = 70
velocity = 127  # Max volume
midi_out.note_on(note, velocity)
sleep(2)  # Duration of the note

# Stop the note
midi_out.note_off(note, velocity)

# Close the MIDI stream and quit
midi_out.close()
pygame.midi.quit()
pygame.quit()

>>> END FILE CONTENTS

## emili-main/sonifier.py

>>> BEGIN FILE CONTENTS

# create real-time audio stream from real-time emotion data

from PyQt5.QtCore import Qt, QObject, pyqtSignal, QTimer, QSize

from gensound import WAV, Sine, Gain
import time
import pygame
import numpy as np
import math

from emili_core import time_since


class Sonifier(QObject):
    
    def __init__(self, start_time, speed, tonic, pipeline, end_session_event):
        super().__init__()
        self.start_time = start_time
        self.end_session_event = end_session_event
        self.interval = 1000//speed # refresh rate in ms
        self.tonic = tonic # tonic frequency in Hz
        self.overtone = [] # integer multiples of the tonic
        self.tone = [] # one tone for each emotion
        self.current_chord = None
        self.pipeline = pipeline
        self.num_channels = 16
        self.channel = 0 # current audio channel

        pygame.mixer.pre_init(44100, 16, self.num_channels, 4096) # (frequency, size, channels, buffer)
        #pygame.mixer.init(44100, -16, self.num_channels, 4096)
        pygame.init()
        pygame.mixer.set_num_channels(self.num_channels) 

        self.overtone.append(Sine(frequency=self.tonic, duration=1e3)*Gain(-9))
        for n in range(1,18): # overtones of self.tonic (For natural indexing, the tonic is repeated at indices 0 and 1.) 
            self.overtone.append(Sine(frequency=self.tonic*n, duration=1e2)*Gain(-9-n))
        
        anger_tone = self.overtone[7] + self.overtone[8] # anger
        disgust_tone = self.overtone[16] + self.overtone[17] # disgust
        fear_tone = self.overtone[11] + self.overtone[12] # fear
        happiness_tone = 0.5*self.overtone[4] + self.overtone[5] + 0.5*self.overtone[6] # happiness
        sadness_tone = 0.5*self.overtone[2] + self.overtone[3] + 0.5*self.overtone[4] # sadness
        surprise_tone = self.overtone[8]+self.overtone[9]+0.5*self.overtone[10] # surprise
        neutral_tone = self.overtone[1] + self.overtone[2] + self.overtone[4] # neutral

        self.tone = [anger_tone, disgust_tone, fear_tone, happiness_tone, sadness_tone, surprise_tone, neutral_tone]

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.step)
        self.timer.start(40) # calls step() when the timer rings

    def step(self):
        #call_time = time_since(self.start_time)
        #print(f"(sonifier.step) called at {call_time}")
        if len(self.pipeline.binned_time_series) == 0: 
            return # no data to sonify

        # play current chord
        if(self.current_chord is not None):
            pygame.mixer.Channel(self.channel).play(self.current_chord, fade_ms=10)
        
        # build new chord in a new channel
        self.channel += 1
        if self.channel >= self.num_channels:
            self.channel = 0
        #print(f"(sonifier.step) called at {call_time}, new channel: {self.channel}")
        new_score = self.pipeline.binned_time_series[-1][1]/1e6 # most recent emotion scores
        chord = self.tone[0]*new_score[0]
        for n in range(1,7):
            chord += self.tone[n]*new_score[n]
        filename = f"chord{self.channel}.wav"
        chord.export(filename) # 100ms chord
        self.current_chord = pygame.mixer.Sound(filename)
        #print(f"(sonifier.step) called at {call_time} finished at {time_since(self.start_time)}")

    def run(self):
        pass

    def stop(self):
        self.timer.stop()
        

>>> END FILE CONTENTS

## emili-main/tunnel.py

>>> BEGIN FILE CONTENTS

# real-time emotion visualizer using FER labels sourced from on-device camera

from PyQt5.QtWidgets import QApplication # GUI uses PyQt
from PyQt5.QtCore import QThread # videoplayer lives in a QThread
from gui import Visualizer, VideoPlayerWorker
from sonifier import Sonifier
#from emili_core import * # core threading logic
from paz import processors as pr
from paz.pipelines import DetectMiniXceptionFER # facial emotion recognition pipeline

import numpy as np
import sys
import argparse
from paz.backend.camera import Camera
import threading
import queue
import time
from datetime import datetime
from copy import deepcopy
import cProfile
import pstats

class EmoTunnel(DetectMiniXceptionFER): # video pipeline for real-time FER visualizer
    def __init__(self, start_time, dims, offsets, speed=25):
        super().__init__(offsets)
        self.start_time = start_time
        self.current_frame = None # other threads have read access
        self.frame_lock = threading.Lock()  # Protects access to current_frame
        self.display_width = dims[1]
        self.display_height = dims[0]
        self.time_series = [] # list of [time, scores] pairs
        self.binned_time_series = [] # list of [time, mean_scores] pairs
        self.current_bin = [] # list of [time, scores] pairs in the current bin
        self.speed = speed # tunnel expansion rate in pixels per second, recommend 25-50
        self.interval = 1000//speed # ms per pixel
        self.bin_end_time = self.interval # start a new bin every interval ms
        self.no_data_indicator = np.full(7,1e5) # mean scores for an empty bin
        self.last_bin_mean = np.full(7,1e5) # mean scores for the most recent bin
        #self.signal = signal
        #self.draw = pr.TunnelBoxes(self.time_series, self.colors, True) # override the default draw method

    def get_current_frame(self):
        with self.frame_lock:  # Ensure exclusive access to current_frame
            return self.current_frame

    def call(self, image):

        # binning logic: every interval ms, record the mean scores of the current bin, signal GUI to update, start a new bin
        current_time = time_since(self.start_time)
        #print(f"(pipeline.call) current_time: {current_time}")
        #print(f"(pipeline.call) bin_end_time: {self.bin_end_time}")
        if self.bin_end_time < current_time: # done with current bin
            new_bin_data = []
            if(len(self.current_bin)>0):
                #print(f"(pipeline.call) done with bin")
                #print(f"(pipeline.call) current_bin: {self.current_bin}")
                self.last_bin_mean = np.mean(self.current_bin, axis=0)
                #print(f"(pipeline.call) bin_mean: {self.last_bin_mean}")
                new_bin_data.append([self.bin_end_time, deepcopy(self.last_bin_mean)])
                self.bin_end_time += self.interval
                self.current_bin = [] # start a new bin
            while(self.bin_end_time < current_time): # catch up to the current time
                #print("(pipeline.call) catching up, empty bin")
                self.last_bin_mean = 0.9*self.last_bin_mean + 0.1*self.no_data_indicator # no new data, discount to indicate staleness
                #print(f"(pipeline.call) bin_end_time: {self.bin_end_time}")
                #print(f"(pipeline.call) last_bin_mean: {self.last_bin_mean}")
                new_bin_data.append([self.bin_end_time, deepcopy(self.last_bin_mean)]) # empty bin
#                new_bin_data.append([self.bin_end_time, np.full(7,1e5)]) # empty bin
                self.bin_end_time += self.interval
            #print(f"(pipeline.call) new_bin_data: ")
            #for timestamp, scores in new_bin_data:
                #print(f"    (pipeline.call) timestamp, scores/1e6: {timestamp, scores/1e6}")
            self.binned_time_series.extend(new_bin_data)
            #print("(pipeline.call) binned_time_series:")
            #for timestamp, scores in reversed(self.binned_time_series):
            #    print(f"    (pipeline.call) timestamp, scores/1e6: {timestamp, scores/1e6}")
            #self.signal.emit() # signal GUI to update the visualizer tab

        # get emotion data from current frame
        results = super().call(image) # classify faces in the image, draw boxes and labels
        #image, faces = results['image'], results['boxes2D']
        faces = results['boxes2D']
        emotion_data = self.report_emotion(faces)
        if(emotion_data is not None):
            timestamp, scores = emotion_data['time'], emotion_data['scores']
            self.time_series.append([timestamp,scores])
            self.current_bin.append(scores)
    
        return results

    def report_emotion(self, faces): # add to emotion_queue to make available to other threads
        current_time = time_since(self.start_time) # milliseconds since start of session
        num_faces = len(faces)
        if(num_faces>0):
            max_height = 0
            for k,box in enumerate(faces): # find the largest face 
                if(box.height > max_height):
                    max_height = box.height
                    argmax = k
            if(max_height>150): # don't log small faces (helps remove false positives)
                face_id = f"{argmax+1} of {num_faces}"
                box = faces[argmax] # log emotions for the largest face only. works well in a single-user setting. todo: improve for social situations! 
                emotion_data = {
                    "time": current_time,
                    "face": face_id,
                    "class": box.class_name,
                    "size": box.height,
                    "scores": (box.scores.tolist())[0]  # 7-vector of emotion scores, converted from np.array to list
                }
                #emotion_queue.put(emotion_data)
                return emotion_data
        return None # no large faces found
                #new_data_event.set()  # Tell the other threads that new data is available
                
 #   def __del__(self): # no log file, not needed
 #       self.log_file.close()  # Close the file when the instance is deleted
 #       print("Log file closed.")
    
def time_since(start_time):
    return int((time.time() - start_time) * 1000) # milliseconds since start of session

if __name__ == "__main__":

    profiler = cProfile.Profile()
    profiler.enable()

    start_time = time.time() 
    start_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    end_session_event = threading.Event() # triggered when the user closes the GUI window

    parser = argparse.ArgumentParser(description='Real-time face classifier')
    parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID')
    parser.add_argument('-o', '--offset', type=float, default=0.1, help='Scaled offset to be added to bounding boxes')
    args = parser.parse_args()
    camera = Camera(args.camera_id)

    #emotion_queue = queue.Queue() # real-time emotion logs updated continuously

    window_dims = [720, 720] # width, height
    speed = 40 # tunnel speed in pixels per second
    pipeline = EmoTunnel(start_time, 
                         window_dims, 
                         [args.offset, args.offset], 
                         #gui_app.signal.fresh_scores, # signals GUI to update the visualizer tab
                         speed
                         ) # video processing pipeline

    EMOTION_COLORS = [[255, 0, 0], [45, 90, 45], [255, 0, 255], [255, 255, 0],
                  [0, 0, 255], [0, 255, 255], [0, 255, 0]]
    
    tonic = 110 # Hz

    app = QApplication(sys.argv)
    gui_app = Visualizer(start_time, window_dims, np.array(EMOTION_COLORS), speed, pipeline, end_session_event)

    print(f"Real-time emotion visualizer using FER labels sourced from on-device camera.")

    gui_app.show() # Start the GUI

    print("Started GUI app.")
    print("gui_app.thread()", gui_app.thread())
    print("QThread.currentThread()", QThread.currentThread())

    video_dims = [800, 450] # width, height (16:9 aspect ratio)
    video_thread = QThread() # video thread: OpenCV is safe in a QThread but not a regular thread
    video_worker = VideoPlayerWorker(
        start_time,
        video_dims,
        pipeline, # applied to each frame of video
        camera)
    video_worker.moveToThread(video_thread)

    video_thread.started.connect(video_worker.run) # connect signals and slots
    video_worker.finished.connect(video_thread.quit)
    video_worker.finished.connect(video_worker.deleteLater)
    video_thread.finished.connect(video_thread.deleteLater)
    video_worker.frameReady.connect(gui_app.display_frame) # update the FER tab with new video frame

    video_thread.start()
    print("Started video thread.")

    audio_thread = QThread() # audio thread
    audio_worker = Sonifier(start_time, speed, tonic, pipeline, end_session_event)
    audio_worker.moveToThread(audio_thread)
    audio_thread.start()
    print("Started audio thread.")

    app.exec_() # start the GUI app. This should run in the main thread. Lines after this only execute if user closes the GUI.

    print("GUI app closed by user.")
    video_worker.stop()  # Signal the worker to stop
    #video_thread.quit()  # redundant with above, the finished signal will do this
    print("Quitting video thread...")
    video_thread.wait()  # Wait for the thread to finish
    print("Session ended.")
    profiler.disable()

    # Print the statistics
    stats = pstats.Stats(profiler)
    stats.strip_dirs().sort_stats('cumulative').print_stats(20) # stats from the most expensive processes


>>> END FILE CONTENTS

## emili-main/utils.py

>>> BEGIN FILE CONTENTS

import openai
import requests
import os
import json
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type

openai.api_key = os.environ["OPENAI_API_KEY"]
client = openai.OpenAI()

# Most models return a response object: To recover the generated text, use 
#   response.choices[0].message.content
#
# But gpt-4-vision-preview returns a "requests" object: To recover the generated text, use
#   (response.json())['choices'][0]['message']['content']

default_response_object = {
    "id": "chatcmpl-abc123",
    "object": "chat.completion",
    "created": 1677858242,
    "model": "gpt-3.5-turbo-0613",
    "usage": {
        "prompt_tokens": 0,
        "completion_tokens": 0,
        "total_tokens": 0
    },
    "choices": [
        {
            "message": {
                "role": "assistant",
                "content": "Brain disconnect, sorry mate."
            },
            "logprobs": None,
            "finish_reason": "OpenAI API error",
            "index": 0
        }
    ]
}

class JSONToObject:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            setattr(self, key, JSONToObject(value) if isinstance(value, dict) else value)

def json_to_object(data):
    return json.loads(data, object_hook=JSONToObject)

@retry(wait=wait_exponential(multiplier=1.5, min=1, max=60), stop=stop_after_attempt(6), retry=retry_if_exception_type(Exception))
def get_api_response(messages, model="gpt-3.5-turbo", temperature=1.0, max_tokens=64, seed=1331, return_full_response=False):
    try:
        full_response = client.chat.completions.create(
            model=model,
            temperature=temperature,
            messages=messages,
            max_tokens=max_tokens
        )
        if return_full_response:
            return full_response # the full response object
        else:
            return full_response.choices[0].message.content # just the generated text
    except Exception as e:
        print(f"(API error: {e}, retrying...)")
        raise e

def get_response(messages, model="gpt-3.5-turbo", temperature=1.0, max_tokens=64, seed=1331, return_full_response=False):
    try:
        if model != "gpt-4-vision-preview":
            return get_api_response(messages, model, temperature, max_tokens, seed, return_full_response)
        else:
            return get_api_vision_response(messages, model, temperature, max_tokens, seed, return_full_response)
    except Exception: # all retries failed
        if(return_full_response):
            return default_response_object # todo: improve this!
        else:
            return "Brain disconnect, sorry mate."
        
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {openai.api_key}"
}
endpoint_url = "https://api.openai.com/v1/chat/completions"

@retry(wait=wait_exponential(multiplier=1.5, min=1, max=60), stop=stop_after_attempt(6), retry=retry_if_exception_type(Exception))
def get_api_vision_response(messages, model="gpt-4-vision-preview", temperature=1.0, max_tokens=64, seed=1331, return_full_response=False):

    try:
        payload = {
            "model": model,
            "messages": messages,
            "max_tokens": max_tokens
        }

        api_response = requests.post(endpoint_url, headers=headers, json=payload)
        json_response = api_response.json()

        if return_full_response:
            return json_response 
        else:
            return json_response['choices'][0]['message']['content'] # just the generated text
        
    except Exception as e:
        print(f"(API error: {e}, retrying...)")
        raise e


>>> END FILE CONTENTS

## emili-main/videochat.py

>>> BEGIN FILE CONTENTS

# video chat with OpenAI models (pipe real-time emotion logs along with user's chats)

from PyQt5.QtWidgets import QApplication # GUI uses PyQt
from PyQt5.QtCore import QThread # videoplayer lives in a QThread
from gui import ChatApp, VideoPlayerWorker
from emili_core import * # core threading logic

import sys
import argparse
from paz.backend.camera import Camera
import threading
import time
from datetime import datetime
import os

from openai import OpenAI
client = OpenAI()

if __name__ == "__main__":

    # pricing as of March 2024 per 1M tokens read: gpt-3.5-turbo-0125 $0.50, gpt-4-0125-preview $10, gpt-4 $30
    model_name = "gpt-4-0125-preview" # start with a good model
    vision_model_name = "gpt-4-vision-preview" # can this take regular text inputs too?
    secondary_model_name = "gpt-3.5-turbo-0125" # switch to a cheaper model if the conversation gets too long
    max_context_length = 16000
    start_time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
    start_time = time.time() # all threads can access this, no need to pass it!

    transcript_path = "transcript" # full and condensed transcripts are written here at end of session
    if not os.path.exists(transcript_path):
        os.makedirs(transcript_path)
    snapshot_path = "snapshot" # snapshots of camera frames sent to OpenAI are written here
    if not os.path.exists(snapshot_path):
        os.makedirs(snapshot_path)
    if(use_tts):
        tts_path = "tts_audio" # temporary storage for text-to-speech audio files
        if not os.path.exists(tts_path):
            os.makedirs(tts_path)

    parser = argparse.ArgumentParser(description='Real-time face classifier')
    parser.add_argument('-c', '--camera_id', type=int, default=0, help='Camera device ID')
    parser.add_argument('-o', '--offset', type=float, default=0.1, help='Scaled offset to be added to bounding boxes')
    args = parser.parse_args()
    camera = Camera(args.camera_id)

    chat_window_dims = [600, 600] # width, height
    app = QApplication(sys.argv)
    gui_app = ChatApp(start_time, chat_window_dims, user_chat_name, assistant_chat_name, chat_queue, chat_timestamps, new_chat_event, end_session_event)

    pipeline = Emolog(start_time, [args.offset, args.offset]) # video processing pipeline

    tick_thread = threading.Thread(target=tick)
    tick_thread.start()

    EMA_thread = threading.Thread(target=EMA_thread, args=(start_time,snapshot_path,pipeline), daemon=True)
    EMA_thread.start()

    sender_thread = threading.Thread(
        target=sender_thread, 
        args=(model_name, vision_model_name, secondary_model_name, max_context_length, gui_app, transcript_path, start_time_str), 
        daemon=True)
    sender_thread.start()

    assembler_thread = threading.Thread(target=assembler_thread, args=(start_time,snapshot_path,pipeline), daemon=True)
    assembler_thread.start()

    print(f"Video chat with {model_name} using emotion labels sourced from on-device camera.")
    print(f"Chat is optional, the assistant will respond to your emotions automatically!")
    print(f"Type 'q' to end the session.")

    gui_app.show() # Start the GUI

    print("Started GUI app.")
    print("gui_app.thread()", gui_app.thread())
    print("QThread.currentThread()", QThread.currentThread())

    video_dims = [800, 450] # width, height (16:9 aspect ratio)
    video_thread = QThread() # video thread: OpenCV is safe in a QThread but not a regular thread
    video_worker = VideoPlayerWorker(
        start_time,
        video_dims,
        pipeline, # applied to each frame of video
        camera)
    video_worker.moveToThread(video_thread)

    video_thread.started.connect(video_worker.run) # connect signals and slots
    video_worker.finished.connect(video_thread.quit)
    video_worker.finished.connect(video_worker.deleteLater)
    video_thread.finished.connect(video_thread.deleteLater)
    video_worker.frameReady.connect(gui_app.display_frame)

    video_thread.start()
    print("Started video thread.")
    app.exec_() # start the GUI app. This should run in the main thread. Lines after this only execute if user closes the GUI.

    print("GUI app closed by user.")
    video_thread.quit()
 #   timer_thread.join()
 #   print("Timer thread joined.") # won't join while sleeping
    print("Video thread closed.")
    new_chat_event.set() # signal assembler thread to stop waiting
    assembler_thread.join() 
    print("Assembler thread joined.")
    new_message_event.set() # signal sender thread to stop waiting
    sender_thread.join()
    print("Sender thread joined.")
    tick_event.set() # signal tick and EMA threads to stop waiting
    EMA_thread.join()
    print("EMA thread joined.")
    tick_thread.join()
    print("Tick thread joined.")
        
    print("Session ended.")


>>> END FILE CONTENTS

## emili-main/vision-test.py

>>> BEGIN FILE CONTENTS

# test out the GPT-4 Vision model

import base64
import requests
import os

from openai import OpenAI
client = OpenAI()
api_key = os.environ["OPENAI_API_KEY"]

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "waterfall.jpg"

# Getting the base64 string
base64_image = encode_image(image_path)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "Describe this scene"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}",
            "detail": "low" # low has a flat rate of 65 tokens, recommended image size is 512x512
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

print(response.json())

>>> END FILE CONTENTS