IntelLabs · dandanelbaz · Jul 7, 2019 · Jul 14, 2019 · Jul 16, 2019 · Jul 16, 2019
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
-# Coach
+# Warrning:
+## This branch of Coach is WIP for migration to tf2  and should not be checked out
 
 [![CI](https://img.shields.io/circleci/project/github/NervanaSystems/coach/master.svg)](https://circleci.com/gh/NervanaSystems/workflows/coach/tree/master)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/NervanaSystems/coach/blob/master/LICENSE)

diff --git a/benchmarks/clipped_ppo/ant_clipped_ppo_tf2.png b/benchmarks/clipped_ppo/ant_clipped_ppo_tf2.png
diff --git a/benchmarks/clipped_ppo/ant_clipped_ppo_tf2_3.png b/benchmarks/clipped_ppo/ant_clipped_ppo_tf2_3.png
diff --git a/benchmarks/clipped_ppo/half_cheetah_clipped_ppo_tf2.png b/benchmarks/clipped_ppo/half_cheetah_clipped_ppo_tf2.png
diff --git a/benchmarks/clipped_ppo/humanoid_clipped_ppo_tf2.png b/benchmarks/clipped_ppo/humanoid_clipped_ppo_tf2.png
diff --git a/benchmarks/clipped_ppo/inverted_double_pendulum_clipped_ppo_tf2.png b/benchmarks/clipped_ppo/inverted_double_pendulum_clipped_ppo_tf2.png
diff --git a/benchmarks/clipped_ppo/inverted_pendulum_clipped_ppo_tf2.png b/benchmarks/clipped_ppo/inverted_pendulum_clipped_ppo_tf2.png
diff --git a/benchmarks/dqn/breakout_dqn_tf2.png b/benchmarks/dqn/breakout_dqn_tf2.png
diff --git a/benchmarks/dqn/pong_dqn_tf2.png b/benchmarks/dqn/pong_dqn_tf2.png
diff --git a/benchmarks/dqn/space_invaders_dqn_tf2.png b/benchmarks/dqn/space_invaders_dqn_tf2.png
diff --git a/rl_coach/agents/clipped_ppo_agent.py b/rl_coach/agents/clipped_ppo_agent.py
@@ -202,10 +202,15 @@ def train_network(self, batch, epochs):
                 'entropy': []
             }
 
-            fetches = [self.networks['main'].online_network.output_heads[1].kl_divergence,
-                       self.networks['main'].online_network.output_heads[1].entropy,
-                       self.networks['main'].online_network.output_heads[1].likelihood_ratio,
-                       self.networks['main'].online_network.output_heads[1].clipped_likelihood_ratio]
+            # fetches = [self.networks['main'].online_network.output_heads[1].kl_divergence,
+            #            self.networks['main'].online_network.output_heads[1].entropy,
+            #            self.networks['main'].online_network.output_heads[1].likelihood_ratio,
+            #            self.networks['main'].online_network.output_heads[1].clipped_likelihood_ratio]
+
+            fetches = [(1, 'kl_divergence'),
+                       (1, 'entropy'),
+                       (1, 'likelihood_ratio'),
+                       (1, 'clipped_likelihood_ratio')]
 
             # TODO-fixme if batch.size / self.ap.network_wrappers['main'].batch_size is not an integer, we do not train on
             #  some of the data

diff --git a/rl_coach/architectures/architecture.py b/rl_coach/architectures/architecture.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation 
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,15 +14,14 @@
 # limitations under the License.
 #
 
-from typing import Any, Dict, List, Tuple
-
 import numpy as np
-
+from typing import Any, Dict, List, Tuple
 from rl_coach.base_parameters import AgentParameters
 from rl_coach.saver import SaverCollection
 from rl_coach.spaces import SpacesDefinition
 
 
+
 class Architecture(object):
     @staticmethod
     def construct(variable_scope: str, devices: List[str], *args, **kwargs) -> 'Architecture':

diff --git a/rl_coach/architectures/embedder_parameters.py b/rl_coach/architectures/embedder_parameters.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/rl_coach/architectures/head_parameters.py b/rl_coach/architectures/head_parameters.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/rl_coach/architectures/layers.py b/rl_coach/architectures/layers.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2017 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/...ectures/tensorflow_components/__init__.py → ...tectures/legacy_tf_components/__init__.py b/...ectures/tensorflow_components/__init__.py → ...tectures/legacy_tf_components/__init__.py
diff --git a/rl_coach/architectures/legacy_tf_components/architecture.py b/rl_coach/architectures/legacy_tf_components/architecture.py
diff --git a/rl_coach/architectures/legacy_tf_components/distributed_tf_utils.py b/rl_coach/architectures/legacy_tf_components/distributed_tf_utils.py
@@ -0,0 +1,103 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Tuple
+
+import tensorflow as tf
+
+
+def create_cluster_spec(parameters_server: str, workers: str) -> tf.train.ClusterSpec:
+    """
+    Creates a ClusterSpec object representing the cluster.
+    :param parameters_server: comma-separated list of hostname:port pairs to which the parameter servers are assigned
+    :param workers: comma-separated list of hostname:port pairs to which the workers are assigned
+    :return: a ClusterSpec object representing the cluster
+    """
+    # extract the parameter servers and workers from the given strings
+    ps_hosts = parameters_server.split(",")
+    worker_hosts = workers.split(",")
+
+    # Create a cluster spec from the parameter server and worker hosts
+    cluster_spec = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})
+
+    return cluster_spec
+
+
+def create_and_start_parameters_server(cluster_spec: tf.train.ClusterSpec, config: tf.ConfigProto=None) -> None:
+    """
+    Create and start a parameter server
+    :param cluster_spec: the ClusterSpec object representing the cluster
+    :param config: the tensorflow config to use
+    :return: None
+    """
+    # create a server object for the parameter server
+    server = tf.train.Server(cluster_spec, job_name="ps", task_index=0, config=config)
+
+    # wait for the server to finish
+    server.join()
+
+
+def create_worker_server_and_device(cluster_spec: tf.train.ClusterSpec, task_index: int,
+                                    use_cpu: bool=True, config: tf.ConfigProto=None) -> Tuple[str, tf.device]:
+    """
+    Creates a worker server and a device setter used to assign the workers operations to
+    :param cluster_spec: a ClusterSpec object representing the cluster
+    :param task_index: the index of the worker task
+    :param use_cpu: if use_cpu=True, all the agent operations will be assigned to a CPU instead of a GPU
+    :param config: the tensorflow config to use
+    :return: the target string for the tf.Session and the worker device setter object
+    """
+    # Create and start a worker
+    server = tf.train.Server(cluster_spec, job_name="worker", task_index=task_index, config=config)
+
+    # Assign ops to the local worker
+    worker_device = "/job:worker/task:{}".format(task_index)
+    if use_cpu:
+        worker_device += "/cpu:0"
+    else:
+        worker_device += "/device:GPU:0"
+    device = tf.train.replica_device_setter(worker_device=worker_device, cluster=cluster_spec)
+
+    return server.target, device
+
+
+def create_monitored_session(target: tf.train.Server, task_index: int,
+                             checkpoint_dir: str, checkpoint_save_secs: int, config: tf.ConfigProto=None) -> tf.Session:
+    """
+    Create a monitored session for the worker
+    :param target: the target string for the tf.Session
+    :param task_index: the task index of the worker
+    :param checkpoint_dir: a directory path where the checkpoints will be stored
+    :param checkpoint_save_secs: number of seconds between checkpoints storing
+    :param config: the tensorflow configuration (optional)
+    :return: the session to use for the run
+    """
+    # we chose the first task to be the chief
+    is_chief = task_index == 0
+
+    # Create the monitored session
+    sess = tf.train.MonitoredTrainingSession(
+        master=target,
+        is_chief=is_chief,
+        hooks=[],
+        checkpoint_dir=checkpoint_dir,
+        save_checkpoint_secs=checkpoint_save_secs,
+        config=config,
+        log_step_count_steps=0  # disable logging of steps to avoid TF warning during inference
+    )
+
+    return sess
+
diff --git a/rl_coach/architectures/legacy_tf_components/embedders/__init__.py b/rl_coach/architectures/legacy_tf_components/embedders/__init__.py
@@ -0,0 +1,5 @@
+from .image_embedder import ImageEmbedder
+from .vector_embedder import VectorEmbedder
+from .tensor_embedder import TensorEmbedder
+
+__all__ = ['ImageEmbedder', 'VectorEmbedder', 'TensorEmbedder']
diff --git a/rl_coach/architectures/legacy_tf_components/embedders/embedder.py b/rl_coach/architectures/legacy_tf_components/embedders/embedder.py
@@ -0,0 +1,157 @@
+#
+# Copyright (c) 2017 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Union, Tuple
+import copy
+
+import numpy as np
+import tensorflow as tf
+
+from rl_coach.architectures.tensorflow_components.layers import BatchnormActivationDropout, convert_layer, Dense
+from rl_coach.base_parameters import EmbedderScheme, NetworkComponentParameters
+
+from rl_coach.core_types import InputEmbedding
+from rl_coach.utils import force_list
+
+
+class InputEmbedder(object):
+    """
+    An input embedder is the first part of the network, which takes the input from the state and produces a vector
+    embedding by passing it through a neural network. The embedder will mostly be input type dependent, and there
+    can be multiple embedders in a single network
+    """
+    def __init__(self, input_size: List[int], activation_function=tf.nn.relu,
+                 scheme: EmbedderScheme=None, batchnorm: bool=False, dropout_rate: float=0.0,
+                 name: str= "embedder", input_rescaling=1.0, input_offset=0.0, input_clipping=None, dense_layer=Dense,
+                 is_training=False):
+        self.name = name
+        self.input_size = input_size
+        self.activation_function = activation_function
+        self.batchnorm = batchnorm
+        self.dropout_rate = dropout_rate
+        self.input = None
+        self.output = None
+        self.scheme = scheme
+        self.return_type = InputEmbedding
+        self.layers_params = []
+        self.layers = []
+        self.input_rescaling = input_rescaling
+        self.input_offset = input_offset
+        self.input_clipping = input_clipping
+        self.dense_layer = dense_layer
+        if self.dense_layer is None:
+            self.dense_layer = Dense
+        self.is_training = is_training
+
+        # layers order is conv -> batchnorm -> activation -> dropout
+        if isinstance(self.scheme, EmbedderScheme):
+            self.layers_params = copy.copy(self.schemes[self.scheme])
+            self.layers_params = [convert_layer(l) for l in self.layers_params]
+        else:
+            # if scheme is specified directly, convert to TF layer if it's not a callable object
+            # NOTE: if layer object is callable, it must return a TF tensor when invoked
+            self.layers_params = [convert_layer(l) for l in copy.copy(self.scheme)]
+
+        # we allow adding batchnorm, dropout or activation functions after each layer.
+        # The motivation is to simplify the transition between a network with batchnorm and a network without
+        # batchnorm to a single flag (the same applies to activation function and dropout)
+        if self.batchnorm or self.activation_function or self.dropout_rate > 0:
+            for layer_idx in reversed(range(len(self.layers_params))):
+                self.layers_params.insert(layer_idx+1,
+                                          BatchnormActivationDropout(batchnorm=self.batchnorm,
+                                                                     activation_function=self.activation_function,
+                                                                     dropout_rate=self.dropout_rate))
+
+    def __call__(self, prev_input_placeholder: tf.placeholder=None) -> Tuple[tf.Tensor, tf.Tensor]:
+        """
+        Wrapper for building the module graph including scoping and loss creation
+        :param prev_input_placeholder: the input to the graph
+        :return: the input placeholder and the output of the last layer
+        """
+        with tf.variable_scope(self.get_name()):
+            if prev_input_placeholder is None:
+                self.input = tf.placeholder("float", shape=[None] + self.input_size, name=self.get_name())
+            else:
+                self.input = prev_input_placeholder
+            self._build_module()
+
+        return self.input, self.output
+
+    def _build_module(self) -> None:
+        """
+        Builds the graph of the module
+        This method is called early on from __call__. It is expected to store the graph
+        in self.output.
+        :return: None
+        """
+        # NOTE: for image inputs, we expect the data format to be of type uint8, so to be memory efficient. we chose not
+        #  to implement the rescaling as an input filters.observation.observation_filter, as this would have caused the
+        #  input to the network to be float, which is 4x more expensive in memory.
+        #  thus causing each saved transition in the memory to also be 4x more pricier.
+
+        input_layer = self.input / self.input_rescaling
+        input_layer -= self.input_offset
+        # clip input using te given range
+        if self.input_clipping is not None:
+            input_layer = tf.clip_by_value(input_layer, self.input_clipping[0], self.input_clipping[1])
+
+        self.layers.append(input_layer)
+
+        for idx, layer_params in enumerate(self.layers_params):
+            self.layers.extend(force_list(
+                layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx),
+                             is_training=self.is_training)
+            ))
+
+        self.output = tf.contrib.layers.flatten(self.layers[-1])
+
+    @property
+    def input_size(self) -> List[int]:
+        return self._input_size
+
+    @input_size.setter
+    def input_size(self, value: Union[int, List[int]]):
+        if isinstance(value, np.ndarray) or isinstance(value, tuple):
+            value = list(value)
+        elif isinstance(value, int):
+            value = [value]
+        if not isinstance(value, list):
+            raise ValueError((
+                'input_size expected to be a list, found {value} which has type {type}'
+            ).format(value=value, type=type(value)))
+        self._input_size = value
+
+    @property
+    def schemes(self):
+        raise NotImplementedError("Inheriting embedder must define schemes matching its allowed default "
+                                  "configurations.")
+
+    def get_name(self) -> str:
+        """
+        Get a formatted name for the module
+        :return: the formatted name
+        """
+        return self.name
+
+    def __str__(self):
+        result = ['Input size = {}'.format(self._input_size)]
+        if self.input_rescaling != 1.0 or self.input_offset != 0.0:
+            result.append('Input Normalization (scale = {}, offset = {})'.format(self.input_rescaling, self.input_offset))
+        result.extend([str(l) for l in self.layers_params])
+        if not self.layers_params:
+            result.append('No layers')
+
+        return '\n'.join(result)