diff --git a/modeling_tf_adapter_bert.py b/modeling_tf_adapter_bert.py index 5ad4b96..6f9bd2b 100644 --- a/modeling_tf_adapter_bert.py +++ b/modeling_tf_adapter_bert.py @@ -20,13 +20,13 @@ def __init__(self, input_size, bottleneck_size, non_linearity, *inputs, **kwargs self.down_project = tf.keras.layers.Dense( bottleneck_size, - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3), bias_initializer="zeros", name="feedforward_downproject") self.up_project = tf.keras.layers.Dense( input_size, - kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), + kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3), bias_initializer="zeros", name="feedforward_upproject")