Merge branch 'keras-team:master' into falcon-causallm

SamanehSaadat · Mar 15, 2024 · bb82633 · bb82633
2 parents c5322b7 + 4511580
commit bb82633
Show file tree

Hide file tree

Showing 27 changed files with 1,249 additions and 204 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -21,3 +21,6 @@ updates:
       python:
         patterns:
           - "*"
+    ignore:
+        # ignore all updates for JAX GPU due to cuda version issue
+      - dependency-name: "jax[cuda12_pip]"
diff --git a/.github/workflows/auto-assignment.yml b/.github/workflows/auto-assignment.yml
@@ -0,0 +1,21 @@
+name: auto-assignment
+on:
+  issues:
+    types:
+      - opened
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
+jobs:
+  welcome:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            const script = require('./\.github/workflows/scripts/auto-assignment.js')
+            script({github, context})
diff --git a/.github/workflows/scripts/auto-assignment.js b/.github/workflows/scripts/auto-assignment.js
@@ -0,0 +1,43 @@
+/** Automatically assign issues and PRs to users in the `assigneesList` 
+ *  on a rotating basis.
+
+  @param {!object}
+    GitHub objects can call GitHub APIs using their built-in library functions.
+    The context object contains issue and PR details.
+*/
+
+module.exports = async ({ github, context }) => {
+  let issueNumber;
+  let assigneesList;
+  // Is this an issue? If so, assign the issue number. Otherwise, assign the PR number.
+  if (context.payload.issue) {
+    //assignee List for issues. 
+    assigneesList = ["SuryanarayanaY", "sachinprasadhs"];
+    issueNumber = context.payload.issue.number;
+  } else {
+    //assignee List for PRs. 
+    assigneesList = [mattdangerw];
+    issueNumber = context.payload.number;
+  }
+  console.log("assignee list", assigneesList);
+  console.log("entered auto assignment for this issue:  ", issueNumber);
+  if (!assigneesList.length) {
+    console.log("No assignees found for this repo.");
+    return;
+  }
+  let noOfAssignees = assigneesList.length;
+  let selection = issueNumber % noOfAssignees;
+  let assigneeForIssue = assigneesList[selection];
+
+  console.log(
+    "issue Number = ",
+    issueNumber + " , assigning to: ",
+    assigneeForIssue
+  );
+  return github.rest.issues.addAssignees({
+    issue_number: context.issue.number,
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    assignees: [assigneeForIssue],
+  });
+};
diff --git a/.github/workflows/scripts/labeler.js b/.github/workflows/scripts/labeler.js
@@ -23,16 +23,20 @@ You may obtain a copy of the License at
 
 module.exports = async ({ github, context }) => {
     const issue_title = context.payload.issue ?  context.payload.issue.title : context.payload.pull_request.title
-    const issue_discription = context.payload.issue ? context.payload.issue.body : context.payload.pull_request.body
+    let issue_description = context.payload.issue ? context.payload.issue.body : context.payload.pull_request.body
     const issue_number = context.payload.issue ? context.payload.issue.number : context.payload.pull_request.number
     const keyword_label =  {
          gemma:'Gemma'
     }
     const labelsToAdd = []
-    console.log(issue_title,issue_discription,issue_number)
+    console.log(issue_title,issue_description,issue_number)
+    if (issue_description==null)
+    {
+      issue_description = ''
+    }
 
     for(const [keyword, label] of Object.entries(keyword_label)){
-     if(issue_title.toLowerCase().indexOf(keyword) !=-1 || issue_discription.toLowerCase().indexOf(keyword) !=-1 ){
+     if(issue_title.toLowerCase().indexOf(keyword) !=-1 || issue_description.toLowerCase().indexOf(keyword) !=-1 ){
         console.log(`'${keyword}'keyword is present inside the title or description. Pushing label '${label}' to row.`)
         labelsToAdd.push(label)
     }

diff --git a/.github/workflows/stale-issue-pr.yml b/.github/workflows/stale-issue-pr.yml
@@ -0,0 +1,50 @@
+name: Close inactive issues
+on:
+  schedule:
+    - cron: "30 1 * * *"
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - name: Awaiting response issues
+        uses: actions/stale@v9
+        with:
+          days-before-issue-stale: 14
+          days-before-issue-close: 14
+          stale-issue-label: "stale"
+          # reason for closed the issue default value is not_planned
+          close-issue-reason: completed
+          only-labels: "stat:awaiting response from contributor"
+          stale-issue-message: > 
+            This issue is stale because it has been open for 14 days with no activity.
+            It will be closed if no further activity occurs. Thank you.
+          # List of labels to remove when issues/PRs unstale. 
+          labels-to-remove-when-unstale: "stat:awaiting response from contributor"
+          close-issue-message: >
+            This issue was closed because it has been inactive for 28 days.
+            Please reopen if you'd like to work on this further.
+          days-before-pr-stale: 14
+          days-before-pr-close: 14
+          stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you."
+          close-pr-message: "This PR was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further."
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Contribution issues
+        uses: actions/stale@v9
+        with:
+          days-before-issue-stale: 180
+          days-before-issue-close: 365
+          stale-issue-label: "stale"
+          # reason for closed the issue default value is not_planned
+          close-issue-reason: not_planned
+          any-of-labels: "stat:contributions welcome,good first issue"
+          # List of labels to remove when issues/PRs unstale. 
+          labels-to-remove-when-unstale: "stat:contributions welcome,good first issue"
+          stale-issue-message: > 
+            This issue is stale because it has been open for 180 days with no activity.
+            It will be closed if no further activity occurs. Thank you.
+          close-issue-message: >
+            This issue was closed because it has been inactive for more than 1 year.
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.kokoro/github/ubuntu/gpu/build.sh b/.kokoro/github/ubuntu/gpu/build.sh
@@ -14,11 +14,8 @@ if [[ -z "${KAGGLE_USERNAME}" ]]; then
 fi
 
 set -x
-
 cd "${KOKORO_ROOT}/"
 
-sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
-
 PYTHON_BINARY="/usr/bin/python3.9"
 
 "${PYTHON_BINARY}" -m venv venv

diff --git a/keras_nlp/layers/modeling/rotary_embedding.py b/keras_nlp/layers/modeling/rotary_embedding.py
@@ -85,30 +85,42 @@ def __init__(
         self.built = True
 
     def call(self, inputs, start_index=0):
+        inputs = ops.moveaxis(
+            inputs, (self.feature_axis, self.sequence_axis), (-1, 1)
+        )
         cos_emb, sin_emb = self._compute_cos_sin_embedding(inputs, start_index)
-        return self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
+        output = self._apply_rotary_pos_emb(inputs, cos_emb, sin_emb)
+        return ops.moveaxis(
+            output, (-1, 1), (self.feature_axis, self.sequence_axis)
+        )
 
     def _apply_rotary_pos_emb(self, tensor, cos_emb, sin_emb):
-        x1, x2 = ops.split(tensor, 2, axis=self.feature_axis)
-        half_rot_tensor = ops.concatenate((-x2, x1), axis=self.feature_axis)
+        x1, x2 = ops.split(tensor, 2, axis=-1)
+        # Avoid `ops.concatenate` for now, to avoid a obscure bug with XLA
+        # compilation on jax. We should be able to remove this once the
+        # following PR is in all jax releases we care about:
+        # https://github.com/openxla/xla/pull/7875
+        half_rot_tensor = ops.stack((-x2, x1), axis=-2)
+        half_rot_tensor = ops.reshape(half_rot_tensor, ops.shape(tensor))
         return (tensor * cos_emb) + (half_rot_tensor * sin_emb)
 
     def _compute_cos_sin_embedding(self, inputs, start_index=0):
-        def get_axis(axis):
-            return axis if axis > 0 else len(inputs.shape) + axis
+        start_index = ops.cast(start_index, dtype="float32")
 
-        feature_axis = get_axis(self.feature_axis)
-        sequence_axis = get_axis(self.sequence_axis)
+        feature_axis = len(inputs.shape) - 1
+        sequence_axis = 1
 
         rotary_dim = ops.shape(inputs)[feature_axis]
         inverse_freq = self._get_inverse_freq(rotary_dim)
 
-        seq_len = ops.shape(inputs)[self.sequence_axis]
-        tensor = ops.cast(ops.arange(seq_len), self.compute_dtype) + start_index
+        seq_len = ops.shape(inputs)[sequence_axis]
+        tensor = ops.arange(seq_len, dtype="float32") + start_index
 
-        tensor = ops.cast(tensor, dtype=inverse_freq.dtype)
         freq = ops.einsum("i,j->ij", tensor, inverse_freq)
-        embedding = ops.concatenate((freq, freq), axis=-1)
+        embedding = ops.stack((freq, freq), axis=-2)
+        embedding = ops.reshape(
+            embedding, (*ops.shape(freq)[:-1], ops.shape(freq)[-1] * 2)
+        )
 
         # Reshape the embedding to be broadcastable with input shape.
         if feature_axis < sequence_axis:
@@ -117,17 +129,16 @@ def get_axis(axis):
             if axis != sequence_axis and axis != feature_axis:
                 embedding = ops.expand_dims(embedding, axis)
 
-        return ops.cos(embedding), ops.sin(embedding)
+        cos_emb = ops.cast(ops.cos(embedding), self.compute_dtype)
+        sin_emb = ops.cast(ops.sin(embedding), self.compute_dtype)
+        return cos_emb, sin_emb
 
     def _get_inverse_freq(self, rotary_dim):
-        freq_range = ops.arange(0, rotary_dim, 2)
-        freq_range = ops.cast(freq_range, self.compute_dtype)
-        freq_range = freq_range / ops.cast(
-            self.scaling_factor, self.compute_dtype
-        )
+        freq_range = ops.arange(0, rotary_dim, 2, dtype="float32")
+        freq_range = freq_range / ops.cast(self.scaling_factor, "float32")
         inverse_freq = 1.0 / (
             self.max_wavelength
-            ** (freq_range / ops.cast(rotary_dim, self.compute_dtype))
+            ** (freq_range / ops.cast(rotary_dim, "float32"))
         )
         return inverse_freq
 

diff --git a/keras_nlp/models/__init__.py b/keras_nlp/models/__init__.py
@@ -20,6 +20,7 @@
 )
 from keras_nlp.models.albert.albert_preprocessor import AlbertPreprocessor
 from keras_nlp.models.albert.albert_tokenizer import AlbertTokenizer
+from keras_nlp.models.backbone import Backbone
 from keras_nlp.models.bart.bart_backbone import BartBackbone
 from keras_nlp.models.bart.bart_preprocessor import BartPreprocessor
 from keras_nlp.models.bart.bart_seq_2_seq_lm import BartSeq2SeqLM
@@ -130,6 +131,7 @@
 from keras_nlp.models.roberta.roberta_tokenizer import RobertaTokenizer
 from keras_nlp.models.t5.t5_backbone import T5Backbone
 from keras_nlp.models.t5.t5_tokenizer import T5Tokenizer
+from keras_nlp.models.task import Task
 from keras_nlp.models.whisper.whisper_audio_feature_extractor import (
     WhisperAudioFeatureExtractor,
 )

diff --git a/keras_nlp/models/backbone.py b/keras_nlp/models/backbone.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from keras_nlp.api_export import keras_nlp_export
 from keras_nlp.backend import config
 from keras_nlp.backend import keras
 from keras_nlp.utils.preset_utils import check_preset_class
@@ -20,7 +21,7 @@
 from keras_nlp.utils.python_utils import format_docstring
 
 
-@keras.saving.register_keras_serializable(package="keras_nlp")
+@keras_nlp_export("keras_nlp.models.Backbone")
 class Backbone(keras.Model):
     def __init__(self, *args, dtype=None, **kwargs):
         super().__init__(*args, **kwargs)

diff --git a/keras_nlp/models/bloom/bloom_presets.py b/keras_nlp/models/bloom/bloom_presets.py
@@ -17,14 +17,105 @@
     "bloom_560m_multi": {
         "metadata": {
             "description": (
-                "24-layer Bloom model. trained on 45 natural languages and "
-                "12 programming languages."
+                "24-layer Bloom model with hidden dimension of 1024. "
+                "trained on 45 natural languages and 12 programming languages."
             ),
-            "params": 816115712,
+            "params": 559214592,
             "official_name": "BLOOM",
             "path": "bloom",
-            "model_card": "https://huggingface.co/bigscience/bloom",
+            "model_card": "https://huggingface.co/bigscience/bloom-560m",
         },
         "kaggle_handle": "kaggle://keras/bloom/keras/bloom_560m_multi/3",
     },
+    "bloom_1.1b_multi": {
+        "metadata": {
+            "description": (
+                "24-layer Bloom model with hidden dimension of 1536. "
+                "trained on 45 natural languages and 12 programming languages."
+            ),
+            "params": 1065314304,
+            "official_name": "BLOOM",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloom-1b1",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloom_1.1b_multi/1",
+    },
+    "bloom_1.7b_multi": {
+        "metadata": {
+            "description": (
+                "24-layer Bloom model with hidden dimension of 2048. "
+                "trained on 45 natural languages and 12 programming languages."
+            ),
+            "params": 1722408960,
+            "official_name": "BLOOM",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloom-1b7",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloom_1.7b_multi/1",
+    },
+    "bloom_3b_multi": {
+        "metadata": {
+            "description": (
+                "30-layer Bloom model with hidden dimension of 2560. "
+                "trained on 45 natural languages and 12 programming languages."
+            ),
+            "params": 3002557440,
+            "official_name": "BLOOM",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloom-3b",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloom_3b_multi/1",
+    },
+    "bloomz_560m_multi": {
+        "metadata": {
+            "description": (
+                "24-layer Bloom model with hidden dimension of 1024. "
+                "finetuned on crosslingual task mixture (xP3) dataset."
+            ),
+            "params": 559214592,
+            "official_name": "BLOOMZ",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloomz-560m",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloomz_560m_multi/1",
+    },
+    "bloomz_1.1b_multi": {
+        "metadata": {
+            "description": (
+                "24-layer Bloom model with hidden dimension of 1536. "
+                "finetuned on crosslingual task mixture (xP3) dataset."
+            ),
+            "params": 1065314304,
+            "official_name": "BLOOMZ",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloomz-1b1",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloomz_1.1b_multi/1",
+    },
+    "bloomz_1.7b_multi": {
+        "metadata": {
+            "description": (
+                "24-layer Bloom model with hidden dimension of 2048. "
+                "finetuned on crosslingual task mixture (xP3) dataset."
+            ),
+            "params": 1722408960,
+            "official_name": "BLOOMZ",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloomz-1b7",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloomz_1.7b_multi/1",
+    },
+    "bloomz_3b_multi": {
+        "metadata": {
+            "description": (
+                "30-layer Bloom model with hidden dimension of 2560. "
+                "finetuned on crosslingual task mixture (xP3) dataset."
+            ),
+            "params": 3002557440,
+            "official_name": "BLOOMZ",
+            "path": "bloom",
+            "model_card": "https://huggingface.co/bigscience/bloomz-3b",
+        },
+        "kaggle_handle": "kaggle://keras/bloom/keras/bloomz_3b_multi/1",
+    },
 }