{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":681246239,"defaultBranch":"main","name":"Megatron-DeepSpeed","ownerLogin":"deep-spin","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2023-08-21T15:33:33.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/39625298?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715289138.0","currentOid":""},"activityList":{"items":[{"before":"f3c66c4943c476ed2a445dfe2e2a8701d102d5b5","after":"4f2c4fc4bccf79d492bb494d0ae11e5c7baaa1b8","ref":"refs/heads/entmax-loss","pushedAt":"2024-07-12T14:26:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"get item from sparsemax score tensor","shortMessageHtmlLink":"get item from sparsemax score tensor"}},{"before":"9e7f53d3f0b25daef9080016fb0c7b3deff1781d","after":"f3c66c4943c476ed2a445dfe2e2a8701d102d5b5","ref":"refs/heads/entmax-loss","pushedAt":"2024-07-12T13:29:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"add missing parentheses","shortMessageHtmlLink":"add missing parentheses"}},{"before":"01825324e5d22373a4dac17aa5d389a99ce37595","after":"9e7f53d3f0b25daef9080016fb0c7b3deff1781d","ref":"refs/heads/entmax-loss","pushedAt":"2024-07-12T10:49:30.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"refactor zeroshot gpt evaluation for sparsemax score","shortMessageHtmlLink":"refactor zeroshot gpt evaluation for sparsemax score"}},{"before":"4cfc1336b5ceeb0d34b7001f4408fb5011318a5f","after":"01825324e5d22373a4dac17aa5d389a99ce37595","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-28T11:39:51.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"fix entmax_bisect_loss return type","shortMessageHtmlLink":"fix entmax_bisect_loss return type"}},{"before":"c41f8b02c2365d35948f82c48591d98d7a8866e8","after":"4cfc1336b5ceeb0d34b7001f4408fb5011318a5f","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-27T21:46:57.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"write lambada to results file","shortMessageHtmlLink":"write lambada to results file"}},{"before":"77fe1089a893726097a67b389c88f43c320acfdc","after":"c41f8b02c2365d35948f82c48591d98d7a8866e8","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-27T21:42:07.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"don't load optimizer state for lambada","shortMessageHtmlLink":"don't load optimizer state for lambada"}},{"before":"24a5ed3e2533d9ac0a01cf3cd23efa8a28c11a10","after":"77fe1089a893726097a67b389c88f43c320acfdc","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-27T18:13:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"small change to try to get LAMBADA to run","shortMessageHtmlLink":"small change to try to get LAMBADA to run"}},{"before":"68a4a1bc73ec22637db36b51fa4e2d21d0cbbdbc","after":"24a5ed3e2533d9ac0a01cf3cd23efa8a28c11a10","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-27T16:42:28.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"fix guardrail for new task","shortMessageHtmlLink":"fix guardrail for new task"}},{"before":"f77cf0c87687ea651fc370f5e8b718fd5c53a236","after":"68a4a1bc73ec22637db36b51fa4e2d21d0cbbdbc","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-27T16:38:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"add accuracy at k","shortMessageHtmlLink":"add accuracy at k"}},{"before":"e24ebc6c7eed49a1d42a0f80533122347bcdcc85","after":"f77cf0c87687ea651fc370f5e8b718fd5c53a236","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T15:38:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"call item() because json cannot handle tensors","shortMessageHtmlLink":"call item() because json cannot handle tensors"}},{"before":"055f869ddf2f1a67a694ac060c500c2aa7fab788","after":"e24ebc6c7eed49a1d42a0f80533122347bcdcc85","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T15:06:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"remove print statements for eval computation","shortMessageHtmlLink":"remove print statements for eval computation"}},{"before":"e51656d3c7d8142f672ea53521da081d0f3791f8","after":"055f869ddf2f1a67a694ac060c500c2aa7fab788","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T14:31:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"avoid unpacking error when labels is None","shortMessageHtmlLink":"avoid unpacking error when labels is None"}},{"before":"a8733c4d6df5486fe6e37abf9d9e4c4aa2a80d34","after":"e51656d3c7d8142f672ea53521da081d0f3791f8","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T13:31:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"update xavier_uniform init (which will probably be unused","shortMessageHtmlLink":"update xavier_uniform init (which will probably be unused"}},{"before":"a02cf72da485b6350e95d23eadabf7069b076706","after":"a8733c4d6df5486fe6e37abf9d9e4c4aa2a80d34","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T13:28:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"update logging with teacher forced accuracy","shortMessageHtmlLink":"update logging with teacher forced accuracy"}},{"before":"c4cf60c5951d4c78642ad2ff169e968637dc4239","after":"a02cf72da485b6350e95d23eadabf7069b076706","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T13:06:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"add force_decoded_accuracy as a zero-shot evaluation metric","shortMessageHtmlLink":"add force_decoded_accuracy as a zero-shot evaluation metric"}},{"before":"19b478f92a1d114374ef50606baa3a0ce00bda98","after":"c4cf60c5951d4c78642ad2ff169e968637dc4239","ref":"refs/heads/entmax-loss","pushedAt":"2024-06-26T12:09:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"add multiple loss functions (with some print statements to make sure my assumptions of dimensions are correct)","shortMessageHtmlLink":"add multiple loss functions (with some print statements to make sure …"}},{"before":"d2fb87f06ede2530af8e868ea3c1cd67e83d1a33","after":"4e33e780f2c3efab79b9417b9289a614f5184ace","ref":"refs/heads/main","pushedAt":"2024-06-03T08:02:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"pedrohenriqueamartins","name":null,"path":"/pedrohenriqueamartins","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/25869114?s=80&v=4"},"commit":{"message":"correct preprocessing bug","shortMessageHtmlLink":"correct preprocessing bug"}},{"before":"48a1781a89cc20f7f09fa3d85c1275cec8596512","after":"19b478f92a1d114374ef50606baa3a0ce00bda98","ref":"refs/heads/entmax-loss","pushedAt":"2024-05-22T14:51:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"add more support logging statistics","shortMessageHtmlLink":"add more support logging statistics"}},{"before":"8a83cf0be33eef3dc3fc46728aa551d9a7d18dfa","after":"48a1781a89cc20f7f09fa3d85c1275cec8596512","ref":"refs/heads/entmax-loss","pushedAt":"2024-05-22T14:14:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"[wip] compute more sparsity stats","shortMessageHtmlLink":"[wip] compute more sparsity stats"}},{"before":"202caed32d9dda75cd92118ffb315eb4527d2207","after":"8a83cf0be33eef3dc3fc46728aa551d9a7d18dfa","ref":"refs/heads/entmax-loss","pushedAt":"2024-05-20T15:18:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"update return_support to return_support_size in all cases","shortMessageHtmlLink":"update return_support to return_support_size in all cases"}},{"before":"f10b652a7204155f310394b58cb6f2417ca517d0","after":"d2fb87f06ede2530af8e868ea3c1cd67e83d1a33","ref":"refs/heads/main","pushedAt":"2024-05-20T14:09:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"CoderPat","name":"Patrick Fernandes","path":"/CoderPat","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11250483?s=80&v=4"},"commit":{"message":"Fix issue with tensorboard not writing","shortMessageHtmlLink":"Fix issue with tensorboard not writing"}},{"before":"8d94512766dff5bfcb1ff5cc777d56775f76157c","after":"daefee17344bd68975dea236044a99d6446f4c5d","ref":"refs/heads/feature_weight","pushedAt":"2024-05-11T02:39:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"nightingal3","name":null,"path":"/nightingal3","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/19615708?s=80&v=4"},"commit":{"message":"save on memory in preproc","shortMessageHtmlLink":"save on memory in preproc"}},{"before":null,"after":"8d94512766dff5bfcb1ff5cc777d56775f76157c","ref":"refs/heads/feature_weight","pushedAt":"2024-05-09T21:12:18.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"nightingal3","name":null,"path":"/nightingal3","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/19615708?s=80&v=4"},"commit":{"message":"add feature weight as an option","shortMessageHtmlLink":"add feature weight as an option"}},{"before":"f05f0057092d0e6d03acf4ac9f6e5813b07a1233","after":"f10b652a7204155f310394b58cb6f2417ca517d0","ref":"refs/heads/main","pushedAt":"2024-05-07T07:48:19.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"pedrohenriqueamartins","name":null,"path":"/pedrohenriqueamartins","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/25869114?s=80&v=4"},"commit":{"message":"Merge remote-tracking branch 'origin/annealing'","shortMessageHtmlLink":"Merge remote-tracking branch 'origin/annealing'"}},{"before":null,"after":"d8e15b533ab32883b3e50932db5c7a58f47e12aa","ref":"refs/heads/annealing","pushedAt":"2024-04-09T13:33:11.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"pedrohenriqueamartins","name":null,"path":"/pedrohenriqueamartins","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/25869114?s=80&v=4"},"commit":{"message":"add annealing scheduler","shortMessageHtmlLink":"add annealing scheduler"}},{"before":"7228a8b190b051f8f7df65c440df9d73ac235900","after":"202caed32d9dda75cd92118ffb315eb4527d2207","ref":"refs/heads/entmax-loss","pushedAt":"2024-04-01T13:53:14.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"slightly more informative support logging","shortMessageHtmlLink":"slightly more informative support logging"}},{"before":"563f968a3641a8d23f9b38d709f9e90ec41424d8","after":"7228a8b190b051f8f7df65c440df9d73ac235900","ref":"refs/heads/entmax-loss","pushedAt":"2024-03-27T11:55:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"fix model training bug, accumulate support across gpus","shortMessageHtmlLink":"fix model training bug, accumulate support across gpus"}},{"before":"1c76bf8cd928e4b9711a862f3dfddd36a607961d","after":"563f968a3641a8d23f9b38d709f9e90ec41424d8","ref":"refs/heads/entmax-loss","pushedAt":"2024-03-21T21:11:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"bpopeters","name":"Ben Peters","path":"/bpopeters","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/10211311?s=80&v=4"},"commit":{"message":"compute support during training (although do not log it yet)","shortMessageHtmlLink":"compute support during training (although do not log it yet)"}},{"before":"d398204fa6b8f594ba4812951669a78a3fe18c32","after":"f05f0057092d0e6d03acf4ac9f6e5813b07a1233","ref":"refs/heads/main","pushedAt":"2024-03-18T20:28:45.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"CoderPat","name":"Patrick Fernandes","path":"/CoderPat","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11250483?s=80&v=4"},"commit":{"message":"Automatically shuffle train/valid/test inds in megatron (#6)\n\n* add option to shuffle dataset inds before slicing into train/val/test\r\n\r\n* pass through arg\r\n\r\n* add top level shuffle to rebuild index","shortMessageHtmlLink":"Automatically shuffle train/valid/test inds in megatron (#6)"}},{"before":"3fc4a242c0fa570a78b3c33eda04e54bc1ddba2f","after":"d398204fa6b8f594ba4812951669a78a3fe18c32","ref":"refs/heads/main","pushedAt":"2024-03-14T21:09:37.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"CoderPat","name":"Patrick Fernandes","path":"/CoderPat","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/11250483?s=80&v=4"},"commit":{"message":"Add a time estimate for training (#7)\n\n* add time estimate\r\n\r\n* fix s -> h conversion","shortMessageHtmlLink":"Add a time estimate for training (#7)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEffEKHAA","startCursor":null,"endCursor":null}},"title":"Activity · deep-spin/Megatron-DeepSpeed"}