Skip to content

Commit

Permalink
test w/o skip_special_tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed May 16, 2024
1 parent d73346c commit bc6cf18
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 99 deletions.
104 changes: 12 additions & 92 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,108 +59,28 @@ jobs:
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
sudo apt-get install libtbb-dev
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model facebook/opt-125m facebook-opt-125m
cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
cmake --build ./build/ --config Release -j
- name: Compare
run: |
source ./ov/setupvars.sh
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenized = tokenizer('Why is the Sun yellow?', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "Why is the Sun yellow?" passed
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenized = tokenizer('69', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "69" passed
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenized = tokenizer('Hi', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "Hi" passed
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenized = tokenizer('return 0', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "return 0" passed
./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt
timeout 1m ./build/beam_search_causal_lm ./facebook-opt-125m "Tell me something about Canada" "What is your name?" "How are you?" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenized = tokenizer('你好! 你好嗎?', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "你好! 你好嗎?" passed
timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
tokenizer = transformers.AutoTokenizer.from_pretrained('facebook/opt-125m')
prompts = [
'Alan Turing was a',
'return 0',
'你好! 你好嗎?'
'Tell me something about Canada',
'What is your name?',
'How are you?'
]
for prompt in prompts:
tokenized = tokenizer(prompt, return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
for beam in transformers.AutoModelForCausalLM.from_pretrained('facebook/opt-125m').generate(**tokenized, num_beam_groups=3, num_beams=6, num_return_sequences=6, diversity_penalty=1.0, max_new_tokens=30, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():]) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
Expand Down Expand Up @@ -201,7 +121,7 @@ jobs:
echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py
echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py
echo for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): >> ref.py
echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' >> ref.py
echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():]) + '\n' >> ref.py
echo idx = predictions.find(ref) >> ref.py
echo if -1 == idx: >> ref.py
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
Expand Down Expand Up @@ -441,14 +361,14 @@ jobs:
tokenizer = transformers.AutoTokenizer.from_pretrained('microsoft/phi-1_5')
tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
for output in transformers.AutoModelForCausalLM.from_pretrained('microsoft/phi-1_5').generate(**tokenized, max_length=100, do_sample=False):
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
ref = tokenizer.decode(output[tokenized['input_ids'].numel():]) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo Phi-1_5 passed
cpp-greedy_causal_lm-redpajama-3b-chat:
runs-on: ubuntu-20.04-4-cores
steps:
Expand Down Expand Up @@ -486,7 +406,7 @@ jobs:
tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
ref = tokenizer.decode(output[tokenized['input_ids'].numel():]) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref}" from predictions')
Expand Down
16 changes: 12 additions & 4 deletions text_generation/causal_lm/cpp/beam_search_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,13 @@ void initialize_inputs(const ov::Tensor& input_ids, const ov::Tensor& attention_

ov::Shape input_shape = input_ids.get_shape();

ov::Tensor position_ids = request.get_tensor("position_ids");
position_ids.set_shape(input_shape);
initialize_position_ids(position_ids, attention_mask);
try {
ov::Tensor position_ids = request.get_tensor("position_ids");
position_ids.set_shape(input_shape);
initialize_position_ids(position_ids, attention_mask);
} catch (...) {
// no position_ids input
}

ov::Tensor beam_idx = request.get_tensor("beam_idx");
beam_idx.set_shape({input_shape.at(0)});
Expand Down Expand Up @@ -209,7 +213,11 @@ int main(int argc, char* argv[]) try {
lm.set_tensor("beam_idx", ov::Tensor{ov::element::i32, {batch_size}, next_beams.data()});
// Set auxiliary inputs
set_attention_mask(lm.get_tensor("attention_mask"), next_beams);
set_position_ids(lm.get_tensor("position_ids"), lm.get_tensor("attention_mask"));
try {
set_position_ids(lm.get_tensor("position_ids"), lm.get_tensor("attention_mask"));
} catch (...) {
// no position_ids input
}
}

for (const std::vector<std::vector<Beam>>& prompt_group : finalize(std::move(group_beam_searcher))) {
Expand Down
6 changes: 3 additions & 3 deletions text_generation/causal_lm/cpp/group_beam_searcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ struct Group {
beam.score /= std::pow(float(beam.tokens.size()), parameters.length_penalty);

// HF implementation counts eos_token for length penalty calculation
if (beam.tokens.back() == parameters.eos_token) {
beam.tokens.pop_back();
}
// if (beam.tokens.back() == parameters.eos_token) {
// beam.tokens.pop_back();
// }

min_heap.push_back(std::move(beam));
std::push_heap(min_heap.begin(), min_heap.end(), greater);
Expand Down

0 comments on commit bc6cf18

Please sign in to comment.