diff --git a/Dockerfile b/Dockerfile index 51b4d558c..2178f2bbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.3-labs FROM postgres:16 - +ENV WHERE_AM_I=docker ENV DEBIAN_FRONTEND=noninteractive USER root diff --git a/test.sh b/test.sh index fdee1a17d..f46f62040 100755 --- a/test.sh +++ b/test.sh @@ -8,35 +8,12 @@ if [ -f .env ]; then set +a fi -if [ -z "$ENABLE_OPENAI_TESTS" ]; then - export ENABLE_OPENAI_TESTS=0 +if [ -n "$WHERE_AM_I" ] && [ "$WHERE_AM_I" == "docker" ]; then + if [ "$(whoami)" == "root" ]; then + echo switching to postgres user... + su postgres - + fi + psql -d postgres -f test.sql +else + psql --no-psqlrc -d 'postgres://postgres@127.0.0.1:9876/postgres' -f test.sql fi - -if [ "$ENABLE_OPENAI_TESTS" ] && [ -z "$OPENAI_API_KEY" ]; then - echo "OPENAI_API_KEY must be set if running OpenAI tests" - exit 3 -fi - -if [ -z "$ENABLE_OLLAMA_TESTS" ]; then - export ENABLE_OLLAMA_TESTS=0 -fi - -if [ -z "$ENABLE_ANTHROPIC_TESTS" ]; then - export ENABLE_ANTHROPIC_TESTS=0 -fi - -if [ "$ENABLE_ANTHROPIC_TESTS" ] && [ -z "$ANTHROPIC_API_KEY" ]; then - echo "ANTHROPIC_API_KEY must be set if running Anthropic tests" - exit 3 -fi - -if [ -z "$ENABLE_COHERE_TESTS" ]; then - export ENABLE_COHERE_TESTS=0 -fi - -if [ "$ENABLE_COHERE_TESTS" ] && [ -z "$COHERE_API_KEY" ]; then - echo "COHERE_API_KEY must be set if running Cohere tests" - exit 3 -fi - -psql -d postgres -f test.sql \ No newline at end of file diff --git a/test.sql b/test.sql index 8b0454706..df89b28f8 100644 --- a/test.sql +++ b/test.sql @@ -83,7 +83,7 @@ create table tests ); ------------------------------------------------------------------------------- --- convenience functions for recording test results +-- convenience function for recording test results create function result(_test text, _expected text, _actual text) returns bool as $func$ merge into tests as t @@ -95,57 +95,73 @@ when not matched then insert (test, actual) values (x.test, x.actual) select passed from tests where test = _test; $func$ language sql; -create function result(_test text, _expected int, _actual int) returns bool -return (select result(_test, _expected::text, _actual::text)) -; - -create function result(_test text, _expected bool, _actual bool) returns bool -return (select result(_test, _expected::text, _actual::text)) -; - \pset tuples_only on ------------------------------------------------------------------------------- -- openai tests \getenv enable_openai_tests ENABLE_OPENAI_TESTS +\if :{?enable_openai_tests} +\else +\set enable_openai_tests 0 +\endif \if :enable_openai_tests \set ON_ERROR_ROLLBACK on \set ON_ERROR_STOP off \i tests/openai.sql \set ON_ERROR_ROLLBACK off \set ON_ERROR_STOP on +\else +\echo Skipped OpenAI tests \endif ------------------------------------------------------------------------------- -- ollama tests \getenv enable_ollama_tests ENABLE_OLLAMA_TESTS +\if :{?enable_ollama_tests} +\else +\set enable_ollama_tests 0 +\endif \if :enable_ollama_tests \set ON_ERROR_ROLLBACK on \set ON_ERROR_STOP off \i tests/ollama.sql \set ON_ERROR_ROLLBACK off \set ON_ERROR_STOP on +\else +\echo Skipped Ollama tests \endif ------------------------------------------------------------------------------- -- anthropic tests \getenv enable_anthropic_tests ENABLE_ANTHROPIC_TESTS +\if :{?enable_anthropic_tests} +\else +\set enable_anthropic_tests 0 +\endif \if :enable_anthropic_tests \set ON_ERROR_ROLLBACK on \set ON_ERROR_STOP off \i tests/anthropic.sql \set ON_ERROR_ROLLBACK off \set ON_ERROR_STOP on +\else +\echo Skipped Anthropic tests \endif ------------------------------------------------------------------------------- -- cohere tests \getenv enable_cohere_tests ENABLE_COHERE_TESTS +\if :{?enable_cohere_tests} +\else +\set enable_cohere_tests 0 +\endif \if :enable_cohere_tests \set ON_ERROR_ROLLBACK on \set ON_ERROR_STOP off \i tests/cohere.sql \set ON_ERROR_ROLLBACK off \set ON_ERROR_STOP on +\else +\echo Skipped Cohere tests \endif \pset tuples_only off @@ -153,34 +169,47 @@ return (select result(_test, _expected::text, _actual::text)) -- test results \echo \echo -\echo test results +\echo \echo \echo \set ON_ERROR_STOP on \set ON_ERROR_ROLLBACK off -\echo test results -select test, passed -from tests -; -\echo failed tests -select * +-- we should fail if no tests were run +select count(*) > 0 as result from tests -where passed is distinct from true -; +\gset -\echo test stats -select - count(*) as total -, count(*) filter (where passed = true) as passed -, count(*) filter (where passed is distinct from true) as failed -from tests -; +\if :result + + \echo test results + select test, passed + from tests + ; + + \echo failed tests + select * + from tests + where passed is distinct from true + ; + + \echo test stats + select + count(*) as total + , count(*) filter (where passed = true) as passed + , count(*) filter (where passed is distinct from true) as failed + from tests + ; + + select count(*) filter (where passed is distinct from true) = 0 as result + from tests + \gset + +\else +\warn NO TESTS WERE RUN! +\endif -select count(*) filter (where passed is distinct from true) = 0 as result -from tests -\gset reset role; -- no longer tester diff --git a/tests/anthropic.sql b/tests/anthropic.sql index 5259cf195..d61acd99e 100644 --- a/tests/anthropic.sql +++ b/tests/anthropic.sql @@ -2,6 +2,17 @@ -- get our anthropic api key \getenv anthropic_api_key ANTHROPIC_API_KEY +\if :{?anthropic_api_key} +\else +\warn Anthropic tests are enabled but ANTHROPIC_API_KEY is not set! +do $$ +begin +raise exception 'Anthropic tests are enabled but ANTHROPIC_API_KEY is not set!'; +end; +$$; +\q +\endif + -- set our session local GUC select set_config('ai.anthropic_api_key', $1, false) is not null as set_anthropic_api_key \bind :anthropic_api_key @@ -18,7 +29,9 @@ values ------------------------------------------------------------------------------- -- anthropic_generate -\echo anthropic_generate +\set testname anthropic_generate +\set expected t +\echo :testname select anthropic_generate ( 'claude-3-5-sonnet-20240620' , jsonb_build_array @@ -32,15 +45,18 @@ select anthropic_generate \bind :anthropic_api_key \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'content', '0', 'text') is not null and (:'actual'::jsonb)->>'stop_reason' = 'end_turn' as actual \gset +\endif -select result('anthropic_generate', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- anthropic_generate-no-key -\echo anthropic_generate-no-key +\set testname anthropic_generate-no-key +\set expected t +\echo :testname select anthropic_generate ( 'claude-3-5-sonnet-20240620' , jsonb_build_array @@ -52,8 +68,9 @@ select anthropic_generate ) as actual \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'content', '0', 'text') is not null and (:'actual'::jsonb)->>'stop_reason' = 'end_turn' as actual \gset +\endif -select result('anthropic_generate-no-key', true, :'actual'); -\unset actual +\ir eval.sql diff --git a/tests/cohere.sql b/tests/cohere.sql index 0304346c0..fcba6db2d 100644 --- a/tests/cohere.sql +++ b/tests/cohere.sql @@ -1,6 +1,17 @@ ------------------------------------------------------------------------------- -- get our cohere api key \getenv cohere_api_key COHERE_API_KEY +\if :{?cohere_api_key} +\else +\warn Cohere tests are enabled but COHERE_API_KEY is not set! +do $$ +begin +raise exception 'Cohere tests are enabled but COHERE_API_KEY is not set!'; +end; +$$; +\q +\endif + -- set our session local GUC select set_config('ai.cohere_api_key', $1, false) is not null as set_cohere_api_key \bind :cohere_api_key @@ -30,64 +41,81 @@ values ------------------------------------------------------------------------------- -- cohere_list_models -\echo cohere_list_models -select count(*) as actual +\set testname cohere_list_models +\set expected t +\echo :testname + +select count(*) > 0 as actual from cohere_list_models(_api_key=>$1) \bind :cohere_api_key \gset -select result('cohere_list_models', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_list_models-no-key -\echo cohere_list_models-no-key -select count(*) as actual +\set testname cohere_list_models-no-key +\set expected t +\echo :testname + +select count(*) > 0 as actual from cohere_list_models() \gset -select result('cohere_list_models-no-key', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_list_models-endpoint -\echo cohere_list_models-endpoint -select count(*) as actual +\set testname cohere_list_models-endpoint +\set expected t +\echo :testname + +select count(*) > 0 as actual from cohere_list_models(_endpoint=>'embed') \gset -select result('cohere_list_models-endpoint', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_tokenize -\echo cohere_tokenize -select cohere_tokenize -( 'command' -, 'What one programmer can do in one month, two programmers can do in two months.' -, _api_key=>$1 +\set testname cohere_tokenize +\set expected 17 +\echo :testname + +select array_length +( + cohere_tokenize + ( 'command' + , 'What one programmer can do in one month, two programmers can do in two months.' + , _api_key=>$1 + ) +, 1 ) as actual \bind :cohere_api_key \gset -select result('cohere_tokenize', 17, array_length(:'actual'::int[], 1)); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_tokenize-no-key -\echo cohere_tokenize-no-key +\set testname cohere_tokenize-no-key +\set expected {5256,1707,1682,2383,9461,4696,1739,1863,1871,1740,9397,2112,1705,4066,3465,1742,38700,21} +\echo :testname + select cohere_tokenize ( 'command' , 'One of the best programming skills you can have is knowing when to walk away for awhile.' ) as actual \gset -select result('cohere_tokenize-no-key', '{5256,1707,1682,2383,9461,4696,1739,1863,1871,1740,9397,2112,1705,4066,3465,1742,38700,21}', :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_detokenize -\echo cohere_detokenize +\set testname cohere_detokenize +select 'What one programmer can do in one month, two programmers can do in two months.' as expected \gset +\echo :testname + select cohere_detokenize ( 'command' , array[5171,2011,36613,1863,1978,1703,2011,2812,19,2253,38374,1863,1978,1703,2253,3784,21] @@ -96,34 +124,40 @@ select cohere_detokenize \bind :cohere_api_key \gset -select result('cohere_detokenize', 'What one programmer can do in one month, two programmers can do in two months.', :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_detokenize-no-key -\echo cohere_detokenize-no-key +\set testname cohere_detokenize-no-key +select $$Good programmers don't just write programs. They build a working vocabulary.$$ as expected \gset +\echo :testname + select cohere_detokenize ( 'command' , array[14485,38374,2630,2060,2252,5164,4905,21,2744,2628,1675,3094,23407,21] ) as actual \gset -select result('cohere_detokenize-no-key', $$Good programmers don't just write programs. They build a working vocabulary.$$, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_list_models-default-only -\echo cohere_list_models-default-only -select count(*) as actual +\set testname cohere_list_models-default-only +\set expected t +\echo :testname + +select count(*) > 0 as actual from cohere_list_models(_endpoint=>'generate', _default_only=>true) \gset -select result('cohere_list_models-default-only', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_embed -\echo cohere_embed +\set testname cohere_embed +\set expected 384 +\echo :testname + select vector_dims ( cohere_embed @@ -136,12 +170,14 @@ select vector_dims \bind :cohere_api_key \gset -select result('cohere_embed', 384, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_embed-no-key -\echo cohere_embed-no-key +\set testname cohere_embed-no-key +\set expected 384 +\echo :testname + select vector_dims ( cohere_embed @@ -153,12 +189,14 @@ select vector_dims ) as actual \gset -select result('cohere_embed-no-key', 384, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_classify -\echo cohere_classify +\set testname cohere_classify +select '{"bird": "animal", "corn": "food", "airplane": "machine"}'::jsonb::text as expected \gset +\echo :testname + with examples(example, label) as ( values @@ -180,12 +218,14 @@ from jsonb_to_recordset )) x(input text, prediction text) \gset -select result('cohere_classify', '{"bird": "animal", "corn": "food", "airplane": "machine"}', :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_classify_simple -\echo cohere_classify_simple +\set testname cohere_classify_simple +select '{"bird": "animal", "corn": "food", "airplane": "machine"}'::jsonb::text as expected \gset +\echo :testname + with examples(example, label) as ( values @@ -204,12 +244,13 @@ from cohere_classify_simple ) x \gset -select result('cohere_classify_simple', '{"bird": "animal", "corn": "food", "airplane": "machine"}', :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_rerank -\echo cohere_rerank +\set testname cohere_rerank +\set expected 2 +\echo :testname select cohere_rerank ( 'rerank-english-v3.0' @@ -224,18 +265,21 @@ select cohere_rerank ) as actual \gset +\if :{?actual} select x."index" as actual from jsonb_to_recordset((:'actual'::jsonb)->'results') x("index" int, "document" jsonb, relevance_score float8) order by relevance_score desc limit 1 \gset +\endif -select result('cohere_rerank', 2, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_rerank_simple -\echo cohere_rerank_simple +\set testname cohere_rerank_simple +\set expected 3 +\echo :testname select x."index" as actual from cohere_rerank_simple @@ -252,12 +296,13 @@ order by relevance_score asc limit 1 \gset -select result('cohere_rerank_simple', 3, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- cohere_chat_complete -\echo cohere_chat_complete +\set testname cohere_chat_complete +\set expected t +\echo :testname select cohere_chat_complete ( 'command-r-plus' @@ -266,7 +311,6 @@ select cohere_chat_complete )->>'text' is not null as actual \gset -select result('cohere_chat_complete', true, :'actual'::bool); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- diff --git a/tests/eval.sql b/tests/eval.sql new file mode 100644 index 000000000..0fc53429d --- /dev/null +++ b/tests/eval.sql @@ -0,0 +1,6 @@ +\if :{?actual} +select result(:'testname', :'expected', :'actual'); +\else +select result(:'testname', :'expected', null); +\endif +\unset actual diff --git a/tests/ollama.sql b/tests/ollama.sql index fb1128c25..33a67bd5b 100644 --- a/tests/ollama.sql +++ b/tests/ollama.sql @@ -2,6 +2,17 @@ -- get our ollama host -- grab our ollama host from the environment as a psql variable \getenv ollama_host OLLAMA_HOST +\if :{?ollama_host} +\else +\warn Ollama tests are enabled but OLLAMA_HOST is not set! +do $$ +begin +raise exception 'Ollama tests are enabled but OLLAMA_HOST is not set!'; +end; +$$; +\q +\endif + -- set our session local GUC select set_config('ai.ollama_host', $1, false) is not null as set_ollama_host \bind :ollama_host @@ -28,28 +39,35 @@ values ------------------------------------------------------------------------------- -- ollama_list_models -\echo ollama_list_models -select count(*) as actual +\set testname ollama_list_models +\set expected t +\echo :testname + +select count(*) > 0 as actual from ollama_list_models(_host=>$1) \bind :ollama_host \gset -select result('ollama_list_models', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_list_models-no-host -\echo ollama_list_models-no-host -select count(*) as actual +\set testname ollama_list_models-no-host +\set expected t +\echo :testname + +select count(*) > 0 as actual from ollama_list_models() \gset -select result('ollama_list_models-no-host', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_embed -\echo ollama_embed +\set testname ollama_embed +\set expected 4096 +\echo :testname + select vector_dims ( ollama_embed @@ -61,12 +79,14 @@ select vector_dims \bind :ollama_host \gset -select result('ollama_embed', 4096, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_embed-no-host -\echo ollama_embed-no-host +\set testname ollama_embed-no-host +\set expected 4096 +\echo :testname + select vector_dims ( ollama_embed @@ -76,12 +96,14 @@ select vector_dims ) as actual \gset -select result('ollama_embed-no-host', 4096, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_generate -\echo ollama_generate +\set testname ollama_generate +\set expected t +\echo :testname + select ollama_generate ( 'llama3' , 'what is the typical weather like in Alabama in June' @@ -95,15 +117,19 @@ select ollama_generate \bind :ollama_host \gset +\if :{?actual} select (:'actual'::jsonb)->>'response' is not null and ((:'actual'::jsonb)->>'done')::boolean as actual \gset +\endif -select result('ollama_generate', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_generate-no-host -\echo ollama_generate-no-host +\set testname ollama_generate-no-host +\set expected t +\echo :testname + select ollama_generate ( 'llama3' , 'what is the typical weather like in Alabama in June' @@ -115,15 +141,19 @@ select ollama_generate ) as actual \gset +\if :{?actual} select (:'actual'::jsonb)->>'response' is not null and ((:'actual'::jsonb)->>'done')::boolean as actual \gset +\endif -select result('ollama_generate-no-host', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_generate-image -\echo ollama_generate-image +\set testname ollama_generate-image +select 'an elephant with boxing gloves on, ready for a fight' as expected \gset +\echo :testname + select ollama_generate ( 'llava:7b' , 'Please describe this image.' @@ -136,12 +166,19 @@ select ollama_generate )->>'response' as actual \gset -select result('ollama_generate-image', 'an elephant with boxing gloves on, ready for a fight', substring(:'actual' from 152 for 52)); -\unset actual +\if :{?actual} +select substring(:'actual' from 152 for 52) as actual +\gset +\endif + +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_chat_complete -\echo ollama_chat_complete +\set testname ollama_chat_complete +\set expected t +\echo :testname + select ollama_chat_complete ( 'llama3' , jsonb_build_array @@ -157,15 +194,19 @@ select ollama_chat_complete \bind :ollama_host \gset +\if :{?actual} select (:'actual'::jsonb)->'message'->>'content' is not null and ((:'actual'::jsonb)->>'done')::boolean as actual \gset +\endif -select result('ollama_chat_complete', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_chat_complete-no-host -\echo ollama_chat_complete-no-host +\set testname ollama_chat_complete-no-host +\set expected t +\echo :testname + select ollama_chat_complete ( 'llama3' , jsonb_build_array @@ -179,15 +220,19 @@ select ollama_chat_complete ) as actual \gset +\if :{?actual} select (:'actual'::jsonb)->'message'->>'content' is not null and ((:'actual'::jsonb)->>'done')::boolean as actual \gset +\endif -select result('ollama_chat_complete-no-host', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_chat_complete-image -\echo ollama_chat_complete-image +\set testname ollama_chat_complete-image +\set expected t +\echo :testname + select ollama_chat_complete ( 'llava:7b' , jsonb_build_array @@ -204,26 +249,34 @@ select ollama_chat_complete )->'message'->>'content' as actual \gset -select result('ollama_chat_complete-image', true, starts_with(:'actual'::text, ' This is a digitally manipulated image')); -\unset actual +\if :{?actual} +select starts_with(:'actual'::text, ' This is a digitally manipulated image') as actual +\gset +\endif + +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_ps -\echo ollama_ps +\set testname ollama_ps +\set expected 1 +\echo :testname + select count(*) filter (where "name" = 'llava:7b') as actual from ollama_ps(_host=>$1) \bind :ollama_host \gset -select result('ollama_ps', 1, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- ollama_ps-no-host -\echo ollama_ps-no-host +\set testname ollama_ps-no-host +\set expected 1 +\echo :testname + select count(*) filter (where "name" = 'llava:7b') as actual from ollama_ps() \gset -select result('ollama_ps-no-host', 1, :actual); -\unset actual +\ir eval.sql diff --git a/tests/openai.sql b/tests/openai.sql index e29ae13c7..48be64440 100644 --- a/tests/openai.sql +++ b/tests/openai.sql @@ -2,6 +2,17 @@ -- get our openai api key -- grab our api key from the environment as a psql variable \getenv openai_api_key OPENAI_API_KEY +\if :{?openai_api_key} +\else +\warn OpenAI tests are enabled but OPENAI_API_KEY is not set! +do $$ +begin +raise exception 'OpenAI tests are enabled but OPENAI_API_KEY is not set!'; +end; +$$; +\q +\endif + -- set our session local GUC select set_config('ai.openai_api_key', $1, false) is not null as set_openai_api_key \bind :openai_api_key @@ -34,46 +45,57 @@ values ------------------------------------------------------------------------------- -- openai_list_models -\echo openai_list_models -select count(*) as actual +\set testname openai_list_models +\set expected t +\echo :testname + +select count(*) > 0 as actual from openai_list_models(_api_key=>$1) \bind :openai_api_key \gset -select result('openai_list_models', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_list_models-no-key -\echo openai_list_models-no-key -select count(*) as actual +\set testname openai_list_models-no-key +\set expected t +\echo :testname + +select count(*) > 0 as actual from openai_list_models() \gset -select result('openai_list_models-no-key', true, :actual > 0); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_tokenize -\echo openai_tokenize -select openai_tokenize('text-embedding-ada-002', 'the purple elephant sits on a red mushroom') as actual +\set testname openai_tokenize +select array[1820,25977,46840,23874,389,264,2579,58466]::text as expected \gset +\echo :testname + +select openai_tokenize('text-embedding-ada-002', 'the purple elephant sits on a red mushroom')::text as actual \gset -select result('openai_tokenize', array[1820,25977,46840,23874,389,264,2579,58466]::text, :'actual'::int[]::text); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_detokenize -\echo openai_detokenize +\set testname openai_detokenize +select 'the purple elephant sits on a red mushroom' as expected \gset +\echo :testname + select openai_detokenize('text-embedding-ada-002', array[1820,25977,46840,23874,389,264,2579,58466]) as actual \gset -select result('openai_detokenize', 'the purple elephant sits on a red mushroom', :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-1 -\echo openai_embed-1 +\set testname openai_embed-1 +\set expected 1536 +\echo :testname + select vector_dims ( openai_embed @@ -85,12 +107,14 @@ select vector_dims \bind :openai_api_key \gset -select result('openai_embed-1', 1536, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-1-no-key -\echo openai_embed-1-no-key +\set testname openai_embed-1-no-key +\set expected 1536 +\echo :testname + select vector_dims ( openai_embed @@ -100,12 +124,14 @@ select vector_dims ) as actual \gset -select result('openai_embed-1-no-key', 1536, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-2 -\echo openai_embed-2 +\set testname openai_embed-2 +\set expected 768 +\echo :testname + select vector_dims ( openai_embed @@ -118,12 +144,14 @@ select vector_dims \bind :openai_api_key \gset -select result('openai_embed-2', 768, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-2-no-key -\echo openai_embed-2-no-key +\set testname openai_embed-2-no-key +\set expected 768 +\echo :testname + select vector_dims ( openai_embed @@ -134,12 +162,14 @@ select vector_dims ) as actual \gset -select result('openai_embed-2-no-key', 768, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-3 -\echo openai_embed-3 +\set testname openai_embed-3 +\set expected 3072 +\echo :testname + select vector_dims ( openai_embed @@ -152,12 +182,14 @@ select vector_dims \bind :openai_api_key \gset -select result('openai_embed-3', 3072, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-3-no-key -\echo openai_embed-3-no-key +\set testname openai_embed-3-no-key +\set expected 3072 +\echo :testname + select vector_dims ( openai_embed @@ -168,12 +200,14 @@ select vector_dims ) as actual \gset -select result('openai_embed-3-no-key', 3072, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-4 -\echo openai_embed-4 +\set testname openai_embed-4 +\set expected 6144 +\echo :testname + select sum(vector_dims(embedding)) as actual from openai_embed ( 'text-embedding-3-large' @@ -183,12 +217,14 @@ from openai_embed \bind :openai_api_key \gset -select result('openai_embed-4', 6144, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-4-no-key -\echo openai_embed-4-no-key +\set testname openai_embed-4-no-key +\set expected 6144 +\echo :testname + select sum(vector_dims(embedding)) as actual from openai_embed ( 'text-embedding-3-large' @@ -196,12 +232,14 @@ from openai_embed ) \gset -select result('openai_embed-4-no-key', 6144, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-5 -\echo openai_embed-5 +\set testname openai_embed-5 +\set expected 1536 +\echo :testname + select vector_dims ( openai_embed @@ -213,12 +251,14 @@ select vector_dims \bind :openai_api_key \gset -select result('openai_embed-5', 1536, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_embed-5-no-key -\echo openai_embed-5-no-key +\set testname openai_embed-5-no-key +\set expected 1536 +\echo :testname + select vector_dims ( openai_embed @@ -228,12 +268,14 @@ select vector_dims ) as actual \gset -select result('openai_embed-5-no-key', 1536, :actual); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_chat_complete -\echo openai_chat_complete +\set testname openai_chat_complete +\set expected t +\echo :testname + select openai_chat_complete ( 'gpt-4o' , jsonb_build_array @@ -245,15 +287,19 @@ select openai_chat_complete \bind :openai_api_key \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'choices', '0', 'message', 'content') is not null as actual \gset +\endif -select result('openai_chat_complete', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_chat_complete-no-key -\echo openai_chat_complete-no-key +\set testname openai_chat_complete-no-key +\set expected t +\echo :testname + select openai_chat_complete ( 'gpt-4o' , jsonb_build_array @@ -263,15 +309,19 @@ select openai_chat_complete ) as actual \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'choices', '0', 'message', 'content') is not null as actual \gset +\endif -select result('openai_chat_complete-no-key', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_moderate -\echo openai_moderate +\set testname openai_moderate +\set expected t +\echo :testname + select openai_moderate ( 'text-moderation-stable' , 'I want to kill them.' @@ -280,25 +330,30 @@ select openai_moderate \bind :openai_api_key \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'results', '0', 'flagged')::bool as actual \gset +\endif -select result('openai_moderate', true, :'actual'); -\unset actual +\ir eval.sql ------------------------------------------------------------------------------- -- openai_moderate-no-key -\echo openai_moderate-no-key +\set testname openai_moderate-no-key +\set expected t +\echo :testname + select openai_moderate ( 'text-moderation-stable' , 'I want to kill them.' ) as actual \gset +\if :{?actual} select jsonb_extract_path_text(:'actual'::jsonb, 'results', '0', 'flagged')::bool as actual \gset +\endif -select result('openai_moderate-no-key', true, :'actual'); -\unset actual +\ir eval.sql -------------------------------------------------------------------------------