This repository has been archived by the owner on Feb 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* added-use-removed-convert * docs-update * modified-test * typo * test-fix * removed-convert-from-tests * convert-doh-import * import-bug * maybe this * yet-another-fix * found-the-bug * multi-lang
- Loading branch information
Showing
9 changed files
with
78 additions
and
166 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
::: whatlies.language._sentence_encode_lang |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import numpy as np | ||
|
||
from whatlies.language import TFHubLanguage, UniversalSentenceLanguage | ||
|
||
|
||
def test_same_results(): | ||
use_lang = UniversalSentenceLanguage("multi", 3) | ||
tf_lang = TFHubLanguage( | ||
"https://tfhub.dev/google/universal-sentence-encoder-multilingual/3" | ||
) | ||
assert np.allclose(use_lang["hello world"].vector, tf_lang["hello world"].vector) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from typing import Union | ||
|
||
from ._tfhub_lang import TFHubLanguage | ||
|
||
|
||
def UniversalSentenceLanguage(variant: str = "base", version: Union[int, None] = None): | ||
""" | ||
Retreive a [universal sentence encoder](https://tfhub.dev/google/collections/universal-sentence-encoder/1) model from tfhub. | ||
You can download specific versions for specific variants. The variants that we support are listed below. | ||
- `"base"`: the base variant (915MB) [link](https://tfhub.dev/google/universal-sentence-encoder/4) | ||
- `"large"`: the large variant (523MB) [link](https://tfhub.dev/google/universal-sentence-encoder-large/5) | ||
- `"qa"`: the variant based on question/answer (528MB) [link](https://tfhub.dev/google/universal-sentence-encoder-qa/3) | ||
- `"multi"`: the multi-language variant (245MB) [link](https://tfhub.dev/google/universal-sentence-encoder-multilingual/3) | ||
- `"multi-large"`: the large multi-language variant (303MB) [link](https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3) | ||
- `"multi-qa"`: the multi-language qa variant (310MB) [link](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3) | ||
TFHub reports that the multi-language models support Arabic, Chinese-simplified, Chinese-traditional, | ||
English, French, German, Italian, Japanese, Korean, Dutch, Polish, Portuguese, Spanish, Thai, Turkish and Russian. | ||
Arguments: | ||
variant: select a specific variant | ||
version: select a specific version, if kept `None` we'll assume the most recent version | ||
""" | ||
urls = { | ||
"base": "https://tfhub.dev/google/universal-sentence-encoder/", | ||
"large": "https://tfhub.dev/google/universal-sentence-encoder-large/", | ||
"qa": "https://tfhub.dev/google/universal-sentence-encoder-qa/", | ||
"multi": "https://tfhub.dev/google/universal-sentence-encoder-multilingual/", | ||
"multi-large": "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/", | ||
"multi-qa": "https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3", | ||
} | ||
|
||
versions = { | ||
"base": 4, | ||
"large": 5, | ||
"qa": 3, | ||
"multi": 3, | ||
"multi-large": 3, | ||
"multi-qa": 3, | ||
} | ||
|
||
version = versions[variant] if not version else version | ||
url = urls[variant] + str(version) | ||
return TFHubLanguage(url=url) |