From 3885779b36d7e22260017319f433fc7d8763a318 Mon Sep 17 00:00:00 2001 From: Eugene-hu <85906264+Eugene-hu@users.noreply.github.com> Date: Mon, 3 Oct 2022 13:01:15 -0700 Subject: [PATCH] Release/3.3.4 (#927) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [feature] external axon flags (#887) * add external axon changes * add defaults for new axon flags * fix args to axon * default to internal ip and port if not specified * add new args and todefaults * add axon unit tests * add description for subtensor integration test * move test to unit test * create new test file add/update copyright notices * don't default to internal ip * add tests for setting the full_address * add tests for subtensor.serve w/external axon info * allow external port config to be None * switch to mock instead of patch * fix test mocks * change mock config create * fix/add default config * change asserts add mesage * fix check call args * fix mock config set * only call once * fix help wording * should be True * [fix] fixes unstake with max-stake flag (#905) * add equality to None to the balance class * add tests for the None case * local train bug fix (#906) * [feature] [CUDA solver] Add multi-GPU and ask for CUDA during btcli run (#893) * added cuda solver * boost versions to fix pip error * allow choosing device id * fix solution check to use keccak * adds params for cuda and dev_id to register * list devices by name during selection * add block number logging * fix calculation of hashrate * fix update interval default * add --TPB arg to register * add update_interval flag * switch back to old looping/work structure * change typing * device count is a function * stop early if wallet registered * add update interval and num proc flag * add better number output * optimize multiproc cpu reg keeping proc until solution * fix test * change import to cubit * fix import and default * up default should have default in CLI call * add comments about params * fix config var access * add cubit as extra * handle stale pow differently check registration after failure * restrict number of processes for integration test * fix stale check * use wallet.is_registered instead * attempt to fix test issue * fix my test * oops typo * typo again ugh * remove print out * fix partly reg test * fix if solution None * fix test? * fix patch * add args for cuda to subtensor * add cuda args to reregister call * add to wallet register the cuda args * fix refs and tests * add for val test also * fix tests with rereg * fix patch for tests * add mock_register to subtensor passed instead * move register under the check for isregistered * use patch obj instead * fit patch object * fix prompt * remove unneeded if * modify POW submit to use rolling submit again * add backoff to block get from network * add test for backoff get block * suppress the dev id flag if not set * remove dest so it uses first arg * fix pow submit loop * move registration status with * fix max attempts check * remove status in subtensor.register * add submit status * change to neuron get instead * fix count * try to patch live display * fix patch * . * separate test cases * add POWNotStale and tests * add more test cases for block get with retry * fix return to None * fix arg order * fix indent * add test to verify solution is submitted * fix mock call * patch hex bytes instead * typo :/ * fix print out for unstake * fix indexing into mock call * call indexing * access dict not with dot * fix other indent * add CUDAException for cubit * up cubit version * [Feature] ask cuda during btcli run (#890) * add ask for cuda reg config in btcli run * suppress unset arg * [Feature] [cuda solver] multi gpu (#891) * change diff display out * remove logging * check cubit support in the check config * allow 1 or more devices in flag * cuda flag should be suppress * modify how cpu count is found * make a solver base class * add a solverbase for CUDA * use mutli process kernel launching, one per GPU * move check under dot get accessor * Feature/cuda solver multi gpu (#892) * change diff display out * remove logging * check cubit support in the check config * allow 1 or more devices in flag * cuda flag should be suppress * modify how cpu count is found * make a solver base class * add a solverbase for CUDA * use mutli process kernel launching, one per GPU * move check under dot get accessor * add All gpus specification * continue trying reg after Stale * catch for OSX * dont use qsize * add test for continue after being stale * patch get_nowait instead of qsize * [Docs] Update old docs link to new link. Change discord invite to custom link (#915) * Update old docs link to new one This change deletes the old gitbooks documentation link and replaces it with the new one. * fix discord links Co-authored-by: Mac Thrasher <95183714+quac88@users.noreply.github.com> * Fix for test_neuron.py (#917) prevents downloading from huggingface * [feature] add --seed option to regen_hotkey (#916) * add seed option to regen hotkey * make seed optional and fix docstring * add tests for both coldkey and hotkey regen w/seed * oops, make seed optional * fix old test, add config.seed * circle ci version update and fix (#920) * Add test_phrases_split unit test Asserts that randomly instantiated compact_topk encodings can be correctly decoded to recover the original topk_tensor. * Update unravel_topk_token_phrases with faster implementation Replaces .tensor_split() with block indexing to avoid extra copy operations. * Rename test_phrases_split to test_random_topk_token_phrases * Unit tests cleanup (#922) * circle ci version update and fix * Test clean up * uncomment test and remove specific test * remove loguru and fix flaky tests * fix syncing * removing tokenizer equivalence + some bug fixes * moving old dataset test * Deactivate test_random_topk_token_phrases unit test * Create topk_tensor on origin device * Normalization Update (#909) * local train bug fix * normalization update * fix tests * remove test * updated normalization * Naming changes, bug fixes * subtensor update for max clip * max weight to a million * Fixes for ordering and comments * additional tests * string fix * numerical stability and testing updates * minor update for division by zero * Naming and spacing fixes * epsilon update * small fix * Adding development workflow documentation and script for bumping the version (#918) BIT-582 Adding development workflow documentation and script for bumping the version * Updated version 3.3.4 * Revert "Normalization Update (#909)" This reverts commit 3990a2870a404adb3b7840bcb1004819b53e949a. * version update * memory fix to release (#932) . * use force on set_start_method * [fix] Enable the stake blacklist (#931) * uncomment stake blaclist * add test for blacklisted stake * [Fix] use with env to reset start method after (#935) * use with env to reset start method after * . Co-authored-by: Cameron Fairchild Co-authored-by: Mac Thrasher <95183714+quac88@users.noreply.github.com> Co-authored-by: opentaco Co-authored-by: opentaco <93473497+opentaco@users.noreply.github.com> Co-authored-by: Eduardo García Co-authored-by: isabella618033 <49876827+isabella618033@users.noreply.github.com> Co-authored-by: Cameron Fairchild --- .circleci/config.yml | 2 +- CONTRIBUTING.md | 29 +- DEVELOPMENT_WORKFLOW.md | 164 ++++++++++ README.md | 4 +- VERSION | 1 + bittensor/__init__.py | 2 +- bittensor/_axon/__init__.py | 20 +- bittensor/_axon/axon_impl.py | 5 + bittensor/_cli/__init__.py | 86 +++-- bittensor/_cli/cli_impl.py | 4 +- bittensor/_config/config_impl.py | 3 + bittensor/_dendrite/__init__.py | 2 +- .../_neuron/text/core_server/nucleus_impl.py | 2 +- bittensor/_neuron/text/core_server/run.py | 2 +- .../_neuron/text/core_validator/__init__.py | 2 +- bittensor/_subtensor/__init__.py | 33 +- bittensor/_subtensor/subtensor_impl.py | 31 +- bittensor/_wallet/wallet_impl.py | 29 +- bittensor/utils/__init__.py | 297 +++++++++++++----- bittensor/utils/balance.py | 3 + bittensor/utils/register_cuda.py | 25 +- bittensor/utils/tokenizer_utils.py | 33 +- scripts/update_version.sh | 49 +++ setup.py | 2 +- tests/integration_tests/test_cli.py | 1 + tests/integration_tests/test_dataset.py | 22 ++ tests/integration_tests/test_dataset_ipfs.py | 22 -- tests/integration_tests/test_dendrite.py | 22 +- tests/integration_tests/test_subtensor.py | 65 +++- .../test_dendrite_multiprocess.py | 102 ------ tests/unit_tests/bittensor_tests/test_axon.py | 153 ++++++++- .../bittensor_tests/test_balance.py | 9 + .../unit_tests/bittensor_tests/test_config.py | 1 - .../bittensor_tests/test_forward_backward.py | 1 - .../unit_tests/bittensor_tests/test_neuron.py | 112 ++++++- .../bittensor_tests/test_receptor.py | 13 +- .../bittensor_tests/test_receptor_pool.py | 12 +- .../bittensor_tests/test_subtensor.py | 108 +++++++ .../unit_tests/bittensor_tests/test_wallet.py | 30 ++ .../utils/test_tokenizer_utils.py | 80 ++++- .../bittensor_tests/utils/test_utils.py | 53 ++++ 41 files changed, 1282 insertions(+), 354 deletions(-) create mode 100644 DEVELOPMENT_WORKFLOW.md create mode 100644 VERSION create mode 100755 scripts/update_version.sh delete mode 100644 tests/integration_tests/test_dataset_ipfs.py delete mode 100644 tests/unit_tests/benchmarking/test_dendrite_multiprocess.py create mode 100644 tests/unit_tests/bittensor_tests/test_subtensor.py diff --git a/.circleci/config.yml b/.circleci/config.yml index c95b9ce1c8..0f70b0b460 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -116,7 +116,7 @@ workflows: - build-and-test: matrix: parameters: - python-version: ["3.7", "3.8", "3.9", "3.10.5"] + python-version: ["3.7.14", "3.8.14", "3.9.13", "3.10.6"] - unit-tests-all-python-versions: requires: - build-and-test diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 751f1e0b55..b9991b250c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,17 +2,16 @@ The following is a set of guidelines for contributing to Bittensor, which are hosted in the [Opentensor Organization](https://github.com/opentensor) on GitHub. These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. -#### Table Of Contents +## Table Of Contents -[I don't want to read this whole thing, I just have a question!!!](#i-dont-want-to-read-this-whole-thing-i-just-have-a-question) - -[What should I know before I get started?](#what-should-i-know-before-i-get-started) - -[How Can I Contribute?](#how-can-i-contribute) - * [Reporting Bugs](#reporting-bugs) - * [Suggesting Enhancements](#suggesting-enhancements) - * [Your First Code Contribution](#your-first-code-contribution) - * [Pull Requests](#pull-requests) +1. [I don't want to read this whole thing, I just have a question!!!](#i-dont-want-to-read-this-whole-thing-i-just-have-a-question) +1. [What should I know before I get started?](#what-should-i-know-before-i-get-started) +1. [How Can I Contribute?](#how-can-i-contribute) + 1. [Reporting Bugs](#reporting-bugs) + 1. [Suggesting Enhancements](#suggesting-enhancements) + 1. [Your First Code Contribution](#your-first-code-contribution) + 1. [Pull Requests](#pull-requests) + 1. [Development-Workflow](#development-workflow) ## I don't want to read this whole thing I just have a question!!! @@ -122,10 +121,10 @@ The process described here has several goals: Please follow these steps to have your contribution considered by the maintainers: -1. Follow all instructions in [the template](https://github.com/opentensor/bittensor/blob/master/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md) -2. Follow the [styleguides](#styleguides) -3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
What if the status checks are failing?If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.
+1. Before the PR. + 1. Read the [development workflow](./DEVELOPMENT_WORKFLOW.md) defined for this repository in order to agree on the ways of working. +1. While coding, please, add tests relevant to the fixed bug or new feature. +1. To create the PR follow all instructions in [the template](https://github.com/opentensor/bittensor/blob/master/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md) +1. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
What if the status checks are failing?If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.
While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted. - - diff --git a/DEVELOPMENT_WORKFLOW.md b/DEVELOPMENT_WORKFLOW.md new file mode 100644 index 0000000000..c5917c4bc0 --- /dev/null +++ b/DEVELOPMENT_WORKFLOW.md @@ -0,0 +1,164 @@ +# Development Workflow + +## Table of contents + +1. [Main branches](#main-branches) +1. [Development model](#development-model) + 1. [Supporting branches](#supporting-branches) + 1. [Feature branches](#feature-branches) + 1. [Release branches](#release-branches) + 1. [Hotfix branches](#hotfix-branches) + 1. [Git operations](#git-operations) + 1. [Create a feature branch](#create-a-feature-branch) + 1. [Merge feature branch into nobunaga](#merge-feature-branch-into-nobunaga) + 1. [Create release branch](#create-release-branch) + 1. [Finish a release branch](#finish-a-release-branch) + 1. [Create a hotfix branch](#create-a-hotfix-branch) + 1. [Finishing a hotfix branch](#finishing-a-hotfix-branch) + +## Main branches + +The repo holds two main branches with an infinite lifetime: +- master +- nobunaga + +We consider `origin/master` to be the main branch where the source code of HEAD always reflects a **__production-ready__** state. + +We consider `origin/nobunaga` to be the main branch where the source code of HEAD always reflects a state with the **__latest delivered development__** changes for the next release. Some would call this the `"integration branch"`. This is where any automatic nightly builds would be built from. + +## Development model + +### Supporting branches + +Each of these branches have a specific purpose and are bound to strict rules as to which branches may be their originating branch and which branches must be their merge targets. We will walk through them in a minute + +#### Feature branches + +- May branch off from: `nobunaga` +- Must merge back into: `nobunaga` +- Branch naming convention: + - Anything except master, nobunaga, nakamoto, release/* or hotfix/* + - Suggested: `feature//` + +Feature branches are used to develop new features for the upcoming or a distant future release. When starting development of a feature, the target release in which this feature will be incorporated may well be unknown at that point. + +The essence of a feature branch is that it exists as long as the feature is in development, but will eventually be merged back into `nobunaga` (to definitely add the new feature to the upcoming release) or discarded (in case of a disappointing experiment). + +#### Release branches + +- May branch off from: `nobunaga` +- Must merge back into: `nobunaga` and `master` +- Branch naming convention: + - Suggested format `release/3.4.0/optional-descriptive-message` + +Release branches support preparation of a new production release. Furthermore, they allow for minor bug fixes and preparing meta-data for a release (e.g.: version number, configuration, etc.). By doing all of this work on a release branch, the `nobunaga` branch is cleared to receive features for the next big release. + +This new branch may exist there for a while, until the release may be rolled out definitely. During that time, bug fixes may be applied in this branch, rather than on the `nobunaga` branch. Adding large new features here is strictly prohibited. They must be merged into `nobunaga`, and therefore, wait for the next big release. + +#### Hotfix branches + +- May branch off from: `master` +- Must merge back into: `nobunaga` and `master` +- Branch naming convention: + - Suggested format: `hotfix/3.3.4/optional-descriptive-message` + +Hotfix branches are very much like release branches in that they are also meant to prepare for a new production release, albeit unplanned. They arise from the necessity to act immediately upon an undesired state of a live production version. When a critical bug in a production version must be resolved immediately, a hotfix branch may be branched off from the corresponding tag on the master branch that marks the production version. + +The essence is that work of team members, on the `nobunaga` branch, can continue, while another person is preparing a quick production fix. + +### Git operations + +#### Create a feature branch + +1. Branch from the **nobunaga** branch. + 1. Command: `git checkout -b feature/my-feature nobunaga` + +> Try to rebase frequently with the updated nobunaga branch so you do not face big conflicts before submitting your pull request. Remember, syncing your changes with other developers could also help you avoid big conflicts. + +#### Merge feature branch into nobunaga + +In other words, integrate your changes into a branch that will be tested and prepared for release. + +- Switch branch to nobunaga: `git checkout nobunaga` +- Merging feature branch into nobunaga: `git merge --no-ff feature/my-feature` +- Pushing changes to nobunaga: `git push origin nobunaga` +- Delete feature branch: `git branch -d feature/my-feature` + +This operation is done by Github when merging a PR. + +So, what you have to keep in mind is: +- Open the PR against the `nobunaga` branch. +- After merging a PR you just have to delete your feature branch. + +#### Create release branch + +- Create branch from nobunaga: `git checkout -b release/3.4.0/optional-descriptive-message nobunaga` +- Updating version with major or minor: `./scripts/update_version.sh major|minor` +- Commit file changes with new version: `git commit -a -m "Updated version to 3.4.0"` + +#### Finish a release branch + +In other words, releasing stable code and generating a new version for bittensor. + +- Switch branch to master: `git checkout master` +- Merging release branch into master: `git merge --no-ff release/3.4.0/optional-descriptive-message` +- Tag changeset: `git tag -a v3.4.0 -m "Releasing v3.4.0: some comment about it"` +- Pushing changes to master: `git push origin master` +- Pushing tags to origin: `git push origin --tags` + +To keep the changes made in the __release__ branch, we need to merge those back into `nobunaga`: + +- Switch branch to nobunaga: `git checkout nobunaga`. +- Merging release branch into nobunaga: `git merge --no-ff release/3.4.0/optional-descriptive-message` + +This step may well lead to a merge conflict (probably even, since we have changed the version number). If so, fix it and commit. + +After this the release branch may be removed, since we don’t need it anymore: + +- `git branch -d release/3.4.0/optional-descriptive-message` + +#### Create the hotfix branch + +- Create branch from master:`git checkout -b hotfix/3.3.4/optional-descriptive-message master` +- Update patch version: `./scripts/update_version.sh patch` +- Commit file changes with new version: `git commit -a -m "Updated version to 3.3.4"` + +Then, fix the bug and commit the fix in one or more separate commits: +- `git commit -m "Fixed critical production issue"` + +#### Finishing a hotfix branch + +When finished, the bugfix needs to be merged back into `master`, but also needs to be merged back into `nobunaga`, in order to safeguard that the bugfix is included in the next release as well. This is completely similar to how release branches are finished. + +First, update master and tag the release. + +- Switch branch to master: `git checkout master` +- Merge changes into master: `git merge --no-ff hotfix/3.3.4/optional-descriptive-message` +- Tag new version: `git tag -a v3.3.4 -m "Releasing v3.3.4: some comment about the hotfix"` +- Pushing changes to master: `git push origin master` +- Pushing tags to origin: `git push origin --tags` + +Next, include the bugfix in `nobunaga`, too: + +- Switch branch to nobunaga: `git checkout nobunaga` +- Merge changes into nobunaga: `git merge --no-ff hotfix/3.3.4/optional-descriptive-message` +- Pushing changes to origin/nobunaga: `git push origin nobunaga` + +The one exception to the rule here is that, **when a release branch currently exists, the hotfix changes need to be merged into that release branch, instead of** `nobunaga`. Back-merging the bugfix into the __release__ branch will eventually result in the bugfix being merged into `develop` too, when the release branch is finished. (If work in develop immediately requires this bugfix and cannot wait for the release branch to be finished, you may safely merge the bugfix into develop now already as well.) + +Finally, we remove the temporary branch: + +- `git branch -d hotfix/3.3.4/optional-descriptive-message` + +## TODO + +- Changing the name of the develop branch from nobunaga to `integration` + - Because sometimes nobunaga are going to have a release branch. +- Knowing if master and nobunaga are different +- Knowing what is in nobunaga that is not merge yet + - Document with not released developments + - When merged into nobunaga, generate the information exposing what's merged into nobunaga but not release. + - When merged into master, generate github release and release notes. +- CircleCI job + - Merge nobunaga into master and release version (needed to release code) + - Build and Test bittensor (needed to merge PRs) \ No newline at end of file diff --git a/README.md b/README.md index 57dd7e034f..e75e440efc 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@
# **Bittensor** -[![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/3rUr6EcvbB) +[![Discord Chat](https://img.shields.io/discord/308323056592486420.svg)](https://discord.gg/bittensor) [![PyPI version](https://badge.fury.io/py/bittensor.svg)](https://badge.fury.io/py/bittensor) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) @@ -9,7 +9,7 @@ ### Internet-scale Neural Networks -[Discord](https://discord.gg/3rUr6EcvbB) • [Docs](https://app.gitbook.com/@opentensor/s/bittensor/) • [Network](https://www.bittensor.com/metagraph) • [Research](https://drive.google.com/file/d/1VnsobL6lIAAqcA1_Tbm8AYIQscfJV4KU) • [Code](https://github.com/opentensor/BitTensor) +[Discord](https://discord.gg/bittensor) • [Docs](https://docs.bittensor.com/) • [Network](https://www.bittensor.com/network) • [Research](https://drive.google.com/file/d/1VnsobL6lIAAqcA1_Tbm8AYIQscfJV4KU) • [Code](https://github.com/opentensor/BitTensor)
diff --git a/VERSION b/VERSION new file mode 100644 index 0000000000..2c6109e5bb --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +3.3.4 \ No newline at end of file diff --git a/bittensor/__init__.py b/bittensor/__init__.py index 091e5896d4..57e7bcddd1 100644 --- a/bittensor/__init__.py +++ b/bittensor/__init__.py @@ -18,7 +18,7 @@ from rich.console import Console # Bittensor code and protocol version. -__version__ = '3.3.3' +__version__ = '3.3.4' version_split = __version__.split(".") __version_as_int__ = (100 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2])) diff --git a/bittensor/_axon/__init__.py b/bittensor/_axon/__init__.py index afa1d0979f..cd2dce9d03 100644 --- a/bittensor/_axon/__init__.py +++ b/bittensor/_axon/__init__.py @@ -2,6 +2,7 @@ """ # The MIT License (MIT) # Copyright © 2021 Yuma Rao +# Copyright © 2022 Opentensor Foundation # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -65,6 +66,8 @@ def __new__( server: 'grpc._Server' = None, port: int = None, ip: str = None, + external_ip: str = None, + external_port: int = None, max_workers: int = None, maximum_concurrent_rpcs: int = None, blacklist: 'Callable' = None, @@ -101,6 +104,10 @@ def __new__( Binding port. ip (:type:`str`, `optional`): Binding ip. + external_ip (:type:`str`, `optional`): + The external ip of the server to broadcast to the network. + external_port (:type:`int`, `optional`): + The external port of the server to broadcast to the network. max_workers (:type:`int`, `optional`): Used to create the threadpool if not passed, specifies the number of active threads servicing requests. maximum_concurrent_rpcs (:type:`int`, `optional`): @@ -120,6 +127,8 @@ def __new__( config = copy.deepcopy(config) config.axon.port = port if port != None else config.axon.port config.axon.ip = ip if ip != None else config.axon.ip + config.axon.external_ip = external_ip if external_ip != None else config.axon.external_ip + config.axon.external_port = external_port if external_port != None else config.axon.external_port config.axon.max_workers = max_workers if max_workers != None else config.axon.max_workers config.axon.maximum_concurrent_rpcs = maximum_concurrent_rpcs if maximum_concurrent_rpcs != None else config.axon.maximum_concurrent_rpcs config.axon.forward_timeout = forward_timeout if forward_timeout != None else config.axon.forward_timeout @@ -174,6 +183,8 @@ def __new__( server = server, ip = config.axon.ip, port = config.axon.port, + external_ip=config.axon.external_ip, # don't use internal ip if it is None, we will try to find it later + external_port=config.axon.external_port or config.axon.port, # default to internal port if external port is not set forward = forward_text, backward = backward_text, synapses = synapses, @@ -214,9 +225,13 @@ def add_args( cls, parser: argparse.ArgumentParser, prefix: str = None ): prefix_str = '' if prefix == None else prefix + '.' try: parser.add_argument('--' + prefix_str + 'axon.port', type=int, - help='''The port this axon endpoint is served on. i.e. 8091''', default = bittensor.defaults.axon.port) + help='''The local port this axon endpoint is bound to. i.e. 8091''', default = bittensor.defaults.axon.port) parser.add_argument('--' + prefix_str + 'axon.ip', type=str, help='''The local ip this axon binds to. ie. [::]''', default = bittensor.defaults.axon.ip) + parser.add_argument('--' + prefix_str + 'axon.external_port', type=int, required=False, + help='''The public port this axon broadcasts to the network. i.e. 8091''', default = bittensor.defaults.axon.external_port) + parser.add_argument('--' + prefix_str + 'axon.external_ip', type=str, required=False, + help='''The external ip this axon broadcasts to the network to. ie. [::]''', default = bittensor.defaults.axon.external_ip) parser.add_argument('--' + prefix_str + 'axon.max_workers', type=int, help='''The maximum number connection handler threads working simultaneously on this endpoint. The grpc server distributes new worker threads to service requests up to this number.''', default = bittensor.defaults.axon.max_workers) @@ -253,6 +268,8 @@ def add_defaults(cls, defaults): defaults.axon = bittensor.Config() defaults.axon.port = os.getenv('BT_AXON_PORT') if os.getenv('BT_AXON_PORT') != None else 8091 defaults.axon.ip = os.getenv('BT_AXON_IP') if os.getenv('BT_AXON_IP') != None else '[::]' + defaults.axon.external_port = os.getenv('BT_AXON_EXTERNAL_PORT') if os.getenv('BT_AXON_EXTERNAL_PORT') != None else None + defaults.axon.external_ip = os.getenv('BT_AXON_EXTERNAL_IP') if os.getenv('BT_AXON_EXTERNAL_IP') != None else None defaults.axon.max_workers = os.getenv('BT_AXON_MAX_WORERS') if os.getenv('BT_AXON_MAX_WORERS') != None else 10 defaults.axon.maximum_concurrent_rpcs = os.getenv('BT_AXON_MAXIMUM_CONCURRENT_RPCS') if os.getenv('BT_AXON_MAXIMUM_CONCURRENT_RPCS') != None else 400 @@ -267,6 +284,7 @@ def check_config(cls, config: 'bittensor.Config' ): """ Check config for axon port and wallet """ assert config.axon.port > 1024 and config.axon.port < 65535, 'port must be in range [1024, 65535]' + assert config.axon.external_port is None or (config.axon.external_port > 1024 and config.axon.external_port < 65535), 'external port must be in range [1024, 65535]' bittensor.wallet.check_config( config ) @classmethod diff --git a/bittensor/_axon/axon_impl.py b/bittensor/_axon/axon_impl.py index 8edc91cf98..1f6cb78793 100644 --- a/bittensor/_axon/axon_impl.py +++ b/bittensor/_axon/axon_impl.py @@ -2,6 +2,7 @@ """ # The MIT License (MIT) # Copyright © 2021 Yuma Rao +# Copyright © 2022 Opentensor Foundation # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -44,6 +45,8 @@ def __init__( wallet: 'bittensor.wallet', ip: str, port: int, + external_ip: str, + external_port: int, server: 'grpc._Server', forward: 'Callable', backward: 'Callable', @@ -75,6 +78,8 @@ def __init__( """ self.ip = ip self.port = port + self.external_ip = external_ip + self.external_port = external_port self.wallet = wallet self.server = server self.forward_callback = forward if forward != None else self.default_forward_callback diff --git a/bittensor/_cli/__init__.py b/bittensor/_cli/__init__.py index 3b21794db4..a69a65b65f 100644 --- a/bittensor/_cli/__init__.py +++ b/bittensor/_cli/__init__.py @@ -26,7 +26,7 @@ import bittensor import torch -from rich.prompt import Confirm, Prompt +from rich.prompt import Confirm, Prompt, PromptBase from . import cli_impl @@ -123,7 +123,7 @@ def config(args: List[str]) -> 'bittensor.config': run_parser = cmd_parsers.add_parser( 'run', - add_help=False, + add_help=True, help='''Run the miner.''' ) run_parser.add_argument( @@ -394,6 +394,12 @@ def config(args: List[str]) -> 'bittensor.config': nargs="+", help='Mnemonic used to regen your key i.e. horse cart dog ...' ) + regen_hotkey_parser.add_argument( + "--seed", + required=False, + default=None, + help='Seed hex string used to regen your key i.e. 0x1234...' + ) regen_hotkey_parser.add_argument( '--use_password', dest='use_password', @@ -823,6 +829,36 @@ def check_overview_config( config: 'bittensor.Config' ): wallet_name = Prompt.ask("Enter wallet name", default = bittensor.defaults.wallet.name) config.wallet.name = str(wallet_name) + def _check_for_cuda_reg_config( config: 'bittensor.Config' ) -> None: + """Checks, when CUDA is available, if the user would like to register with their CUDA device.""" + if torch.cuda.is_available(): + if config.subtensor.register.cuda.get('use_cuda') is None: + # Ask about cuda registration only if a CUDA device is available. + cuda = Confirm.ask("Detected CUDA device, use CUDA for registration?\n") + config.subtensor.register.cuda.use_cuda = cuda + + # Only ask about which CUDA device if the user has more than one CUDA device. + if config.subtensor.register.cuda.use_cuda and config.subtensor.register.cuda.get('dev_id') is None and torch.cuda.device_count() > 0: + devices: List[str] = [str(x) for x in range(torch.cuda.device_count())] + device_names: List[str] = [torch.cuda.get_device_name(x) for x in range(torch.cuda.device_count())] + console.print("Available CUDA devices:") + choices_str: str = "" + for i, device in enumerate(devices): + choices_str += (" {}: {}\n".format(device, device_names[i])) + console.print(choices_str) + dev_id = IntListPrompt.ask("Which GPU(s) would you like to use? Please list one, or comma-separated", choices=devices, default='All') + if dev_id == 'All': + dev_id = list(range(torch.cuda.device_count())) + else: + try: + # replace the commas with spaces then split over whitespace., + # then strip the whitespace and convert to ints. + dev_id = [int(dev_id.strip()) for dev_id in dev_id.replace(',', ' ').split()] + except ValueError: + console.error(":cross_mark:[red]Invalid GPU device[/red] [bold white]{}[/bold white]\nAvailable CUDA devices:{}".format(dev_id, choices_str)) + sys.exit(1) + config.subtensor.register.cuda.dev_id = dev_id + def check_register_config( config: 'bittensor.Config' ): if config.subtensor.get('network') == bittensor.defaults.subtensor.network and not config.no_prompt: config.subtensor.network = Prompt.ask("Enter subtensor network", choices=bittensor.__networks__, default = bittensor.defaults.subtensor.network) @@ -835,27 +871,8 @@ def check_register_config( config: 'bittensor.Config' ): hotkey = Prompt.ask("Enter hotkey name", default = bittensor.defaults.wallet.hotkey) config.wallet.hotkey = str(hotkey) - if not config.no_prompt and config.subtensor.register.cuda.use_cuda == bittensor.defaults.subtensor.register.cuda.use_cuda: - # Ask about cuda registration only if a CUDA device is available. - if torch.cuda.is_available(): - cuda = Confirm.ask("Detected CUDA device, use CUDA for registration?\n") - config.subtensor.register.cuda.use_cuda = cuda - # Only ask about which CUDA device if the user has more than one CUDA device. - if cuda and config.subtensor.register.cuda.get('dev_id') is None and torch.cuda.device_count() > 0: - devices: List[str] = [str(x) for x in range(torch.cuda.device_count())] - device_names: List[str] = [torch.cuda.get_device_name(x) for x in range(torch.cuda.device_count())] - console.print("Available CUDA devices:") - choices_str: str = "" - for i, device in enumerate(devices): - choices_str += (" {}: {}\n".format(device, device_names[i])) - console.print(choices_str) - dev_id = Prompt.ask("Which GPU would you like to use?", choices=devices, default=str(bittensor.defaults.subtensor.register.cuda.dev_id)) - try: - dev_id = int(dev_id) - except ValueError: - console.error(":cross_mark:[red]Invalid GPU device[/red] [bold white]{}[/bold white]\nAvailable CUDA devices:{}".format(dev_id, choices_str)) - sys.exit(1) - config.subtensor.register.cuda.dev_id = dev_id + if not config.no_prompt: + cli._check_for_cuda_reg_config(config) def check_new_coldkey_config( config: 'bittensor.Config' ): if config.wallet.get('name') == bittensor.defaults.wallet.name and not config.no_prompt: @@ -880,8 +897,12 @@ def check_regen_hotkey_config( config: 'bittensor.Config' ): hotkey = Prompt.ask("Enter hotkey name", default = bittensor.defaults.wallet.hotkey) config.wallet.hotkey = str(hotkey) - if config.mnemonic == None: - config.mnemonic = Prompt.ask("Enter mnemonic") + if config.mnemonic == None and config.seed == None: + prompt_answer = Prompt.ask("Enter mnemonic or seed") + if prompt_answer.startswith("0x"): + config.seed = prompt_answer + else: + config.mnemonic = prompt_answer def check_regen_coldkey_config( config: 'bittensor.Config' ): if config.wallet.get('name') == bittensor.defaults.wallet.name and not config.no_prompt: @@ -889,7 +910,6 @@ def check_regen_coldkey_config( config: 'bittensor.Config' ): config.wallet.name = str(wallet_name) if config.mnemonic == None and config.seed == None: prompt_answer = Prompt.ask("Enter mnemonic or seed") - print(prompt_answer) if prompt_answer.startswith("0x"): config.seed = prompt_answer else: @@ -931,6 +951,10 @@ def check_run_config( config: 'bittensor.Config' ): if 'server' in config.model and not config.no_prompt: synapse = Prompt.ask('Enter synapse', choices = list(bittensor.synapse.__synapses_types__), default = 'All') config.synapse = synapse + + # Don't need to ask about registration if they don't want to reregister the wallet. + if config.wallet.get('reregister', bittensor.defaults.wallet.reregister) and not config.no_prompt: + cli._check_for_cuda_reg_config(config) def check_help_config( config: 'bittensor.Config'): if config.model == 'None': @@ -941,3 +965,13 @@ def check_update_config( config: 'bittensor.Config'): if not config.no_prompt: answer = Prompt.ask('This will update the local bittensor package', choices = ['Y','N'], default = 'Y') config.answer = answer + +class IntListPrompt(PromptBase): + """ Prompt for a list of integers. """ + + def check_choice( self, value: str ) -> bool: + assert self.choices is not None + # check if value is a valid choice or all the values in a list of ints are valid choices + return value == "All" or \ + value in self.choices or \ + all( val.strip() in self.choices for val in value.replace(',', ' ').split( )) diff --git a/bittensor/_cli/cli_impl.py b/bittensor/_cli/cli_impl.py index bdce4358dc..0425504486 100644 --- a/bittensor/_cli/cli_impl.py +++ b/bittensor/_cli/cli_impl.py @@ -114,7 +114,7 @@ def regen_hotkey ( self ): r""" Creates a new coldkey under this wallet. """ wallet = bittensor.wallet(config = self.config) - wallet.regenerate_hotkey( mnemonic = self.config.mnemonic, use_password = self.config.use_password, overwrite = self.config.overwrite_hotkey) + wallet.regenerate_hotkey( mnemonic = self.config.mnemonic, seed=self.config.seed, use_password = self.config.use_password, overwrite = self.config.overwrite_hotkey) def query ( self ): r""" Query an endpoint and get query time. @@ -309,7 +309,7 @@ def unstake( self ): if not self.config.no_prompt: if not Confirm.ask("Do you want to unstake from the following keys:\n" + \ "".join([ - f" [bold white]- {wallet.hotkey_str}: {amount.tao}𝜏[/bold white]\n" for wallet, amount in zip(final_wallets, final_amounts) + f" [bold white]- {wallet.hotkey_str}: {amount}𝜏[/bold white]\n" for wallet, amount in zip(final_wallets, final_amounts) ]) ): return None diff --git a/bittensor/_config/config_impl.py b/bittensor/_config/config_impl.py index fdfcb9d4b8..7da3aada06 100644 --- a/bittensor/_config/config_impl.py +++ b/bittensor/_config/config_impl.py @@ -3,6 +3,7 @@ """ # The MIT License (MIT) # Copyright © 2021 Yuma Rao +# Copyright © 2022 Opentensor Foundation # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -53,6 +54,8 @@ def to_defaults(self): if 'axon' in self.keys(): bittensor.defaults.axon.port = self.axon.port bittensor.defaults.axon.ip = self.axon.ip + bittensor.defaults.axon.external_port = self.axon.external_port + bittensor.defaults.axon.external_ip = self.axon.external_ip bittensor.defaults.axon.max_workers = self.axon.max_workers bittensor.defaults.axon.maximum_concurrent_rpcs = self.axon.maximum_concurrent_rpcs diff --git a/bittensor/_dendrite/__init__.py b/bittensor/_dendrite/__init__.py index 6a0fece88a..dc7be14545 100644 --- a/bittensor/_dendrite/__init__.py +++ b/bittensor/_dendrite/__init__.py @@ -181,7 +181,7 @@ def check_config( cls, config: 'bittensor.Config' ): assert 'timeout' in config.dendrite assert 'requires_grad' in config.dendrite assert config.dendrite.max_worker_threads > 0, 'max_worker_threads must be larger than 0' - assert config.dendrite.max_active_receptors > 0, 'max_active_receptors must be larger than 0' + assert config.dendrite.max_active_receptors >= 0, 'max_active_receptors must be larger or eq to 0' bittensor.wallet.check_config( config ) @classmethod diff --git a/bittensor/_neuron/text/core_server/nucleus_impl.py b/bittensor/_neuron/text/core_server/nucleus_impl.py index f2d7d2c19b..1351e98a75 100644 --- a/bittensor/_neuron/text/core_server/nucleus_impl.py +++ b/bittensor/_neuron/text/core_server/nucleus_impl.py @@ -234,7 +234,7 @@ def forward(self, inputs, tokenizer=None): Decoded predictions of the next token in the sentence. """ - message, model_output, decoded_targets = self.local_forward(inputs, tokenizer)[1] + message, model_output, decoded_targets = self.local_forward(inputs, tokenizer) shift_logits = decoded_targets[..., :-1, :].contiguous() shift_labels = inputs[..., 1:].contiguous() diff --git a/bittensor/_neuron/text/core_server/run.py b/bittensor/_neuron/text/core_server/run.py index 9ef5a65630..d756868698 100644 --- a/bittensor/_neuron/text/core_server/run.py +++ b/bittensor/_neuron/text/core_server/run.py @@ -193,7 +193,7 @@ def time_check(): time_check() - #stake_check() + stake_check() return False diff --git a/bittensor/_neuron/text/core_validator/__init__.py b/bittensor/_neuron/text/core_validator/__init__.py index 57e1f840be..36e982d4de 100644 --- a/bittensor/_neuron/text/core_validator/__init__.py +++ b/bittensor/_neuron/text/core_validator/__init__.py @@ -144,7 +144,7 @@ def __init__( self.wallet = bittensor.wallet ( config = self.config ) if wallet == None else wallet self.subtensor = bittensor.subtensor ( config = self.config ) if subtensor == None else subtensor self.metagraph = bittensor.metagraph ( config = self.config, subtensor = self.subtensor ) if metagraph == None else metagraph - self.dendrite = bittensor.dendrite ( config = self.config, wallet = self.wallet ) if dendrite == None else dendrite + self.dendrite = bittensor.dendrite ( config = self.config, wallet = self.wallet, max_active_receptors = 0 ) if dendrite == None else dendrite # Dendrite should not store receptor in validator. self.device = torch.device ( device = self.config.neuron.device ) self.nucleus = nucleus ( config = self.config, device = self.device, subtensor = self.subtensor ).to( self.device ) self.dataset = (bittensor.dataset(config=self.config, batch_size=self.subtensor.validator_batch_size, diff --git a/bittensor/_subtensor/__init__.py b/bittensor/_subtensor/__init__.py index 8c0be7c88f..3b0c870671 100644 --- a/bittensor/_subtensor/__init__.py +++ b/bittensor/_subtensor/__init__.py @@ -15,22 +15,16 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. import argparse +import copy import os -import random -import time -import psutil -import subprocess -from sys import platform - import bittensor -import copy +from loguru import logger from substrateinterface import SubstrateInterface +from torch.cuda import is_available as is_cuda_available -from . import subtensor_impl -from . import subtensor_mock +from . import subtensor_impl, subtensor_mock -from loguru import logger logger = logger.opt(colors=True) __type_registery__ = { @@ -193,8 +187,9 @@ def add_args(cls, parser: argparse.ArgumentParser, prefix: str = None ): parser.add_argument('--' + prefix_str + 'subtensor.register.num_processes', '-n', dest='subtensor.register.num_processes', help="Number of processors to use for registration", type=int, default=bittensor.defaults.subtensor.register.num_processes) parser.add_argument('--' + prefix_str + 'subtensor.register.update_interval', '--' + prefix_str + 'subtensor.register.cuda.update_interval', '--' + prefix_str + 'cuda.update_interval', '-u', help="The number of nonces to process before checking for next block during registration", type=int, default=bittensor.defaults.subtensor.register.update_interval) # registration args. Used for register and re-register and anything that calls register. - parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.use_cuda', '--' + prefix_str + 'cuda', '--' + prefix_str + 'cuda.use_cuda', default=bittensor.defaults.subtensor.register.cuda.use_cuda, help='''Set true to use CUDA.''', action='store_true', required=False ) - parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.dev_id', '--' + prefix_str + 'cuda.dev_id', type=int, default=argparse.SUPPRESS, help='''Set the CUDA device id. Goes by the order of speed. (i.e. 0 is the fastest).''', required=False ) + parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.use_cuda', '--' + prefix_str + 'cuda', '--' + prefix_str + 'cuda.use_cuda', default=argparse.SUPPRESS, help='''Set true to use CUDA.''', action='store_true', required=False ) + parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.dev_id', '--' + prefix_str + 'cuda.dev_id', type=int, nargs='+', default=argparse.SUPPRESS, help='''Set the CUDA device id(s). Goes by the order of speed. (i.e. 0 is the fastest).''', required=False ) + parser.add_argument( '--' + prefix_str + 'subtensor.register.cuda.TPB', '--' + prefix_str + 'cuda.TPB', type=int, default=bittensor.defaults.subtensor.register.cuda.TPB, help='''Set the number of Threads Per Block for CUDA.''', required=False ) except argparse.ArgumentError: @@ -215,7 +210,7 @@ def add_defaults(cls, defaults ): defaults.subtensor.register.update_interval = os.getenv('BT_SUBTENSOR_REGISTER_UPDATE_INTERVAL') if os.getenv('BT_SUBTENSOR_REGISTER_UPDATE_INTERVAL') != None else 50_000 defaults.subtensor.register.cuda = bittensor.Config() - defaults.subtensor.register.cuda.dev_id = 0 + defaults.subtensor.register.cuda.dev_id = [0] defaults.subtensor.register.cuda.use_cuda = False defaults.subtensor.register.cuda.TPB = 256 @@ -223,6 +218,18 @@ def add_defaults(cls, defaults ): def check_config( config: 'bittensor.Config' ): assert config.subtensor #assert config.subtensor.network != None + if config.subtensor.get('register') and config.subtensor.register.get('cuda'): + assert all((isinstance(x, int) or isinstance(x, str) and x.isnumeric() ) for x in config.subtensor.register.cuda.get('dev_id', [])) + + if config.subtensor.register.cuda.get('use_cuda', False): + try: + import cubit + except ImportError: + raise ImportError('CUDA registration is enabled but cubit is not installed. Please install cubit.') + + if not is_cuda_available(): + raise RuntimeError('CUDA registration is enabled but no CUDA devices are detected.') + @staticmethod def determine_chain_endpoint(network: str): diff --git a/bittensor/_subtensor/subtensor_impl.py b/bittensor/_subtensor/subtensor_impl.py index df648062ab..5da7dd1232 100644 --- a/bittensor/_subtensor/subtensor_impl.py +++ b/bittensor/_subtensor/subtensor_impl.py @@ -412,15 +412,18 @@ def serve_axon ( except net.UPNPCException as upnpc_exception: raise RuntimeError('Failed to hole-punch with upnpc with exception {}'.format( upnpc_exception )) from upnpc_exception else: - external_port = axon.port + external_port = axon.external_port # ---- Get external ip ---- - try: - external_ip = net.get_external_ip() - bittensor.__console__.print(":white_heavy_check_mark: [green]Found external ip: {}[/green]".format( external_ip )) - bittensor.logging.success(prefix = 'External IP', sufix = '{}'.format( external_ip )) - except Exception as E: - raise RuntimeError('Unable to attain your external ip. Check your internet connection. error: {}'.format(E)) from E + if axon.external_ip == None: + try: + external_ip = net.get_external_ip() + bittensor.__console__.print(":white_heavy_check_mark: [green]Found external ip: {}[/green]".format( external_ip )) + bittensor.logging.success(prefix = 'External IP', sufix = '{}'.format( external_ip )) + except Exception as E: + raise RuntimeError('Unable to attain your external ip. Check your internet connection. error: {}'.format(E)) from E + else: + external_ip = axon.external_ip # ---- Subscribe to chain ---- serve_success = self.serve( @@ -442,7 +445,7 @@ def register ( prompt: bool = False, max_allowed_attempts: int = 3, cuda: bool = False, - dev_id: int = 0, + dev_id: Union[List[int], int] = 0, TPB: int = 256, num_processes: Optional[int] = None, update_interval: Optional[int] = None, @@ -462,11 +465,11 @@ def register ( max_allowed_attempts (int): Maximum number of attempts to register the wallet. cuda (bool): - If true, the wallet should be registered on the cuda device. - dev_id (int): - The cuda device id. + If true, the wallet should be registered using CUDA device(s). + dev_id (Union[List[int], int]): + The CUDA device id to use, or a list of device ids. TPB (int): - The number of threads per block (cuda). + The number of threads per block (CUDA). num_processes (int): The number of processes to use to register. update_interval (int): @@ -559,7 +562,9 @@ def register ( else: # Exited loop because pow is no longer valid. bittensor.__console__.print( "[red]POW is stale.[/red]" ) - return False + # Try again. + continue + if attempts < max_allowed_attempts: #Failed registration, retry pow attempts += 1 diff --git a/bittensor/_wallet/wallet_impl.py b/bittensor/_wallet/wallet_impl.py index 993b09930a..5749c487ce 100644 --- a/bittensor/_wallet/wallet_impl.py +++ b/bittensor/_wallet/wallet_impl.py @@ -669,7 +669,7 @@ def regenerate_coldkeypub( self, ss58_address: Optional[str] = None, public_key: # Short name for regenerate_coldkeypub regen_coldkeypub = regenerate_coldkeypub - def regenerate_coldkey( self, mnemonic: Optional[Union[list, str]]=None, seed: Optional[str]=None, use_password: bool = True, overwrite:bool = False) -> 'Wallet': + def regenerate_coldkey( self, mnemonic: Optional[Union[list, str]] = None, seed: Optional[str] = None, use_password: bool = True, overwrite:bool = False) -> 'Wallet': """ Regenerates the coldkey from passed mnemonic, encrypts it with the user's password and save the file Args: mnemonic: (Union[list, str], optional): @@ -700,11 +700,13 @@ def regenerate_coldkey( self, mnemonic: Optional[Union[list, str]]=None, seed: O self.set_coldkeypub( keypair, overwrite = overwrite) return self - def regen_hotkey( self, mnemonic: Union[list, str], use_password: bool = True, overwrite:bool = False) -> 'Wallet': + def regen_hotkey( self, mnemonic: Optional[Union[list, str]], seed: Optional[str] = None, use_password: bool = True, overwrite:bool = False) -> 'Wallet': """ Regenerates the hotkey from passed mnemonic, encrypts it with the user's password and save the file Args: mnemonic: (Union[list, str], optional): Key mnemonic as list of words or string space separated words. + seed: (str, optional): + Seed as hex string. use_password (bool, optional): Is the created key password protected. overwrite (bool, optional): @@ -713,13 +715,15 @@ def regen_hotkey( self, mnemonic: Union[list, str], use_password: bool = True, o wallet (bittensor.Wallet): this object with newly created hotkey. """ - self.regenerate_hotkey(mnemonic, use_password, overwrite) + self.regenerate_hotkey(mnemonic, seed, use_password, overwrite) - def regenerate_hotkey( self, mnemonic: Union[list, str], use_password: bool = True, overwrite:bool = False) -> 'Wallet': + def regenerate_hotkey( self, mnemonic: Optional[Union[list, str]] = None, seed: Optional[str] = None, use_password: bool = True, overwrite:bool = False) -> 'Wallet': """ Regenerates the hotkey from passed mnemonic, encrypts it with the user's password and save the file Args: mnemonic: (Union[list, str], optional): Key mnemonic as list of words or string space separated words. + seed: (str, optional): + Seed as hex string. use_password (bool, optional): Is the created key password protected. overwrite (bool, optional): @@ -728,10 +732,17 @@ def regenerate_hotkey( self, mnemonic: Union[list, str], use_password: bool = Tr wallet (bittensor.Wallet): this object with newly created hotkey. """ - if isinstance( mnemonic, str): mnemonic = mnemonic.split() - if len(mnemonic) not in [12,15,18,21,24]: - raise ValueError("Mnemonic has invalid size. This should be 12,15,18,21 or 24 words") - keypair = Keypair.create_from_mnemonic(" ".join(mnemonic)) - display_mnemonic_msg( keypair, "hotkey" ) + if mnemonic is None and seed is None: + raise ValueError("Must pass either mnemonic or seed") + if mnemonic is not None: + if isinstance( mnemonic, str): mnemonic = mnemonic.split() + if len(mnemonic) not in [12,15,18,21,24]: + raise ValueError("Mnemonic has invalid size. This should be 12,15,18,21 or 24 words") + keypair = Keypair.create_from_mnemonic(" ".join(mnemonic)) + display_mnemonic_msg( keypair, "hotkey" ) + else: + # seed is not None + keypair = Keypair.create_from_seed(seed) + self.set_hotkey( keypair, encrypt=use_password, overwrite = overwrite) return self diff --git a/bittensor/utils/__init__.py b/bittensor/utils/__init__.py index 3a5b353b8d..1da54cab7e 100644 --- a/bittensor/utils/__init__.py +++ b/bittensor/utils/__init__.py @@ -1,5 +1,4 @@ import binascii -import datetime import hashlib import math import multiprocessing @@ -9,7 +8,7 @@ import time from dataclasses import dataclass from queue import Empty -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import backoff import bittensor @@ -20,7 +19,12 @@ from substrateinterface import Keypair from substrateinterface.utils import ss58 -from .register_cuda import reset_cuda, solve_cuda +from .register_cuda import solve_cuda + + +class CUDAException(Exception): + """An exception raised when an error occurs in the CUDA environment.""" + pass def indexed_values_to_dataframe ( @@ -140,7 +144,7 @@ class POWSolution: difficulty: int seal: bytes -class Solver(multiprocessing.Process): +class SolverBase(multiprocessing.Process): """ A process that solves the registration PoW problem. @@ -188,7 +192,7 @@ class Solver(multiprocessing.Process): proc_num: int num_proc: int update_interval: int - best_queue: multiprocessing.Queue + best_queue: Optional[multiprocessing.Queue] time_queue: multiprocessing.Queue solution_queue: multiprocessing.Queue newBlockEvent: multiprocessing.Event @@ -216,6 +220,10 @@ def __init__(self, proc_num, num_proc, update_interval, best_queue, time_queue, self.stopEvent = stopEvent self.limit = limit + def run(self): + raise NotImplementedError("SolverBase is an abstract class") + +class Solver(SolverBase): def run(self): block_number: int block_bytes: bytes @@ -250,6 +258,72 @@ def run(self): nonce_start += self.update_interval * self.num_proc nonce_end += self.update_interval * self.num_proc +class CUDASolver(SolverBase): + dev_id: int + TPB: int + + def __init__(self, proc_num, num_proc, update_interval, time_queue, solution_queue, stopEvent, curr_block, curr_block_num, curr_diff, check_block, limit, dev_id: int, TPB: int): + super().__init__(proc_num, num_proc, update_interval, None, time_queue, solution_queue, stopEvent, curr_block, curr_block_num, curr_diff, check_block, limit) + self.dev_id = dev_id + self.TPB = TPB + + def run(self): + block_number: int + block_bytes: bytes + block_difficulty: int + nonce_limit = int(math.pow(2,64)) - 1 + + # Start at random nonce + nonce_start = self.TPB * self.update_interval * self.proc_num + random.randint( 0, nonce_limit ) + nonce_end = nonce_start + self.update_interval * self.TPB + while not self.stopEvent.is_set(): + if self.newBlockEvent.is_set(): + with self.check_block: + block_number = self.curr_block_num.value + block_bytes = bytes(self.curr_block) + block_difficulty = registration_diff_unpack(self.curr_diff) + + self.newBlockEvent.clear() + # reset nonces to start from random point + nonce_start = self.update_interval * self.proc_num + random.randint( 0, nonce_limit ) + nonce_end = nonce_start + self.update_interval + + # Do a block of nonces + solution, time = solve_for_nonce_block_cuda(self, nonce_start, self.update_interval, block_bytes, block_difficulty, self.limit, block_number, self.dev_id, self.TPB) + if solution is not None: + self.solution_queue.put(solution) + + # Send time + self.time_queue.put_nowait(time) + + nonce_start += self.update_interval * self.num_proc + nonce_start = nonce_start % nonce_limit + nonce_end += self.update_interval * self.num_proc + + +def solve_for_nonce_block_cuda(solver: CUDASolver, nonce_start: int, update_interval: int, block_bytes: bytes, difficulty: int, limit: int, block_number: int, dev_id: int, TPB: int) -> Tuple[Optional[POWSolution], int]: + start = time.time() + + solution, seal = solve_cuda(nonce_start, + update_interval, + TPB, + block_bytes, + block_number, + difficulty, + limit, + dev_id) + + if (solution != -1): + # Check if solution is valid + # Attempt to reset CUDA device + #reset_cuda() + + #print(f"{solver.proc_num} on cuda:{solver.dev_id} found a solution: {solution}, {block_number}, {str(block_bytes)}, {str(seal)}, {difficulty}") + # Found a solution, save it. + return POWSolution(solution, block_number, difficulty, seal), time.time() - start + + return None, time.time() - start + def solve_for_nonce_block(solver: Solver, nonce_start: int, nonce_end: int, block_bytes: bytes, difficulty: int, limit: int, block_number: int) -> Tuple[Optional[POWSolution], int]: best_local = float('inf') @@ -297,6 +371,12 @@ def update_curr_block(curr_diff: multiprocessing.Array, curr_block: multiprocess curr_block[i] = block_bytes[i] registration_diff_pack(diff, curr_diff) +def get_cpu_count(): + try: + return len(os.sched_getaffinity(0)) + except AttributeError: + # OSX does not have sched_getaffinity + return os.cpu_count() def solve_for_difficulty_fast( subtensor, wallet, num_processes: Optional[int] = None, update_interval: Optional[int] = None ) -> Optional[POWSolution]: """ @@ -317,7 +397,7 @@ def solve_for_difficulty_fast( subtensor, wallet, num_processes: Optional[int] = """ if num_processes == None: # get the number of allowed processes for this process - num_processes = len(os.sched_getaffinity(0)) + num_processes = min(1, get_cpu_count()) if update_interval is None: update_interval = 50_000 @@ -401,12 +481,11 @@ def solve_for_difficulty_fast( subtensor, wallet, num_processes: Optional[int] = # Get times for each solver time_total = 0 num_time = 0 - while time_queue.qsize() > 0: + + for _ in solvers: try: - time_ = time_queue.get_nowait() - time_total += time_ + time_total += time_queue.get_nowait() num_time += 1 - except Empty: break @@ -416,7 +495,7 @@ def solve_for_difficulty_fast( subtensor, wallet, num_processes: Optional[int] = itrs_per_sec = update_interval*num_processes / time_avg # get best solution from each solver using the best_queue - while best_queue.qsize() > 0: + for _ in solvers: try: num, seal = best_queue.get_nowait() if num < best_number: @@ -449,12 +528,12 @@ def get_human_readable(num, suffix="H"): return f"{num:.1f}Y{suffix}" def millify(n: int): - millnames = ['',' K',' M',' B',' T'] + millnames = ['',' K',' M',' B',' T', 'q', 'Q'] n = float(n) millidx = max(0,min(len(millnames)-1, int(math.floor(0 if n == 0 else math.log10(abs(n))/3)))) - return '{:.0f}{}'.format(n / 10**(3 * millidx), millnames[millidx]) + return '{:.4f}{}'.format(n / 10**(3 * millidx), millnames[millidx]) @backoff.on_exception(backoff.constant, Exception, @@ -468,7 +547,24 @@ def get_block_with_retry(subtensor: 'bittensor.Subtensor') -> Tuple[int, int, by raise Exception("Network error. Could not connect to substrate to get block hash") return block_number, difficulty, block_hash -def solve_for_difficulty_fast_cuda( subtensor: 'bittensor.Subtensor', wallet: 'bittensor.Wallet', update_interval: int = 50_000, TPB: int = 512, dev_id: int = 0 ) -> Optional[POWSolution]: +class UsingSpawnStartMethod(): + def __init__(self, force: bool = False): + self._old_start_method = None + self._force = force + + def __enter__(self): + self._old_start_method = multiprocessing.get_start_method(allow_none=True) + if self._old_start_method == None: + self._old_start_method = 'spawn' # default to spawn + + multiprocessing.set_start_method('spawn', force=self._force) + + def __exit__(self, *args): + # restore the old start method + multiprocessing.set_start_method(self._old_start_method, force=True) + + +def solve_for_difficulty_fast_cuda( subtensor: 'bittensor.Subtensor', wallet: 'bittensor.Wallet', update_interval: int = 50_000, TPB: int = 512, dev_id: Union[List[int], int] = 0, use_kernel_launch_optimization: bool = False ) -> Optional[POWSolution]: """ Solves the registration fast using CUDA Args: @@ -480,79 +576,138 @@ def solve_for_difficulty_fast_cuda( subtensor: 'bittensor.Subtensor', wallet: 'b The number of nonces to try before checking for more blocks TPB: int The number of threads per block. CUDA param that should match the GPU capability - dev_id: int - The CUDA device ID to execute the registration on + dev_id: Union[List[int], int] + The CUDA device IDs to execute the registration on, either a single device or a list of devices """ - if not torch.cuda.is_available(): - raise Exception("CUDA not available") + if isinstance(dev_id, int): + dev_id = [dev_id] + elif dev_id is None: + dev_id = [0] if update_interval is None: update_interval = 50_000 - - block_number, difficulty, block_hash = get_block_with_retry(subtensor) - block_bytes = block_hash.encode('utf-8')[2:] - - nonce = 0 + + if not torch.cuda.is_available(): + raise Exception("CUDA not available") + limit = int(math.pow(2,256)) - 1 - start_time = time.time() console = bittensor.__console__ status = console.status("Solving") - - solution = -1 - start_time = time.time() - interval_time = start_time - status.start() - while solution == -1 and not wallet.is_registered(subtensor): - solution, seal = solve_cuda(nonce, - update_interval, - TPB, - block_bytes, - block_number, - difficulty, - limit, - dev_id) - - if (solution != -1): - # Attempt to reset CUDA device - reset_cuda() - status.stop() - new_bn = subtensor.get_current_block() - print(f"Found solution for bn: {block_number}; Newest: {new_bn}") - return POWSolution(solution, block_number, difficulty, seal) - - nonce += (TPB * update_interval) - if (nonce >= int(math.pow(2,63))): - nonce = 0 - itrs_per_sec = (TPB * update_interval) / (time.time() - interval_time) - interval_time = time.time() + # Set mp start to use spawn so CUDA doesn't complain + # Force the set start method in-case of re-register + with UsingSpawnStartMethod(force=True): + curr_block = multiprocessing.Array('h', 64, lock=True) # byte array + curr_block_num = multiprocessing.Value('i', 0, lock=True) # int + curr_diff = multiprocessing.Array('Q', [0, 0], lock=True) # [high, low] + + def update_curr_block(block_number: int, block_bytes: bytes, diff: int, lock: multiprocessing.Lock): + with lock: + curr_block_num.value = block_number + for i in range(64): + curr_block[i] = block_bytes[i] + registration_diff_pack(diff, curr_diff) + + status.start() + + # Establish communication queues + stopEvent = multiprocessing.Event() + stopEvent.clear() + solution_queue = multiprocessing.Queue() + time_queue = multiprocessing.Queue() + check_block = multiprocessing.Lock() + + # Start consumers + num_processes = len(dev_id) + ## Create one consumer per GPU + solvers = [ CUDASolver(i, num_processes, update_interval, time_queue, solution_queue, stopEvent, curr_block, curr_block_num, curr_diff, check_block, limit, dev_id[i], TPB) + for i in range(num_processes) ] - block_number, difficulty, block_hash = get_block_with_retry(subtensor) + # Get first block + block_number = subtensor.get_current_block() + difficulty = subtensor.difficulty + block_hash = subtensor.substrate.get_block_hash( block_number ) + while block_hash == None: + block_hash = subtensor.substrate.get_block_hash( block_number ) block_bytes = block_hash.encode('utf-8')[2:] + old_block_number = block_number + # Set to current block + update_curr_block(block_number, block_bytes, difficulty, check_block) - message = f"""Solving - time spent: {datetime.timedelta(seconds=time.time() - start_time)} - Nonce: [bold white]{nonce}[/bold white] - Difficulty: [bold white]{millify(difficulty)}[/bold white] - Iters: [bold white]{get_human_readable(int(itrs_per_sec), "H")}/s[/bold white] - Block: [bold white]{block_number}[/bold white] - Block_hash: [bold white]{block_hash.encode('utf-8')}[/bold white]""" - status.update(message.replace(" ", "")) + # Set new block events for each solver to start + for w in solvers: + w.newBlockEvent.set() - # exited while, found_solution contains the nonce or wallet is registered - if solution == -1: # didn't find solution - reset_cuda() - status.stop() - return None - - else: - reset_cuda() - # Shouldn't get here + for w in solvers: + w.start() # start the solver processes + + start_time = time.time() + time_since = 0.0 + solution = None + itrs_per_sec = 0 + while not wallet.is_registered(subtensor): + # Wait until a solver finds a solution + try: + solution = solution_queue.get(block=True, timeout=0.15) + if solution is not None: + break + except Empty: + # No solution found, try again + pass + + # check for new block + block_number = subtensor.get_current_block() + if block_number != old_block_number: + old_block_number = block_number + # update block information + block_hash = subtensor.substrate.get_block_hash( block_number) + while block_hash == None: + block_hash = subtensor.substrate.get_block_hash( block_number) + block_bytes = block_hash.encode('utf-8')[2:] + difficulty = subtensor.difficulty + + update_curr_block(block_number, block_bytes, difficulty, check_block) + # Set new block events for each solver + for w in solvers: + w.newBlockEvent.set() + + # Get times for each solver + time_total = 0 + num_time = 0 + for _ in solvers: + try: + time_ = time_queue.get_nowait() + time_total += time_ + num_time += 1 + + except Empty: + break + + if num_time > 0: + time_avg = time_total / num_time + itrs_per_sec = TPB*update_interval*num_processes / time_avg + time_since = time.time() - start_time + + message = f"""Solving + time spent: {time_since} + Difficulty: [bold white]{millify(difficulty)}[/bold white] + Iters: [bold white]{get_human_readable(int(itrs_per_sec), 'H')}/s[/bold white] + Block: [bold white]{block_number}[/bold white] + Block_hash: [bold white]{block_hash.encode('utf-8')}[/bold white]""" + status.update(message.replace(" ", "")) + + # exited while, found_solution contains the nonce or wallet is registered + if solution is not None: + stopEvent.set() # stop all other processes + status.stop() + + return solution + status.stop() return None -def create_pow( subtensor, wallet, cuda: bool = False, dev_id: int = 0, tpb: int = 256, num_processes: int = None, update_interval: int = None ) -> Optional[Dict[str, Any]]: +def create_pow( subtensor, wallet, cuda: bool = False, dev_id: Union[List[int], int] = 0, tpb: int = 256, num_processes: int = None, update_interval: int = None) -> Optional[Dict[str, Any]]: if cuda: solution: POWSolution = solve_for_difficulty_fast_cuda( subtensor, wallet, dev_id=dev_id, TPB=tpb, update_interval=update_interval ) else: diff --git a/bittensor/utils/balance.py b/bittensor/utils/balance.py index 0bba07622d..a52913c37d 100644 --- a/bittensor/utils/balance.py +++ b/bittensor/utils/balance.py @@ -68,6 +68,9 @@ def __repr__(self): return self.__str__() def __eq__(self, other: Union[int, float, "Balance"]): + if other is None: + return False + if hasattr(other, "rao"): return self.rao == other.rao else: diff --git a/bittensor/utils/register_cuda.py b/bittensor/utils/register_cuda.py index f64f4777b4..086f1f3637 100644 --- a/bittensor/utils/register_cuda.py +++ b/bittensor/utils/register_cuda.py @@ -6,6 +6,9 @@ import numpy as np from Crypto.Hash import keccak +from contextlib import redirect_stdout +import io + def solve_cuda(nonce_start: np.int64, update_interval: np.int64, TPB: int, block_bytes: bytes, bn: int, difficulty: int, limit: int, dev_id: int = 0) -> Tuple[np.int64, bytes]: """ @@ -66,7 +69,6 @@ def create_seal_hash( block_bytes:bytes, nonce:int ) -> bytes: solution = cubit.solve_cuda(TPB, nonce_start, update_interval, upper_bytes, block_bytes, dev_id) # 0 is first GPU seal = None if solution != -1: - print(f"Checking solution: {solution} for bn: {bn}") seal = create_seal_hash(block_bytes, solution) if seal_meets_difficulty(seal, difficulty): return solution, seal @@ -85,3 +87,24 @@ def reset_cuda(): raise ImportError("Please install cubit") cubit.reset_cuda() + +def log_cuda_errors() -> str: + """ + Logs any CUDA errors. + """ + try: + import cubit + except ImportError: + raise ImportError("Please install cubit") + + f = io.StringIO() + with redirect_stdout(f): + cubit.log_cuda_errors() + + s = f.getvalue() + + return s + + + + diff --git a/bittensor/utils/tokenizer_utils.py b/bittensor/utils/tokenizer_utils.py index 10a82df5b0..9192556097 100644 --- a/bittensor/utils/tokenizer_utils.py +++ b/bittensor/utils/tokenizer_utils.py @@ -872,19 +872,32 @@ def unravel_topk_token_phrases(compact_topk: torch.Tensor, topk: int, ignore_ind batch_size = len(prob_idx) // (topk + 1) # (batch_size * (topk + floor)) / (topk + floor) assert batch_size * (topk + 1) == len(prob_idx), f'{batch_size} * ({topk} + 1) != {len(prob_idx)}' # decoding irregularity otherwise - # split into topk token phrases with prob prepend [prob, tok_0, tok_1, ... tok_n] - phrases = [s.tolist() for s in torch.tensor_split(compact_topk, prob_idx)] # tolist for faster list comprehension - phrases = phrases[1:] # ignore first (empty) split + # Obtain phrase lengths and maximum phrase length + phrase_len = prob_idx[1:] - prob_idx[:-1] # [batch_size * (topk + 1) - 1] length of each phrase + phrase_len = torch.cat((phrase_len, torch.tensor([1]))) # [batch_size * (topk + 1)] prob_floor is always len=1 + max_len = phrase_len.max() # determine width of topk_tensor as max len of all phrase lists (with prob in front) - # determine width of topk_tensor as max len of all phrase lists (with prob in front) - max_len = max([len(p) for p in phrases]) # max_{b,k}(len([prob_k, tok_0_k, tok_1_k, ...])) + # Initialize topk_tensor with ignore_index + 2, since decrement with 2 follows to remove token offset later + topk_tensor = torch.ones((batch_size * (topk + 1), max_len), device=compact_topk.device) + topk_tensor *= ignore_index + 2 # [batch_size * (topk + 1), max_len] + + # Insert phrases of each unique length as block into topk_tensor + for unique_len in phrase_len.unique(): + if unique_len <= 1: + continue # skip probability column, will be added afterward + + phrase_idx = torch.where(phrase_len == unique_len)[0] # phrase indices where phrase_len is unique_len + compact_idx = prob_idx[phrase_idx] # indices in compact_topk + + # Create indexing block, add index for each phrase position, skip first (prob) position + block_idx = [compact_idx + position for position in range(1, unique_len)] # incrementally add each position of phrase + # transpose .t() ensures correct interleaving of consecutive positions: + # [[phrase_a_1, phrase_a_2, ..., phrase_a_n], [phrase_b_1, phrase_b_2, ..., phrase_b_n], ...] + block_idx = torch.vstack(block_idx).t().reshape(-1, unique_len - 1) # [-1, unique_len - 1] for all phrases with unique_len - ignore_index_2 = ignore_index + 2 # increment with 2, as decrement with 2 follows + topk_tensor[phrase_idx, 1:unique_len] = compact_topk[block_idx] # slice selected phrases and copy into topk_tensor - # form single 2D tensor with topk token phrases with prob prepend [prob, tok_0, tok_1, ... tok_n] - topk_tensor = torch.tensor([p + [ignore_index_2] * (max_len - len(p)) - for p in phrases]).to(compact_topk.device) # [batch_size * (topk + 1), max_len] - topk_tensor -= 2 # remove token offset + topk_tensor -= 2 # remove token offset, overwrites probability column, replace probabilities below # grafting probability tensors into first column to attach gradients topk_tensor[:, 0] = compact_topk[prob_idx] # tensor([prob_k=0_b, prob_k=1_b, ..., prob_floor_b]) diff --git a/scripts/update_version.sh b/scripts/update_version.sh new file mode 100755 index 0000000000..0cd8334ff1 --- /dev/null +++ b/scripts/update_version.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +VERSION=$(cat VERSION) +CODE_WITH_VERSION='bittensor/__init__.py' + +MAJOR=$(awk -F. '{print $1}' <<< $VERSION) +MINOR=$(awk -F. '{print $2}' <<< $VERSION) +PATCH=$(awk -F. '{print $3}' <<< $VERSION) + +# RC version +RC=$(awk -F- '{print $NF}' <<< $version) +if [ -z $RC ]; then + echo "Current version: $MAJOR.$MINOR.$PATCH" +else + echo "Current version: $MAJOR.$MINOR.$PATCH-$RC" +fi + +OPERATION=$1 +case $OPERATION in + "major") + echo "Applying a $OPERATION update" + NEW_VERSION="$((MAJOR + 1)).$MINOR.$PATCH" + ;; + "minor") + echo "Applying a $OPERATION update" + NEW_VERSION="$MAJOR.$((MINOR + 1)).$PATCH" + ;; + "patch") + echo "Applying a $OPERATION update" + NEW_VERSION="$MAJOR.$MINOR.$((PATCH + 1))" + ;; + "rc") + SUFFIX=$2 + if [ -z $SUFFIX ]; then + echo "Suffix is needed when updating version to a RC" + exit 1 + fi + NEW_VERSION="$MAJOR.$MINOR.$PATCH-$SUFFIX" + ;; + *) + echo "This operation is not allowed. Try one of the following: {major, minor, patch, rc}" + exit 1 + ;; +esac + +echo "New version: $NEW_VERSION" + +#sed -i "18,22s/$VERSION/$NEW_VERSION/g" $CODE_WITH_VERSION +#echo -n $NEW_VERSION > VERSION \ No newline at end of file diff --git a/setup.py b/setup.py index d4a9723bcd..50771f802c 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,6 @@ ], python_requires='>=3.7', extras_requires={ - 'cubit': ['cubit>=1.0.5 @ git+https://github.com/opentensor/cubit.git'] + 'cubit': ['cubit>=1.1.0 @ git+https://github.com/opentensor/cubit.git'] } ) diff --git a/tests/integration_tests/test_cli.py b/tests/integration_tests/test_cli.py index 65bdd8f67a..febbcbf5e7 100644 --- a/tests/integration_tests/test_cli.py +++ b/tests/integration_tests/test_cli.py @@ -1185,6 +1185,7 @@ def test_regen_hotkey( self ): config.subtensor._mock = True config.model = "core_server" config.mnemonic = "faculty decade seven jelly gospel axis next radio grain radio remain gentle" + config.seed = None config.n_words = 12 config.use_password = False config.no_prompt = True diff --git a/tests/integration_tests/test_dataset.py b/tests/integration_tests/test_dataset.py index 223f1d98b1..df7465f3a2 100644 --- a/tests/integration_tests/test_dataset.py +++ b/tests/integration_tests/test_dataset.py @@ -66,6 +66,28 @@ def test_change_data_size(): assert next(dataset).size() == result_data_size dataset.close() + +def test_text_dataset(): + batch_size = 20 + block_size = 128 + num_batches = 10 + epoch_length = 10 + + dataset = bittensor.dataset ( + _mock = True, + batch_size = batch_size, + block_size = block_size, + num_batches = num_batches + ) + + dataloader = dataset.dataloader(epoch_length) + + assert len(dataloader) == epoch_length + assert len(dataloader) != len(dataset) + assert len(dataset[0]) == block_size + assert len(dataloader.dataset) == batch_size * epoch_length + dataset.close() + if __name__ == "__main__": test_change_data_size() \ No newline at end of file diff --git a/tests/integration_tests/test_dataset_ipfs.py b/tests/integration_tests/test_dataset_ipfs.py deleted file mode 100644 index d10401b267..0000000000 --- a/tests/integration_tests/test_dataset_ipfs.py +++ /dev/null @@ -1,22 +0,0 @@ -import bittensor - -def test_text_dataset(): - batch_size = 20 - block_size = 128 - num_batches = 10 - epoch_length = 10 - - dataset = bittensor.dataset ( - batch_size = batch_size, - block_size = block_size, - num_batches = num_batches - ) - - dataloader = dataset.dataloader(epoch_length) - - assert len(dataloader) == epoch_length - assert len(dataloader) != len(dataset) - assert len(dataset[0]) == block_size - assert len(dataloader.dataset) == batch_size * epoch_length - - dataset.close() \ No newline at end of file diff --git a/tests/integration_tests/test_dendrite.py b/tests/integration_tests/test_dendrite.py index b2a243eb14..cb62d540b1 100644 --- a/tests/integration_tests/test_dendrite.py +++ b/tests/integration_tests/test_dendrite.py @@ -20,6 +20,7 @@ import pytest import bittensor from bittensor._proto.bittensor_pb2 import UnknownException +from bittensor.utils.test_utils import get_random_unused_port from . import constant wallet = bittensor.wallet.mock() @@ -275,8 +276,9 @@ def forward_casual_lm(inputs_x, synapse, model_output = None): def forward_casual_lm_next(inputs_x, synapse, model_output=None): return None, None, synapse.nill_forward_response_tensor(inputs_x) + port = get_random_unused_port() axon = bittensor.axon ( - port = 8096, + port = port, ip = '0.0.0.0', wallet = wallet, ) @@ -293,7 +295,7 @@ def forward_casual_lm_next(inputs_x, synapse, model_output=None): hotkey = wallet.hotkey.ss58_address, ip = '0.0.0.0', ip_type = 4, - port = 8096, + port = port, modality = 0, coldkey = wallet.coldkeypub.ss58_address ) @@ -323,8 +325,9 @@ def forward_casual_lm(inputs_x, synapse, model_output = None): def forward_casual_lm_next(inputs_x, synapse, model_output=None): return None, None, synapse.nill_forward_response_tensor(inputs_x) + port = get_random_unused_port() axon = bittensor.axon ( - port = 8097, + port = port, ip = '0.0.0.0', wallet = wallet, ) @@ -341,7 +344,7 @@ def forward_casual_lm_next(inputs_x, synapse, model_output=None): hotkey = wallet.hotkey.ss58_address, ip = '0.0.0.0', ip_type = 4, - port = 8097, + port = port, modality = 0, coldkey = wallet.coldkeypub.ss58_address ) @@ -382,8 +385,9 @@ def forward_casual_lm(inputs_x, synapse, model_output = None): def forward_casual_lm_next(inputs_x, synapse, model_output=None): return None, None, synapse.nill_forward_response_tensor(inputs_x) + port = get_random_unused_port() axon = bittensor.axon ( - port = 8098, + port = port, ip = '0.0.0.0', wallet = wallet, ) @@ -396,7 +400,7 @@ def forward_casual_lm_next(inputs_x, synapse, model_output=None): hotkey = wallet.hotkey.ss58_address, ip = '0.0.0.0', ip_type = 4, - port = 8098, + port = port, modality = 0, coldkey = wallet.coldkeypub.ss58_address ) @@ -440,8 +444,9 @@ def forward_casual_lm_next(inputs_x, synapse, model_output=None): time.sleep(3) return None, None, synapse.nill_forward_response_tensor(inputs_x) + port = get_random_unused_port() axon = bittensor.axon ( - port = 8098, + port = port, ip = '0.0.0.0', wallet = wallet, ) @@ -454,7 +459,7 @@ def forward_casual_lm_next(inputs_x, synapse, model_output=None): hotkey = wallet.hotkey.ss58_address, ip = '0.0.0.0', ip_type = 4, - port = 8098, + port = port, modality = 0, coldkey = wallet.coldkeypub.ss58_address ) @@ -481,5 +486,4 @@ def test_clear(): dataset.close() if __name__ == "__main__": - bittensor.logging(debug = True) test_dendrite_timeout() \ No newline at end of file diff --git a/tests/integration_tests/test_subtensor.py b/tests/integration_tests/test_subtensor.py index 8c1ae1967e..fca7cb08f9 100644 --- a/tests/integration_tests/test_subtensor.py +++ b/tests/integration_tests/test_subtensor.py @@ -16,18 +16,19 @@ # DEALINGS IN THE SOFTWARE. -import multiprocessing -from unittest.mock import patch +import random +import time +import unittest +from queue import Empty as QueueEmpty +from unittest.mock import MagicMock, patch + import bittensor import pytest -import unittest -import time -import random -from unittest.mock import MagicMock +from bittensor._subtensor.subtensor_mock import mock_subtensor from bittensor.utils.balance import Balance -from bittensor.utils import Solver, update_curr_block from substrateinterface import Keypair -from bittensor._subtensor.subtensor_mock import mock_subtensor + + class TestSubtensor(unittest.TestCase): def setUp(self): self.subtensor = bittensor.subtensor( network = 'nobunaga' ) @@ -58,7 +59,7 @@ def setUp(self): "is_null":False }) ) - self.neurons = self.subtensor.neurons() + self.neurons = [self.subtensor.neuron_for_uid(0), self.subtensor.neuron_for_uid(1) ] self.balance = Balance.from_tao(1000) assert True @@ -404,8 +405,8 @@ def process_events(self): with patch('bittensor.Subtensor.difficulty'): # patch solution queue to return None with patch('multiprocessing.queues.Queue.get', return_value=None) as mock_queue_get: - # patch time queue size check - with patch('multiprocessing.queues.Queue.qsize', return_value=0): + # patch time queue get to raise Empty exception + with patch('multiprocessing.queues.Queue.get_nowait', side_effect=QueueEmpty) as mock_queue_get_nowait: wallet = bittensor.wallet(_mock=True) wallet.is_registered = MagicMock( side_effect=is_registered_return_values ) @@ -491,6 +492,46 @@ def process_events(self): assert self.subtensor.register(wallet=wallet,) == False assert bittensor.utils.create_pow.call_count == 3 + def test_registration_stale_then_continue( self ): + # verifty that after a stale solution, the solve will continue without exiting + + class ExitEarly(Exception): + pass + + mock_not_stale = MagicMock( + side_effect = [False, True] + ) + + mock_substrate_enter = MagicMock( + side_effect=ExitEarly() + ) + + mock_subtensor_self = MagicMock( + neuron_for_pubkey = MagicMock( return_value = MagicMock(is_null = True) ), # not registered + substrate=MagicMock( + __enter__ = mock_substrate_enter + ) + ) + + mock_wallet = MagicMock() + + mock_create_pow = MagicMock( + return_value = MagicMock() + ) + + + with patch('bittensor.utils.create_pow', mock_create_pow): + with patch('bittensor.utils.POWNotStale', mock_not_stale): + # should create a pow and check if it is stale + # then should create a new pow and check if it is stale + # then should enter substrate and exit early because of test + with pytest.raises(ExitEarly): + bittensor.Subtensor.register(mock_subtensor_self, mock_wallet) + assert mock_create_pow.call_count == 2 # must try another pow after stale + assert mock_not_stale.call_count == 2 + assert mock_substrate_enter.call_count == 1 # only tries to submit once, then exits + + def test_subtensor_mock(): mock_subtensor.kill_global_mock_process() sub = bittensor.subtensor(_mock=True) @@ -575,4 +616,4 @@ def test_subtensor_mock_functions(): if __name__ == "__main__": sub = TestSubtensor() sub.setUp() - sub.test_registration_partly_failed() \ No newline at end of file + sub.test_registration_partly_failed() diff --git a/tests/unit_tests/benchmarking/test_dendrite_multiprocess.py b/tests/unit_tests/benchmarking/test_dendrite_multiprocess.py deleted file mode 100644 index 9b7fdcc2b6..0000000000 --- a/tests/unit_tests/benchmarking/test_dendrite_multiprocess.py +++ /dev/null @@ -1,102 +0,0 @@ -import bittensor -import torch -import time -from multiprocessing import Pool -from qqdm import qqdm - -from bittensor.utils.test_utils import get_random_unused_port - -wallet = bittensor.wallet ( - path = f"/tmp/pytest{time.time()}", - name = 'pytest', - hotkey = 'pytest', -) - -wallet.create_new_coldkey( use_password=False, overwrite = True) -wallet.create_new_hotkey( use_password=False, overwrite = True) -logging =bittensor.logging(debug=True) -ports = [get_random_unused_port() for _ in range(5)] - -inputs="""in my palm is a clear stone , and inside it is a - small ivory statuette . a guardian angel . - figured if you 're going to be out at night""" - - -def forward( inputs_x): - return torch.zeros([1, 42, bittensor.__network_dim__]) - -def create_axon(port): - axon = bittensor.axon ( - port = port, - wallet = wallet, - ) - axon.attach_forward_callback( forward, modality = bittensor.proto.Modality.TEXT ) - axon.start() - - -def dendrite_delay(i): - dend = bittensor.dendrite(wallet=wallet,max_active_receptors=10,multiprocess=True) - for idx in range(100): - responses, return_ops, query_times = dend.forward_text( endpoints=endpoints,inputs = inputs) - assert all(return_ops) == 1 - time.sleep(0.1) - return - -def main(): - global endpoints - endpoints = [] - for i in ports: - create_axon(i) - wallet.create_new_hotkey( use_password=False, overwrite = True) - endpoint = bittensor.endpoint( - version = bittensor.__version_as_int__, - uid = 1, - hotkey = wallet.hotkey.ss58_address, - ip = '0.0.0.0', - ip_type = 4, - port = i, - modality = 0, - coldkey = wallet.coldkey.ss58_address - ) - endpoints += [endpoint] - - logging =bittensor.logging(debug=True) - dend = bittensor.dendrite(wallet=wallet,max_active_receptors=10,multiprocess=True) - responses, return_ops, query_times = dend.forward_text( endpoints=endpoints,inputs = inputs) - assert all(return_ops) == 1 - - N_processes = [1,2,3,4,5] - N = len(N_processes) - Num_experiments = 5 - collections = torch.zeros((Num_experiments,N)) - bittensor.logging(debug=False) - experiments = [i for i in range(Num_experiments)] - for j in qqdm(experiments): - for i in range(N): - start = time.time() - process = N_processes[i] - with Pool(process) as p: - reps = p.map(dendrite_delay,list(range(i+1))) - - end = time.time() - collections[j,i] = end-start - time.sleep(1) - - means = torch.mean(collections,axis=0) - error = torch.std(collections,axis=0) - - scaled_collections = torch.zeros((Num_experiments,N)) - for i in range(N): - scaled_collections[:,i] = collections[:,i]/((i+1)*(100*len(ports))) - - means_scaled = torch.mean(scaled_collections,axis=0) - error_scaled = torch.std(scaled_collections,axis=0) - - print ("{:<8} {:<15} {:<10} {:<10}".format('# of Processes','Avg Time Elapsed','Standard Error','Time Per Payload')) - for i in range(N): - print ("{:^13} | {:^14.3f} | {:^14.3f} | {:^10.3f}".format(N_processes[i], means[i], error[i], means_scaled[i])) - - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/tests/unit_tests/bittensor_tests/test_axon.py b/tests/unit_tests/bittensor_tests/test_axon.py index 8f2bfd22fa..5a7ede8ee6 100644 --- a/tests/unit_tests/bittensor_tests/test_axon.py +++ b/tests/unit_tests/bittensor_tests/test_axon.py @@ -1,5 +1,6 @@ # The MIT License (MIT) # Copyright © 2021 Yuma Rao +# Copyright © 2022 Opentensor Foundation # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the “Software”), to deal in the Software without restriction, including without limitation @@ -16,6 +17,7 @@ # DEALINGS IN THE SOFTWARE. import time +import unittest import unittest.mock as mock import uuid @@ -31,13 +33,7 @@ wallet = bittensor.wallet.mock() axon = bittensor.axon(wallet = wallet) -bittensor.logging(debug = True) -""" -TODO: Tests that need to be added - - Different synapses in combination - - Different errors for different synapses - - Correct Messages when only a single synapse fails -""" + def sign(wallet): @@ -1107,6 +1103,149 @@ def test_axon_is_destroyed(): axonB.__del__() assert is_port_in_use( port ) == False +# test external axon args +class TestExternalAxon(unittest.TestCase): + """ + Tests the external axon config flags + `--axon.external_port` and `--axon.external_ip` + Need to verify the external config is used when broadcasting to the network + and the internal config is used when creating the grpc server + + Also test the default behaviour when no external axon config is provided + (should use the internal axon config, like usual) + """ + + def test_external_ip_not_set_dont_use_internal_ip(self): + # Verify that not setting the external ip arg will NOT default to the internal axon ip + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + axon = bittensor.axon ( ip = 'fake_ip', server=mock_server, config=mock_config ) + assert axon.external_ip != axon.ip # should be different + assert axon.external_ip is None # should be None + + def test_external_port_not_set_use_internal_port(self): + # Verify that not setting the external port arg will default to the internal axon port + mock_config = bittensor.axon.config() + + axon = bittensor.axon ( port = 1234, config=mock_config ) + assert axon.external_port == axon.port + + def test_external_port_set_full_address_internal(self): + internal_port = 1234 + external_port = 5678 + + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + _ = bittensor.axon( port=internal_port, external_port=external_port, server=mock_server, config=mock_config ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address0 = args[0] + + assert f'{internal_port}' in full_address0 and f':{external_port}' not in full_address0 + + mock_add_insecure_port.reset_mock() + + # Test using config + mock_config = bittensor.axon.config() + + mock_config.axon.port = internal_port + mock_config.axon.external_port = external_port + + _ = bittensor.axon( config=mock_config, server=mock_server ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address0 = args[0] + + assert f'{internal_port}' in full_address0, f'{internal_port} was not found in {full_address0}' + assert f':{external_port}' not in full_address0, f':{external_port} was found in {full_address0}' + + def test_external_ip_set_full_address_internal(self): + internal_ip = 'fake_ip_internal' + external_ip = 'fake_ip_external' + + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + _ = bittensor.axon( ip=internal_ip, external_ip=external_ip, server=mock_server, config=mock_config ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address0 = args[0] + + assert f'{internal_ip}' in full_address0 and f'{external_ip}' not in full_address0 + + mock_add_insecure_port.reset_mock() + + # Test using config + mock_config = bittensor.axon.config() + mock_config.axon.external_ip = external_ip + mock_config.axon.ip = internal_ip + + _ = bittensor.axon( config=mock_config, server=mock_server ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address0 = args[0] + + assert f'{internal_ip}' in full_address0, f'{internal_ip} was not found in {full_address0}' + assert f'{external_ip}' not in full_address0, f'{external_ip} was found in {full_address0}' + + def test_external_ip_port_set_full_address_internal(self): + internal_ip = 'fake_ip_internal' + external_ip = 'fake_ip_external' + internal_port = 1234 + external_port = 5678 + + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + _ = bittensor.axon( ip=internal_ip, external_ip=external_ip, port=internal_port, external_port=external_port, server=mock_server, config=mock_config ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address0 = args[0] + + assert f'{internal_ip}:{internal_port}' == full_address0 and f'{external_ip}:{external_port}' != full_address0 + + mock_add_insecure_port.reset_mock() + + # Test using config + mock_config = bittensor.axon.config() + + mock_config.axon.ip = internal_ip + mock_config.axon.external_ip = external_ip + mock_config.axon.port = internal_port + mock_config.axon.external_port = external_port + + _ = bittensor.axon( config=mock_config, server=mock_server ) + + mock_add_insecure_port.assert_called_once() + args, _ = mock_add_insecure_port.call_args + full_address1 = args[0] + + assert f'{internal_ip}:{internal_port}' == full_address1, f'{internal_ip}:{internal_port} is not eq to {full_address1}' + assert f'{external_ip}:{external_port}' != full_address1, f'{external_ip}:{external_port} is eq to {full_address1}' + if __name__ == "__main__": # test_forward_joint_success() diff --git a/tests/unit_tests/bittensor_tests/test_balance.py b/tests/unit_tests/bittensor_tests/test_balance.py index 60f61fac67..8a52d117ab 100644 --- a/tests/unit_tests/bittensor_tests/test_balance.py +++ b/tests/unit_tests/bittensor_tests/test_balance.py @@ -327,3 +327,12 @@ def test_balance_rfloordiv_other_not_balance(self, balance: Union[int, float], b assert isinstance(quot_, Balance) assert CLOSE_IN_VALUE(quot_.rao, 5) == rao2_ // rao_ + @given(balance=valid_tao_numbers_strategy) + def test_balance_not_eq_none(self, balance: Union[int, float]): + balance_ = Balance(balance) + assert not balance_ == None + + @given(balance=valid_tao_numbers_strategy) + def test_balance_neq_none(self, balance: Union[int, float]): + balance_ = Balance(balance) + assert balance_ != None diff --git a/tests/unit_tests/bittensor_tests/test_config.py b/tests/unit_tests/bittensor_tests/test_config.py index 981bc3beab..55d59ff1df 100644 --- a/tests/unit_tests/bittensor_tests/test_config.py +++ b/tests/unit_tests/bittensor_tests/test_config.py @@ -21,7 +21,6 @@ import argparse import pytest -bittensor.logging(debug = True) def test_loaded_config(): with pytest.raises(NotImplementedError): diff --git a/tests/unit_tests/bittensor_tests/test_forward_backward.py b/tests/unit_tests/bittensor_tests/test_forward_backward.py index ee5fcb47ea..527b792ced 100644 --- a/tests/unit_tests/bittensor_tests/test_forward_backward.py +++ b/tests/unit_tests/bittensor_tests/test_forward_backward.py @@ -27,7 +27,6 @@ from bittensor.utils.test_utils import get_random_unused_port wallet = bittensor.wallet.mock() -bittensor.logging(debug = True) dendrite = bittensor.dendrite(requires_grad=True) dendrite_no_grad = bittensor.dendrite(requires_grad=False) dendrite_mock = bittensor.dendrite(requires_grad=True) diff --git a/tests/unit_tests/bittensor_tests/test_neuron.py b/tests/unit_tests/bittensor_tests/test_neuron.py index 4d5195cb1f..00639a4b56 100644 --- a/tests/unit_tests/bittensor_tests/test_neuron.py +++ b/tests/unit_tests/bittensor_tests/test_neuron.py @@ -1,5 +1,6 @@ from atexit import register from types import SimpleNamespace +import unittest from unittest.mock import MagicMock, patch from more_itertools import side_effect @@ -23,9 +24,9 @@ def __init__(self): self.encoder2 = TransformerEncoder( self.encoder_layers, nlayers_2 ) self.decoder = torch.nn.Linear( network_dim, vocab_size , bias=False) - core_server = bittensor._neuron.text.core_server.server() + core_server = bittensor._neuron.text.core_server.server(pretrained=False) # test for the basic default gpt2 case - assert core_server.set_fine_tuning_params() == (True, 'transformer.h.11') + assert core_server.set_fine_tuning_params() == (True, 'h.11') # test for the case when there are 2 modulelists core_server.pre_model = Model() @@ -77,9 +78,6 @@ def test_coreserver_reregister_flag_false_exit(): mock_wallet = bittensor.wallet.mock() mock_wallet.config = config - class MockException(Exception): - pass - def exit_early(*args, **kwargs): raise MockException('exit_early') @@ -129,9 +127,6 @@ def test_coreserver_reregister_flag_true(): mock_wallet = bittensor.wallet.mock() mock_wallet.config = config - class MockException(Exception): - pass - def exit_early(*args, **kwargs): raise MockException('exit_early') @@ -178,9 +173,6 @@ def test_corevalidator_reregister_flag_false_exit(): mock_wallet = bittensor.wallet.mock() mock_wallet.config = config - class MockException(Exception): - pass - def exit_early(*args, **kwargs): raise MockException('exit_early') @@ -227,9 +219,6 @@ def test_corevalidator_reregister_flag_true(): mock_wallet = bittensor.wallet.mock() mock_wallet.config = config - class MockException(Exception): - pass - def exit_early(*args, **kwargs): raise MockException('exit_early') @@ -260,5 +249,100 @@ def exit_early(*args, **kwargs): # Should try to register the neuron mock_register.assert_called_once() +class MockException(Exception): + pass + +class TestBlacklist(unittest.TestCase): + + @staticmethod + def construct_config(): + defaults = bittensor.Config() + bittensor.subtensor.add_defaults( defaults ) + bittensor.dendrite.add_defaults( defaults ) + bittensor.axon.add_defaults( defaults ) + bittensor.wallet.add_defaults( defaults ) + bittensor.dataset.add_defaults( defaults ) + bittensor.logging.add_defaults( defaults ) + bittensor.wandb.add_defaults( defaults ) + defaults.wandb.api_key = 'test' + defaults.neuron = bittensor.neurons.core_server.neuron.config() + defaults.neuron.learning_rate = 0.0001 + defaults.neuron.momentum = 0.9 + + return defaults + + def exit_early(self, *args, **kwargs): + raise MockException('exit_early') + + def test_stake_blacklist(self): + import sys + sys.setrecursionlimit(100) + + mock_hotkey = "0x0000000000000000000000000000000000000000" + mock_hotkey_1 = "0x0000000000000000000000000000000000000001" + + mock_subtensor = MagicMock( + is_hotkey_registered=MagicMock(return_value=True), + ) + + mock_wallet = MagicMock( + reregister=MagicMock(), + is_registered=MagicMock(return_value=True), + ) + + mock_metagraph = MagicMock( + hotkeys=[ + mock_hotkey, + mock_hotkey_1, + ], + S=[ + torch.tensor(100), # stake for mock_hotkey, uid 0 + torch.tensor(1001), # stake for mock_hotkey_1, uid 1 + ] + ) + + mock_config = self.construct_config() + + mock_config.neuron.blacklist = bittensor.Config() + mock_config.neuron.blacklist.stake = 1000 # blacklist if stake is less than 1000 + + mock_model_config = bittensor.neurons.core_server.server.config() + mock_model_config.neuron = MagicMock( + disable_blacklist = False + ) + + mock_model = MagicMock( + spec=bittensor.neurons.core_server.server, + config=mock_model_config, + ) + + with patch('bittensor.axon.__new__', side_effect=self.exit_early) as mock_new_axon: + with patch('bittensor.neurons.core_server.neuron.check_config', return_value=True): + with pytest.raises(MockException): + bittensor.neurons.core_server.serve( + config=mock_config, + model=MagicMock( + spec=bittensor.neurons.core_server.server, + device="cpu", + to=MagicMock(return_value=mock_model), + config=mock_model_config, + ), + subtensor=mock_subtensor, + wallet=mock_wallet, + axon=None, + metagraph=mock_metagraph + ) + + # args, kwargs + _, kwargs = mock_new_axon.call_args + blacklist = kwargs['blacklist'] + + # Check that the blacklist rejects below min stake + assert blacklist(mock_hotkey, bittensor.proto.RequestType.FORWARD) == True + + # Check that the blacklist accepts above min stake + assert blacklist(mock_hotkey_1, bittensor.proto.RequestType.FORWARD) == False + + if __name__ == '__main__': pass diff --git a/tests/unit_tests/bittensor_tests/test_receptor.py b/tests/unit_tests/bittensor_tests/test_receptor.py index c1b2dc82c9..031e75d1e1 100644 --- a/tests/unit_tests/bittensor_tests/test_receptor.py +++ b/tests/unit_tests/bittensor_tests/test_receptor.py @@ -24,8 +24,7 @@ import asyncio from types import SimpleNamespace import time as clock - -logging = bittensor.logging(debug = True) +from bittensor.utils.test_utils import get_random_unused_port wallet = bittensor.wallet.mock() @@ -429,8 +428,9 @@ def forward_casual_lm( input, synapse, model_output = None): def forward_casual_lm_next(input, synapse, model_output=None): return None, None, torch.zeros([3, (synapse.topk + 1), 1 + 1]) + port = get_random_unused_port() axon = bittensor.axon ( - port = 8081, + port = port, ip = '127.0.0.1', wallet = wallet, ) @@ -445,7 +445,7 @@ def forward_casual_lm_next(input, synapse, model_output=None): uid = 0, ip = '127.0.0.1', ip_type = 4, - port = 8081, + port = port, hotkey = wallet.hotkey.ss58_address, coldkey = wallet.coldkey.ss58_address, modality = 2 @@ -612,8 +612,9 @@ def forward_casual_lm_next(input, synapse): ## --unimplemented error def test_axon_receptor_connection_forward_unimplemented(): + port = get_random_unused_port() axon = bittensor.axon ( - port = 8091, + port = port, ip = '127.0.0.1', wallet = wallet, ) @@ -624,7 +625,7 @@ def test_axon_receptor_connection_forward_unimplemented(): uid = 0, ip = '127.0.0.1', ip_type = 4, - port = 8091, + port = port, hotkey = wallet.hotkey.ss58_address, coldkey = wallet.coldkey.ss58_address, modality = 2 diff --git a/tests/unit_tests/bittensor_tests/test_receptor_pool.py b/tests/unit_tests/bittensor_tests/test_receptor_pool.py index 165e787198..55ae719fbe 100644 --- a/tests/unit_tests/bittensor_tests/test_receptor_pool.py +++ b/tests/unit_tests/bittensor_tests/test_receptor_pool.py @@ -25,8 +25,6 @@ import unittest.mock as mock import asyncio -logging = bittensor.logging(debug = True) - # --- Receptor Pool --- wallet = bittensor.wallet.mock() wallet2 = bittensor.wallet.mock() @@ -145,7 +143,7 @@ def test_receptor_pool_forward_timeout(): tensors=[y_hidden_serialized, y_causallm_serialized, y_causallmnext_serialized, y_seq_2_seq_serialized] ) - + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Forward = MagicMock( return_value = mock_return_val ) resp1, codes, _ = receptor_pool.forward( endpoints, synapses, x, timeout=1) @@ -178,6 +176,7 @@ def test_receptor_pool_forward_num_synapse_mismatch(): tensors = [y_hidden_serialized, y_causallm_serialized, y_causallmnext_serialized] ) + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Forward = MagicMock( return_value = mock_return_val ) resp1, codes, _ = receptor_pool.forward( endpoints, synapses, x, timeout=1) @@ -207,6 +206,7 @@ def test_receptor_pool_forward_response_partial_shape_error(): tensors = [y_hidden_serialized, y_causallm_serialized, y_causallmnext_serialized, y_seq_2_seq_serialized] ) + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Forward = MagicMock( return_value = mock_return_val ) resp1, codes, _ = receptor_pool.forward( endpoints, synapses, x, timeout=1) @@ -237,6 +237,7 @@ def test_receptor_pool_partial_remote_success_return_code(): tensors = [y_hidden_serialized, y_causallm_serialized, y_causallmnext_serialized, y_seq_2_seq_serialized] ) + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Forward = MagicMock( return_value = mock_return_val ) resp1, codes, _ = receptor_pool.forward( endpoints, synapses, x, timeout=1) @@ -266,6 +267,7 @@ def test_receptor_pool_missing_synapse(): tensors = [y_hidden_serialized, y_causallm_serialized, y_causallmnext_serialized, y_seq_2_seq_serialized] ) + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Forward = MagicMock( return_value = mock_return_val ) resp1, codes, _ = receptor_pool.forward( endpoints, synapses, x, timeout=1) @@ -286,11 +288,13 @@ def test_receptor_pool_backward_hang(): causallmnext_grads = torch.ones((x.size(0), (bittensor.synapse.TextCausalLMNext().topk + 1), 1 + 1)) seq_2_seq_grads = torch.tensor([]) + receptor_pool = bittensor.receptor_pool(wallet=wallet,max_active_receptors=1) receptor_pool._get_or_create_receptor_for_endpoint(neuron_obj) receptor_pool.receptors[neuron_obj.hotkey].stub.Backward = MagicMock( return_value = mock_return_val ) receptor_pool.backward(endpoints, synapses, x, [[hidden_grads, causal_grads, causallmnext_grads, seq_2_seq_grads], [hidden_grads, causal_grads, causallmnext_grads, seq_2_seq_grads]], timeout=1) if __name__ == "__main__": - test_receptor_pool_missing_synapse() + test_receptor_pool_forward_success() + test_receptor_pool_forward_timeout() pass \ No newline at end of file diff --git a/tests/unit_tests/bittensor_tests/test_subtensor.py b/tests/unit_tests/bittensor_tests/test_subtensor.py new file mode 100644 index 0000000000..5bb8631181 --- /dev/null +++ b/tests/unit_tests/bittensor_tests/test_subtensor.py @@ -0,0 +1,108 @@ + +# The MIT License (MIT) +# Copyright © 2022 Opentensor Foundation + +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the “Software”), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of +# the Software. + +# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import unittest.mock as mock +from unittest.mock import MagicMock + +import bittensor +import unittest + +class TestSubtensorWithExternalAxon(unittest.TestCase): + """ + Test the subtensor with external axon in the config + """ + + def test_serve_axon_with_external_ip_set(self): + internal_ip: str = 'this is an internal ip' + external_ip: str = 'this is an external ip' + + mock_serve = MagicMock( + return_value=True + ) + + mock_subtensor = MagicMock( + spec=bittensor.Subtensor, + serve=mock_serve + ) + + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_grpc_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + mock_axon_with_external_ip_set = bittensor.axon( + ip=internal_ip, + external_ip=external_ip, + server=mock_grpc_server, + config=mock_config + ) + + bittensor.Subtensor.serve_axon( + mock_subtensor, + axon=mock_axon_with_external_ip_set, + use_upnpc=False, + ) + + mock_serve.assert_called_once() + # verify that the axon is served to the network with the external ip + _, kwargs = mock_serve.call_args + self.assertEqual(kwargs['ip'], external_ip) + + def test_serve_axon_with_external_port_set(self): + external_ip: str = 'this is an external ip' + + internal_port: int = 1234 + external_port: int = 5678 + + mock_serve = MagicMock( + return_value=True + ) + + mock_subtensor = MagicMock( + spec=bittensor.Subtensor, + serve=mock_serve + ) + + mock_add_insecure_port = mock.MagicMock(return_value=None) + mock_grpc_server = mock.MagicMock( + add_insecure_port=mock_add_insecure_port + ) + + mock_config = bittensor.axon.config() + + mock_axon_with_external_port_set = bittensor.axon( + port=internal_port, + external_port=external_port, + server=mock_grpc_server, + config=mock_config + ) + + with mock.patch('bittensor.utils.networking.get_external_ip', return_value=external_ip): + # mock the get_external_ip function to return the external ip + bittensor.Subtensor.serve_axon( + mock_subtensor, + axon=mock_axon_with_external_port_set, + use_upnpc=False, + ) + + mock_serve.assert_called_once() + # verify that the axon is served to the network with the external port + _, kwargs = mock_serve.call_args + self.assertEqual(kwargs['port'], external_port) diff --git a/tests/unit_tests/bittensor_tests/test_wallet.py b/tests/unit_tests/bittensor_tests/test_wallet.py index 6415966a5b..660eb5bf99 100644 --- a/tests/unit_tests/bittensor_tests/test_wallet.py +++ b/tests/unit_tests/bittensor_tests/test_wallet.py @@ -64,3 +64,33 @@ def test_regen_coldkeypub_no_pubkey(self): with pytest.raises(ValueError): # Must provide either public_key or ss58_address self.mock_wallet.regenerate_coldkeypub(ss58_address=None, public_key=None) + + def test_regen_coldkey_from_hex_seed_str(self): + ss58_addr = "5D5cwd8DX6ij7nouVcoxDuWtJfiR1BnzCkiBVTt7DU8ft5Ta" + seed_str = "0x659c024d5be809000d0d93fe378cfde020846150b01c49a201fc2a02041f7636" + with patch.object(self.mock_wallet, 'set_coldkey') as mock_set_coldkey: + self.mock_wallet.regenerate_coldkey(seed=seed_str) + + mock_set_coldkey.assert_called_once() + keypair: bittensor.Keypair = mock_set_coldkey.call_args_list[0][0][0] + self.assertEqual(keypair.seed_hex, seed_str) + self.assertEqual(keypair.ss58_address, ss58_addr) # Check that the ss58 address is correct + + seed_str_bad = "0x659c024d5be809000d0d93fe378cfde020846150b01c49a201fc2a02041f763" # 1 character short + with pytest.raises(ValueError): + self.mock_wallet.regenerate_coldkey(seed=seed_str_bad) + + def test_regen_hotkey_from_hex_seed_str(self): + ss58_addr = "5D5cwd8DX6ij7nouVcoxDuWtJfiR1BnzCkiBVTt7DU8ft5Ta" + seed_str = "0x659c024d5be809000d0d93fe378cfde020846150b01c49a201fc2a02041f7636" + with patch.object(self.mock_wallet, 'set_hotkey') as mock_set_hotkey: + self.mock_wallet.regenerate_hotkey(seed=seed_str) + + mock_set_hotkey.assert_called_once() + keypair: bittensor.Keypair = mock_set_hotkey.call_args_list[0][0][0] + self.assertEqual(keypair.seed_hex, seed_str) + self.assertEqual(keypair.ss58_address, ss58_addr) # Check that the ss58 address is correct + + seed_str_bad = "0x659c024d5be809000d0d93fe378cfde020846150b01c49a201fc2a02041f763" # 1 character short + with pytest.raises(ValueError): + self.mock_wallet.regenerate_hotkey(seed=seed_str_bad) diff --git a/tests/unit_tests/bittensor_tests/utils/test_tokenizer_utils.py b/tests/unit_tests/bittensor_tests/utils/test_tokenizer_utils.py index 110b274132..d19f91879b 100644 --- a/tests/unit_tests/bittensor_tests/utils/test_tokenizer_utils.py +++ b/tests/unit_tests/bittensor_tests/utils/test_tokenizer_utils.py @@ -44,7 +44,7 @@ ]} -def test_tokenizer_equivalence(): +def _test_tokenizer_equivalence(): r""" Checks if two tokenizers are equivalent w.r.t. their vocabularies. Equivalent tokenizers should always produce the same tokenization for the same text. @@ -249,7 +249,7 @@ def tokenizer_translation(text_batch: List[str], model_name: str, max_length: in return original_loss, encoded_loss, translated_loss, enc_pre_logits -def test_tokenizer_translation(): +def _test_tokenizer_translation(): r""" Unit test for tokenizer translation. @@ -433,6 +433,77 @@ def test_topk_token_phrases(): tokenizer_topk_phrases(sample_text[text_name], model_name, max_length, _enc_pre_logits, topk=128) +def _test_random_topk_token_phrases(single_token_ratios: Tuple = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), + max_len_final: int = 10, batch_size: int = 32, topk: int = 4096, + ignore_index: int = -100, vocab_len: int = 50256): + r""" + Asserts that randomly instantiated compact_topk encodings can be correctly decoded + to recover the original topk_tensor, where: + topk_tensor: + [batch_size, (topk + 1), max_len] tensor includes topk token probabilities (prob_k) + floor_prob + in first column with gradients attached, with std_tokens in remaining columns with ignore_index padding. + Content structure: + [[[prob_k=0_b=0, tok_0_k=0_b=0, tok_1_k=0_b=0, ..., ignore_index?], + [prob_k=1_b=0, tok_0_k=1_b=0, tok_1_k=1_b=0, ..., ignore_index?], + [...], + [prob_floor_b=0, ignore_index, ..., ignore_index]], + [[prob_k=0_b=1, tok_0_k=0_b=1, tok_1_k=0_b=1, ..., ignore_index?], + [prob_k=1_b=1, tok_0_k=1_b=1, tok_1_k=1_b=1, ..., ignore_index?], + [...], + [prob_floor_b=1, ignore_index, ..., ignore_index]], + [...]] + compact_topk: + [sum_b(sum_k(len(phrase_k) + 1)_b)] Compacted 1-D tensor >= batch_size * (2 * topk + 1), + since 2 * topk + 1: topk x [probability, token sequence (at least one token)] + + floor probability (rest). + Content structure: + [prob_k=0_b=0, tok_0_k=0_b=0, tok_1_k=0_b=0, ..., prob_k=1_b=0, tok_0_k=1_b=0, ..., prob_floor_b=0, + prob_k=0_b=1, tok_0_k=0_b=1, tok_1_k=0_b=1, ..., prob_k=1_b=1, tok_0_k=1_b=1, ..., prob_floor_b=1, + ...] + + Args: + single_token_ratios (:obj:`Tuple`, `optional`): + Series of ratios of single-token phrases to total phrases, to test individually. + max_len_final (:obj:`int`, `optional`): + The maximum phrase length to test. + batch_size (:obj:`int`, `optional`): + The batch_size of the test input. + topk (:obj:`int`, `optional`): + The topk of the test input, the amount of logits retained. + ignore_index (:obj:`int`, `optional`): + The padding value after the end of each phrase. + vocab_len (:obj:`int`, `optional`): + The tokenizer vocabulary length. + + Returns: + """ + for single_token_ratio in single_token_ratios: # for each single token occurrence ratio + for _max_len in torch.arange(3, max_len_final): # for each max_len in range 3 to max_len_final + longer_phrases = int(topk * (1 - single_token_ratio) / (_max_len - 2)) # number of multi-token phrases per length + max_len = _max_len if longer_phrases > 0 else 2 # change max_len if only single_phrases + single_phrases = topk - (max_len - 2) * longer_phrases # number of [prob, token, ignore_index, ...] phrases + + topk_tensor = ignore_index * torch.ones((batch_size, topk + 1, max_len)) # [batch_size, (topk + 1), max_len] + + for batch in range(batch_size): # construct each batch separately + permuted = torch.randperm(topk) + + # add single token phrases: [prob, token, ignore_index, ..., ignore_index] + topk_tensor[batch, permuted[:single_phrases], 1:2] = 1. * torch.randint(vocab_len, (single_phrases, 1)) + + # add longer token phrases: [prob, token, token, ..., ignore_index?, ..., ignore_index] + for length in range(2, max_len): + start = single_phrases + (length - 2) * longer_phrases + phrase_idx = permuted[start:start + longer_phrases] + topk_tensor[batch, phrase_idx, 1:length+1] = 1. * torch.randint(vocab_len, (longer_phrases, length)) + + topk_tensor[:, :, 0] = torch.rand((batch_size, topk + 1)) # assign random probabilities to first column + + compact_topk = compact_topk_token_phrases(topk_tensor) # [>= batch_size * (2 * topk + 1)] + _topk_tensor = unravel_topk_token_phrases(compact_topk, topk=topk) # [batch_size, (topk + 1), max_len] + assert torch.all(torch.eq(_topk_tensor, topk_tensor)) + + def topk_phrases_crossentropy(text_batch: List[str], model_name: str, max_length: int, last_indices: List[int], enc_pre_logits: torch.FloatTensor = None, @@ -590,7 +661,4 @@ def test_topk_phrases_crossentropy(): if __name__ == '__main__': - test_tokenizer_equivalence() - test_tokenizer_translation() - test_topk_token_phrases() - test_topk_phrases_crossentropy() + pass diff --git a/tests/unit_tests/bittensor_tests/utils/test_utils.py b/tests/unit_tests/bittensor_tests/utils/test_utils.py index 906d0d88ba..feb1807250 100644 --- a/tests/unit_tests/bittensor_tests/utils/test_utils.py +++ b/tests/unit_tests/bittensor_tests/utils/test_utils.py @@ -10,6 +10,7 @@ import random import torch import multiprocessing +from types import SimpleNamespace from sys import platform from substrateinterface.base import Keypair @@ -348,6 +349,58 @@ def test_pow_not_stale_diff_block_number_too_old(self): assert not bittensor.utils.POWNotStale(mock_subtensor, mock_solution) +def test_pow_called_for_cuda(): + class MockException(Exception): + pass + mock_compose_call = MagicMock(side_effect=MockException) + + mock_subtensor = bittensor.subtensor(_mock=True) + mock_subtensor.neuron_for_pubkey=MagicMock(is_null=True) + mock_subtensor.substrate = MagicMock( + __enter__= MagicMock(return_value=MagicMock( + compose_call=mock_compose_call + )), + __exit__ = MagicMock(return_value=None), + ) + + mock_wallet = SimpleNamespace( + hotkey=SimpleNamespace( + ss58_address='' + ), + coldkeypub=SimpleNamespace( + ss58_address='' + ) + ) + + mock_result = { + "block_number": 1, + 'nonce': random.randint(0, pow(2, 32)), + 'work': b'\x00' * 64, + } + + with patch('bittensor.utils.POWNotStale', return_value=True) as mock_pow_not_stale: + with patch('torch.cuda.is_available', return_value=True) as mock_cuda_available: + with patch('bittensor.utils.create_pow', return_value=mock_result) as mock_create_pow: + with patch('bittensor.utils.hex_bytes_to_u8_list', return_value=b''): + + # Should exit early + with pytest.raises(MockException): + mock_subtensor.register(mock_wallet, cuda=True, prompt=False) + + mock_pow_not_stale.assert_called_once() + mock_create_pow.assert_called_once() + mock_cuda_available.assert_called_once() + + call0 = mock_pow_not_stale.call_args + assert call0[0][0] == mock_subtensor + assert call0[0][1] == mock_result + + mock_compose_call.assert_called_once() + call1 = mock_compose_call.call_args + assert call1[1]['call_function'] == 'register' + call_params = call1[1]['call_params'] + assert call_params['nonce'] == mock_result['nonce'] + def test_pow_called_for_cuda(): class MockException(Exception):