Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

discojs-core/models: add gpt #644

Merged
merged 7 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 49 additions & 33 deletions .github/workflows/lint-test-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,18 @@ env:
node_version: 16

jobs:
download-training-data:
download-datasets:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
- run: ./get_training_data.sh
working-directory: ./
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- run: ./datasets/populate

lint-lib-core:
needs: [build-lib-core, build-lib-node]
Expand Down Expand Up @@ -213,19 +215,17 @@ jobs:

test-lib-core:
needs:
[
build-lib-core,
build-lib-node,
build-server-docker,
download-training-data,
]
[build-lib-core, build-lib-node, build-server-docker, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -235,14 +235,17 @@ jobs:
- run: ./with_server npm --workspace=./discojs/discojs-core test

test-lib-node:
needs: [build-lib-core, build-server-docker, download-training-data]
needs: [build-lib-core, build-server-docker, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -252,14 +255,17 @@ jobs:
- run: ./with_server npm --workspace=./discojs/discojs-node test

test-lib-web:
needs: [build-lib-core, build-server-docker, download-training-data]
needs: [build-lib-core, build-server-docker, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -269,14 +275,17 @@ jobs:
- run: ./with_server npm --workspace=./discojs/discojs-web test

test-server:
needs: [build-lib-core, build-lib-node, download-training-data]
needs: [build-lib-core, build-lib-node, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -286,14 +295,17 @@ jobs:
- run: npm --workspace=./server test

test-web-client:
needs: [build-lib-core, build-lib-web, download-training-data]
needs: [build-lib-core, build-lib-web, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -310,15 +322,17 @@ jobs:
config: baseUrl=http://localhost:8081/#/

test-cli:
needs:
[build-lib-core, build-lib-node, build-server, download-training-data]
needs: [build-lib-core, build-lib-node, build-server, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand All @@ -328,15 +342,17 @@ jobs:
- run: npm --workspace=./cli start -- -t cifar10 -u 1 -e 1

test-docs-examples:
needs:
[build-lib-core, build-lib-node, build-server, download-training-data]
needs: [build-lib-core, build-lib-node, build-server, download-datasets]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
lfs: true
submodules: true
- uses: actions/cache@v3
with:
path: example_training_data
key: training_data
path: datasets
key: datasets-${{ hashFiles('datasets/**') }}
- uses: actions/setup-node@v3
with:
node-version: ${{ env.node_version }}
Expand Down
149 changes: 5 additions & 144 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,150 +1,11 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# dependencies
/node_modules/

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
# built
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/


UI/public/.DS_Store
UI/.DS_Store

*.DS_Store

node_modules/

# model files on server
weights.bin
model.json

# example training data
example_training_data/
example_training_data.tar.gz

# IDE files
# system specifics files
.metals/
.idea/
.vscode/
*.DS_Store
2 changes: 1 addition & 1 deletion DEV.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ npm -ws run build
**6.** Download and extract the sample training datasets. These datasets are used in the automated tests.

```
./get_training_data.sh
./datasets/populate
```

**7.** Launch DISCO
Expand Down
23 changes: 9 additions & 14 deletions cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,8 @@ import { saveLog } from './utils'
import { getTaskData } from './data'
import { args } from './args'

const NUMBER_OF_USERS = args.numberOfUsers
const TASK = args.task

const infoText = `\nStarted federated training of ${TASK.id}`
console.log(infoText)

console.log({ args })

async function runUser (task: Task, url: URL, data: data.DataSplit): Promise<TrainerLog> {
const client = new clients.federated.FederatedClient(url, task, new aggregators.MeanAggregator(TASK))
const client = new clients.federated.FederatedClient(url, task, new aggregators.MeanAggregator())

// force the federated scheme
const scheme = TrainingSchemes.FEDERATED
Expand All @@ -28,17 +20,20 @@ async function runUser (task: Task, url: URL, data: data.DataSplit): Promise<Tra
return await disco.logs()
}

async function main (): Promise<void> {
async function main (task: Task, numberOfUsers: number): Promise<void> {
console.log(`Started federated training of ${task.id}`)
console.log({ args })

const [server, url] = await startServer()

const data = await getTaskData(TASK)
const data = await getTaskData(task)

const logs = await Promise.all(
Range(0, NUMBER_OF_USERS).map(async (_) => await runUser(TASK, url, data)).toArray()
Range(0, numberOfUsers).map(async (_) => await runUser(task, url, data)).toArray()
)

if (args.save) {
const fileName = `${TASK.id}_${NUMBER_OF_USERS}users.csv`
const fileName = `${task.id}_${numberOfUsers}users.csv`
saveLog(logs, fileName)
}
console.log('Shutting down the server...')
Expand All @@ -48,4 +43,4 @@ async function main (): Promise<void> {
})
}

main().catch(console.error)
main(args.task, args.numberOfUsers).catch(console.error)
Loading
Loading