From c7a663879dd1542ba1406e6938e77334cd74ff73 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 001/150] Trigger tests on push to devel or main branch --- .github/workflows/anaconda_linux.yml | 2 +- .github/workflows/anaconda_windows.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macosx.yml | 2 +- .github/workflows/pickle.yml | 2 +- .github/workflows/pickle_wheel.yml | 2 +- .github/workflows/windows.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: From 821a1c5ea3fd29387848c137f53b9ca34194b59c Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:46:33 +0100 Subject: [PATCH 002/150] Add cuda workflow to test cuda developments on CI --- .github/actions/coverage_install/action.yml | 2 +- .github/actions/linux_install/action.yml | 10 +-- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 17 +++++ .github/actions/python_install/action.yml | 17 +++++ .github/workflows/cuda.yml | 83 +++++++++++++++++++++ ci_tools/bot_messages/show_tests.txt | 1 + ci_tools/bot_tools/bot_funcs.py | 12 +-- ci_tools/devel_branch_tests.py | 1 + ci_tools/json_pytest_output.py | 2 +- 10 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 .github/actions/pytest_run_cuda/action.yml create mode 100644 .github/actions/python_install/action.yml create mode 100644 .github/workflows/cuda.yml diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..b0bdc31f16 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..52092a6e02 --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,17 @@ +name: 'Pyccel pytest commands generating Ccuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: From 092b557cf0ead7c949731adf40f0acd6678dbe66 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 003/150] Trigger tests on push to devel or main branch --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9111b47d52..cf52b1c624 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 From 02a2360e41a3f3d09b31e271609dbe642c13ac01 Mon Sep 17 00:00:00 2001 From: bauom <40796259+bauom@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:50 +0100 Subject: [PATCH 004/150] [init] Adding CUDA language/compiler and CodePrinter (#32) This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter. Changes to stdlib: Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler --------- Co-authored-by: Mouad Elalj, EmilyBourne --- .dict_custom.txt | 1 + .github/actions/pytest_parallel/action.yml | 4 +- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 11 +- CHANGELOG.md | 6 + pyccel/codegen/codegen.py | 8 +- pyccel/codegen/compiling/compilers.py | 5 +- pyccel/codegen/pipeline.py | 5 +- pyccel/codegen/printing/cucode.py | 74 +++++++++++ pyccel/commands/console.py | 2 +- pyccel/compilers/default_compilers.py | 13 +- pyccel/naming/__init__.py | 4 +- pyccel/naming/cudanameclashchecker.py | 92 ++++++++++++++ pyccel/stdlib/numpy/numpy_c.c | 2 + pyccel/stdlib/numpy/numpy_c.h | 2 + pytest.ini | 1 + tests/conftest.py | 11 ++ tests/epyccel/test_base.py | 136 ++++++++++----------- 18 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 pyccel/codegen/printing/cucode.py create mode 100644 pyccel/naming/cudanameclashchecker.py diff --git a/.dict_custom.txt b/.dict_custom.txt index 82a6b10d31..ae99f31ed4 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,7 @@ Valgrind variadic subclasses oneAPI +Cuda getter setter bitwise diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index b0bdc31f16..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 52092a6e02..46f90552ed 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -1,4 +1,4 @@ -name: 'Pyccel pytest commands generating Ccuda' +name: 'Pyccel pytest commands generating Cuda' inputs: shell_cmd: description: 'Specifies the shell command (different for anaconda)' @@ -11,7 +11,14 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4807a17474..ce9212abc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. + ## \[UNRELEASED\] ### Added diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index daf4559df4..8d4abb6bdb 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index fca93c5624..ef11579e49 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -441,7 +441,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index c0f8634e03..1e9d0e327d 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..86146b065b --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,74 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers + +from pyccel.ast.core import Import, Module + +from pyccel.errors.errors import Errors + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + c_headers_imports = '' + local_imports = '' + + for imp in imports: + if imp.source in c_library_headers: + c_headers_imports += self._print(imp) + else: + local_imports += self._print(imp) + + imports = f'{c_headers_imports}\ + extern "C"{{\n\ + {local_imports}\ + }}' + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..971204e912 --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,92 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..a5082ef6e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() From bd7351493e3ae2c0947b1d2fb92605360db4de08 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 15 May 2024 12:58:50 +0100 Subject: [PATCH 005/150] Fix import handling (#49) This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'. **Commit Summary** - Implemented new header printer for CUDA. - Added CUDA wrapper assignment - Instead of wrapping all local headers, wrap only C functions with extern 'C' --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- AUTHORS | 1 + CHANGELOG.md | 3 +- pyccel/codegen/printing/cucode.py | 45 ++++++++---- pyccel/codegen/python_wrapper.py | 4 ++ pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++ tests/epyccel/modules/cuda_module.py | 13 ++++ tests/epyccel/test_epyccel_modules.py | 13 ++++ 7 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py create mode 100644 tests/epyccel/modules/cuda_module.py diff --git a/AUTHORS b/AUTHORS index 6c30ce5830..3dbaa2f249 100644 --- a/AUTHORS +++ b/AUTHORS @@ -31,3 +31,4 @@ Contributors * Farouk Ech-Charef * Mustapha Belbiad * Varadarajan Rengaraj +* Said Mazouz diff --git a/CHANGELOG.md b/CHANGELOG.md index ce9212abc6..1d99c60127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file. ### Added -- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. ## \[UNRELEASED\] diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 86146b065b..277d2a3a6a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -52,19 +52,7 @@ def _print_Module(self, expr): # Print imports last to be sure that all additional_imports have been collected imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] - c_headers_imports = '' - local_imports = '' - - for imp in imports: - if imp.source in c_library_headers: - c_headers_imports += self._print(imp) - else: - local_imports += self._print(imp) - - imports = f'{c_headers_imports}\ - extern "C"{{\n\ - {local_imports}\ - }}' + imports = ''.join(self._print(i) for i in imports) code = f'{imports}\n\ {global_variables}\n\ @@ -72,3 +60,34 @@ def _print_Module(self, expr): self.exit_scope() return code + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators: # Checking for 'kernel' decorator + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) From 261c152638e54caae3966e54985725a7fca505ba Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Thu, 27 Jun 2024 20:31:46 +0100 Subject: [PATCH 006/150] Add support for kernels (#42) This pull request addresses issue #28 by implementing a new feature in Pyccel that allows users to define custom GPU kernels. The syntax for creating these kernels is inspired by Numba. and I also need to fix issue #45 for testing purposes **Commit Summary** - Introduced KernelCall class - Added cuda printer methods _print_KernelCall and _print_FunctionDef to generate the corresponding CUDA representation for both kernel calls and definitions - Added IndexedFunctionCall represents an indexed function call - Added CUDA module and cuda.synchronize() - Fixing a bug that I found in the header: it does not import the necessary header for the used function --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> Co-authored-by: Emily Bourne --- .dict_custom.txt | 1 + CHANGELOG.md | 2 + docs/cuda.md | 23 +++ pyccel/ast/core.py | 37 ++++ pyccel/ast/cuda.py | 65 +++++++ pyccel/ast/cudaext.py | 42 +++++ pyccel/ast/utilities.py | 4 +- pyccel/codegen/printing/cucode.py | 46 ++++- pyccel/cuda/__init__.py | 10 + pyccel/cuda/cuda_sync_primitives.py | 16 ++ pyccel/decorators.py | 32 ++++ pyccel/errors/messages.py | 8 + pyccel/parser/semantic.py | 84 ++++++++- pyccel/parser/syntactic.py | 4 + tests/conftest.py | 9 + tests/cuda/test_kernel_semantic.py | 176 ++++++++++++++++++ tests/pyccel/scripts/kernel/hello_kernel.py | 19 ++ .../scripts/kernel/kernel_name_collision.py | 8 + tests/pyccel/test_pyccel.py | 22 ++- 19 files changed, 599 insertions(+), 9 deletions(-) create mode 100644 docs/cuda.md create mode 100644 pyccel/ast/cuda.py create mode 100644 pyccel/ast/cudaext.py create mode 100644 pyccel/cuda/__init__.py create mode 100644 pyccel/cuda/cuda_sync_primitives.py create mode 100644 tests/cuda/test_kernel_semantic.py create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py diff --git a/.dict_custom.txt b/.dict_custom.txt index ae99f31ed4..5d99e21194 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -118,3 +118,4 @@ datatyping datatypes indexable traceback +GPUs diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d99c60127..7c1dcffc55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..de30d52b80 --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,23 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` \ No newline at end of file diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 013f206dd6..f0e5cc67f1 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..b540f20993 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef + +__all__ = ( + 'CudaSynchronize', +) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 277d2a3a6a..cd26843017 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -9,11 +9,12 @@ enabling the direct translation of high-level Pyccel expressions into CUDA code. """ -from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers +from pyccel.codegen.printing.ccode import CCodePrinter -from pyccel.ast.core import Import, Module +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil -from pyccel.errors.errors import Errors +from pyccel.errors.errors import Errors errors = Errors() @@ -61,6 +62,44 @@ def _print_Module(self, expr): self.exit_scope() return code + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorater} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr): }}\n' return '\n'.join((f"#ifndef {name.upper()}_H", f"#define {name.upper()}_H", + imports, global_variables, function_declaration, "#endif // {name.upper()}_H\n")) diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..e8542ad5d5 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize + +__all__ = ['synchronize'] diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..77717a991f 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -19,6 +19,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +110,34 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..09966d810c 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' + + + + diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index e94b9c8413..fde10d6317 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,9 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 2967f4999b..3af7f0728a 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/tests/conftest.py b/tests/conftest.py index a5082ef6e8..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..b4757a3c31 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,19 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From 80f905bed09e70bcbded0c27e0632b50fc1e1a06 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 2 Jul 2024 15:37:10 +0100 Subject: [PATCH 007/150] Begin implementation of CUDA arrays: adding cudaempty and cudafull functions, and refining CUDA type handling --- pyccel/ast/cudaext.py | 100 ++++++++++++++++++++++++++++++++++++++ pyccel/ast/cudatypes.py | 34 +++++++++++++ pyccel/ast/numpyext.py | 9 +++- pyccel/ast/test.cu | 46 ++++++++++++++++++ pyccel/ast/test.py | 4 ++ pyccel/parser/semantic.py | 5 +- 6 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 pyccel/ast/cudatypes.py create mode 100644 pyccel/ast/test.cu create mode 100644 pyccel/ast/test.py diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index b540f20993..07ffb4d5e0 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -12,11 +12,109 @@ from .datatypes import VoidType from .core import Module, PyccelFunctionDef +from .internals import PyccelFunction +from .internals import LiteralInteger +from .numpyext import process_dtype, process_shape , DtypePrecisionToCastFunction +from .numpytypes import NumpyNDArrayType + + __all__ = ( 'CudaSynchronize', + 'CudaNewarray' ) +class CudaNewarray(PyccelFunction): + """ + superclass for nodes representing Cuda array allocation functions. + + Class from which all nodes representing a Cuda function which implies a call + to `Allocate` should inherit. + + Parameters + + class_type : NumpyNDArrayType + The type of the new array. + + init_dtype : PythonType, PyccelFunctionDef, LiteralString, str + The actual dtype passed to the Cuda function. + + memory_location : str + The memory location of the new array ('host' or 'device'). + """ + __slots__ = ('class_type', 'init_dtype', 'memory_location') + + def __init__(self, class_type, init_dtype, memory_location): + self.class_type = class_type + self.init_dtype = init_dtype + self.memory_location = memory_location + + super().__init__() + @staticmethod + def _process_order(rank, order): + + if rank < 2: + return None + order = str(order).strip('\'"') + assert order in ('C', 'F') + return order + +class CudaFull(CudaNewarray): + + __slots__ = ('_fill_value','_shape') + + def __init__(self, shape, fill_value, dtype='float', order='C'): + shape = process_shape(False, shape) + init_dtype = dtype + if(dtype is None): + dtype = fill_value.dtype + + dtype = process_dtype(dtype) + + # if fill_value and fill_value.dtype != dtype: + # cast_func = DtypePrecisionToCastFunction[dtype] + # fill_value = cast_func(fill_value) + self.shape = shape + rank = len(shape) + order = CudaNewarray._process_order(rank, order) + class_type = NumpyNDArrayType(dtype, shape, order) + + super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype) + + +class CudaAutoFill(CudaFull): + """ Abstract class for all classes which inherit from NumpyFull but + the fill_value is implicitly specified + """ + __slots__ = () + def __init__(self, shape, dtype='float', order='C'): + if not dtype: + raise TypeError("Data type must be provided") + super().__init__(shape, None, dtype, order) + +class CudaEmpty(CudaNewarray): + """ + Represents a call to Cuda.host_empty for code generation. + + A class representing a call to the Cuda `host_empty` function. + + Parameters + ---------- + shape : tuple of int , int + The shape of the new array. + + dtype : PythonType, LiteralString, str + The actual dtype passed to the NumPy function. + + order : str , LiteralString + The order passed to the function defoulting to 'C'. + """ + __slots__ = () + + def __init__(self, shape, dtype='float', order='C'): + super().__init__(shape, dtype, order) + + class CudaSynchronize(PyccelFunction): """ Represents a call to Cuda.synchronize for code generation. @@ -32,6 +130,8 @@ def __init__(self): cuda_funcs = { 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), + 'full' : PyccelFunctionDef('full' , CudaFull), + 'empty' : PyccelFunctionDef('empty' , CudaEmpty), } cuda_mod = Module('cuda', diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py new file mode 100644 index 0000000000..51ed80299a --- /dev/null +++ b/pyccel/ast/cudatypes.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/devel/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" Module containing types from the numpy module understood by pyccel +""" + +class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): + """ + Class representing the Cuda array type. + + Class representing the Cuda array type + + dtype : NumpyNumericType | PythonNativeBool | GenericType + The internal datatype of the object (GenericType is allowed for external + libraries, e.g. MPI). + rank : int + The rank of the new NumPy array. + order : str + The order of the memory layout for the new NumPy array. + memory_location : str + The memory location of the new cuda array. + """ + __slots__ = ('_dtype', '_rank', '_order', '_memory_location') + + def __new__(cls, dtype, rank, order): + if rank == 0: + return dtype + else: + return super().__new__(cls, dtype, rank, order) + + diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py index 861b640282..766c581e20 100644 --- a/pyccel/ast/numpyext.py +++ b/pyccel/ast/numpyext.py @@ -620,6 +620,7 @@ class NumpyNewArray(PyccelFunction): init_dtype : PythonType, PyccelFunctionDef, LiteralString, str The actual dtype passed to the NumPy function. """ + print(123) __slots__ = ('_init_dtype','_class_type') def __init__(self, *args, class_type, init_dtype = None): @@ -671,6 +672,7 @@ def _process_order(rank, order): #============================================================================== class NumpyArray(NumpyNewArray): + print(1234) """ Represents a call to `numpy.array` for code generation. @@ -1312,15 +1314,20 @@ class NumpyFull(NumpyNewArray): def __init__(self, shape, fill_value, dtype=None, order='C'): # Convert shape to PythonTuple + print(shape) + print(type(shape)) shape = process_shape(False, shape) + print(shape) + print(type(shape)) init_dtype = dtype # If there is no dtype, extract it from fill_value # TODO: must get dtype from an annotated node if dtype is None: dtype = fill_value.dtype + dtype = process_dtype(dtype) - + # Cast fill_value to correct type if fill_value: if fill_value.dtype != dtype: diff --git a/pyccel/ast/test.cu b/pyccel/ast/test.cu new file mode 100644 index 0000000000..5938aa2d6d --- /dev/null +++ b/pyccel/ast/test.cu @@ -0,0 +1,46 @@ +#include +#include + +__global__ void add(int *a, int *b, int *c, int n) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + if (index < n) { + c[index] = a[index] + b[index]; + } +} + +int main() { + int n = 512; + int size = n * sizeof(int); + int *a, *b, *c; + + // Allocate unified memory - accessible from CPU or GPU + cudaMallocManaged(&a, size); + cudaMallocManaged(&b, size); + cudaMallocManaged(&c, size); + + // Initialize arrays on the host (CPU) + for (int i = 0; i < n; i++) { + a[i] = i; + b[i] = i * 2; + } + + // Launch kernel with n threads + int blockSize = 256; + int numBlocks = (n + blockSize - 1) / blockSize; + add<<>>(a, b, c, n); + + // Wait for GPU to finish before accessing on host + cudaDeviceSynchronize(); + + // Verify the result + for (int i = 0; i < n; i++) { + if (c[i] != a[i] + b[i]) { + std::cerr << "Error at index " << i << ": " << c[i] << " != " << a[i] + b[i] << std::endl + << std::endl; + return 1; + } + } + + std::cout << "Success!" << std::endl; + return 0; +} diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py new file mode 100644 index 0000000000..c12492de06 --- /dev/null +++ b/pyccel/ast/test.py @@ -0,0 +1,4 @@ +import numpy as np + +a = np.full((2, 3), 1, device = 'cpu') +print(a) \ No newline at end of file diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index fde10d6317..f8cc4035d9 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -3127,7 +3127,7 @@ def _visit_Assign(self, expr): elif isinstance(rhs, CodeBlock) and len(rhs.body)>1 and isinstance(rhs.body[1], FunctionalFor): return rhs - + elif isinstance(rhs, FunctionCall): func = rhs.funcdef results = func.results @@ -3160,7 +3160,7 @@ def _visit_Assign(self, expr): d_var['memory_handling'] = arg.memory_handling d_var['class_type' ] = arg.class_type d_var['cls_base' ] = arg.cls_base - + elif isinstance(rhs, NumpyTranspose): d_var = self._infer_type(rhs) if d_var['memory_handling'] == 'alias' and not isinstance(lhs, IndexedElement): @@ -3169,6 +3169,7 @@ def _visit_Assign(self, expr): if expr.lhs.is_temp: return rhs else: + raise NotImplementedError("Cannot assign result of a function without a return") else: From 7e8cf9e0879ddd61b45943f2afb265b693b341d2 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 3 Jul 2024 09:51:26 +0100 Subject: [PATCH 008/150] work in progress --- pyccel/ast/cudatypes.py | 13 ++++++++++++- pyccel/ast/numpyext.py | 2 -- pyccel/errors/errors.py | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 51ed80299a..822c9921b2 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -30,5 +30,16 @@ def __new__(cls, dtype, rank, order): return dtype else: return super().__new__(cls, dtype, rank, order) + def __init__(self, dtype, rank, order, memory_location): + assert isinstance(rank, int) + assert order in (None, 'C', 'F') + + self._dtype = dtype + self._rank = rank + self._order = order + self._memory_location = memory_location + super().__init__() - + @lru_cache + def __add__(self, other) + \ No newline at end of file diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py index 766c581e20..5c6067bb39 100644 --- a/pyccel/ast/numpyext.py +++ b/pyccel/ast/numpyext.py @@ -620,7 +620,6 @@ class NumpyNewArray(PyccelFunction): init_dtype : PythonType, PyccelFunctionDef, LiteralString, str The actual dtype passed to the NumPy function. """ - print(123) __slots__ = ('_init_dtype','_class_type') def __init__(self, *args, class_type, init_dtype = None): @@ -672,7 +671,6 @@ def _process_order(rank, order): #============================================================================== class NumpyArray(NumpyNewArray): - print(1234) """ Represents a call to `numpy.array` for code generation. diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py index b261a81830..f4172820db 100644 --- a/pyccel/errors/errors.py +++ b/pyccel/errors/errors.py @@ -345,7 +345,7 @@ def report(self, traceback = ''.join(tb.format_stack(limit=5)) else: traceback = None - + print(pyccel_stage.current_stage) info = ErrorInfo(stage=pyccel_stage.current_stage, filename=filename, message=message, From 2dbcfaeead521e24f8bca0b0e71c42e1afa3a9df Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 3 Jul 2024 15:47:18 +0100 Subject: [PATCH 009/150] work in progress --- pyccel/ast/cudaext.py | 47 +++++++++++---------- pyccel/ast/cudatypes.py | 89 +++++++++++++++++++++++++++++++++++----- pyccel/ast/numpyext.py | 11 ++--- pyccel/ast/numpytypes.py | 2 + pyccel/ast/test.py | 24 +++++++++-- 5 files changed, 130 insertions(+), 43 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 07ffb4d5e0..4a534f35b1 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -9,13 +9,14 @@ Provides CUDA functionality for code generation. """ from .internals import PyccelFunction +from .literals import Nil from .datatypes import VoidType from .core import Module, PyccelFunctionDef from .internals import PyccelFunction from .internals import LiteralInteger from .numpyext import process_dtype, process_shape , DtypePrecisionToCastFunction -from .numpytypes import NumpyNDArrayType +from .cudatypes import CudaArrayType @@ -42,14 +43,14 @@ class CudaNewarray(PyccelFunction): memory_location : str The memory location of the new array ('host' or 'device'). """ - __slots__ = ('class_type', 'init_dtype', 'memory_location') + __slots__ = ('_class_type', '_init_dtype', '_memory_location') - def __init__(self, class_type, init_dtype, memory_location): - self.class_type = class_type - self.init_dtype = init_dtype - self.memory_location = memory_location + def __init__(self, *arg,class_type, init_dtype, memory_location): + self._class_type = class_type + self._init_dtype = init_dtype + self._memory_location = memory_location - super().__init__() + super().__init__(*arg) @staticmethod def _process_order(rank, order): @@ -62,6 +63,7 @@ def _process_order(rank, order): class CudaFull(CudaNewarray): __slots__ = ('_fill_value','_shape') + name = 'full' def __init__(self, shape, fill_value, dtype='float', order='C'): shape = process_shape(False, shape) @@ -71,15 +73,11 @@ def __init__(self, shape, fill_value, dtype='float', order='C'): dtype = process_dtype(dtype) - # if fill_value and fill_value.dtype != dtype: - # cast_func = DtypePrecisionToCastFunction[dtype] - # fill_value = cast_func(fill_value) - self.shape = shape - rank = len(shape) + self._shape = shape + rank = len(self._shape) order = CudaNewarray._process_order(rank, order) - class_type = NumpyNDArrayType(dtype, shape, order) - - super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype) + class_type = CudaArrayType(dtype, rank, order, 'device') + super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device') class CudaAutoFill(CudaFull): @@ -88,11 +86,9 @@ class CudaAutoFill(CudaFull): """ __slots__ = () def __init__(self, shape, dtype='float', order='C'): - if not dtype: - raise TypeError("Data type must be provided") - super().__init__(shape, None, dtype, order) + super().__init__(shape, Nil(), dtype, order) -class CudaEmpty(CudaNewarray): +class CudaEmpty(CudaAutoFill): """ Represents a call to Cuda.host_empty for code generation. @@ -109,10 +105,19 @@ class CudaEmpty(CudaNewarray): order : str , LiteralString The order passed to the function defoulting to 'C'. """ - __slots__ = () - + __slots__ = ('_shape', '_dtype', '_order') + name = 'empty' def __init__(self, shape, dtype='float', order='C'): super().__init__(shape, dtype, order) + + @property + def fill_value(self): + """ + The value with which the array will be filled on initialisation. + + The value with which the array will be filled on initialisation. + """ + return None class CudaSynchronize(PyccelFunction): diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 822c9921b2..07a23ded1b 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -6,13 +6,20 @@ #------------------------------------------------------------------------------------------# """ Module containing types from the numpy module understood by pyccel """ +from functools import lru_cache +import numpy as np + +from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool +from pyccel.utilities.metaclasses import ArgumentSingleton +from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type + class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): """ Class representing the Cuda array type. - + Class representing the Cuda array type - + dtype : NumpyNumericType | PythonNativeBool | GenericType The internal datatype of the object (GenericType is allowed for external libraries, e.g. MPI). @@ -24,12 +31,12 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): The memory location of the new cuda array. """ __slots__ = ('_dtype', '_rank', '_order', '_memory_location') - - def __new__(cls, dtype, rank, order): - if rank == 0: - return dtype - else: - return super().__new__(cls, dtype, rank, order) + + # def __new__(cls, dtype, rank, order, memory_location): + # if rank == 0: + # return dtype + # else: + # return super().__new__(cls, dtype, rank, order) def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) assert order in (None, 'C', 'F') @@ -39,7 +46,67 @@ def __init__(self, dtype, rank, order, memory_location): self._order = order self._memory_location = memory_location super().__init__() - + + @lru_cache + def __add__(self, other): + test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) + if isinstance(other, FixedSizeNumericType): + comparison_type = pyccel_type_to_original_type[other]() + elif isinstance(other, CudaArrayType): + comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type]) + else: + return NotImplemented + # Todo need to check for memory location as well + result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type] + rank = max(other.rank, self.rank) + if rank < 2: + order = None + else: + other_f_contiguous = other.order in (None, 'F') + self_f_contiguous = self.order in (None, 'F') + order = 'F' if other_f_contiguous and self_f_contiguous else 'C' + return CudaArrayType(result_type, rank, order, self.memory_location) + @lru_cache - def __add__(self, other) - \ No newline at end of file + def __radd__(self, other): + return self.__add__(other) + + @lru_cache + def __and__(self, other): + elem_type = self.element_type + if isinstance(other, FixedSizeNumericType): + return CudaArrayType(elem_type and other) + elif isinstance(other, CudaArrayType): + return CudaArrayType(elem_type+other.element_type) + else: + return NotImplemented + + @lru_cache + def __rand__(self, other): + return self.__and__(other) + + @property + def rank(self): + """ + Number of dimensions of the object. + + Number of dimensions of the object. If the object is a scalar then + this is equal to 0. + """ + return self._container_rank + + @property + def order(self): + """ + The data layout ordering in memory. + + Indicates whether the data is stored in row-major ('C') or column-major + ('F') format. This is only relevant if rank > 1. When it is not relevant + this function returns None. + """ + return self._order + + def __repr__(self): + dims = ','.join(':'*self._container_rank) + order_str = f'(order={self._order})' if self._order else '' + return f'{self.element_type}[{dims}]{order_str}' diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py index 5c6067bb39..41273f75f7 100644 --- a/pyccel/ast/numpyext.py +++ b/pyccel/ast/numpyext.py @@ -626,7 +626,7 @@ def __init__(self, *args, class_type, init_dtype = None): assert isinstance(class_type, NumpyNDArrayType) self._init_dtype = init_dtype self._class_type = class_type # pylint: disable=no-member - + print(*args) super().__init__(*args) @property @@ -1312,20 +1312,15 @@ class NumpyFull(NumpyNewArray): def __init__(self, shape, fill_value, dtype=None, order='C'): # Convert shape to PythonTuple - print(shape) - print(type(shape)) shape = process_shape(False, shape) - print(shape) - print(type(shape)) - init_dtype = dtype # If there is no dtype, extract it from fill_value # TODO: must get dtype from an annotated node if dtype is None: dtype = fill_value.dtype - + dtype = process_dtype(dtype) - + # Cast fill_value to correct type if fill_value: if fill_value.dtype != dtype: diff --git a/pyccel/ast/numpytypes.py b/pyccel/ast/numpytypes.py index 8bc1df828e..1d56ce14e9 100644 --- a/pyccel/ast/numpytypes.py +++ b/pyccel/ast/numpytypes.py @@ -282,6 +282,7 @@ def __new__(cls, dtype, rank, order): return super().__new__(cls) def __init__(self, dtype, rank, order): + # print("reank", rank) assert isinstance(rank, int) assert order in (None, 'C', 'F') assert rank < 2 or order is not None @@ -319,6 +320,7 @@ def __radd__(self, other): @lru_cache def __and__(self, other): + print("jouj draham diyali aba jalal") elem_type = self.element_type if isinstance(other, FixedSizeNumericType): return NumpyNDArrayType(elem_type and other) diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py index c12492de06..96b228ba64 100644 --- a/pyccel/ast/test.py +++ b/pyccel/ast/test.py @@ -1,4 +1,22 @@ -import numpy as np +from pyccel.decorators import device , kernel +from pyccel import cuda -a = np.full((2, 3), 1, device = 'cpu') -print(a) \ No newline at end of file +@device +def device_call_2(): + + +@device +def device_call(): + device_call_2() + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() \ No newline at end of file From f3911d53dc0cbc98a709a136f8ac153f3a2ff76e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 3 Jul 2024 22:23:22 +0100 Subject: [PATCH 010/150] work in progress --- pyccel/ast/class_defs.py | 12 +++++++++++- pyccel/ast/cudaext.py | 19 ++++++++++++++++--- pyccel/ast/cudatypes.py | 6 +++--- pyccel/codegen/compiling/compilers.py | 6 +++--- pyccel/codegen/pipeline.py | 2 +- pyccel/codegen/printing/ccode.py | 4 ++++ pyccel/codegen/printing/cucode.py | 3 +++ pyccel/errors/errors.py | 1 - 8 files changed, 41 insertions(+), 12 deletions(-) diff --git a/pyccel/ast/class_defs.py b/pyccel/ast/class_defs.py index a0c414ae38..9d8065329d 100644 --- a/pyccel/ast/class_defs.py +++ b/pyccel/ast/class_defs.py @@ -20,9 +20,11 @@ NumpyImag, NumpyReal, NumpyTranspose, NumpyConjugate, NumpySize, NumpyResultType, NumpyArray) from .numpytypes import NumpyNumericType, NumpyNDArrayType +from .cudatypes import CudaArrayType __all__ = ( 'BooleanClass', + 'CudaArrayClass', 'IntegerClass', 'FloatClass', 'ComplexClass', @@ -34,6 +36,7 @@ 'literal_classes', 'get_cls_base', ) +#======================================================================================= #======================================================================================= @@ -171,7 +174,9 @@ #index #count ]) - +CudaArrayClass = ClassDef('cuda.array', + methods=[] + ) #======================================================================================= NumpyArrayClass = ClassDef('numpy.ndarray', @@ -246,8 +251,13 @@ def get_cls_base(class_type): return None elif class_type in literal_classes: return literal_classes[class_type] + elif isinstance(class_type, CudaArrayType): + return CudaArrayClass elif isinstance(class_type, (NumpyNumericType, NumpyNDArrayType)): return NumpyArrayClass + + + # elif isinstance(class_type, StackArrayType): elif isinstance(class_type, TupleType): return TupleClass elif isinstance(class_type, HomogeneousListType): diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 4a534f35b1..e107b6fe6f 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -23,6 +23,8 @@ __all__ = ( 'CudaSynchronize', 'CudaNewarray' + 'CudaFull' + 'CudaEmpty' ) class CudaNewarray(PyccelFunction): @@ -45,6 +47,16 @@ class CudaNewarray(PyccelFunction): """ __slots__ = ('_class_type', '_init_dtype', '_memory_location') + property + def init_dtype(self): + """ + The dtype provided to the function when it was initialised in Python. + + The dtype provided to the function when it was initialised in Python. + If no dtype was provided then this should equal `None`. + """ + return self._init_dtype + def __init__(self, *arg,class_type, init_dtype, memory_location): self._class_type = class_type self._init_dtype = init_dtype @@ -78,7 +90,9 @@ def __init__(self, shape, fill_value, dtype='float', order='C'): order = CudaNewarray._process_order(rank, order) class_type = CudaArrayType(dtype, rank, order, 'device') super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device') - + @property + def fill_value(self): + return self._args[0] class CudaAutoFill(CudaFull): """ Abstract class for all classes which inherit from NumpyFull but @@ -105,7 +119,7 @@ class CudaEmpty(CudaAutoFill): order : str , LiteralString The order passed to the function defoulting to 'C'. """ - __slots__ = ('_shape', '_dtype', '_order') + __slots__ = () name = 'empty' def __init__(self, shape, dtype='float', order='C'): super().__init__(shape, dtype, order) @@ -119,7 +133,6 @@ def fill_value(self): """ return None - class CudaSynchronize(PyccelFunction): """ Represents a call to Cuda.synchronize for code generation. diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 07a23ded1b..a513f7664e 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -30,7 +30,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): memory_location : str The memory location of the new cuda array. """ - __slots__ = ('_dtype', '_rank', '_order', '_memory_location') + __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') # def __new__(cls, dtype, rank, order, memory_location): # if rank == 0: @@ -41,8 +41,8 @@ def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) assert order in (None, 'C', 'F') - self._dtype = dtype - self._rank = rank + self._element_type = dtype + self._container_rank = rank self._order = order self._memory_location = memory_location super().__init__() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index ef11579e49..9a50b42066 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -136,9 +136,9 @@ def _get_exec(self, accelerators): # Reset PATH variable os.environ['PATH'] = current_path - if exec_loc is None: - errors.report(f"Could not find compiler ({exec_cmd})", - severity='fatal') + + errors.report(f"Could not find compiler ({exec_cmd})", + severity='fatal') return exec_loc diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 1e9d0e327d..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -389,7 +389,7 @@ def get_module_dependencies(parser, deps): output_folder=pyccel_dirpath, verbose=verbose) except Exception: - handle_error('Fortran compilation') + handle_error('compilation') raise diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index a39a442a83..07e592745c 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -45,6 +45,7 @@ from pyccel.ast.numpytypes import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map +from pyccel.ast.cudatypes import CudaArrayType from pyccel.ast.utilities import expand_to_loops @@ -1311,6 +1312,9 @@ def get_declare_type(self, expr): errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' + elif isinstance(expr.class_type, CudaArrayType): + dtype = 't_cuda' + else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') elif not isinstance(class_type, CustomDataType): diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..168c89c6d4 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -100,6 +100,9 @@ def _print_KernelCall(self, expr): def _print_CudaSynchronize(self, expr): return 'cudaDeviceSynchronize();\n' + def _print_CudaEmpty(self, expr): + print(expr) + return 'cudaDeviceSynchronize();\n' def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py index f4172820db..96910b3dfa 100644 --- a/pyccel/errors/errors.py +++ b/pyccel/errors/errors.py @@ -345,7 +345,6 @@ def report(self, traceback = ''.join(tb.format_stack(limit=5)) else: traceback = None - print(pyccel_stage.current_stage) info = ErrorInfo(stage=pyccel_stage.current_stage, filename=filename, message=message, From 37289f9a87cdafd6f52753c3d9821124c798f828 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 8 Jul 2024 14:16:40 +0100 Subject: [PATCH 011/150] work in progress --- pyccel/codegen/compiling/compilers.py | 4 ++-- pyccel/codegen/printing/cucode.py | 22 ++++++++++++++++++++ pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 22 ++++++++++++++++++++ pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 10 +++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu create mode 100644 pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index 9a50b42066..d99ad02bbd 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -137,8 +137,8 @@ def _get_exec(self, accelerators): os.environ['PATH'] = current_path - errors.report(f"Could not find compiler ({exec_cmd})", - severity='fatal') + # errors.report(f"Could not find compiler ({exec_cmd})", + # severity='fatal') return exec_loc diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 168c89c6d4..a5a4027937 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -15,12 +15,17 @@ from pyccel.ast.literals import Nil from pyccel.errors.errors import Errors +from pyccel.ast.core import Allocate, Deallocate + errors = Errors() __all__ = ["CudaCodePrinter"] +c_imports = {n : Import(n, Module(n, (), ())) for n in + ['cuda_ndarrays',]} + class CudaCodePrinter(CCodePrinter): """ Print code in CUDA format. @@ -133,4 +138,21 @@ def _print_ModuleHeader(self, expr): global_variables, function_declaration, "#endif // {name.upper()}_H\n")) + def _print_Allocate(self, expr): + self.add_import('cuda_ndarrays') + free_code = '' + + + #free the array if its already allocated and checking if its not null if the status is unknown + # if (expr.status == 'unknown'): + # free_code = 'if (%s.shape != NULL)\n' % self._print(expr.variable.name) + # free_code += "{{\n{}}}\n".format(self._print(Deallocate(expr.variable))) + # elif (expr.status == 'allocated'): + # free_code += self._print(Deallocate(expr.variable)) + + alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n" + return f'{alloc_code}' + # print(shape) + + # return "hjsjkahsjkajskasjkasj" diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu new file mode 100644 index 0000000000..cb97ef759f --- /dev/null +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -0,0 +1,22 @@ +#include "cuda_ndarrays.h" + +void *cuda_array_create(int shape[]) +{ + size_t i = 0; + size_t alloc_size = 1; + + while (shape[i] != 0) + { + alloc_size *= shape[i]; + i++; + } + + void *array_ptr = malloc(alloc_size); + if (array_ptr == NULL) + { + cout << "Error allocating memory" << endl; + return NULL; + } + + return array_ptr; +} \ No newline at end of file diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h new file mode 100644 index 0000000000..5b176390d6 --- /dev/null +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -0,0 +1,10 @@ +#ifndef CUDA_NDARRAYS_H +# define CUDA_NDARRAYS_H + +# include +# include + +using namespace std; + + +#endif \ No newline at end of file From ba66b4834e72705b394f31ad994557ec771d1fca Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 8 Jul 2024 14:33:33 +0100 Subject: [PATCH 012/150] work in progress --- pyccel/codegen/printing/cucode.py | 13 ++----------- pyccel/codegen/utilities.py | 1 + 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index a5a4027937..1d35ddbca3 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -139,17 +139,8 @@ def _print_ModuleHeader(self, expr): function_declaration, "#endif // {name.upper()}_H\n")) def _print_Allocate(self, expr): - self.add_import('cuda_ndarrays') - free_code = '' - - - #free the array if its already allocated and checking if its not null if the status is unknown - # if (expr.status == 'unknown'): - # free_code = 'if (%s.shape != NULL)\n' % self._print(expr.variable.name) - # free_code += "{{\n{}}}\n".format(self._print(Deallocate(expr.variable))) - # elif (expr.status == 'allocated'): - # free_code += self._print(Deallocate(expr.variable)) - + + self.add_import(c_imports['cuda_ndarrays']) alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n" return f'{alloc_code}' # print(shape) diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index ceffc483e3..7d81744fe9 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -36,6 +36,7 @@ # The compile object folder will be in the pyccel dirpath internal_libs = { "ndarrays" : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")), + "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="ndarrays")), "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")), "pyc_math_c" : ("math", CompileObj("pyc_math_c.c",folder="math")), "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))), From 406a88b965dd17f42e4886f5f61cfc5814246c7d Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 8 Jul 2024 15:11:12 +0100 Subject: [PATCH 013/150] work in progress --- pyccel/codegen/printing/ccode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 07e592745c..6ab084a989 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1313,7 +1313,7 @@ def get_declare_type(self, expr): self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' elif isinstance(expr.class_type, CudaArrayType): - dtype = 't_cuda' + dtype = 'int *' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') From 3afad1b06c0974453adc1d3aa9a5eccb3f62aa9f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 9 Jul 2024 15:39:25 +0100 Subject: [PATCH 014/150] work in progress --- pyccel/ast/variable.py | 42 +++++++- pyccel/codegen/printing/ccode.py | 3 +- pyccel/codegen/printing/cucode.py | 43 ++++++-- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 101 ++++++++++++++++--- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 1 + pyccel/stdlib/ndarrays/ndarrays.h | 7 ++ 6 files changed, 176 insertions(+), 21 deletions(-) diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index 051cf631b7..93f61f3698 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -56,6 +56,11 @@ class Variable(TypedAstNode): 'stack' if memory should be allocated on the stack, represents stack arrays and scalars. 'alias' if object allows access to memory stored in another variable. + memory_location: str, default: 'host' + 'host' the variable can only be accessed by the CPU. + 'device' the variable can only be accessed by the GPU. + 'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly). + is_const : bool, default: False Indicates if object is a const argument of a function. @@ -98,7 +103,7 @@ class Variable(TypedAstNode): >>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows')) matrix.n_rows """ - __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target', + __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target', '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp', '_shape','_is_private','_class_type') _attribute_nodes = () @@ -109,6 +114,7 @@ def __init__( name, *, memory_handling='stack', + memory_location='host', is_const=False, is_target=False, is_optional=False, @@ -141,6 +147,10 @@ def __init__( raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'") self._memory_handling = memory_handling + if memory_location not in ('host', 'device', 'managed'): + raise ValueError("memory_location must be 'host', 'device' or 'managed'") + self._memory_location = memory_location + if not isinstance(is_const, bool): raise TypeError('is_const must be a boolean.') self._is_const = is_const @@ -323,6 +333,36 @@ def cls_base(self): """ return self._cls_base + @property + def memory_location(self): + """ Indicates whether a Variable has a dynamic size + """ + return self._memory_location + + @memory_location.setter + def memory_location(self, memory_location): + if memory_location not in ('host', 'device', 'managed'): + raise ValueError("memory_location must be 'host', 'device' or 'managed'") + self._memory_location = memory_location + + @property + def on_host(self): + """ Indicates if memory is only accessible by the CPU + """ + return self.memory_location == 'host' + + @property + def on_device(self): + """ Indicates if memory is only accessible by the GPU + """ + return self.memory_location == 'device' + + @property + def is_managed(self): + """ Indicates if memory is being managed by CUDA API + """ + return self.memory_location == 'managed' + @property def is_const(self): """ diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 6ab084a989..12fd5dbdeb 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1313,7 +1313,8 @@ def get_declare_type(self, expr): self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' elif isinstance(expr.class_type, CudaArrayType): - dtype = 'int *' + self.add_import(c_imports['ndarrays']) + dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 1d35ddbca3..1c01f1d45d 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -16,6 +16,14 @@ from pyccel.errors.errors import Errors from pyccel.ast.core import Allocate, Deallocate +from pyccel.ast.numpytypes import NumpyInt64Type +from pyccel.ast.cudatypes import CudaArrayType +from pyccel.ast.datatypes import HomogeneousContainerType +from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map + + + + @@ -24,7 +32,9 @@ __all__ = ["CudaCodePrinter"] c_imports = {n : Import(n, Module(n, (), ())) for n in - ['cuda_ndarrays',]} + ['cuda_ndarrays', + 'ndarrays', + ]} class CudaCodePrinter(CCodePrinter): """ @@ -139,11 +149,32 @@ def _print_ModuleHeader(self, expr): function_declaration, "#endif // {name.upper()}_H\n")) def _print_Allocate(self, expr): - + variable = expr.variable + shape = ", ".join(self._print(i) for i in expr.shape) + if isinstance(variable.class_type, CudaArrayType): + dtype = self.find_in_ndarray_type_registry(variable.dtype) + elif isinstance(variable.class_type, HomogeneousContainerType): + dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)]) + else: + raise NotImplementedError(f"Don't know how to index {variable.class_type} type") + shape_dtype = self.get_c_type(NumpyInt64Type()) + shape_Assign = "("+ shape_dtype +"[]){" + shape + "}" + is_view = 'false' if variable.on_heap else 'true' + memory_location = expr.variable.memory_location + if memory_location in ('device', 'host'): + memory_location = 'allocateMemoryOn' + str(memory_location).capitalize() + else: + memory_location = 'managedMemory' self.add_import(c_imports['cuda_ndarrays']) - alloc_code = f"{self._print(expr.variable)} = cuda_array_create();\n" + self.add_import(c_imports['ndarrays']) + alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, {shape_Assign}, {dtype}, {is_view},{memory_location});\n" return f'{alloc_code}' - # print(shape) - - # return "hjsjkahsjkajskasjkasj" + + def _print_Deallocate(self, expr): + var_code = self._print(expr.variable) + + if expr.variable.memory_location == 'host': + return f"cuda_free_host({var_code});\n" + else: + return f"cuda_free({var_code});\n" diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index cb97ef759f..f74e8630f3 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -1,22 +1,97 @@ #include "cuda_ndarrays.h" -void *cuda_array_create(int shape[]) +void device_memory(void** devPtr, size_t size) { - size_t i = 0; - size_t alloc_size = 1; + cudaMalloc(devPtr, size); +} - while (shape[i] != 0) +void managed_memory(void** devPtr, size_t size) +{ + cudaMallocManaged(devPtr, size); +} + +void host_memory(void** devPtr, size_t size) +{ + cudaMallocHost(devPtr, size); +} +t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, + enum e_types type, bool is_view) +{ + t_ndarray arr; + void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory}; + + arr.nd = nd; + arr.type = type; + switch (type) { - alloc_size *= shape[i]; - i++; + case nd_int8: + arr.type_size = sizeof(int8_t); + break; + case nd_int16: + arr.type_size = sizeof(int16_t); + break; + case nd_int32: + arr.type_size = sizeof(int32_t); + break; + case nd_int64: + arr.type_size = sizeof(int64_t); + break; + case nd_float: + arr.type_size = sizeof(float); + break; + case nd_double: + arr.type_size = sizeof(double); + break; + case nd_bool: + arr.type_size = sizeof(bool); + break; } - - void *array_ptr = malloc(alloc_size); - if (array_ptr == NULL) + arr.is_view = is_view; + arr.length = 1; + arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t)); + for (int32_t i = 0; i < arr.nd; i++) { - cout << "Error allocating memory" << endl; - return NULL; + arr.length *= shape[i]; + arr.shape[i] = shape[i]; } + arr.buffer_size = arr.length * arr.type_size; - return array_ptr; -} \ No newline at end of file + if (!is_view) + (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size); + return (arr); +} + +int32_t cuda_free_host(t_ndarray arr) +{ + if (arr.shape == NULL) + return (0); + cudaFreeHost(arr.raw_data); + arr.raw_data = NULL; + cudaFree(arr.shape); + arr.shape = NULL; + cudaFree(arr.strides); + arr.strides = NULL; + return (1); +} + +__host__ __device__ +int32_t cuda_free(t_ndarray arr) +{ + if (arr.shape == NULL) + return (0); + cudaFree(arr.raw_data); + arr.raw_data = NULL; + cudaFree(arr.shape); + arr.shape = NULL; + return (0); +} + +__host__ __device__ +int32_t cuda_free_pointer(t_ndarray arr) +{ + if (arr.is_view == false || arr.shape == NULL) + return (0); + cudaFree(arr.shape); + arr.shape = NULL; + return (0); +} diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index 5b176390d6..9b665cc96a 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -3,6 +3,7 @@ # include # include +#include "../ndarrays/ndarrays.h" using namespace std; diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h index 11bbfbf455..082146d639 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.h +++ b/pyccel/stdlib/ndarrays/ndarrays.h @@ -80,6 +80,13 @@ typedef enum e_order order_c, } t_order; +enum e_memory_locations +{ + managedMemory, + allocateMemoryOnHost, + allocateMemoryOnDevice +}; + typedef struct s_ndarray { /* raw data buffer*/ From 190c5a29d5fbc075316d80d00b662036cc031e2e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 09:16:19 +0100 Subject: [PATCH 015/150] work in progress --- pyccel/codegen/printing/ccode.py | 5 +++++ pyccel/codegen/printing/cucode.py | 9 ++++----- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 5 +++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 12fd5dbdeb..26868d5522 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -46,6 +46,7 @@ from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map from pyccel.ast.cudatypes import CudaArrayType +from pyccel.ast.cudaext import CudaFull from pyccel.ast.utilities import expand_to_loops @@ -59,6 +60,7 @@ from pyccel.codegen.printing.codeprinter import CodePrinter + from pyccel.errors.errors import Errors from pyccel.errors.messages import (PYCCEL_RESTRICTION_TODO, INCOMPATIBLE_TYPEVAR_TO_FUNC, PYCCEL_RESTRICTION_IS_ISNOT, UNSUPPORTED_ARRAY_RANK) @@ -2181,6 +2183,9 @@ def _print_Assign(self, expr): # Inhomogenous tuples are unravelled and therefore do not exist in the c printer if isinstance(rhs, (NumpyArray, PythonTuple)): return prefix_code+self.copy_NumpyArray_Data(expr) + if(isinstance(rhs, (CudaFull))): + # TODO add support for CudaFull + return " \n" if isinstance(rhs, (NumpyFull)): return prefix_code+self.arrayFill(expr) lhs = self._print(expr.lhs) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 1c01f1d45d..a90b4513c3 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -116,8 +116,7 @@ def _print_CudaSynchronize(self, expr): return 'cudaDeviceSynchronize();\n' def _print_CudaEmpty(self, expr): - print(expr) - return 'cudaDeviceSynchronize();\n' + return 'cuda_array_create(1, (int64_t[]){INT64_C(10)}, nd_double, false,allocateMemoryOnHost);\n' def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -158,7 +157,7 @@ def _print_Allocate(self, expr): else: raise NotImplementedError(f"Don't know how to index {variable.class_type} type") shape_dtype = self.get_c_type(NumpyInt64Type()) - shape_Assign = "("+ shape_dtype +"[]){" + shape + "}" + shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n" is_view = 'false' if variable.on_heap else 'true' memory_location = expr.variable.memory_location if memory_location in ('device', 'host'): @@ -167,8 +166,8 @@ def _print_Allocate(self, expr): memory_location = 'managedMemory' self.add_import(c_imports['cuda_ndarrays']) self.add_import(c_imports['ndarrays']) - alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, {shape_Assign}, {dtype}, {is_view},{memory_location});\n" - return f'{alloc_code}' + alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign, {dtype}, {is_view},{memory_location});\n" + return f'{shape_Assign} {alloc_code}' def _print_Deallocate(self, expr): var_code = self._print(expr.variable) diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index 9b665cc96a..fc571685f1 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -5,6 +5,11 @@ # include #include "../ndarrays/ndarrays.h" +t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +enum e_memory_locations location); +int32_t cuda_free_host(t_ndarray arr); + + using namespace std; From eeeb2492b498e42a5b131c13c932211ed114940d Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 09:25:24 +0100 Subject: [PATCH 016/150] cleaning up my PR --- pyccel/ast/test.cu | 46 ---------------------------------------------- pyccel/ast/test.py | 22 ---------------------- 2 files changed, 68 deletions(-) delete mode 100644 pyccel/ast/test.cu delete mode 100644 pyccel/ast/test.py diff --git a/pyccel/ast/test.cu b/pyccel/ast/test.cu deleted file mode 100644 index 5938aa2d6d..0000000000 --- a/pyccel/ast/test.cu +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include - -__global__ void add(int *a, int *b, int *c, int n) { - int index = threadIdx.x + blockIdx.x * blockDim.x; - if (index < n) { - c[index] = a[index] + b[index]; - } -} - -int main() { - int n = 512; - int size = n * sizeof(int); - int *a, *b, *c; - - // Allocate unified memory - accessible from CPU or GPU - cudaMallocManaged(&a, size); - cudaMallocManaged(&b, size); - cudaMallocManaged(&c, size); - - // Initialize arrays on the host (CPU) - for (int i = 0; i < n; i++) { - a[i] = i; - b[i] = i * 2; - } - - // Launch kernel with n threads - int blockSize = 256; - int numBlocks = (n + blockSize - 1) / blockSize; - add<<>>(a, b, c, n); - - // Wait for GPU to finish before accessing on host - cudaDeviceSynchronize(); - - // Verify the result - for (int i = 0; i < n; i++) { - if (c[i] != a[i] + b[i]) { - std::cerr << "Error at index " << i << ": " << c[i] << " != " << a[i] + b[i] << std::endl - << std::endl; - return 1; - } - } - - std::cout << "Success!" << std::endl; - return 0; -} diff --git a/pyccel/ast/test.py b/pyccel/ast/test.py deleted file mode 100644 index 96b228ba64..0000000000 --- a/pyccel/ast/test.py +++ /dev/null @@ -1,22 +0,0 @@ -from pyccel.decorators import device , kernel -from pyccel import cuda - -@device -def device_call_2(): - - -@device -def device_call(): - device_call_2() - print("Hello from device") - -@kernel -def kernel_call(): - device_call() - -def f(): - kernel_call[1,1]() - cuda.synchronize() - -if __name__ == '__main__': - f() \ No newline at end of file From de0f5abdcfad9af3c94a3f4297930cad77a665e4 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 09:30:36 +0100 Subject: [PATCH 017/150] cleaning up my PR --- pyccel/codegen/compiling/compilers.py | 6 +++--- pyccel/codegen/pipeline.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index d99ad02bbd..ef11579e49 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -136,9 +136,9 @@ def _get_exec(self, accelerators): # Reset PATH variable os.environ['PATH'] = current_path - - # errors.report(f"Could not find compiler ({exec_cmd})", - # severity='fatal') + if exec_loc is None: + errors.report(f"Could not find compiler ({exec_cmd})", + severity='fatal') return exec_loc diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index eb357fab74..1e9d0e327d 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -389,7 +389,7 @@ def get_module_dependencies(parser, deps): output_folder=pyccel_dirpath, verbose=verbose) except Exception: - handle_error('compilation') + handle_error('Fortran compilation') raise From d6ba6ad77c071c21e588cbd4e686ce8135d21e9a Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 09:53:25 +0100 Subject: [PATCH 018/150] cleaning up my PR --- pyccel/codegen/printing/cucode.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index a90b4513c3..d911f7b727 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -115,8 +115,6 @@ def _print_KernelCall(self, expr): def _print_CudaSynchronize(self, expr): return 'cudaDeviceSynchronize();\n' - def _print_CudaEmpty(self, expr): - return 'cuda_array_create(1, (int64_t[]){INT64_C(10)}, nd_double, false,allocateMemoryOnHost);\n' def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True From 8286a8933b45e039b9b5aaa11e8777f0ed569d55 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 16:01:34 +0100 Subject: [PATCH 019/150] work in progress --- pyccel/ast/cudatypes.py | 5 -- pyccel/codegen/printing/ccode.py | 3 +- pyccel/codegen/printing/cucode.py | 1 - pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 10 ++-- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 57 ++++++++++++++++++-- 5 files changed, 60 insertions(+), 16 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index a513f7664e..5731aa6957 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -32,11 +32,6 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): """ __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') - # def __new__(cls, dtype, rank, order, memory_location): - # if rank == 0: - # return dtype - # else: - # return super().__new__(cls, dtype, rank, order) def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) assert order in (None, 'C', 'F') diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 26868d5522..d0620ccb84 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1315,8 +1315,7 @@ def get_declare_type(self, expr): self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' elif isinstance(expr.class_type, CudaArrayType): - self.add_import(c_imports['ndarrays']) - dtype = 't_ndarray' + dtype = 't_cuda_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index d911f7b727..6b7b47f1b6 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -163,7 +163,6 @@ def _print_Allocate(self, expr): else: memory_location = 'managedMemory' self.add_import(c_imports['cuda_ndarrays']) - self.add_import(c_imports['ndarrays']) alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign, {dtype}, {is_view},{memory_location});\n" return f'{shape_Assign} {alloc_code}' diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index f74e8630f3..d813540707 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -14,10 +14,10 @@ void host_memory(void** devPtr, size_t size) { cudaMallocHost(devPtr, size); } -t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, +t_cuda_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, enum e_types type, bool is_view) { - t_ndarray arr; + t_cuda_ndarray arr; void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory}; arr.nd = nd; @@ -61,7 +61,7 @@ t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int6 return (arr); } -int32_t cuda_free_host(t_ndarray arr) +int32_t cuda_free_host(t_cuda_ndarray arr) { if (arr.shape == NULL) return (0); @@ -75,7 +75,7 @@ int32_t cuda_free_host(t_ndarray arr) } __host__ __device__ -int32_t cuda_free(t_ndarray arr) +int32_t cuda_free(t_cuda_ndarray arr) { if (arr.shape == NULL) return (0); @@ -87,7 +87,7 @@ int32_t cuda_free(t_ndarray arr) } __host__ __device__ -int32_t cuda_free_pointer(t_ndarray arr) +int32_t cuda_free_pointer(t_cuda_ndarray arr) { if (arr.is_view == false || arr.shape == NULL) return (0); diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index fc571685f1..af586b7ac8 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -3,11 +3,62 @@ # include # include -#include "../ndarrays/ndarrays.h" -t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +typedef enum e_types +{ + nd_bool = 0, + nd_int8 = 1, + nd_int16 = 3, + nd_int32 = 5, + nd_int64 = 7, + nd_float = 11, + nd_double = 12, + nd_cfloat = 14, + nd_cdouble = 15 +} t_types; + + +enum e_memory_locations +{ + managedMemory, + allocateMemoryOnHost, + allocateMemoryOnDevice +}; + +typedef enum e_order +{ + order_f, + order_c, +} t_order; + +typedef struct s_cuda_ndarray +{ + void *raw_data; + /* number of dimensions */ + int32_t nd; + /* shape 'size of each dimension' */ + int64_t *shape; + /* strides 'number of elements to skip to get the next element' */ + int64_t *strides; + /* type of the array elements */ + t_types type; + /* type size of the array elements */ + int32_t type_size; + /* number of element in the array */ + int32_t length; + /* size of the array */ + int32_t buffer_size; + /* True if the array does not own the data */ + bool is_view; + /* stores the order of the array: order_f or order_c */ + t_order order; +} t_cuda_ndarray; + + +t_cuda_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , enum e_memory_locations location); -int32_t cuda_free_host(t_ndarray arr); +int32_t cuda_free_host(t_cuda_ndarray arr); + using namespace std; From 96c3f292f8532f0f396bf8016af9f0f9cc6e8ea3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 16:28:43 +0100 Subject: [PATCH 020/150] work in progress --- pyccel/ast/numpyext.py | 1 - pyccel/codegen/printing/ccode.py | 3 --- pyccel/codegen/printing/cucode.py | 19 +++++++++++++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py index 41273f75f7..eb1ee92e26 100644 --- a/pyccel/ast/numpyext.py +++ b/pyccel/ast/numpyext.py @@ -626,7 +626,6 @@ def __init__(self, *args, class_type, init_dtype = None): assert isinstance(class_type, NumpyNDArrayType) self._init_dtype = init_dtype self._class_type = class_type # pylint: disable=no-member - print(*args) super().__init__(*args) @property diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index d0620ccb84..ec37735dff 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -2182,9 +2182,6 @@ def _print_Assign(self, expr): # Inhomogenous tuples are unravelled and therefore do not exist in the c printer if isinstance(rhs, (NumpyArray, PythonTuple)): return prefix_code+self.copy_NumpyArray_Data(expr) - if(isinstance(rhs, (CudaFull))): - # TODO add support for CudaFull - return " \n" if isinstance(rhs, (NumpyFull)): return prefix_code+self.arrayFill(expr) lhs = self._print(expr.lhs) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 6b7b47f1b6..e0b1b2d1c7 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -15,11 +15,10 @@ from pyccel.ast.literals import Nil from pyccel.errors.errors import Errors -from pyccel.ast.core import Allocate, Deallocate -from pyccel.ast.numpytypes import NumpyInt64Type from pyccel.ast.cudatypes import CudaArrayType from pyccel.ast.datatypes import HomogeneousContainerType -from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map +from pyccel.ast.numpytypes import numpy_precision_map +from pyccel.ast.cudaext import CudaFull @@ -147,6 +146,8 @@ def _print_ModuleHeader(self, expr): "#endif // {name.upper()}_H\n")) def _print_Allocate(self, expr): variable = expr.variable + if not isinstance(variable.class_type, CudaArrayType): + return super()._print_Allocate(expr) shape = ", ".join(self._print(i) for i in expr.shape) if isinstance(variable.class_type, CudaArrayType): dtype = self.find_in_ndarray_type_registry(variable.dtype) @@ -154,7 +155,6 @@ def _print_Allocate(self, expr): dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)]) else: raise NotImplementedError(f"Don't know how to index {variable.class_type} type") - shape_dtype = self.get_c_type(NumpyInt64Type()) shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n" is_view = 'false' if variable.on_heap else 'true' memory_location = expr.variable.memory_location @@ -169,8 +169,19 @@ def _print_Allocate(self, expr): def _print_Deallocate(self, expr): var_code = self._print(expr.variable) + if not isinstance(expr.variable.class_type, CudaArrayType): + return super()._print_Deallocate(expr) + if expr.variable.memory_location == 'host': return f"cuda_free_host({var_code});\n" else: return f"cuda_free({var_code});\n" + def _print_Assign(self, expr): + rhs = expr.rhs + if not isinstance(rhs.class_type, CudaArrayType): + return super()._print_Assign(expr) + if(isinstance(rhs, (CudaFull))): + # TODO add support for CudaFull + return " \n" + From b414d6209d01ae381f87507b8fbc07f6c7a23bb3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 10 Jul 2024 16:42:24 +0100 Subject: [PATCH 021/150] work in progress --- pyccel/ast/cudatypes.py | 4 +++ pyccel/ast/variable.py | 39 --------------------- pyccel/codegen/printing/cucode.py | 2 +- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 3 -- 4 files changed, 5 insertions(+), 43 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 5731aa6957..3e9a8df1cf 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -42,6 +42,10 @@ def __init__(self, dtype, rank, order, memory_location): self._memory_location = memory_location super().__init__() + @property + def memory_location(self): + return self._memory_location + @lru_cache def __add__(self, other): test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index 93f61f3698..b38dd100f5 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -56,11 +56,6 @@ class Variable(TypedAstNode): 'stack' if memory should be allocated on the stack, represents stack arrays and scalars. 'alias' if object allows access to memory stored in another variable. - memory_location: str, default: 'host' - 'host' the variable can only be accessed by the CPU. - 'device' the variable can only be accessed by the GPU. - 'managed' the variable can be accessed by CPU and GPU and is being managed by the Cuda API (memory transfer is being done implicitly). - is_const : bool, default: False Indicates if object is a const argument of a function. @@ -147,10 +142,6 @@ def __init__( raise ValueError("memory_handling must be 'heap', 'stack' or 'alias'") self._memory_handling = memory_handling - if memory_location not in ('host', 'device', 'managed'): - raise ValueError("memory_location must be 'host', 'device' or 'managed'") - self._memory_location = memory_location - if not isinstance(is_const, bool): raise TypeError('is_const must be a boolean.') self._is_const = is_const @@ -333,36 +324,6 @@ def cls_base(self): """ return self._cls_base - @property - def memory_location(self): - """ Indicates whether a Variable has a dynamic size - """ - return self._memory_location - - @memory_location.setter - def memory_location(self, memory_location): - if memory_location not in ('host', 'device', 'managed'): - raise ValueError("memory_location must be 'host', 'device' or 'managed'") - self._memory_location = memory_location - - @property - def on_host(self): - """ Indicates if memory is only accessible by the CPU - """ - return self.memory_location == 'host' - - @property - def on_device(self): - """ Indicates if memory is only accessible by the GPU - """ - return self.memory_location == 'device' - - @property - def is_managed(self): - """ Indicates if memory is being managed by CUDA API - """ - return self.memory_location == 'managed' - @property def is_const(self): """ diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index e0b1b2d1c7..d343272979 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -157,7 +157,7 @@ def _print_Allocate(self, expr): raise NotImplementedError(f"Don't know how to index {variable.class_type} type") shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n" is_view = 'false' if variable.on_heap else 'true' - memory_location = expr.variable.memory_location + memory_location = variable.class_type.memory_location if memory_location in ('device', 'host'): memory_location = 'allocateMemoryOn' + str(memory_location).capitalize() else: diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index af586b7ac8..13e8419594 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -20,7 +20,6 @@ typedef enum e_types enum e_memory_locations { - managedMemory, allocateMemoryOnHost, allocateMemoryOnDevice }; @@ -39,8 +38,6 @@ typedef struct s_cuda_ndarray /* shape 'size of each dimension' */ int64_t *shape; /* strides 'number of elements to skip to get the next element' */ - int64_t *strides; - /* type of the array elements */ t_types type; /* type size of the array elements */ int32_t type_size; From 7c93416b2c25d5bf065b33041d0d8501f4a9c417 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 022/150] Trigger tests on push to devel or main branch --- .github/workflows/anaconda_linux.yml | 2 +- .github/workflows/anaconda_windows.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macosx.yml | 2 +- .github/workflows/pickle.yml | 2 +- .github/workflows/pickle_wheel.yml | 2 +- .github/workflows/windows.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: From f8ec72265db6b1d482913d3c849edfea75df96f9 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:46:33 +0100 Subject: [PATCH 023/150] Add cuda workflow to test cuda developments on CI --- .github/actions/coverage_install/action.yml | 2 +- .github/actions/linux_install/action.yml | 10 +-- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 17 +++++ .github/actions/python_install/action.yml | 17 +++++ .github/workflows/cuda.yml | 83 +++++++++++++++++++++ ci_tools/bot_messages/show_tests.txt | 1 + ci_tools/bot_tools/bot_funcs.py | 12 +-- ci_tools/devel_branch_tests.py | 1 + ci_tools/json_pytest_output.py | 2 +- 10 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 .github/actions/pytest_run_cuda/action.yml create mode 100644 .github/actions/python_install/action.yml create mode 100644 .github/workflows/cuda.yml diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..b0bdc31f16 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..52092a6e02 --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,17 @@ +name: 'Pyccel pytest commands generating Ccuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: From cc3a93ee1ea3df11d914519b8279eecbf853cb9f Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 024/150] Trigger tests on push to devel or main branch --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9111b47d52..cf52b1c624 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 From 3be623da6d5658f13fece4c47e734e1fe40ff6b9 Mon Sep 17 00:00:00 2001 From: bauom <40796259+bauom@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:50 +0100 Subject: [PATCH 025/150] [init] Adding CUDA language/compiler and CodePrinter (#32) This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter. Changes to stdlib: Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler --------- Co-authored-by: Mouad Elalj, EmilyBourne --- .dict_custom.txt | 1 + .github/actions/pytest_parallel/action.yml | 4 +- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 11 +- CHANGELOG.md | 6 + pyccel/codegen/codegen.py | 8 +- pyccel/codegen/compiling/compilers.py | 5 +- pyccel/codegen/pipeline.py | 5 +- pyccel/codegen/printing/cucode.py | 74 +++++++++++ pyccel/commands/console.py | 2 +- pyccel/compilers/default_compilers.py | 13 +- pyccel/naming/__init__.py | 4 +- pyccel/naming/cudanameclashchecker.py | 92 ++++++++++++++ pyccel/stdlib/numpy/numpy_c.c | 2 + pyccel/stdlib/numpy/numpy_c.h | 2 + pytest.ini | 1 + tests/conftest.py | 11 ++ tests/epyccel/test_base.py | 136 ++++++++++----------- 18 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 pyccel/codegen/printing/cucode.py create mode 100644 pyccel/naming/cudanameclashchecker.py diff --git a/.dict_custom.txt b/.dict_custom.txt index 82a6b10d31..ae99f31ed4 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,7 @@ Valgrind variadic subclasses oneAPI +Cuda getter setter bitwise diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index b0bdc31f16..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 52092a6e02..46f90552ed 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -1,4 +1,4 @@ -name: 'Pyccel pytest commands generating Ccuda' +name: 'Pyccel pytest commands generating Cuda' inputs: shell_cmd: description: 'Specifies the shell command (different for anaconda)' @@ -11,7 +11,14 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 695dc72cf7..d6928b0eca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. + ## \[UNRELEASED\] ### Added diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index daf4559df4..8d4abb6bdb 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index c866ee5b1a..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 14087fb567..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..86146b065b --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,74 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers + +from pyccel.ast.core import Import, Module + +from pyccel.errors.errors import Errors + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + c_headers_imports = '' + local_imports = '' + + for imp in imports: + if imp.source in c_library_headers: + c_headers_imports += self._print(imp) + else: + local_imports += self._print(imp) + + imports = f'{c_headers_imports}\ + extern "C"{{\n\ + {local_imports}\ + }}' + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..971204e912 --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,92 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..a5082ef6e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() From b6d1549c8cb1999f76396d4919e98998d3367c55 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 15 May 2024 12:58:50 +0100 Subject: [PATCH 026/150] Fix import handling (#49) This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'. **Commit Summary** - Implemented new header printer for CUDA. - Added CUDA wrapper assignment - Instead of wrapping all local headers, wrap only C functions with extern 'C' --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 3 +- pyccel/codegen/printing/cucode.py | 45 ++++++++---- pyccel/codegen/python_wrapper.py | 4 ++ pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++ tests/epyccel/modules/cuda_module.py | 13 ++++ tests/epyccel/test_epyccel_modules.py | 13 ++++ 6 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py create mode 100644 tests/epyccel/modules/cuda_module.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d6928b0eca..b897e14385 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file. ### Added -- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. ## \[UNRELEASED\] diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 86146b065b..277d2a3a6a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -52,19 +52,7 @@ def _print_Module(self, expr): # Print imports last to be sure that all additional_imports have been collected imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] - c_headers_imports = '' - local_imports = '' - - for imp in imports: - if imp.source in c_library_headers: - c_headers_imports += self._print(imp) - else: - local_imports += self._print(imp) - - imports = f'{c_headers_imports}\ - extern "C"{{\n\ - {local_imports}\ - }}' + imports = ''.join(self._print(i) for i in imports) code = f'{imports}\n\ {global_variables}\n\ @@ -72,3 +60,34 @@ def _print_Module(self, expr): self.exit_scope() return code + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators: # Checking for 'kernel' decorator + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) From 7da772a5096082d6268c6baf50cf2fc56c5d6152 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Thu, 27 Jun 2024 20:31:46 +0100 Subject: [PATCH 027/150] Add support for kernels (#42) This pull request addresses issue #28 by implementing a new feature in Pyccel that allows users to define custom GPU kernels. The syntax for creating these kernels is inspired by Numba. and I also need to fix issue #45 for testing purposes **Commit Summary** - Introduced KernelCall class - Added cuda printer methods _print_KernelCall and _print_FunctionDef to generate the corresponding CUDA representation for both kernel calls and definitions - Added IndexedFunctionCall represents an indexed function call - Added CUDA module and cuda.synchronize() - Fixing a bug that I found in the header: it does not import the necessary header for the used function --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> Co-authored-by: Emily Bourne --- .dict_custom.txt | 1 + CHANGELOG.md | 2 + docs/cuda.md | 23 +++ pyccel/ast/core.py | 37 ++++ pyccel/ast/cuda.py | 65 +++++++ pyccel/ast/cudaext.py | 42 +++++ pyccel/ast/utilities.py | 4 +- pyccel/codegen/printing/cucode.py | 46 ++++- pyccel/cuda/__init__.py | 10 + pyccel/cuda/cuda_sync_primitives.py | 16 ++ pyccel/decorators.py | 32 ++++ pyccel/errors/messages.py | 8 + pyccel/parser/semantic.py | 84 ++++++++- pyccel/parser/syntactic.py | 4 + tests/conftest.py | 9 + tests/cuda/test_kernel_semantic.py | 176 ++++++++++++++++++ tests/pyccel/scripts/kernel/hello_kernel.py | 19 ++ .../scripts/kernel/kernel_name_collision.py | 8 + tests/pyccel/test_pyccel.py | 22 ++- 19 files changed, 599 insertions(+), 9 deletions(-) create mode 100644 docs/cuda.md create mode 100644 pyccel/ast/cuda.py create mode 100644 pyccel/ast/cudaext.py create mode 100644 pyccel/cuda/__init__.py create mode 100644 pyccel/cuda/cuda_sync_primitives.py create mode 100644 tests/cuda/test_kernel_semantic.py create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py diff --git a/.dict_custom.txt b/.dict_custom.txt index ae99f31ed4..5d99e21194 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -118,3 +118,4 @@ datatyping datatypes indexable traceback +GPUs diff --git a/CHANGELOG.md b/CHANGELOG.md index b897e14385..717f638bf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..de30d52b80 --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,23 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` \ No newline at end of file diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 013f206dd6..f0e5cc67f1 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..b540f20993 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef + +__all__ = ( + 'CudaSynchronize', +) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 277d2a3a6a..cd26843017 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -9,11 +9,12 @@ enabling the direct translation of high-level Pyccel expressions into CUDA code. """ -from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers +from pyccel.codegen.printing.ccode import CCodePrinter -from pyccel.ast.core import Import, Module +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil -from pyccel.errors.errors import Errors +from pyccel.errors.errors import Errors errors = Errors() @@ -61,6 +62,44 @@ def _print_Module(self, expr): self.exit_scope() return code + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorater} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr): }}\n' return '\n'.join((f"#ifndef {name.upper()}_H", f"#define {name.upper()}_H", + imports, global_variables, function_declaration, "#endif // {name.upper()}_H\n")) diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..e8542ad5d5 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize + +__all__ = ['synchronize'] diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..77717a991f 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -19,6 +19,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +110,34 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..09966d810c 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' + + + + diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index e94b9c8413..fde10d6317 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,9 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 2967f4999b..3af7f0728a 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/tests/conftest.py b/tests/conftest.py index a5082ef6e8..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..b4757a3c31 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,19 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From ca701d93be9db239b75084e1c5ddd2e0b28e2ab5 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:37:02 +0100 Subject: [PATCH 028/150] Updated CUDA Name Clash Checker By Added CUDA-specific keywords (#60) This pull request addresses issue #59 by adding more CUDA-specific keywords to enhance the checking of variable/function names and prevent name clashes --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 1 + pyccel/naming/cudanameclashchecker.py | 36 ++++++++++++++++++++++- pyccel/naming/languagenameclashchecker.py | 5 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 717f638bf3..afdabc3ab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index 971204e912..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', @@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker): 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', 'get_index', 'numpy_to_ndarray_strides', - 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) def has_clash(self, name, symbols): """ diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. From 828d16646dd52174e2dda9742f30e45df87e07f2 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:04:22 +0100 Subject: [PATCH 029/150] add handle for custom device (#61) This pull request addresses issue https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new feature in Pyccel that allows users to define a custom device **Commit Summary** - Adding handler for custom device and its code generation. - Adding test --------- Co-authored-by: EmilyBourne --- CHANGELOG.md | 1 + docs/cuda.md | 25 ++++++++++++++++- pyccel/codegen/printing/cucode.py | 7 ++--- pyccel/decorators.py | 19 +++++++++++++ pyccel/errors/messages.py | 2 +- pyccel/parser/semantic.py | 7 ++++- tests/cuda/test_device_semantic.py | 31 ++++++++++++++++++++++ tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++ tests/pyccel/test_pyccel.py | 8 ++++++ 9 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 tests/cuda/test_device_semantic.py create mode 100644 tests/pyccel/scripts/kernel/device_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index afdabc3ab7..d5523ac5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md index de30d52b80..7643a4ac02 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -20,4 +20,27 @@ threadsperblock = 1 # Call your kernel function my_kernel[blockspergrid, threadsperblock]() -``` \ No newline at end of file +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..7c01d93c47 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True): str Signature of the function. """ - cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' c_function_signature = super().function_signature(expr, print_arg_names) - return f'{cuda_decorater} {c_function_signature}' + return f'{cuda_decorator} {c_function_signature}' def _print_KernelCall(self, expr): func = expr.funcdef @@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr): cuda_headers = "" for f in expr.module.funcs: if not f.is_inline: - if 'kernel' in f.decorators: # Checking for 'kernel' decorator + if 'kernel' in f.decorators or 'device' in f.decorators: cuda_headers += self.function_signature(f) + ';\n' else: funcs += self.function_signature(f) + ';\n' diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 77717a991f..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -141,3 +142,21 @@ def __getitem__(self, args): return self._f return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 09966d810c..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -166,7 +166,7 @@ INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' - +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index fde10d6317..7e8dd11bb4 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -136,9 +136,10 @@ UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, - MISSING_KERNEL_CONFIGURATION, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b4757a3c31..2d55c6e1cb 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available): pyccel_test("scripts/kernel/kernel_name_collision.py", language="cuda", execute_code=gpu_available) +#------------------------------------------------------------------------------ + +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From a28c7247e765743def5294b825a864b7bfd120fe Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 11 Jul 2024 14:45:50 +0100 Subject: [PATCH 030/150] work in progress --- pyccel/ast/cudatypes.py | 7 +++-- pyccel/ast/numpytypes.py | 2 -- pyccel/ast/variable.py | 2 +- pyccel/codegen/printing/ccode.py | 2 +- pyccel/codegen/printing/cucode.py | 30 ++++++++++++++------ pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 18 ++++++------ pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 30 ++++++++++---------- 7 files changed, 52 insertions(+), 39 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 3e9a8df1cf..df45abdb6e 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -12,6 +12,7 @@ from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool from pyccel.utilities.metaclasses import ArgumentSingleton from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type +from .numpytypes import NumpyNDArrayType class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): @@ -51,11 +52,13 @@ def __add__(self, other): test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) if isinstance(other, FixedSizeNumericType): comparison_type = pyccel_type_to_original_type[other]() - elif isinstance(other, CudaArrayType): + elif isinstance(other, CudaArrayType) or isinstance(other, NumpyNDArrayType): comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type]) else: return NotImplemented - # Todo need to check for memory location as well + if(isinstance(other, CudaArrayType)): + assert self.memory_location == other.memory_location + result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type] rank = max(other.rank, self.rank) if rank < 2: diff --git a/pyccel/ast/numpytypes.py b/pyccel/ast/numpytypes.py index 1d56ce14e9..8bc1df828e 100644 --- a/pyccel/ast/numpytypes.py +++ b/pyccel/ast/numpytypes.py @@ -282,7 +282,6 @@ def __new__(cls, dtype, rank, order): return super().__new__(cls) def __init__(self, dtype, rank, order): - # print("reank", rank) assert isinstance(rank, int) assert order in (None, 'C', 'F') assert rank < 2 or order is not None @@ -320,7 +319,6 @@ def __radd__(self, other): @lru_cache def __and__(self, other): - print("jouj draham diyali aba jalal") elem_type = self.element_type if isinstance(other, FixedSizeNumericType): return NumpyNDArrayType(elem_type and other) diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index b38dd100f5..c8b9fd95ef 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -98,7 +98,7 @@ class Variable(TypedAstNode): >>> Variable(PythonNativeInt(), DottedName('matrix', 'n_rows')) matrix.n_rows """ - __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const', '_is_target', + __slots__ = ('_name', '_alloc_shape', '_memory_handling', '_is_const', '_is_target', '_is_optional', '_allows_negative_indexes', '_cls_base', '_is_argument', '_is_temp', '_shape','_is_private','_class_type') _attribute_nodes = () diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index ec37735dff..de1ad669c3 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1260,7 +1260,7 @@ def find_in_ndarray_type_registry(self, dtype): The code which declares the datatype in C. """ try : - return self.ndarray_type_registry[dtype] + return self.cuda_ndarray_type_registry[dtype] except KeyError: raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from symbol = dtype, diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index d343272979..8d405f15a4 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -16,15 +16,11 @@ from pyccel.errors.errors import Errors from pyccel.ast.cudatypes import CudaArrayType -from pyccel.ast.datatypes import HomogeneousContainerType +from pyccel.ast.datatypes import HomogeneousContainerType, PythonNativeBool from pyccel.ast.numpytypes import numpy_precision_map from pyccel.ast.cudaext import CudaFull - - - - - - +from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type +from pyccel.ast.numpytypes import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type errors = Errors() @@ -52,6 +48,16 @@ class CudaCodePrinter(CCodePrinter): """ language = "cuda" + cuda_ndarray_type_registry = { + NumpyFloat64Type() : 'cu_double', + NumpyFloat32Type() : 'cu_float', + NumpyComplex128Type() : 'cu_cdouble', + NumpyComplex64Type() : 'cu_cfloat', + NumpyInt64Type() : 'cu_int64', + NumpyInt32Type() : 'cu_int32', + NumpyInt16Type() : 'cu_int16', + NumpyInt8Type() : 'cu_int8', + PythonNativeBool() : 'cu_bool'} def __init__(self, filename, prefix_module = None): errors.set_target(filename) @@ -172,10 +178,18 @@ def _print_Deallocate(self, expr): if not isinstance(expr.variable.class_type, CudaArrayType): return super()._print_Deallocate(expr) - if expr.variable.memory_location == 'host': + if expr.variable.class_type.memory_location == 'host': return f"cuda_free_host({var_code});\n" else: return f"cuda_free({var_code});\n" + def get_declare_type(self, expr): + class_type = expr.class_type + rank = expr.rank + if not isinstance(class_type, CudaArrayType ) or rank <= 0: + return super().get_declare_type(expr) + + dtype = 't_cuda_ndarray' + return dtype def _print_Assign(self, expr): rhs = expr.rhs diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index d813540707..0dae780e54 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -15,7 +15,7 @@ void host_memory(void** devPtr, size_t size) cudaMallocHost(devPtr, size); } t_cuda_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, - enum e_types type, bool is_view) + enum cu_types type, bool is_view) { t_cuda_ndarray arr; void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory}; @@ -24,25 +24,25 @@ t_cuda_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, arr.type = type; switch (type) { - case nd_int8: + case cu_int8: arr.type_size = sizeof(int8_t); break; - case nd_int16: + case cu_int16: arr.type_size = sizeof(int16_t); break; - case nd_int32: + case cu_int32: arr.type_size = sizeof(int32_t); break; - case nd_int64: + case cu_int64: arr.type_size = sizeof(int64_t); break; - case nd_float: + case cu_float: arr.type_size = sizeof(float); break; - case nd_double: + case cu_double: arr.type_size = sizeof(double); break; - case nd_bool: + case cu_bool: arr.type_size = sizeof(bool); break; } @@ -69,8 +69,6 @@ int32_t cuda_free_host(t_cuda_ndarray arr) arr.raw_data = NULL; cudaFree(arr.shape); arr.shape = NULL; - cudaFree(arr.strides); - arr.strides = NULL; return (1); } diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index 13e8419594..8e8851e0a1 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -4,18 +4,18 @@ # include # include -typedef enum e_types +typedef enum cu_types { - nd_bool = 0, - nd_int8 = 1, - nd_int16 = 3, - nd_int32 = 5, - nd_int64 = 7, - nd_float = 11, - nd_double = 12, - nd_cfloat = 14, - nd_cdouble = 15 -} t_types; + cu_bool = 0, + cu_int8 = 1, + cu_int16 = 3, + cu_int32 = 5, + cu_int64 = 7, + cu_float = 11, + cu_double = 12, + cu_cfloat = 14, + cu_cdouble = 15 +} t_cu_types; enum e_memory_locations @@ -38,7 +38,7 @@ typedef struct s_cuda_ndarray /* shape 'size of each dimension' */ int64_t *shape; /* strides 'number of elements to skip to get the next element' */ - t_types type; + cu_types type; /* type size of the array elements */ int32_t type_size; /* number of element in the array */ @@ -52,13 +52,13 @@ typedef struct s_cuda_ndarray } t_cuda_ndarray; -t_cuda_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +t_cuda_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum cu_types type, bool is_view , enum e_memory_locations location); int32_t cuda_free_host(t_cuda_ndarray arr); - +__host__ __device__ +int32_t cuda_free(t_cuda_ndarray arr); using namespace std; - #endif \ No newline at end of file From 22686d77428e47daa99672f9701a189f5382accf Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Sun, 14 Jul 2024 16:50:20 +0100 Subject: [PATCH 031/150] work in progress --- pyccel/ast/cudatypes.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index df45abdb6e..29952c3efc 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -52,7 +52,7 @@ def __add__(self, other): test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) if isinstance(other, FixedSizeNumericType): comparison_type = pyccel_type_to_original_type[other]() - elif isinstance(other, CudaArrayType) or isinstance(other, NumpyNDArrayType): + elif isinstance(other, CudaArrayType) or (isinstance(other, NumpyNDArrayType) and self.memory_location == "host"): comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type]) else: return NotImplemented @@ -69,24 +69,6 @@ def __add__(self, other): order = 'F' if other_f_contiguous and self_f_contiguous else 'C' return CudaArrayType(result_type, rank, order, self.memory_location) - @lru_cache - def __radd__(self, other): - return self.__add__(other) - - @lru_cache - def __and__(self, other): - elem_type = self.element_type - if isinstance(other, FixedSizeNumericType): - return CudaArrayType(elem_type and other) - elif isinstance(other, CudaArrayType): - return CudaArrayType(elem_type+other.element_type) - else: - return NotImplemented - - @lru_cache - def __rand__(self, other): - return self.__and__(other) - @property def rank(self): """ From ab68eb44b1cfa8dab43abe86b5fe726e97298515 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Sun, 14 Jul 2024 22:18:24 +0100 Subject: [PATCH 032/150] work in progress --- pyccel/codegen/printing/ccode.py | 37 +++++++++++++++++++++ pyccel/codegen/printing/cucode.py | 11 +----- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 29 +++++++++++++++- 3 files changed, 66 insertions(+), 11 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 39e0c435f5..8b038fe6e0 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -287,6 +287,16 @@ class CCodePrinter(CodePrinter): NumpyInt16Type() : 'nd_int16', NumpyInt8Type() : 'nd_int8', PythonNativeBool() : 'nd_bool'} + cuda_ndarray_type_registry = { + NumpyFloat64Type() : 'cu_double', + NumpyFloat32Type() : 'cu_float', + NumpyComplex128Type() : 'cu_cdouble', + NumpyComplex64Type() : 'cu_cfloat', + NumpyInt64Type() : 'cu_int64', + NumpyInt32Type() : 'cu_int32', + NumpyInt16Type() : 'cu_int16', + NumpyInt8Type() : 'cu_int8', + PythonNativeBool() : 'cu_bool'} type_to_format = {(PrimitiveFloatingPointType(),8) : '%.15lf', (PrimitiveFloatingPointType(),4) : '%.6f', @@ -1257,6 +1267,30 @@ def find_in_ndarray_type_registry(self, dtype): type within a ndarray. Raise PYCCEL_RESTRICTION_TODO if not found. + Parameters + ---------- + dtype : DataType + The data type of the expression. + + Returns + ------- + str + The code which declares the datatype in C. + """ + try : + return self.ndarray_type_registry[dtype] + except KeyError: + raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from + symbol = dtype, + severity='fatal') + def find_in_cuarray_type_registry(self, dtype): + """ + Find the descriptor for the datatype in the ndarray_type_registry. + + Find the tag which allows the user to access data of the specified + type within a ndarray. + Raise PYCCEL_RESTRICTION_TODO if not found. + Parameters ---------- dtype : DataType @@ -1450,9 +1484,12 @@ def _print_IndexedElement(self, expr): inds = list(expr.indices) base_shape = base.shape allow_negative_indexes = expr.allows_negative_indexes + if isinstance(base.class_type, NumpyNDArrayType): #set dtype to the C struct types dtype = self.find_in_ndarray_type_registry(expr.dtype) + if isinstance(base.class_type, CudaArrayType): + dtype = self.find_in_cuarray_type_registry(expr.dtype) elif isinstance(base.class_type, HomogeneousContainerType): dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(expr.dtype.primitive_type, expr.dtype.precision)]) else: diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index c3cb7b8d71..d3ebfb2ef2 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -48,16 +48,7 @@ class CudaCodePrinter(CCodePrinter): """ language = "cuda" - cuda_ndarray_type_registry = { - NumpyFloat64Type() : 'cu_double', - NumpyFloat32Type() : 'cu_float', - NumpyComplex128Type() : 'cu_cdouble', - NumpyComplex64Type() : 'cu_cfloat', - NumpyInt64Type() : 'cu_int64', - NumpyInt32Type() : 'cu_int32', - NumpyInt16Type() : 'cu_int16', - NumpyInt8Type() : 'cu_int8', - PythonNativeBool() : 'cu_bool'} + def __init__(self, filename, prefix_module = None): errors.set_target(filename) diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index 8e8851e0a1..46eddb6eb1 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -4,6 +4,31 @@ # include # include +#define GET_INDEX_EXP1(t, arr, a) t(arr, 0, a) +#define GET_INDEX_EXP2(t, arr, a, b) GET_INDEX_EXP1(t, arr, a) + t(arr, 1, b) +#define GET_INDEX_EXP3(t, arr, a, b, c) GET_INDEX_EXP2(t, arr, a, b) + t(arr, 2, c) +#define GET_INDEX_EXP4(t, arr, a, b, c, d) GET_INDEX_EXP3(t, arr, a, b, c) + t(arr, 3, d) +#define GET_INDEX_EXP5(t, arr, a, b, c, d, e) GET_INDEX_EXP4(t, arr, a, b, c, d) + t(arr, 4, e) +#define GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) GET_INDEX_EXP5(t, arr, a, b, c, d, e) + t(arr, 5, f) +#define GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) + t(arr, 6, g) +#define GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) + t(arr, 7, h) +#define GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) + t(arr, 8, i) +#define GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) + t(arr, 9, j) +#define GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) + t(arr, 10, k) +#define GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) + t(arr, 11, l) +#define GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) + t(arr, 12, m) +#define GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) + t(arr, 13, n) +#define GET_INDEX_EXP15(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) + t(arr, 14, o) + +#define NUM_ARGS_H1(dummy, x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0, ...) x0 +#define NUM_ARGS(...) NUM_ARGS_H1(dummy, __VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define GET_INDEX_FUNC_H2(t, arr, ndim, ...) GET_INDEX_EXP##ndim(t, arr, __VA_ARGS__) +#define GET_INDEX_FUNC(t, arr, ndim, ...) GET_INDEX_FUNC_H2(t, arr, ndim, __VA_ARGS__) + +#define GET_INDEX(arr, ...) GET_INDEX_FUNC(INDEX, arr, NUM_ARGS(__VA_ARGS__), __VA_ARGS__) +#define INDEX(arr, dim, a) (arr.strides[dim] * (a)) +#define GET_ELEMENT(arr, type, ...) arr.type[GET_INDEX(arr, __VA_ARGS__)] + typedef enum cu_types { cu_bool = 0, @@ -38,7 +63,9 @@ typedef struct s_cuda_ndarray /* shape 'size of each dimension' */ int64_t *shape; /* strides 'number of elements to skip to get the next element' */ - cu_types type; + int64_t *strides; + /* data type of the array elements */ + t_cu_types type; /* type size of the array elements */ int32_t type_size; /* number of element in the array */ From 73c4c81624708d8e056a7b126dcaca131431d075 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Sun, 14 Jul 2024 22:48:03 +0100 Subject: [PATCH 033/150] work in progress --- pyccel/codegen/printing/ccode.py | 39 ----------- pyccel/codegen/printing/cucode.py | 3 +- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 26 +++---- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 72 ++------------------ pyccel/stdlib/ndarrays/ndarrays.h | 15 ++-- 5 files changed, 28 insertions(+), 127 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 8b038fe6e0..d81ed35293 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -287,16 +287,6 @@ class CCodePrinter(CodePrinter): NumpyInt16Type() : 'nd_int16', NumpyInt8Type() : 'nd_int8', PythonNativeBool() : 'nd_bool'} - cuda_ndarray_type_registry = { - NumpyFloat64Type() : 'cu_double', - NumpyFloat32Type() : 'cu_float', - NumpyComplex128Type() : 'cu_cdouble', - NumpyComplex64Type() : 'cu_cfloat', - NumpyInt64Type() : 'cu_int64', - NumpyInt32Type() : 'cu_int32', - NumpyInt16Type() : 'cu_int16', - NumpyInt8Type() : 'cu_int8', - PythonNativeBool() : 'cu_bool'} type_to_format = {(PrimitiveFloatingPointType(),8) : '%.15lf', (PrimitiveFloatingPointType(),4) : '%.6f', @@ -1283,30 +1273,6 @@ def find_in_ndarray_type_registry(self, dtype): raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from symbol = dtype, severity='fatal') - def find_in_cuarray_type_registry(self, dtype): - """ - Find the descriptor for the datatype in the ndarray_type_registry. - - Find the tag which allows the user to access data of the specified - type within a ndarray. - Raise PYCCEL_RESTRICTION_TODO if not found. - - Parameters - ---------- - dtype : DataType - The data type of the expression. - - Returns - ------- - str - The code which declares the datatype in C. - """ - try : - return self.cuda_ndarray_type_registry[dtype] - except KeyError: - raise errors.report(PYCCEL_RESTRICTION_TODO, #pylint: disable=raise-missing-from - symbol = dtype, - severity='fatal') def get_declare_type(self, expr): """ @@ -1356,9 +1322,6 @@ def get_declare_type(self, expr): errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' - elif isinstance(expr.class_type, CudaArrayType): - dtype = 't_cuda_ndarray' - else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') elif not isinstance(class_type, CustomDataType): @@ -1488,8 +1451,6 @@ def _print_IndexedElement(self, expr): if isinstance(base.class_type, NumpyNDArrayType): #set dtype to the C struct types dtype = self.find_in_ndarray_type_registry(expr.dtype) - if isinstance(base.class_type, CudaArrayType): - dtype = self.find_in_cuarray_type_registry(expr.dtype) elif isinstance(base.class_type, HomogeneousContainerType): dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(expr.dtype.primitive_type, expr.dtype.precision)]) else: diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index d3ebfb2ef2..6d8a6bc305 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -179,8 +179,9 @@ def get_declare_type(self, expr): rank = expr.rank if not isinstance(class_type, CudaArrayType ) or rank <= 0: return super().get_declare_type(expr) + self.add_import(c_imports['ndarrays']) - dtype = 't_cuda_ndarray' + dtype = 't_ndarray ' return dtype def _print_Assign(self, expr): diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index 0dae780e54..34890002f3 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -14,35 +14,35 @@ void host_memory(void** devPtr, size_t size) { cudaMallocHost(devPtr, size); } -t_cuda_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, - enum cu_types type, bool is_view) +t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, + enum e_types type, bool is_view) { - t_cuda_ndarray arr; + t_ndarray arr; void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory}; arr.nd = nd; arr.type = type; switch (type) { - case cu_int8: + case nd_int8: arr.type_size = sizeof(int8_t); break; - case cu_int16: + case nd_int16: arr.type_size = sizeof(int16_t); break; - case cu_int32: + case nd_int32: arr.type_size = sizeof(int32_t); break; - case cu_int64: + case nd_int64: arr.type_size = sizeof(int64_t); break; - case cu_float: + case nd_float: arr.type_size = sizeof(float); break; - case cu_double: + case nd_double: arr.type_size = sizeof(double); break; - case cu_bool: + case nd_bool: arr.type_size = sizeof(bool); break; } @@ -61,7 +61,7 @@ t_cuda_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, return (arr); } -int32_t cuda_free_host(t_cuda_ndarray arr) +int32_t cuda_free_host(t_ndarray arr) { if (arr.shape == NULL) return (0); @@ -73,7 +73,7 @@ int32_t cuda_free_host(t_cuda_ndarray arr) } __host__ __device__ -int32_t cuda_free(t_cuda_ndarray arr) +int32_t cuda_free(t_ndarray arr) { if (arr.shape == NULL) return (0); @@ -85,7 +85,7 @@ int32_t cuda_free(t_cuda_ndarray arr) } __host__ __device__ -int32_t cuda_free_pointer(t_cuda_ndarray arr) +int32_t cuda_free_pointer(t_ndarray arr) { if (arr.is_view == false || arr.shape == NULL) return (0); diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index 46eddb6eb1..e074443662 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -3,44 +3,8 @@ # include # include +#include "../ndarrays/ndarrays.h" -#define GET_INDEX_EXP1(t, arr, a) t(arr, 0, a) -#define GET_INDEX_EXP2(t, arr, a, b) GET_INDEX_EXP1(t, arr, a) + t(arr, 1, b) -#define GET_INDEX_EXP3(t, arr, a, b, c) GET_INDEX_EXP2(t, arr, a, b) + t(arr, 2, c) -#define GET_INDEX_EXP4(t, arr, a, b, c, d) GET_INDEX_EXP3(t, arr, a, b, c) + t(arr, 3, d) -#define GET_INDEX_EXP5(t, arr, a, b, c, d, e) GET_INDEX_EXP4(t, arr, a, b, c, d) + t(arr, 4, e) -#define GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) GET_INDEX_EXP5(t, arr, a, b, c, d, e) + t(arr, 5, f) -#define GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) GET_INDEX_EXP6(t, arr, a, b, c, d, e, f) + t(arr, 6, g) -#define GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) GET_INDEX_EXP7(t, arr, a, b, c, d, e, f, g) + t(arr, 7, h) -#define GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) GET_INDEX_EXP8(t, arr, a, b, c, d, e, f, g, h) + t(arr, 8, i) -#define GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) GET_INDEX_EXP9(t, arr, a, b, c, d, e, f, g, h, i) + t(arr, 9, j) -#define GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) GET_INDEX_EXP10(t, arr, a, b, c, d, e, f, g, h, i, j) + t(arr, 10, k) -#define GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) GET_INDEX_EXP11(t, arr, a, b, c, d, e, f, g, h, i, j, k) + t(arr, 11, l) -#define GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) GET_INDEX_EXP12(t, arr, a, b, c, d, e, f, g, h, i, j, k, l) + t(arr, 12, m) -#define GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) GET_INDEX_EXP13(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m) + t(arr, 13, n) -#define GET_INDEX_EXP15(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) GET_INDEX_EXP14(t, arr, a, b, c, d, e, f, g, h, i, j, k, l, m, n) + t(arr, 14, o) - -#define NUM_ARGS_H1(dummy, x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, x0, ...) x0 -#define NUM_ARGS(...) NUM_ARGS_H1(dummy, __VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define GET_INDEX_FUNC_H2(t, arr, ndim, ...) GET_INDEX_EXP##ndim(t, arr, __VA_ARGS__) -#define GET_INDEX_FUNC(t, arr, ndim, ...) GET_INDEX_FUNC_H2(t, arr, ndim, __VA_ARGS__) - -#define GET_INDEX(arr, ...) GET_INDEX_FUNC(INDEX, arr, NUM_ARGS(__VA_ARGS__), __VA_ARGS__) -#define INDEX(arr, dim, a) (arr.strides[dim] * (a)) -#define GET_ELEMENT(arr, type, ...) arr.type[GET_INDEX(arr, __VA_ARGS__)] - -typedef enum cu_types -{ - cu_bool = 0, - cu_int8 = 1, - cu_int16 = 3, - cu_int32 = 5, - cu_int64 = 7, - cu_float = 11, - cu_double = 12, - cu_cfloat = 14, - cu_cdouble = 15 -} t_cu_types; enum e_memory_locations @@ -49,41 +13,13 @@ enum e_memory_locations allocateMemoryOnDevice }; -typedef enum e_order -{ - order_f, - order_c, -} t_order; - -typedef struct s_cuda_ndarray -{ - void *raw_data; - /* number of dimensions */ - int32_t nd; - /* shape 'size of each dimension' */ - int64_t *shape; - /* strides 'number of elements to skip to get the next element' */ - int64_t *strides; - /* data type of the array elements */ - t_cu_types type; - /* type size of the array elements */ - int32_t type_size; - /* number of element in the array */ - int32_t length; - /* size of the array */ - int32_t buffer_size; - /* True if the array does not own the data */ - bool is_view; - /* stores the order of the array: order_f or order_c */ - t_order order; -} t_cuda_ndarray; -t_cuda_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum cu_types type, bool is_view , +t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , enum e_memory_locations location); -int32_t cuda_free_host(t_cuda_ndarray arr); +int32_t cuda_free_host(t_ndarray arr); __host__ __device__ -int32_t cuda_free(t_cuda_ndarray arr); +int32_t cuda_free(t_ndarray arr); using namespace std; diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h index 082146d639..9764113705 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.h +++ b/pyccel/stdlib/ndarrays/ndarrays.h @@ -80,12 +80,6 @@ typedef enum e_order order_c, } t_order; -enum e_memory_locations -{ - managedMemory, - allocateMemoryOnHost, - allocateMemoryOnDevice -}; typedef struct s_ndarray { @@ -135,8 +129,10 @@ void _array_fill_int64(int64_t c, t_ndarray arr); void _array_fill_float(float c, t_ndarray arr); void _array_fill_double(double c, t_ndarray arr); void _array_fill_bool(bool c, t_ndarray arr); +#ifndef __NVCC__ void _array_fill_cfloat(float complex c, t_ndarray arr); void _array_fill_cdouble(double complex c, t_ndarray arr); +#endif /* slicing */ /* creating a Slice object */ @@ -156,6 +152,7 @@ int32_t free_pointer(t_ndarray* dump); int64_t get_index(t_ndarray arr, ...); /* data converting between numpy and ndarray */ + int64_t *numpy_to_ndarray_strides(int64_t *np_strides, int type_size, int nd); int64_t *numpy_to_ndarray_shape(int64_t *np_shape, int nd); void print_ndarray_memory(t_ndarray nd); @@ -171,8 +168,10 @@ int64_t numpy_sum_int32(t_ndarray arr); int64_t numpy_sum_int64(t_ndarray arr); float numpy_sum_float32(t_ndarray arr); double numpy_sum_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_sum_complex64(t_ndarray arr); double complex numpy_sum_complex128(t_ndarray arr); +#endif /*numpy max/amax */ @@ -183,8 +182,10 @@ int64_t numpy_amax_int32(t_ndarray arr); int64_t numpy_amax_int64(t_ndarray arr); float numpy_amax_float32(t_ndarray arr); double numpy_amax_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_amax_complex64(t_ndarray arr); double complex numpy_amax_complex128(t_ndarray arr); +#endif /* numpy min/amin */ @@ -195,7 +196,9 @@ int64_t numpy_amin_int32(t_ndarray arr); int64_t numpy_amin_int64(t_ndarray arr); float numpy_amin_float32(t_ndarray arr); double numpy_amin_float64(t_ndarray arr); +#ifndef __NVCC__ float complex numpy_amin_complex64(t_ndarray arr); double complex numpy_amin_complex128(t_ndarray arr); +#endif #endif From af4d097d481a8f81112b0c9e993bd13d8b8a3f2b Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 034/150] Trigger tests on push to devel or main branch --- .github/workflows/anaconda_linux.yml | 2 +- .github/workflows/anaconda_windows.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macosx.yml | 2 +- .github/workflows/pickle.yml | 2 +- .github/workflows/pickle_wheel.yml | 2 +- .github/workflows/windows.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: From 061996e13c8b8d7e0723a5a9f7fd12c50f1efd63 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:46:33 +0100 Subject: [PATCH 035/150] Add cuda workflow to test cuda developments on CI --- .github/actions/coverage_install/action.yml | 2 +- .github/actions/linux_install/action.yml | 10 +-- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 17 +++++ .github/actions/python_install/action.yml | 17 +++++ .github/workflows/cuda.yml | 83 +++++++++++++++++++++ ci_tools/bot_messages/show_tests.txt | 1 + ci_tools/bot_tools/bot_funcs.py | 12 +-- ci_tools/devel_branch_tests.py | 1 + ci_tools/json_pytest_output.py | 2 +- 10 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 .github/actions/pytest_run_cuda/action.yml create mode 100644 .github/actions/python_install/action.yml create mode 100644 .github/workflows/cuda.yml diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..b0bdc31f16 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..52092a6e02 --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,17 @@ +name: 'Pyccel pytest commands generating Ccuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: From 32457ccf514fd77d537a5a56d84ffaf58ef89b43 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 036/150] Trigger tests on push to devel or main branch --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9111b47d52..cf52b1c624 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 From 9f0388997a8341657e141fbabced9bb8d895100c Mon Sep 17 00:00:00 2001 From: bauom <40796259+bauom@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:50 +0100 Subject: [PATCH 037/150] [init] Adding CUDA language/compiler and CodePrinter (#32) This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter. Changes to stdlib: Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler --------- Co-authored-by: Mouad Elalj, EmilyBourne --- .dict_custom.txt | 1 + .github/actions/pytest_parallel/action.yml | 4 +- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 11 +- CHANGELOG.md | 6 + pyccel/codegen/codegen.py | 8 +- pyccel/codegen/compiling/compilers.py | 5 +- pyccel/codegen/pipeline.py | 5 +- pyccel/codegen/printing/cucode.py | 74 +++++++++++ pyccel/commands/console.py | 2 +- pyccel/compilers/default_compilers.py | 13 +- pyccel/naming/__init__.py | 4 +- pyccel/naming/cudanameclashchecker.py | 92 ++++++++++++++ pyccel/stdlib/numpy/numpy_c.c | 2 + pyccel/stdlib/numpy/numpy_c.h | 2 + pytest.ini | 1 + tests/conftest.py | 11 ++ tests/epyccel/test_base.py | 136 ++++++++++----------- 18 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 pyccel/codegen/printing/cucode.py create mode 100644 pyccel/naming/cudanameclashchecker.py diff --git a/.dict_custom.txt b/.dict_custom.txt index 82a6b10d31..ae99f31ed4 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,7 @@ Valgrind variadic subclasses oneAPI +Cuda getter setter bitwise diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index b0bdc31f16..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 52092a6e02..46f90552ed 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -1,4 +1,4 @@ -name: 'Pyccel pytest commands generating Ccuda' +name: 'Pyccel pytest commands generating Cuda' inputs: shell_cmd: description: 'Specifies the shell command (different for anaconda)' @@ -11,7 +11,14 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 695dc72cf7..d6928b0eca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. + ## \[UNRELEASED\] ### Added diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index daf4559df4..8d4abb6bdb 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index c866ee5b1a..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 14087fb567..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..86146b065b --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,74 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers + +from pyccel.ast.core import Import, Module + +from pyccel.errors.errors import Errors + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + c_headers_imports = '' + local_imports = '' + + for imp in imports: + if imp.source in c_library_headers: + c_headers_imports += self._print(imp) + else: + local_imports += self._print(imp) + + imports = f'{c_headers_imports}\ + extern "C"{{\n\ + {local_imports}\ + }}' + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..971204e912 --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,92 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..a5082ef6e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() From 57b643e79dbbbccd582fe69967c9d95748db81a8 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 15 May 2024 12:58:50 +0100 Subject: [PATCH 038/150] Fix import handling (#49) This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'. **Commit Summary** - Implemented new header printer for CUDA. - Added CUDA wrapper assignment - Instead of wrapping all local headers, wrap only C functions with extern 'C' --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 3 +- pyccel/codegen/printing/cucode.py | 45 ++++++++---- pyccel/codegen/python_wrapper.py | 4 ++ pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++ tests/epyccel/modules/cuda_module.py | 13 ++++ tests/epyccel/test_epyccel_modules.py | 13 ++++ 6 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py create mode 100644 tests/epyccel/modules/cuda_module.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d6928b0eca..b897e14385 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file. ### Added -- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. ## \[UNRELEASED\] diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 86146b065b..277d2a3a6a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -52,19 +52,7 @@ def _print_Module(self, expr): # Print imports last to be sure that all additional_imports have been collected imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] - c_headers_imports = '' - local_imports = '' - - for imp in imports: - if imp.source in c_library_headers: - c_headers_imports += self._print(imp) - else: - local_imports += self._print(imp) - - imports = f'{c_headers_imports}\ - extern "C"{{\n\ - {local_imports}\ - }}' + imports = ''.join(self._print(i) for i in imports) code = f'{imports}\n\ {global_variables}\n\ @@ -72,3 +60,34 @@ def _print_Module(self, expr): self.exit_scope() return code + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators: # Checking for 'kernel' decorator + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) From af589a10e38815e4c0cce9b30e0f351818f419f4 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Thu, 27 Jun 2024 20:31:46 +0100 Subject: [PATCH 039/150] Add support for kernels (#42) This pull request addresses issue #28 by implementing a new feature in Pyccel that allows users to define custom GPU kernels. The syntax for creating these kernels is inspired by Numba. and I also need to fix issue #45 for testing purposes **Commit Summary** - Introduced KernelCall class - Added cuda printer methods _print_KernelCall and _print_FunctionDef to generate the corresponding CUDA representation for both kernel calls and definitions - Added IndexedFunctionCall represents an indexed function call - Added CUDA module and cuda.synchronize() - Fixing a bug that I found in the header: it does not import the necessary header for the used function --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> Co-authored-by: Emily Bourne --- .dict_custom.txt | 1 + CHANGELOG.md | 2 + docs/cuda.md | 23 +++ pyccel/ast/core.py | 37 ++++ pyccel/ast/cuda.py | 65 +++++++ pyccel/ast/cudaext.py | 42 +++++ pyccel/ast/utilities.py | 4 +- pyccel/codegen/printing/cucode.py | 46 ++++- pyccel/cuda/__init__.py | 10 + pyccel/cuda/cuda_sync_primitives.py | 16 ++ pyccel/decorators.py | 32 ++++ pyccel/errors/messages.py | 8 + pyccel/parser/semantic.py | 84 ++++++++- pyccel/parser/syntactic.py | 4 + tests/conftest.py | 9 + tests/cuda/test_kernel_semantic.py | 176 ++++++++++++++++++ tests/pyccel/scripts/kernel/hello_kernel.py | 19 ++ .../scripts/kernel/kernel_name_collision.py | 8 + tests/pyccel/test_pyccel.py | 22 ++- 19 files changed, 599 insertions(+), 9 deletions(-) create mode 100644 docs/cuda.md create mode 100644 pyccel/ast/cuda.py create mode 100644 pyccel/ast/cudaext.py create mode 100644 pyccel/cuda/__init__.py create mode 100644 pyccel/cuda/cuda_sync_primitives.py create mode 100644 tests/cuda/test_kernel_semantic.py create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py diff --git a/.dict_custom.txt b/.dict_custom.txt index ae99f31ed4..5d99e21194 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -118,3 +118,4 @@ datatyping datatypes indexable traceback +GPUs diff --git a/CHANGELOG.md b/CHANGELOG.md index b897e14385..717f638bf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..de30d52b80 --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,23 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` \ No newline at end of file diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 013f206dd6..f0e5cc67f1 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..b540f20993 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef + +__all__ = ( + 'CudaSynchronize', +) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 277d2a3a6a..cd26843017 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -9,11 +9,12 @@ enabling the direct translation of high-level Pyccel expressions into CUDA code. """ -from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers +from pyccel.codegen.printing.ccode import CCodePrinter -from pyccel.ast.core import Import, Module +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil -from pyccel.errors.errors import Errors +from pyccel.errors.errors import Errors errors = Errors() @@ -61,6 +62,44 @@ def _print_Module(self, expr): self.exit_scope() return code + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorater} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr): }}\n' return '\n'.join((f"#ifndef {name.upper()}_H", f"#define {name.upper()}_H", + imports, global_variables, function_declaration, "#endif // {name.upper()}_H\n")) diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..e8542ad5d5 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize + +__all__ = ['synchronize'] diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..77717a991f 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -19,6 +19,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +110,34 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..09966d810c 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' + + + + diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index e94b9c8413..fde10d6317 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,9 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 2967f4999b..3af7f0728a 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/tests/conftest.py b/tests/conftest.py index a5082ef6e8..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..b4757a3c31 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,19 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From 91d610129ea8a4e3f695281cf6891c2691f3b79a Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:37:02 +0100 Subject: [PATCH 040/150] Updated CUDA Name Clash Checker By Added CUDA-specific keywords (#60) This pull request addresses issue #59 by adding more CUDA-specific keywords to enhance the checking of variable/function names and prevent name clashes --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 1 + pyccel/naming/cudanameclashchecker.py | 36 ++++++++++++++++++++++- pyccel/naming/languagenameclashchecker.py | 5 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 717f638bf3..afdabc3ab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index 971204e912..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', @@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker): 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', 'get_index', 'numpy_to_ndarray_strides', - 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) def has_clash(self, name, symbols): """ diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. From 9234e99958bd2b53b74f26670907465aee521302 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:04:22 +0100 Subject: [PATCH 041/150] add handle for custom device (#61) This pull request addresses issue https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new feature in Pyccel that allows users to define a custom device **Commit Summary** - Adding handler for custom device and its code generation. - Adding test --------- Co-authored-by: EmilyBourne --- CHANGELOG.md | 1 + docs/cuda.md | 25 ++++++++++++++++- pyccel/codegen/printing/cucode.py | 7 ++--- pyccel/decorators.py | 19 +++++++++++++ pyccel/errors/messages.py | 2 +- pyccel/parser/semantic.py | 7 ++++- tests/cuda/test_device_semantic.py | 31 ++++++++++++++++++++++ tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++ tests/pyccel/test_pyccel.py | 8 ++++++ 9 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 tests/cuda/test_device_semantic.py create mode 100644 tests/pyccel/scripts/kernel/device_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index afdabc3ab7..d5523ac5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md index de30d52b80..7643a4ac02 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -20,4 +20,27 @@ threadsperblock = 1 # Call your kernel function my_kernel[blockspergrid, threadsperblock]() -``` \ No newline at end of file +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..7c01d93c47 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True): str Signature of the function. """ - cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' c_function_signature = super().function_signature(expr, print_arg_names) - return f'{cuda_decorater} {c_function_signature}' + return f'{cuda_decorator} {c_function_signature}' def _print_KernelCall(self, expr): func = expr.funcdef @@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr): cuda_headers = "" for f in expr.module.funcs: if not f.is_inline: - if 'kernel' in f.decorators: # Checking for 'kernel' decorator + if 'kernel' in f.decorators or 'device' in f.decorators: cuda_headers += self.function_signature(f) + ';\n' else: funcs += self.function_signature(f) + ';\n' diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 77717a991f..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -141,3 +142,21 @@ def __getitem__(self, args): return self._f return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 09966d810c..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -166,7 +166,7 @@ INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' - +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index fde10d6317..7e8dd11bb4 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -136,9 +136,10 @@ UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, - MISSING_KERNEL_CONFIGURATION, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b4757a3c31..2d55c6e1cb 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available): pyccel_test("scripts/kernel/kernel_name_collision.py", language="cuda", execute_code=gpu_available) +#------------------------------------------------------------------------------ + +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From c79b56d33e8f5a5239d79996fceb069cab71a163 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 15 Jul 2024 14:50:01 +0100 Subject: [PATCH 042/150] work in progress --- pyccel/ast/cudaext.py | 6 ++-- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 33 +++++++------------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index e107b6fe6f..4d3be8cb3f 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -88,7 +88,7 @@ def __init__(self, shape, fill_value, dtype='float', order='C'): self._shape = shape rank = len(self._shape) order = CudaNewarray._process_order(rank, order) - class_type = CudaArrayType(dtype, rank, order, 'device') + class_type = CudaArrayType(dtype, rank, order, 'host') super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device') @property def fill_value(self): @@ -102,7 +102,7 @@ class CudaAutoFill(CudaFull): def __init__(self, shape, dtype='float', order='C'): super().__init__(shape, Nil(), dtype, order) -class CudaEmpty(CudaAutoFill): +class CudaHostEmpty(CudaAutoFill): """ Represents a call to Cuda.host_empty for code generation. @@ -149,7 +149,7 @@ def __init__(self): cuda_funcs = { 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), 'full' : PyccelFunctionDef('full' , CudaFull), - 'empty' : PyccelFunctionDef('empty' , CudaEmpty), + 'host_empty' : PyccelFunctionDef('host_empty' , CudaHostEmpty), } cuda_mod = Module('cuda', diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index 34890002f3..47b0e5d420 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -5,20 +5,15 @@ void device_memory(void** devPtr, size_t size) cudaMalloc(devPtr, size); } -void managed_memory(void** devPtr, size_t size) -{ - cudaMallocManaged(devPtr, size); -} - void host_memory(void** devPtr, size_t size) { cudaMallocHost(devPtr, size); } -t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int64_t *shape, - enum e_types type, bool is_view) +t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , +enum e_memory_locations location) { t_ndarray arr; - void (*fun_ptr_arr[])(void**, size_t) = {managed_memory, host_memory, device_memory}; + void (*fun_ptr_arr[])(void**, size_t) = {host_memory, device_memory}; arr.nd = nd; arr.type = type; @@ -48,14 +43,20 @@ t_ndarray cuda_array_create(enum e_memory_locations location, int32_t nd, int } arr.is_view = is_view; arr.length = 1; - arr.shape = (int64_t *)malloc(arr.nd * sizeof(int64_t)); + cudaMallocManaged(&(arr.shape), arr.nd * sizeof(int64_t)); for (int32_t i = 0; i < arr.nd; i++) { arr.length *= shape[i]; arr.shape[i] = shape[i]; } arr.buffer_size = arr.length * arr.type_size; - + cudaMallocManaged(&(arr.strides), nd * sizeof(int64_t)); + for (int32_t i = 0; i < arr.nd; i++) + { + arr.strides[i] = 1; + for (int32_t j = i + 1; j < arr.nd; j++) + arr.strides[i] *= arr.shape[j]; + } if (!is_view) (*fun_ptr_arr[location])(&(arr.raw_data), arr.buffer_size); return (arr); @@ -82,14 +83,4 @@ int32_t cuda_free(t_ndarray arr) cudaFree(arr.shape); arr.shape = NULL; return (0); -} - -__host__ __device__ -int32_t cuda_free_pointer(t_ndarray arr) -{ - if (arr.is_view == false || arr.shape == NULL) - return (0); - cudaFree(arr.shape); - arr.shape = NULL; - return (0); -} +} \ No newline at end of file From 1c7ec43aa68e273e16572da7680bf9d6c6b909c3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 15 Jul 2024 22:28:12 +0100 Subject: [PATCH 043/150] work in progress --- pyccel/codegen/pipeline.py | 2 +- pyccel/codegen/printing/ccode.py | 1 + pyccel/codegen/printing/codeprinter.py | 2 +- pyccel/codegen/printing/cucode.py | 1 - 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index eb357fab74..f3e3be1602 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -404,7 +404,7 @@ def get_module_dependencies(parser, deps): verbose=verbose) timers["Compilation without wrapper"] = time.time() - start_compile_target_language - + print(100*'-') # Create shared library generated_filepath, shared_lib_timers = create_shared_library(codegen, mod_obj, diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index d81ed35293..dd16523284 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1354,6 +1354,7 @@ def _print_FuncAddressDeclare(self, expr): return f'{ret_type} (*{name})({arg_code});\n' def _print_Declare(self, expr): + print("1") if isinstance(expr.variable, InhomogeneousTupleVariable): return ''.join(self._print_Declare(Declare(v,intent=expr.intent, static=expr.static)) for v in expr.variable) diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py index 2dbcde1069..566cb4af7c 100644 --- a/pyccel/codegen/printing/codeprinter.py +++ b/pyccel/codegen/printing/codeprinter.py @@ -50,7 +50,6 @@ def doprint(self, expr): # Do the actual printing lines = self._print(expr).splitlines(True) - # Format the output return ''.join(self._format_code(lines)) @@ -83,6 +82,7 @@ def _print(self, expr): classes = type(expr).__mro__ for cls in classes: + print('_print_' + cls.__name__) print_method = '_print_' + cls.__name__ if hasattr(self, print_method): obj = getattr(self, print_method)(expr) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 6d8a6bc305..613665d587 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -180,7 +180,6 @@ def get_declare_type(self, expr): if not isinstance(class_type, CudaArrayType ) or rank <= 0: return super().get_declare_type(expr) self.add_import(c_imports['ndarrays']) - dtype = 't_ndarray ' return dtype From ba103eeb84982134a3cfc27b766cbd1b2ed8841e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 16 Jul 2024 15:36:25 +0100 Subject: [PATCH 044/150] work in progress --- pyccel/codegen/printing/ccode.py | 3 +-- pyccel/codegen/printing/codeprinter.py | 1 - pyccel/stdlib/ndarrays/ndarrays.c | 25 +++++++++++++++++++------ pyccel/stdlib/ndarrays/ndarrays.h | 7 +++++++ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index dd16523284..85b5071b18 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1317,7 +1317,7 @@ def get_declare_type(self, expr): if isinstance(expr.class_type, (HomogeneousSetType, HomogeneousListType)): dtype = self.get_c_type(expr.class_type) return dtype - if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType)): + if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType, CudaArrayType)): if expr.rank > 15: errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) @@ -1354,7 +1354,6 @@ def _print_FuncAddressDeclare(self, expr): return f'{ret_type} (*{name})({arg_code});\n' def _print_Declare(self, expr): - print("1") if isinstance(expr.variable, InhomogeneousTupleVariable): return ''.join(self._print_Declare(Declare(v,intent=expr.intent, static=expr.static)) for v in expr.variable) diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py index 566cb4af7c..c9d2b1a9e8 100644 --- a/pyccel/codegen/printing/codeprinter.py +++ b/pyccel/codegen/printing/codeprinter.py @@ -82,7 +82,6 @@ def _print(self, expr): classes = type(expr).__mro__ for cls in classes: - print('_print_' + cls.__name__) print_method = '_print_' + cls.__name__ if hasattr(self, print_method): obj = getattr(self, print_method)(expr) diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c index bceaeea429..784b222cbb 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.c +++ b/pyccel/stdlib/ndarrays/ndarrays.c @@ -46,6 +46,7 @@ void print_ndarray_memory(t_ndarray nd) case nd_bool: printf("[%d]", nd.nd_bool[i]); break; + #ifndef __NVCC__ case nd_cfloat: { double real = creal(nd.nd_cfloat[i]); @@ -60,6 +61,8 @@ void print_ndarray_memory(t_ndarray nd) printf("[%lf%+lfj]", real, imag); break; } + #endif + } ++i; } @@ -248,7 +251,7 @@ void _array_fill_double(double c, t_ndarray arr) for (int32_t i = 0; i < arr.length; i++) arr.nd_double[i] = c; } - +#ifndef __NVCC__ void _array_fill_cfloat(float complex c, t_ndarray arr) { if (c == 0) @@ -267,6 +270,7 @@ void _array_fill_cdouble(double complex c, t_ndarray arr) for (int32_t i = 0; i < arr.length; i++) arr.nd_cdouble[i] = c; } +#endif /* ** deallocation @@ -584,24 +588,24 @@ bool is_same_shape(t_ndarray a, t_ndarray b) if(elem_wise_cp == false)\ { \ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cfloat[i + offset] = (float complex)src.nd_##SRC_TYPE[i]; \ + dest->nd_cfloat[i + offset] = src.nd_cfloat[i]; \ }\ else \ {\ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = (float complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \ + dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = src.nd_cfloat[element_index(src, i, src.nd)]; \ }\ break; \ case nd_cdouble: \ if(elem_wise_cp == false)\ { \ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cdouble[i + offset] = (double complex)src.nd_##SRC_TYPE[i]; \ + dest->nd_cdouble[i + offset] = src.nd_cdouble[i]; \ }\ else \ {\ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = (double complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \ + dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = src.nd_cdouble[element_index(src, i, src.nd)]; \ }\ break; \ } \ @@ -614,8 +618,10 @@ COPY_DATA_FROM_(int32) COPY_DATA_FROM_(int64) COPY_DATA_FROM_(float) COPY_DATA_FROM_(double) +#ifndef __NVCC__ COPY_DATA_FROM_(cfloat) COPY_DATA_FROM_(cdouble) +#endif void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp) { @@ -648,7 +654,7 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp case nd_double: copy_data_from_double(ds, src, offset, elem_wise_cp); break; - + #ifndef __NVCC__ case nd_cfloat: copy_data_from_cfloat(ds, src, offset, elem_wise_cp); break; @@ -656,6 +662,7 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp case nd_cdouble: copy_data_from_cdouble(ds, src, offset, elem_wise_cp); break; + #endif } } @@ -747,8 +754,10 @@ NUMPY_SUM_(int32, int64_t, int32) NUMPY_SUM_(int64, int64_t, int64) NUMPY_SUM_(float32, float, float) NUMPY_SUM_(float64, double, double) +#ifndef __NVCC__ NUMPY_SUM_(complex64, float complex, cfloat) NUMPY_SUM_(complex128, double complex, cdouble) +#endif #define NUMPY_AMAX_(NAME, TYPE, CTYPE) \ TYPE numpy_amax_##NAME(t_ndarray arr) \ @@ -782,8 +791,10 @@ NUMPY_AMAX_(int32, int64_t, int32) NUMPY_AMAX_(int64, int64_t, int64) NUMPY_AMAX_(float32, float, float) NUMPY_AMAX_(float64, double, double) +#ifndef __NVCC__ NUMPY_AMAX_(complex64, float complex, cfloat) NUMPY_AMAX_(complex128, double complex, cdouble) +#endif #define NUMPY_AMIN_(NAME, TYPE, CTYPE) \ TYPE numpy_amin_##NAME(t_ndarray arr) \ @@ -817,6 +828,8 @@ NUMPY_AMIN_(int32, int64_t, int32) NUMPY_AMIN_(int64, int64_t, int64) NUMPY_AMIN_(float32, float, float) NUMPY_AMIN_(float64, double, double) +#ifndef __NVCC__ NUMPY_AMIN_(complex64, float complex, cfloat) NUMPY_AMIN_(complex128, double complex, cdouble) +#endif diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h index 9764113705..2e1b8e793d 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.h +++ b/pyccel/stdlib/ndarrays/ndarrays.h @@ -10,6 +10,7 @@ # include # include # include +#include /* mapping the function array_fill to the correct type */ # define array_fill(c, arr) _Generic((c), int64_t : _array_fill_int64,\ @@ -93,8 +94,14 @@ typedef struct s_ndarray float *nd_float; double *nd_double; bool *nd_bool; + #ifndef __NVCC__ double complex *nd_cdouble; float complex *nd_cfloat; + #endif + #ifdef __NVCC__ + cuDoubleComplex *nd_cdouble; + cuFloatComplex *nd_cfloat; + #endif }; /* number of dimensions */ int32_t nd; From 947ce32bdff7eee993510a3f315e408d1dab9e48 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 16 Jul 2024 23:08:47 +0100 Subject: [PATCH 045/150] work in progress --- pyccel/ast/cudaext.py | 13 +++++++------ pyccel/codegen/pipeline.py | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 4d3be8cb3f..67648aebd6 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -77,7 +77,7 @@ class CudaFull(CudaNewarray): __slots__ = ('_fill_value','_shape') name = 'full' - def __init__(self, shape, fill_value, dtype='float', order='C'): + def __init__(self, shape, fill_value, dtype, order, memory_location): shape = process_shape(False, shape) init_dtype = dtype if(dtype is None): @@ -88,8 +88,8 @@ def __init__(self, shape, fill_value, dtype='float', order='C'): self._shape = shape rank = len(self._shape) order = CudaNewarray._process_order(rank, order) - class_type = CudaArrayType(dtype, rank, order, 'host') - super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = 'device') + class_type = CudaArrayType(dtype, rank, order, memory_location) + super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) @property def fill_value(self): return self._args[0] @@ -99,8 +99,8 @@ class CudaAutoFill(CudaFull): the fill_value is implicitly specified """ __slots__ = () - def __init__(self, shape, dtype='float', order='C'): - super().__init__(shape, Nil(), dtype, order) + def __init__(self, shape, dtype, order, memory_location): + super().__init__(shape, Nil(), dtype, order, memory_location = memory_location) class CudaHostEmpty(CudaAutoFill): """ @@ -122,7 +122,8 @@ class CudaHostEmpty(CudaAutoFill): __slots__ = () name = 'empty' def __init__(self, shape, dtype='float', order='C'): - super().__init__(shape, dtype, order) + memory_location = 'host' + super().__init__(shape, dtype, order , memory_location) @property def fill_value(self): diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index f3e3be1602..ff8d657704 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -404,7 +404,6 @@ def get_module_dependencies(parser, deps): verbose=verbose) timers["Compilation without wrapper"] = time.time() - start_compile_target_language - print(100*'-') # Create shared library generated_filepath, shared_lib_timers = create_shared_library(codegen, mod_obj, From 1eeed75b63ba91fbfa04e65d819f3009de0a2daa Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 10:00:53 +0100 Subject: [PATCH 046/150] adding test for host array --- tests/pyccel/scripts/kernel/host_array.py | 10 ++++++++++ tests/pyccel/test_pyccel.py | 5 +++++ 2 files changed, 15 insertions(+) create mode 100644 tests/pyccel/scripts/kernel/host_array.py diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py new file mode 100644 index 0000000000..b682e0cbbf --- /dev/null +++ b/tests/pyccel/scripts/kernel/host_array.py @@ -0,0 +1,10 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel import cuda + +a = cuda.host_empty(10, 'int') + +for i in range(10): + a[i] = 1 + +if __name__ == '__main__': + print(a) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 2d55c6e1cb..e3d98ef3ef 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -743,6 +743,11 @@ def test_kernel_collision(gpu_available): language="cuda", execute_code=gpu_available) #------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_device_array(gpu_available): + types = str + pyccel_test("scripts/kernel/host_array.py", + language="cuda", output_dtype=types, execute_code=gpu_available) @pytest.mark.cuda def test_device_call(gpu_available): From a8dbc18f5d1698675ea01079bc30c2c0ed9d7545 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 10:32:27 +0100 Subject: [PATCH 047/150] add documentation for CUDA arrays --- docs/cuda.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/cuda.md b/docs/cuda.md index 7643a4ac02..4c7a1e8370 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -44,3 +44,28 @@ my_kernel[1, 1]() ``` +## Cuda Arrays + +Pyccel provides support for CUDA arrays, enabling efficient data transfer between the host and the GPU device. Here are some of the key functions you can use: + +### cuda+host_empty + +The cuda+host_empty function allocates an empty array on the host. + +```python +from pyccel import cuda + +a = cuda.host_empty(10, 'int') + +for i in range(10): + a[i] = 1 + +if __name__ == '__main__': + print(a) +``` + + + + + + From 5d91031b05e8ccb350b08e3bf42d46857df0e7c1 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 10:33:42 +0100 Subject: [PATCH 048/150] fix: remove unnecessary spaces --- docs/cuda.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/cuda.md b/docs/cuda.md index 4c7a1e8370..ff68b5c69a 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -63,9 +63,3 @@ for i in range(10): if __name__ == '__main__': print(a) ``` - - - - - - From 7c74bbd9beb9277b3035801116bdea1d9c21600e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 10:56:55 +0100 Subject: [PATCH 049/150] refactoring the code --- pyccel/ast/cudaext.py | 41 ++++++++++++++++++++++--------- pyccel/ast/cudatypes.py | 2 +- pyccel/codegen/printing/cucode.py | 4 +-- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 67648aebd6..c557e27f00 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -14,8 +14,7 @@ from .datatypes import VoidType from .core import Module, PyccelFunctionDef from .internals import PyccelFunction -from .internals import LiteralInteger -from .numpyext import process_dtype, process_shape , DtypePrecisionToCastFunction +from .numpyext import process_dtype, process_shape from .cudatypes import CudaArrayType @@ -46,6 +45,7 @@ class CudaNewarray(PyccelFunction): The memory location of the new array ('host' or 'device'). """ __slots__ = ('_class_type', '_init_dtype', '_memory_location') + name = 'newarray' property def init_dtype(self): @@ -63,17 +63,34 @@ def __init__(self, *arg,class_type, init_dtype, memory_location): self._memory_location = memory_location super().__init__(*arg) - @staticmethod - def _process_order(rank, order): - - if rank < 2: - return None - order = str(order).strip('\'"') - assert order in ('C', 'F') - return order class CudaFull(CudaNewarray): - + """ + Represents a call to `cuda.full` for code generation. + + Represents a call to the Cuda function `full` which creates an array + of a specified size and shape filled with a specified value. + Parameters + ---------- + shape : TypedAstNode + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + For a 1D array this is either a `LiteralInteger` or an expression. + For a ND array this is a `TypedAstNode` with the class type HomogeneousTupleType. + + fill_value : TypedAstNode + Fill value. + + dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional + Datatype for the constructed array. + If `None` the dtype of the fill value is used. + + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. + + memory_location : str + The memory location of the new array ('host' or 'device'). + """ __slots__ = ('_fill_value','_shape') name = 'full' @@ -87,7 +104,6 @@ def __init__(self, shape, fill_value, dtype, order, memory_location): self._shape = shape rank = len(self._shape) - order = CudaNewarray._process_order(rank, order) class_type = CudaArrayType(dtype, rank, order, memory_location) super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) @property @@ -99,6 +115,7 @@ class CudaAutoFill(CudaFull): the fill_value is implicitly specified """ __slots__ = () + name = 'auto_fill' def __init__(self, shape, dtype, order, memory_location): super().__init__(shape, Nil(), dtype, order, memory_location = memory_location) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 29952c3efc..5f7ca5554f 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -9,7 +9,7 @@ from functools import lru_cache import numpy as np -from .datatypes import FixedSizeNumericType, HomogeneousContainerType, PythonNativeBool +from .datatypes import FixedSizeNumericType, HomogeneousContainerType from pyccel.utilities.metaclasses import ArgumentSingleton from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type from .numpytypes import NumpyNDArrayType diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 613665d587..5646c15dde 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -16,11 +16,9 @@ from pyccel.errors.errors import Errors from pyccel.ast.cudatypes import CudaArrayType -from pyccel.ast.datatypes import HomogeneousContainerType, PythonNativeBool +from pyccel.ast.datatypes import HomogeneousContainerType from pyccel.ast.numpytypes import numpy_precision_map from pyccel.ast.cudaext import CudaFull -from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type -from pyccel.ast.numpytypes import NumpyInt8Type, NumpyInt16Type, NumpyInt32Type, NumpyInt64Type errors = Errors() From 95cf8214e828891815b5f6729f0e62fc7a22f24f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 11:21:00 +0100 Subject: [PATCH 050/150] refactoring the code --- pyccel/ast/cudaext.py | 8 +++----- pyccel/ast/cudatypes.py | 7 ++++++- pyccel/codegen/printing/cucode.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index c557e27f00..5687e4f179 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -13,7 +13,6 @@ from .datatypes import VoidType from .core import Module, PyccelFunctionDef -from .internals import PyccelFunction from .numpyext import process_dtype, process_shape from .cudatypes import CudaArrayType @@ -21,9 +20,9 @@ __all__ = ( 'CudaSynchronize', - 'CudaNewarray' - 'CudaFull' - 'CudaEmpty' + 'CudaNewarray', + 'CudaFull', + 'CudaEmpty', ) class CudaNewarray(PyccelFunction): @@ -141,7 +140,6 @@ class CudaHostEmpty(CudaAutoFill): def __init__(self, shape, dtype='float', order='C'): memory_location = 'host' super().__init__(shape, dtype, order , memory_location) - @property def fill_value(self): """ diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 5f7ca5554f..872a87f284 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -29,7 +29,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): order : str The order of the memory layout for the new NumPy array. memory_location : str - The memory location of the new cuda array. + The memory location of the new cuda array ('host' or 'device'). """ __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') @@ -45,6 +45,11 @@ def __init__(self, dtype, rank, order, memory_location): @property def memory_location(self): + """ + The memory location of the new array ('host' or 'device'). + + The memory location of the new array ('host' or 'device'). + """ return self._memory_location @lru_cache diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 5646c15dde..13e0eec0f7 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -46,7 +46,7 @@ class CudaCodePrinter(CCodePrinter): """ language = "cuda" - + def __init__(self, filename, prefix_module = None): errors.set_target(filename) From 0fc4a1bca6d881a6afc1c2247f1d15de3ad6dfb8 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 11:46:47 +0100 Subject: [PATCH 051/150] refactoring the code --- pyccel/ast/cudaext.py | 3 --- pyccel/codegen/printing/cucode.py | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 5687e4f179..96b205ceee 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -105,9 +105,6 @@ def __init__(self, shape, fill_value, dtype, order, memory_location): rank = len(self._shape) class_type = CudaArrayType(dtype, rank, order, memory_location) super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) - @property - def fill_value(self): - return self._args[0] class CudaAutoFill(CudaFull): """ Abstract class for all classes which inherit from NumpyFull but diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 13e0eec0f7..005b2200d5 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -173,6 +173,23 @@ def _print_Deallocate(self, expr): else: return f"cuda_free({var_code});\n" def get_declare_type(self, expr): + """ + Get the string which describes the type in a declaration. + + This function returns the code which describes the type + of the `expr` object such that the declaration can be written as: + `f"{self.get_declare_type(expr)} {expr.name}"` + + Parameters + ---------- + expr : Variable + The variable whose type should be described. + + Returns + ------- + str + The code describing the type. + """ class_type = expr.class_type rank = expr.rank if not isinstance(class_type, CudaArrayType ) or rank <= 0: From f43fba86148360e482b1d01139cf2702fe59d1fc Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 11:50:17 +0100 Subject: [PATCH 052/150] refactoring the code --- pyccel/ast/cudaext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 96b205ceee..49e3670cf7 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -22,7 +22,7 @@ 'CudaSynchronize', 'CudaNewarray', 'CudaFull', - 'CudaEmpty', + 'CudaHostEmpty' ) class CudaNewarray(PyccelFunction): @@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction): __slots__ = ('_class_type', '_init_dtype', '_memory_location') name = 'newarray' - property + @property def init_dtype(self): """ The dtype provided to the function when it was initialised in Python. From f4546e14aceec9f36062bbba78ee4c851f837fb3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 12:13:47 +0100 Subject: [PATCH 053/150] fix a doc problem --- pyccel/ast/cudaext.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 49e3670cf7..f9fbb8d42a 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -33,7 +33,7 @@ class CudaNewarray(PyccelFunction): to `Allocate` should inherit. Parameters - + ---------- class_type : NumpyNDArrayType The type of the new array. @@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction): __slots__ = ('_class_type', '_init_dtype', '_memory_location') name = 'newarray' - @property + property def init_dtype(self): """ The dtype provided to the function when it was initialised in Python. @@ -69,6 +69,7 @@ class CudaFull(CudaNewarray): Represents a call to the Cuda function `full` which creates an array of a specified size and shape filled with a specified value. + Parameters ---------- shape : TypedAstNode @@ -107,8 +108,10 @@ def __init__(self, shape, fill_value, dtype, order, memory_location): super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) class CudaAutoFill(CudaFull): - """ Abstract class for all classes which inherit from NumpyFull but - the fill_value is implicitly specified + """ + Abstract class for all classes which inherit from CudaFull. + + Abstract class for all classes which inherit from CudaFull. """ __slots__ = () name = 'auto_fill' From ec0132b322624525ddd27fcd29e53522a28d7e99 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 14:20:53 +0100 Subject: [PATCH 054/150] fix a doc problem --- pyccel/ast/cudaext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index f9fbb8d42a..fa63357876 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -46,7 +46,7 @@ class CudaNewarray(PyccelFunction): __slots__ = ('_class_type', '_init_dtype', '_memory_location') name = 'newarray' - property + @property def init_dtype(self): """ The dtype provided to the function when it was initialised in Python. From 9030dfe2af87f8a42f5355ca0a331696045f34a7 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 14:34:20 +0100 Subject: [PATCH 055/150] fix a doc problem --- pyccel/ast/cudaext.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index fa63357876..90ffc5341e 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -34,6 +34,9 @@ class CudaNewarray(PyccelFunction): Parameters ---------- + *args : tuple of TypedAstNode + The arguments of the superclass PyccelFunction. + class_type : NumpyNDArrayType The type of the new array. @@ -56,7 +59,7 @@ def init_dtype(self): """ return self._init_dtype - def __init__(self, *arg,class_type, init_dtype, memory_location): + def __init__(self, *arg ,class_type, init_dtype, memory_location): self._class_type = class_type self._init_dtype = init_dtype self._memory_location = memory_location @@ -107,18 +110,8 @@ def __init__(self, shape, fill_value, dtype, order, memory_location): class_type = CudaArrayType(dtype, rank, order, memory_location) super().__init__(fill_value, class_type = class_type, init_dtype = init_dtype, memory_location = memory_location) -class CudaAutoFill(CudaFull): - """ - Abstract class for all classes which inherit from CudaFull. - - Abstract class for all classes which inherit from CudaFull. - """ - __slots__ = () - name = 'auto_fill' - def __init__(self, shape, dtype, order, memory_location): - super().__init__(shape, Nil(), dtype, order, memory_location = memory_location) -class CudaHostEmpty(CudaAutoFill): +class CudaHostEmpty(CudaFull): """ Represents a call to Cuda.host_empty for code generation. @@ -139,7 +132,7 @@ class CudaHostEmpty(CudaAutoFill): name = 'empty' def __init__(self, shape, dtype='float', order='C'): memory_location = 'host' - super().__init__(shape, dtype, order , memory_location) + super().__init__(shape, Nil(), dtype, order , memory_location) @property def fill_value(self): """ From e47373cfa021b24cbf9f98457d004b10c707539c Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 14:50:22 +0100 Subject: [PATCH 056/150] fix a doc problem --- pyccel/ast/cudaext.py | 8 ++++---- pyccel/ast/cudatypes.py | 2 ++ pyccel/ast/variable.py | 1 - 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 90ffc5341e..8bf790b968 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -54,17 +54,17 @@ def init_dtype(self): """ The dtype provided to the function when it was initialised in Python. - The dtype provided to the function when it was initialised in Python. - If no dtype was provided then this should equal `None`. + The dtype provided to the function when it was initialised in Python + if no dtype was provided then this should equal `None`. """ return self._init_dtype - def __init__(self, *arg ,class_type, init_dtype, memory_location): + def __init__(self, *args ,class_type, init_dtype, memory_location): self._class_type = class_type self._init_dtype = init_dtype self._memory_location = memory_location - super().__init__(*arg) + super().__init__(*args) class CudaFull(CudaNewarray): """ diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 872a87f284..b8b063a116 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -21,6 +21,8 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): Class representing the Cuda array type + Parameters + ---------- dtype : NumpyNumericType | PythonNativeBool | GenericType The internal datatype of the object (GenericType is allowed for external libraries, e.g. MPI). diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index c8b9fd95ef..051cf631b7 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -109,7 +109,6 @@ def __init__( name, *, memory_handling='stack', - memory_location='host', is_const=False, is_target=False, is_optional=False, From eee620207740cbc87561ba37baff42d4105b0ba2 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 15:03:40 +0100 Subject: [PATCH 057/150] fix a doc problem --- pyccel/ast/cudaext.py | 4 ++-- pyccel/ast/cudatypes.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 8bf790b968..99efd2c4ed 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -54,8 +54,8 @@ def init_dtype(self): """ The dtype provided to the function when it was initialised in Python. - The dtype provided to the function when it was initialised in Python - if no dtype was provided then this should equal `None`. + The dtype provided to the function when it was initialised in Python. + If no dtype was provided then this should equal `None`. """ return self._init_dtype diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index b8b063a116..70c1fc06c8 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -9,10 +9,12 @@ from functools import lru_cache import numpy as np -from .datatypes import FixedSizeNumericType, HomogeneousContainerType from pyccel.utilities.metaclasses import ArgumentSingleton -from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type -from .numpytypes import NumpyNDArrayType + +from .datatypes import FixedSizeNumericType, HomogeneousContainerType +from .datatypes import pyccel_type_to_original_type, original_type_to_pyccel_type + +from .numpytypes import NumpyNDArrayType class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): From afd05c1b329687d7e8e454256417baae2c98d8c3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 16:00:08 +0100 Subject: [PATCH 058/150] make sure tests are running successfully --- pyccel/cuda/__init__.py | 3 ++- pyccel/cuda/cuda_arrays.py | 29 +++++++++++++++++++++++ tests/pyccel/scripts/kernel/host_array.py | 2 +- tests/pyccel/test_pyccel.py | 6 +++-- 4 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 pyccel/cuda/cuda_arrays.py diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py index e8542ad5d5..ae4be32387 100644 --- a/pyccel/cuda/__init__.py +++ b/pyccel/cuda/__init__.py @@ -6,5 +6,6 @@ This module is for exposing the CudaSubmodule functions. """ from .cuda_sync_primitives import synchronize +from .cuda_arrays import host_empty -__all__ = ['synchronize'] +__all__ = ['synchronize', 'host_empty'] diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py new file mode 100644 index 0000000000..19be596a54 --- /dev/null +++ b/pyccel/cuda/cuda_arrays.py @@ -0,0 +1,29 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains cuda_arrays methods for Pyccel. +""" + +def host_empty(shape): + """ + Create an empty array on the host. + + Create an empty array on the host. + + Parameters + ---------- + shape : tuple of int or int + The shape of the array. + + Returns + ------- + a : array + The empty array on the host. + """ + import numpy as np + a = np.empty(shape) + return a + + diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py index b682e0cbbf..cacbcc1da5 100644 --- a/tests/pyccel/scripts/kernel/host_array.py +++ b/tests/pyccel/scripts/kernel/host_array.py @@ -1,7 +1,7 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel import cuda -a = cuda.host_empty(10, 'int') +a = cuda.host_empty(10) for i in range(10): a[i] = 1 diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index e3d98ef3ef..9b0f0d443e 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -744,11 +744,13 @@ def test_kernel_collision(gpu_available): #------------------------------------------------------------------------------ @pytest.mark.cuda -def test_device_array(gpu_available): - types = str +def test_host_array(gpu_available): + types = float pyccel_test("scripts/kernel/host_array.py", language="cuda", output_dtype=types, execute_code=gpu_available) +#------------------------------------------------------------------------------ + @pytest.mark.cuda def test_device_call(gpu_available): types = str From ff950290b25ab60f2ba36e378bfe4d583e0d9a54 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 17 Jul 2024 16:20:48 +0100 Subject: [PATCH 059/150] fix a doc problem --- .dict_custom.txt | 1 + pyccel/cuda/cuda_arrays.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.dict_custom.txt b/.dict_custom.txt index 5d99e21194..8981c8bbef 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -111,6 +111,7 @@ variadic subclasses oneAPI Cuda +cuda getter setter bitwise diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py index 19be596a54..cbdf938c0c 100644 --- a/pyccel/cuda/cuda_arrays.py +++ b/pyccel/cuda/cuda_arrays.py @@ -19,7 +19,7 @@ def host_empty(shape): Returns ------- - a : array + array The empty array on the host. """ import numpy as np From 21f70c042aed9f0dde6643d889784f711c6aa0de Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 060/150] Trigger tests on push to devel or main branch --- .github/workflows/anaconda_linux.yml | 2 +- .github/workflows/anaconda_windows.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macosx.yml | 2 +- .github/workflows/pickle.yml | 2 +- .github/workflows/pickle_wheel.yml | 2 +- .github/workflows/windows.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: From 13efc4e19c17cb0a3ee213ffdedd7290110aca65 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:46:33 +0100 Subject: [PATCH 061/150] Add cuda workflow to test cuda developments on CI --- .github/actions/coverage_install/action.yml | 2 +- .github/actions/linux_install/action.yml | 10 +-- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 17 +++++ .github/actions/python_install/action.yml | 17 +++++ .github/workflows/cuda.yml | 83 +++++++++++++++++++++ ci_tools/bot_messages/show_tests.txt | 1 + ci_tools/bot_tools/bot_funcs.py | 12 +-- ci_tools/devel_branch_tests.py | 1 + ci_tools/json_pytest_output.py | 2 +- 10 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 .github/actions/pytest_run_cuda/action.yml create mode 100644 .github/actions/python_install/action.yml create mode 100644 .github/workflows/cuda.yml diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..b0bdc31f16 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..52092a6e02 --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,17 @@ +name: 'Pyccel pytest commands generating Ccuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: From e07587d8cec95efcb0bcb53bda468a549c4d941c Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 062/150] Trigger tests on push to devel or main branch --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9111b47d52..cf52b1c624 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 From 94b13575c2baad7d8c2d6bcce0e4c443716c0b47 Mon Sep 17 00:00:00 2001 From: bauom <40796259+bauom@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:50 +0100 Subject: [PATCH 063/150] [init] Adding CUDA language/compiler and CodePrinter (#32) This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter. Changes to stdlib: Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler --------- Co-authored-by: Mouad Elalj, EmilyBourne --- .dict_custom.txt | 1 + .github/actions/pytest_parallel/action.yml | 4 +- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 11 +- CHANGELOG.md | 6 + pyccel/codegen/codegen.py | 8 +- pyccel/codegen/compiling/compilers.py | 5 +- pyccel/codegen/pipeline.py | 5 +- pyccel/codegen/printing/cucode.py | 74 +++++++++++ pyccel/commands/console.py | 2 +- pyccel/compilers/default_compilers.py | 13 +- pyccel/naming/__init__.py | 4 +- pyccel/naming/cudanameclashchecker.py | 92 ++++++++++++++ pyccel/stdlib/numpy/numpy_c.c | 2 + pyccel/stdlib/numpy/numpy_c.h | 2 + pytest.ini | 1 + tests/conftest.py | 11 ++ tests/epyccel/test_base.py | 136 ++++++++++----------- 18 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 pyccel/codegen/printing/cucode.py create mode 100644 pyccel/naming/cudanameclashchecker.py diff --git a/.dict_custom.txt b/.dict_custom.txt index b9240f6215..161337d33b 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,7 @@ Valgrind variadic subclasses oneAPI +Cuda getter setter bitwise diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index b0bdc31f16..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 52092a6e02..46f90552ed 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -1,4 +1,4 @@ -name: 'Pyccel pytest commands generating Ccuda' +name: 'Pyccel pytest commands generating Cuda' inputs: shell_cmd: description: 'Specifies the shell command (different for anaconda)' @@ -11,7 +11,14 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 60e982aa70..a7048916d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. + ## \[UNRELEASED\] ### Added diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index a7a02d7804..33721a48e8 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index c866ee5b1a..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 14087fb567..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..86146b065b --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,74 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers + +from pyccel.ast.core import Import, Module + +from pyccel.errors.errors import Errors + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + c_headers_imports = '' + local_imports = '' + + for imp in imports: + if imp.source in c_library_headers: + c_headers_imports += self._print(imp) + else: + local_imports += self._print(imp) + + imports = f'{c_headers_imports}\ + extern "C"{{\n\ + {local_imports}\ + }}' + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..971204e912 --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,92 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..a5082ef6e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() From c0006dd94302e9e4781ca960e67832a91b0868ca Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 15 May 2024 12:58:50 +0100 Subject: [PATCH 064/150] Fix import handling (#49) This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'. **Commit Summary** - Implemented new header printer for CUDA. - Added CUDA wrapper assignment - Instead of wrapping all local headers, wrap only C functions with extern 'C' --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 3 +- pyccel/codegen/printing/cucode.py | 45 ++++++++---- pyccel/codegen/python_wrapper.py | 4 ++ pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++ tests/epyccel/modules/cuda_module.py | 13 ++++ tests/epyccel/test_epyccel_modules.py | 13 ++++ 6 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py create mode 100644 tests/epyccel/modules/cuda_module.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a7048916d6..8885e66107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file. ### Added -- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. ## \[UNRELEASED\] diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 86146b065b..277d2a3a6a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -52,19 +52,7 @@ def _print_Module(self, expr): # Print imports last to be sure that all additional_imports have been collected imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] - c_headers_imports = '' - local_imports = '' - - for imp in imports: - if imp.source in c_library_headers: - c_headers_imports += self._print(imp) - else: - local_imports += self._print(imp) - - imports = f'{c_headers_imports}\ - extern "C"{{\n\ - {local_imports}\ - }}' + imports = ''.join(self._print(i) for i in imports) code = f'{imports}\n\ {global_variables}\n\ @@ -72,3 +60,34 @@ def _print_Module(self, expr): self.exit_scope() return code + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators: # Checking for 'kernel' decorator + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) From 1145d9151318787947ae30e2775ddfd243ddaeb3 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Thu, 27 Jun 2024 20:31:46 +0100 Subject: [PATCH 065/150] Add support for kernels (#42) This pull request addresses issue #28 by implementing a new feature in Pyccel that allows users to define custom GPU kernels. The syntax for creating these kernels is inspired by Numba. and I also need to fix issue #45 for testing purposes **Commit Summary** - Introduced KernelCall class - Added cuda printer methods _print_KernelCall and _print_FunctionDef to generate the corresponding CUDA representation for both kernel calls and definitions - Added IndexedFunctionCall represents an indexed function call - Added CUDA module and cuda.synchronize() - Fixing a bug that I found in the header: it does not import the necessary header for the used function --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> Co-authored-by: Emily Bourne --- .dict_custom.txt | 1 + CHANGELOG.md | 2 + docs/cuda.md | 23 +++ pyccel/ast/core.py | 37 ++++ pyccel/ast/cuda.py | 65 +++++++ pyccel/ast/cudaext.py | 42 +++++ pyccel/ast/utilities.py | 4 +- pyccel/codegen/printing/cucode.py | 46 ++++- pyccel/cuda/__init__.py | 10 + pyccel/cuda/cuda_sync_primitives.py | 16 ++ pyccel/decorators.py | 32 ++++ pyccel/errors/messages.py | 8 + pyccel/parser/semantic.py | 84 ++++++++- pyccel/parser/syntactic.py | 4 + tests/conftest.py | 9 + tests/cuda/test_kernel_semantic.py | 176 ++++++++++++++++++ tests/pyccel/scripts/kernel/hello_kernel.py | 19 ++ .../scripts/kernel/kernel_name_collision.py | 8 + tests/pyccel/test_pyccel.py | 22 ++- 19 files changed, 599 insertions(+), 9 deletions(-) create mode 100644 docs/cuda.md create mode 100644 pyccel/ast/cuda.py create mode 100644 pyccel/ast/cudaext.py create mode 100644 pyccel/cuda/__init__.py create mode 100644 pyccel/cuda/cuda_sync_primitives.py create mode 100644 tests/cuda/test_kernel_semantic.py create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py diff --git a/.dict_custom.txt b/.dict_custom.txt index 161337d33b..6ddf80b1ff 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -120,3 +120,4 @@ indexable traceback STC gFTL +GPUs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8885e66107..10bec59084 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..de30d52b80 --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,23 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` \ No newline at end of file diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 8981ddc160..2758b75be2 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..b540f20993 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef + +__all__ = ( + 'CudaSynchronize', +) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 277d2a3a6a..cd26843017 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -9,11 +9,12 @@ enabling the direct translation of high-level Pyccel expressions into CUDA code. """ -from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers +from pyccel.codegen.printing.ccode import CCodePrinter -from pyccel.ast.core import Import, Module +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil -from pyccel.errors.errors import Errors +from pyccel.errors.errors import Errors errors = Errors() @@ -61,6 +62,44 @@ def _print_Module(self, expr): self.exit_scope() return code + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorater} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr): }}\n' return '\n'.join((f"#ifndef {name.upper()}_H", f"#define {name.upper()}_H", + imports, global_variables, function_declaration, "#endif // {name.upper()}_H\n")) diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..e8542ad5d5 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize + +__all__ = ['synchronize'] diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..77717a991f 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -19,6 +19,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +110,34 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..09966d810c 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' + + + + diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index c7ce8d9d7e..9519bc8a63 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,9 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2815,6 +2880,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 2967f4999b..3af7f0728a 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1102,6 +1104,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/tests/conftest.py b/tests/conftest.py index a5082ef6e8..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..b4757a3c31 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,19 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From 31aae9d43845e6a3ea037d2b53428681af13946e Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:37:02 +0100 Subject: [PATCH 066/150] Updated CUDA Name Clash Checker By Added CUDA-specific keywords (#60) This pull request addresses issue #59 by adding more CUDA-specific keywords to enhance the checking of variable/function names and prevent name clashes --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 1 + pyccel/naming/cudanameclashchecker.py | 36 ++++++++++++++++++++++- pyccel/naming/languagenameclashchecker.py | 5 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10bec59084..0539783922 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index 971204e912..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', @@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker): 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', 'get_index', 'numpy_to_ndarray_strides', - 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) def has_clash(self, name, symbols): """ diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. From 21c93b5bfb93ea9af843436ec4c17fc0b17898e7 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:04:22 +0100 Subject: [PATCH 067/150] add handle for custom device (#61) This pull request addresses issue https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new feature in Pyccel that allows users to define a custom device **Commit Summary** - Adding handler for custom device and its code generation. - Adding test --------- Co-authored-by: EmilyBourne --- CHANGELOG.md | 1 + docs/cuda.md | 25 ++++++++++++++++- pyccel/codegen/printing/cucode.py | 7 ++--- pyccel/decorators.py | 19 +++++++++++++ pyccel/errors/messages.py | 2 +- pyccel/parser/semantic.py | 7 ++++- tests/cuda/test_device_semantic.py | 31 ++++++++++++++++++++++ tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++ tests/pyccel/test_pyccel.py | 8 ++++++ 9 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 tests/cuda/test_device_semantic.py create mode 100644 tests/pyccel/scripts/kernel/device_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0539783922..1b9fa3e635 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md index de30d52b80..7643a4ac02 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -20,4 +20,27 @@ threadsperblock = 1 # Call your kernel function my_kernel[blockspergrid, threadsperblock]() -``` \ No newline at end of file +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..7c01d93c47 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True): str Signature of the function. """ - cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' c_function_signature = super().function_signature(expr, print_arg_names) - return f'{cuda_decorater} {c_function_signature}' + return f'{cuda_decorator} {c_function_signature}' def _print_KernelCall(self, expr): func = expr.funcdef @@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr): cuda_headers = "" for f in expr.module.funcs: if not f.is_inline: - if 'kernel' in f.decorators: # Checking for 'kernel' decorator + if 'kernel' in f.decorators or 'device' in f.decorators: cuda_headers += self.function_signature(f) + ';\n' else: funcs += self.function_signature(f) + ';\n' diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 77717a991f..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -141,3 +142,21 @@ def __getitem__(self, args): return self._f return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 09966d810c..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -166,7 +166,7 @@ INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' - +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index 9519bc8a63..b3a7ecd6b1 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -136,9 +136,10 @@ UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, - MISSING_KERNEL_CONFIGURATION, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b4757a3c31..2d55c6e1cb 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available): pyccel_test("scripts/kernel/kernel_name_collision.py", language="cuda", execute_code=gpu_available) +#------------------------------------------------------------------------------ + +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From d66442d4a09c479d92b17c8a262634d3f4995888 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 10:41:06 +0100 Subject: [PATCH 068/150] fix a doc problem --- pyccel/ast/cudaext.py | 1 - pyccel/codegen/printing/ccode.py | 2 +- pyccel/codegen/printing/cucode.py | 9 +++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 99efd2c4ed..c823ee1181 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -55,7 +55,6 @@ def init_dtype(self): The dtype provided to the function when it was initialised in Python. The dtype provided to the function when it was initialised in Python. - If no dtype was provided then this should equal `None`. """ return self._init_dtype diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 41517f2420..3ca1833d4a 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1462,7 +1462,7 @@ def _print_IndexedElement(self, expr): inds = list(expr.indices) base_shape = base.shape allow_negative_indexes = expr.allows_negative_indexes - + if isinstance(base.class_type, NumpyNDArrayType): #set dtype to the C struct types dtype = self.find_in_ndarray_type_registry(expr.dtype) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 61254cf367..1d6d02e7e4 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -199,9 +199,10 @@ def get_declare_type(self, expr): def _print_Assign(self, expr): rhs = expr.rhs - if not isinstance(rhs.class_type, CudaArrayType): - return super()._print_Assign(expr) - if(isinstance(rhs, (CudaFull))): + if isinstance(rhs.class_type, CudaArrayType): + if(isinstance(rhs, (CudaFull))): # TODO add support for CudaFull - return " \n" + return " \n" + + return super()._print_Assign(expr) From 976e72904fecf9816f1901647c7ec449317be3b0 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 11:01:45 +0100 Subject: [PATCH 069/150] fix a doc problem --- pyccel/ast/cudaext.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index c823ee1181..f890fc7999 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -27,7 +27,7 @@ class CudaNewarray(PyccelFunction): """ - superclass for nodes representing Cuda array allocation functions. + Superclass for nodes representing Cuda array allocation functions. Class from which all nodes representing a Cuda function which implies a call to `Allocate` should inherit. @@ -55,6 +55,7 @@ def init_dtype(self): The dtype provided to the function when it was initialised in Python. The dtype provided to the function when it was initialised in Python. + If no dtype was provided then this should equal `None`. """ return self._init_dtype @@ -68,7 +69,7 @@ def __init__(self, *args ,class_type, init_dtype, memory_location): class CudaFull(CudaNewarray): """ Represents a call to `cuda.full` for code generation. - + Represents a call to the Cuda function `full` which creates an array of a specified size and shape filled with a specified value. From 0289a778a0e18d43e4898c81b1c45d15c2fca443 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 16:41:07 +0100 Subject: [PATCH 070/150] debuging perpose --- tests/pyccel/test_pyccel.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 9b0f0d443e..fbf6adc972 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,6 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() + print(cmd) + exit(0) p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) p.wait() assert p.returncode==0 From ca49b1b704aa8642bd445b7550b9c000e5316673 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 16:44:57 +0100 Subject: [PATCH 071/150] debuging perpose --- tests/pyccel/test_pyccel.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index fbf6adc972..cd33c5d17e 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,6 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() + print(path_dir) + print("-------------------------------------------") print(cmd) exit(0) p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) From ce1ddbcd344c040212fc0e8882b787c2d5250577 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 16:51:47 +0100 Subject: [PATCH 072/150] debuging perpose --- tests/pyccel/test_pyccel.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index cd33c5d17e..c7edf5a6e5 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,13 +62,20 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - print(path_dir) - print("-------------------------------------------") - print(cmd) - exit(0) - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) - p.wait() - assert p.returncode==0 + try: + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + if out: + print("Output:\n", out) + if err: + print("Error:\n", err) + + assert p.returncode == 0, f"Command failed with return code {p.returncode}" + + except AssertionError as e: + print(e) + except Exception as e: + print("An error occurred:", e) #------------------------------------------------------------------------------ def compile_c(path_dir, test_file, dependencies, is_mod=False): From 717c4a4fbed7a0f46e73eb86cc764a5c30cd23ba Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 16:58:06 +0100 Subject: [PATCH 073/150] debuging perpose --- tests/pyccel/test_pyccel.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index c7edf5a6e5..9b0f0d443e 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,20 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - try: - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - if out: - print("Output:\n", out) - if err: - print("Error:\n", err) - - assert p.returncode == 0, f"Command failed with return code {p.returncode}" - - except AssertionError as e: - print(e) - except Exception as e: - print("An error occurred:", e) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) + p.wait() + assert p.returncode==0 #------------------------------------------------------------------------------ def compile_c(path_dir, test_file, dependencies, is_mod=False): From a19c559f6bd04436f95366dbe1b2be5ce5965c1a Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:04:28 +0100 Subject: [PATCH 074/150] debuging perpose --- tests/pyccel/test_pyccel.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 9b0f0d443e..c42d718f3a 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,8 +62,13 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir , stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() + err , msg = p.communicate() + if p.returncode != 0: + print(err) + else: + print(msg) assert p.returncode==0 #------------------------------------------------------------------------------ From 33842e970201f4b80465b07b9d74df51f15d12e2 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:14:08 +0100 Subject: [PATCH 075/150] debuging perpose --- tests/pyccel/test_pyccel.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index c42d718f3a..6001207f67 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,14 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir , stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) p.wait() - err , msg = p.communicate() - if p.returncode != 0: - print(err) - else: - print(msg) - assert p.returncode==0 + # assert p.returncode==0 #------------------------------------------------------------------------------ def compile_c(path_dir, test_file, dependencies, is_mod=False): From a52f4a22a5f8eaeff0c9e63ffd44e89db06cb1ce Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:23:49 +0100 Subject: [PATCH 076/150] debuging perpose --- tests/pyccel/test_pyccel.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 6001207f67..33b41ae9d2 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,8 +62,13 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() + stdout, stderr = p.communicate() + if p.returncode != 0: + print(f"Command failed with return code {p.returncode}") + print(f"Standard Output:\n{stdout}") + print(f"Standard Error:\n{stderr}") # assert p.returncode==0 #------------------------------------------------------------------------------ From 40d7c0d994ae1246e00f0f0e8b6adbf801eae5aa Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:30:45 +0100 Subject: [PATCH 077/150] debuging perpose --- tests/pyccel/test_pyccel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 33b41ae9d2..404ca2adb9 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -69,7 +69,7 @@ def compile_pyccel(path_dir, test_file, options = ""): print(f"Command failed with return code {p.returncode}") print(f"Standard Output:\n{stdout}") print(f"Standard Error:\n{stderr}") - # assert p.returncode==0 + assert p.returncode==0 #------------------------------------------------------------------------------ def compile_c(path_dir, test_file, dependencies, is_mod=False): From 532cf03b50edd0fcde76d1fffabd4f0a0d016956 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:44:23 +0100 Subject: [PATCH 078/150] debuging perpose --- tests/pyccel/test_pyccel.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 404ca2adb9..6e6f1ba519 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -56,19 +56,29 @@ def get_python_output(abs_path, cwd = None): return out #------------------------------------------------------------------------------ +def cat_file(file_path): + try: + with open(file_path, 'r') as file: + content = file.read() + print(content) + except FileNotFoundError: + print(f"Error: The file {file_path} does not exist.") + except Exception as e: + print(f"An error occurred: {e}") + +# Path to the file you want to display + def compile_pyccel(path_dir, test_file, options = ""): if "python" in options and "--output" not in options: options += " --output=__pyccel__" cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + file_path = "/__w/pyccel-cuda/pyccel-cuda/tests/pyccel/scripts/kernel/__pyccel__/cuda_ndarrays/cuda_ndarrays.cu" + cat_file(file_path) + exit(0) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) p.wait() - stdout, stderr = p.communicate() - if p.returncode != 0: - print(f"Command failed with return code {p.returncode}") - print(f"Standard Output:\n{stdout}") - print(f"Standard Error:\n{stderr}") assert p.returncode==0 #------------------------------------------------------------------------------ From 3424b83616964c9d400392ab161f2b634f53cb40 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:55:22 +0100 Subject: [PATCH 079/150] debuging perpose --- .../pyccel/scripts/hope_benchmarks/test.json | 67 +++++++++++++++++++ tests/pyccel/scripts/kernel/host_array.py | 12 ++-- tests/pyccel/scripts/test2.json | 67 +++++++++++++++++++ tests/pyccel/test_pyccel.py | 15 ----- 4 files changed, 140 insertions(+), 21 deletions(-) create mode 100644 tests/pyccel/scripts/hope_benchmarks/test.json create mode 100644 tests/pyccel/scripts/test2.json diff --git a/tests/pyccel/scripts/hope_benchmarks/test.json b/tests/pyccel/scripts/hope_benchmarks/test.json new file mode 100644 index 0000000000..bd74b25867 --- /dev/null +++ b/tests/pyccel/scripts/hope_benchmarks/test.json @@ -0,0 +1,67 @@ +{ + "exec": "gfortran", + "mpi_exec": "mpif90", + "language": "fortran", + "module_output_flag": "-J", + "debug_flags": [ + "-fcheck=bounds", + "-g", + "-O0" + ], + "release_flags": [ + "-O3", + "-funroll-loops" + ], + "general_flags": [ + "-fPIC" + ], + "standard_flags": [ + "-std=f2003" + ], + "mpi": {}, + "openmp": { + "flags": [ + "-fopenmp" + ], + "libs": [ + "gomp" + ] + }, + "openacc": { + "flags": [ + "-ta=multicore", + "-Minfo=accel" + ] + }, + "family": "GNU", + "libs": [ + "-lm" + ], + "python": { + "flags": [ + "-Wsign-compare", + "-DNDEBUG", + "-g", + "-fwrapv", + "-O2", + "-Wall", + "-g", + "-fstack-protector-strong", + "-fstack-clash-protection", + "-Wformat", + "-Werror=format-security", + "-fcf-protection", + "-g", + "-fwrapv", + "-O2" + ], + "includes": [ + "/usr/include/python3.11", + "/usr/local/lib/python3.11/dist-packages/numpy/core/include" + ], + "shared_suffix": ".cpython-311-x86_64-linux-gnu.so", + "dependencies": [ + "/usr/lib/x86_64-linux-gnu/libpython3.11.so" + ] + } +} diff --git a/tests/pyccel/scripts/kernel/host_array.py b/tests/pyccel/scripts/kernel/host_array.py index cacbcc1da5..e686521e43 100644 --- a/tests/pyccel/scripts/kernel/host_array.py +++ b/tests/pyccel/scripts/kernel/host_array.py @@ -1,10 +1,10 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel import cuda +def f(): + a = cuda.host_empty(10) -a = cuda.host_empty(10) - -for i in range(10): - a[i] = 1 - -if __name__ == '__main__': + for i in range(10): + a[i] = 1 print(a) +if __name__ == '__main__': + f() diff --git a/tests/pyccel/scripts/test2.json b/tests/pyccel/scripts/test2.json new file mode 100644 index 0000000000..bd74b25867 --- /dev/null +++ b/tests/pyccel/scripts/test2.json @@ -0,0 +1,67 @@ +{ + "exec": "gfortran", + "mpi_exec": "mpif90", + "language": "fortran", + "module_output_flag": "-J", + "debug_flags": [ + "-fcheck=bounds", + "-g", + "-O0" + ], + "release_flags": [ + "-O3", + "-funroll-loops" + ], + "general_flags": [ + "-fPIC" + ], + "standard_flags": [ + "-std=f2003" + ], + "mpi": {}, + "openmp": { + "flags": [ + "-fopenmp" + ], + "libs": [ + "gomp" + ] + }, + "openacc": { + "flags": [ + "-ta=multicore", + "-Minfo=accel" + ] + }, + "family": "GNU", + "libs": [ + "-lm" + ], + "python": { + "flags": [ + "-Wsign-compare", + "-DNDEBUG", + "-g", + "-fwrapv", + "-O2", + "-Wall", + "-g", + "-fstack-protector-strong", + "-fstack-clash-protection", + "-Wformat", + "-Werror=format-security", + "-fcf-protection", + "-g", + "-fwrapv", + "-O2" + ], + "includes": [ + "/usr/include/python3.11", + "/usr/local/lib/python3.11/dist-packages/numpy/core/include" + ], + "shared_suffix": ".cpython-311-x86_64-linux-gnu.so", + "dependencies": [ + "/usr/lib/x86_64-linux-gnu/libpython3.11.so" + ] + } +} diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 6e6f1ba519..9b0f0d443e 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -56,27 +56,12 @@ def get_python_output(abs_path, cwd = None): return out #------------------------------------------------------------------------------ -def cat_file(file_path): - try: - with open(file_path, 'r') as file: - content = file.read() - print(content) - except FileNotFoundError: - print(f"Error: The file {file_path} does not exist.") - except Exception as e: - print(f"An error occurred: {e}") - -# Path to the file you want to display - def compile_pyccel(path_dir, test_file, options = ""): if "python" in options and "--output" not in options: options += " --output=__pyccel__" cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - file_path = "/__w/pyccel-cuda/pyccel-cuda/tests/pyccel/scripts/kernel/__pyccel__/cuda_ndarrays/cuda_ndarrays.cu" - cat_file(file_path) - exit(0) p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) p.wait() assert p.returncode==0 From 3c1387b424d99eddc72def20873d20ab9ffc2bc3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 18 Jul 2024 17:57:49 +0100 Subject: [PATCH 080/150] debuging perpose --- .../pyccel/scripts/hope_benchmarks/test.json | 67 ------------------- tests/pyccel/scripts/test2.json | 67 ------------------- 2 files changed, 134 deletions(-) delete mode 100644 tests/pyccel/scripts/hope_benchmarks/test.json delete mode 100644 tests/pyccel/scripts/test2.json diff --git a/tests/pyccel/scripts/hope_benchmarks/test.json b/tests/pyccel/scripts/hope_benchmarks/test.json deleted file mode 100644 index bd74b25867..0000000000 --- a/tests/pyccel/scripts/hope_benchmarks/test.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "exec": "gfortran", - "mpi_exec": "mpif90", - "language": "fortran", - "module_output_flag": "-J", - "debug_flags": [ - "-fcheck=bounds", - "-g", - "-O0" - ], - "release_flags": [ - "-O3", - "-funroll-loops" - ], - "general_flags": [ - "-fPIC" - ], - "standard_flags": [ - "-std=f2003" - ], - "mpi": {}, - "openmp": { - "flags": [ - "-fopenmp" - ], - "libs": [ - "gomp" - ] - }, - "openacc": { - "flags": [ - "-ta=multicore", - "-Minfo=accel" - ] - }, - "family": "GNU", - "libs": [ - "-lm" - ], - "python": { - "flags": [ - "-Wsign-compare", - "-DNDEBUG", - "-g", - "-fwrapv", - "-O2", - "-Wall", - "-g", - "-fstack-protector-strong", - "-fstack-clash-protection", - "-Wformat", - "-Werror=format-security", - "-fcf-protection", - "-g", - "-fwrapv", - "-O2" - ], - "includes": [ - "/usr/include/python3.11", - "/usr/local/lib/python3.11/dist-packages/numpy/core/include" - ], - "shared_suffix": ".cpython-311-x86_64-linux-gnu.so", - "dependencies": [ - "/usr/lib/x86_64-linux-gnu/libpython3.11.so" - ] - } -} diff --git a/tests/pyccel/scripts/test2.json b/tests/pyccel/scripts/test2.json deleted file mode 100644 index bd74b25867..0000000000 --- a/tests/pyccel/scripts/test2.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "exec": "gfortran", - "mpi_exec": "mpif90", - "language": "fortran", - "module_output_flag": "-J", - "debug_flags": [ - "-fcheck=bounds", - "-g", - "-O0" - ], - "release_flags": [ - "-O3", - "-funroll-loops" - ], - "general_flags": [ - "-fPIC" - ], - "standard_flags": [ - "-std=f2003" - ], - "mpi": {}, - "openmp": { - "flags": [ - "-fopenmp" - ], - "libs": [ - "gomp" - ] - }, - "openacc": { - "flags": [ - "-ta=multicore", - "-Minfo=accel" - ] - }, - "family": "GNU", - "libs": [ - "-lm" - ], - "python": { - "flags": [ - "-Wsign-compare", - "-DNDEBUG", - "-g", - "-fwrapv", - "-O2", - "-Wall", - "-g", - "-fstack-protector-strong", - "-fstack-clash-protection", - "-Wformat", - "-Werror=format-security", - "-fcf-protection", - "-g", - "-fwrapv", - "-O2" - ], - "includes": [ - "/usr/include/python3.11", - "/usr/local/lib/python3.11/dist-packages/numpy/core/include" - ], - "shared_suffix": ".cpython-311-x86_64-linux-gnu.so", - "dependencies": [ - "/usr/lib/x86_64-linux-gnu/libpython3.11.so" - ] - } -} From 6d003c95f9289516bf9b92c0daeef825d0f9ac1b Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 02:00:38 +0100 Subject: [PATCH 081/150] fix: add support for cuda.device_empty and thread indexing as well --- docs/cuda.md | 24 ++++++ pyccel/ast/cudaext.py | 82 ++++++++++++++++++++- pyccel/codegen/printing/cucode.py | 16 ++++ pyccel/cuda/__init__.py | 7 +- pyccel/cuda/cuda_arrays.py | 25 +++++++ tests/pyccel/scripts/kernel/device_array.py | 16 ++++ tests/pyccel/test_pyccel.py | 6 ++ 7 files changed, 174 insertions(+), 2 deletions(-) create mode 100644 tests/pyccel/scripts/kernel/device_array.py diff --git a/docs/cuda.md b/docs/cuda.md index ff68b5c69a..ad19b01f78 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -63,3 +63,27 @@ for i in range(10): if __name__ == '__main__': print(a) ``` + +### cuda+device_empty + +The cuda+device_empty function allocates an empty array on the device. + +```python +from pyccel import cuda +from pyccel.decorators import kernel + +@kernel +def kernel_call(a : 'int[:]', size : 'int'): + i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0) + if(i < size): + a[i] = 1 + +def f(): + x = cuda.device_empty(10) + kernel_call[1,10](x, 10) + +if __name__ == "__main__": + f() + +``` + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index f890fc7999..181e120d8f 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -15,6 +15,7 @@ from .core import Module, PyccelFunctionDef from .numpyext import process_dtype, process_shape from .cudatypes import CudaArrayType +from .numpytypes import NumpyInt32Type @@ -141,6 +142,81 @@ def fill_value(self): The value with which the array will be filled on initialisation. """ return None +class CudaDeviceEmpty(CudaFull): + """ + Represents a call to Cuda.host_empty for code generation. + + A class representing a call to the Cuda `host_empty` function. + + Parameters + ---------- + shape : tuple of int , int + The shape of the new array. + + dtype : PythonType, LiteralString, str + The actual dtype passed to the NumPy function. + + order : str , LiteralString + The order passed to the function defoulting to 'C'. + """ + __slots__ = () + name = 'empty' + def __init__(self, shape, dtype='float', order='C'): + memory_location = 'device' + super().__init__(shape, Nil(), dtype, order , memory_location) + @property + def fill_value(self): + """ + The value with which the array will be filled on initialisation. + + The value with which the array will be filled on initialisation. + """ + return None +class CudaDimFunction(PyccelFunction): + """ + Represents a call to a CUDA dimension-related function for code generation. + + This class serves as a representation of a CUDA dimension-related function call. + """ + __slots__ = ('_dim',) + _attribute_nodes = ('_dim',) + _shape = None + _class_type = NumpyInt32Type() + + def __init__(self, dim=0): + self._dim = dim + super().__init__() + + @property + def dim(self): + return self._dim + +class threadIdx(CudaDimFunction): + """ + Represents a call to Cuda.threadIdx for code generation. + + This class serves as a representation of a thread call to the CUDA. + """ + def __init__(self, dim=0): + super().__init__(dim) + +class blockIdx(CudaDimFunction): + """ + Represents a call to Cuda.blockIdx for code generation. + + This class serves as a representation of a block call to the CUDA. + """ + def __init__(self, dim=0): + super().__init__(dim) + +class blockDim(CudaDimFunction): + """ + Represents a call to Cuda.blockDim for code generation. + + This class serves as a representation of a block dimension call to the CUDA. + """ + def __init__(self, dim=0): + super().__init__(dim) class CudaSynchronize(PyccelFunction): """ @@ -158,7 +234,11 @@ def __init__(self): cuda_funcs = { 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), 'full' : PyccelFunctionDef('full' , CudaFull), - 'host_empty' : PyccelFunctionDef('host_empty' , CudaHostEmpty), + 'host_empty' : PyccelFunctionDef('host_empty' , CudaHostEmpty), + 'device_empty' : PyccelFunctionDef('device_empty' , CudaDeviceEmpty), + 'threadIdx' : PyccelFunctionDef('threadIdx' , threadIdx), + 'blockIdx' : PyccelFunctionDef('blockIdx' , blockIdx), + 'blockDim' : PyccelFunctionDef('blockDim' , blockDim) } cuda_mod = Module('cuda', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 1d6d02e7e4..9fc7bf57e4 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -205,4 +205,20 @@ def _print_Assign(self, expr): return " \n" return super()._print_Assign(expr) + def _get_cuda_dim(self, dim, prefix): + if dim == 0: + return f'{prefix}.x' + elif dim == 1: + return f'{prefix}.y' + elif dim == 2: + return f'{prefix}.z' + + def _print_threadIdx(self, expr): + return self._get_cuda_dim(expr.dim, 'threadIdx') + + def _print_blockIdx(self, expr): + return self._get_cuda_dim(expr.dim, 'blockIdx') + + def _print_blockDim(self, expr): + return self._get_cuda_dim(expr.dim, 'blockDim') diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py index ae4be32387..269725600b 100644 --- a/pyccel/cuda/__init__.py +++ b/pyccel/cuda/__init__.py @@ -7,5 +7,10 @@ """ from .cuda_sync_primitives import synchronize from .cuda_arrays import host_empty +from .cuda_arrays import device_empty -__all__ = ['synchronize', 'host_empty'] +from .cuda_arrays import threadIdx +from .cuda_arrays import blockIdx +from .cuda_arrays import blockDim + +__all__ = ['synchronize', 'host_empty', 'device_empty', 'threadIdx', 'blockIdx', 'blockDim'] diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py index cbdf938c0c..4e025d4009 100644 --- a/pyccel/cuda/cuda_arrays.py +++ b/pyccel/cuda/cuda_arrays.py @@ -25,5 +25,30 @@ def host_empty(shape): import numpy as np a = np.empty(shape) return a +def device_empty(shape): + """ + Create an empty array on the device. + + Create an empty array on the device. + + Parameters + ---------- + shape : tuple of int or int + The shape of the array. + + Returns + ------- + array + The empty array on the device. + """ + import numpy as np + a = np.empty(shape) + return a +def threadIdx(dim): + return 1 +def blockIdx(dim): + return 0 +def blockDim(dim): + return 0 diff --git a/tests/pyccel/scripts/kernel/device_array.py b/tests/pyccel/scripts/kernel/device_array.py new file mode 100644 index 0000000000..2282b37682 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_array.py @@ -0,0 +1,16 @@ +from pyccel import cuda +from pyccel.decorators import kernel + +@kernel +def kernel_call(a : 'int[:]'): + i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0) + if(i == 1): + a[i] = 1 + print(a[i]) + +def f(): + x = cuda.device_empty(10) + kernel_call[1,10](x) + +if __name__ == "__main__": + f() \ No newline at end of file diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 9b0f0d443e..1531de1eba 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -750,7 +750,13 @@ def test_host_array(gpu_available): language="cuda", output_dtype=types, execute_code=gpu_available) #------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_device_array(gpu_available): + types = float + pyccel_test("scripts/kernel/device_array.py", + language="cuda", output_dtype=types, execute_code=gpu_available) +#------------------------------------------------------------------------------ @pytest.mark.cuda def test_device_call(gpu_available): types = str From 26337f20c6f9d1c54314799d4053d4cb55d1f5c9 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 03:22:08 +0100 Subject: [PATCH 082/150] move all thread indexing functions to separate file --- pyccel/ast/cudaext.py | 4 ++-- pyccel/codegen/printing/cucode.py | 1 + pyccel/cuda/__init__.py | 6 +++--- pyccel/cuda/cuda_arrays.py | 7 ------- pyccel/cuda/cuda_thread_indexing.py | 15 +++++++++++++++ 5 files changed, 21 insertions(+), 12 deletions(-) create mode 100644 pyccel/cuda/cuda_thread_indexing.py diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 181e120d8f..e362746493 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -144,9 +144,9 @@ def fill_value(self): return None class CudaDeviceEmpty(CudaFull): """ - Represents a call to Cuda.host_empty for code generation. + Represents a call to Cuda.device_empty for code generation. - A class representing a call to the Cuda `host_empty` function. + A class representing a call to the Cuda `device_empty` function. Parameters ---------- diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 9fc7bf57e4..2fc98eafe7 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -205,6 +205,7 @@ def _print_Assign(self, expr): return " \n" return super()._print_Assign(expr) + def _get_cuda_dim(self, dim, prefix): if dim == 0: return f'{prefix}.x' diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py index 269725600b..bc222f96a0 100644 --- a/pyccel/cuda/__init__.py +++ b/pyccel/cuda/__init__.py @@ -9,8 +9,8 @@ from .cuda_arrays import host_empty from .cuda_arrays import device_empty -from .cuda_arrays import threadIdx -from .cuda_arrays import blockIdx -from .cuda_arrays import blockDim +from .cuda_thread_indexing import threadIdx +from .cuda_thread_indexing import blockIdx +from .cuda_thread_indexing import blockDim __all__ = ['synchronize', 'host_empty', 'device_empty', 'threadIdx', 'blockIdx', 'blockDim'] diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py index 4e025d4009..99562ddf9e 100644 --- a/pyccel/cuda/cuda_arrays.py +++ b/pyccel/cuda/cuda_arrays.py @@ -44,11 +44,4 @@ def device_empty(shape): import numpy as np a = np.empty(shape) return a -def threadIdx(dim): - return 1 -def blockIdx(dim): - return 0 -def blockDim(dim): - return 0 - diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py new file mode 100644 index 0000000000..beab4ad613 --- /dev/null +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -0,0 +1,15 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains cuda_thread_indexing methods for Pyccel. +""" +def threadIdx(dim): + return 1 +def blockIdx(dim): + return 0 +def blockDim(dim): + return 0 + + From c2044813b9be07863f5496a3597229dca098ef1a Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 03:43:58 +0100 Subject: [PATCH 083/150] debuging perpose --- pyccel/codegen/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 056bfaddbf..9ba3dee163 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -37,7 +37,7 @@ # The compile object folder will be in the pyccel dirpath internal_libs = { "ndarrays" : ("ndarrays", CompileObj("ndarrays.c",folder="ndarrays")), - "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="ndarrays")), + "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="cuda_ndarrays")), "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")), "pyc_math_c" : ("math", CompileObj("pyc_math_c.c",folder="math")), "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))), From 2517c433d3c9ecac1be83b84ee5e578891d3f592 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 03:55:01 +0100 Subject: [PATCH 084/150] debuging perpose --- pyccel/codegen/printing/cucode.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 1d6d02e7e4..7265b9059b 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -193,6 +193,7 @@ def get_declare_type(self, expr): rank = expr.rank if not isinstance(class_type, CudaArrayType ) or rank <= 0: return super().get_declare_type(expr) + self.add_import(c_imports['cuda_ndarrays']) self.add_import(c_imports['ndarrays']) dtype = 't_ndarray ' return dtype From 9778a86d609cf3ee0436640537561241ec9d2922 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 04:09:02 +0100 Subject: [PATCH 085/150] debuging perpose --- pyccel/codegen/printing/ccode.py | 2 ++ pyccel/codegen/printing/cucode.py | 6 ++---- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 4 ---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 3ca1833d4a..cf571c7eaa 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -235,6 +235,7 @@ 'math', 'string', 'ndarrays', + 'cuda_ndarrays', 'complex', 'stdint', 'pyc_math_c', @@ -1336,6 +1337,7 @@ def get_declare_type(self, expr): if expr.rank > 15: errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) + self.add_import(c_imports['cuda_ndarrays']) dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 7265b9059b..0b599d2716 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -19,15 +19,13 @@ from pyccel.ast.datatypes import HomogeneousContainerType from pyccel.ast.numpytypes import numpy_precision_map from pyccel.ast.cudaext import CudaFull +from pyccel.codegen.printing.ccode import c_imports + errors = Errors() __all__ = ["CudaCodePrinter"] -c_imports = {n : Import(n, Module(n, (), ())) for n in - ['cuda_ndarrays', - 'ndarrays', - ]} class CudaCodePrinter(CCodePrinter): """ diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index e074443662..e7cbb4581f 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -2,7 +2,6 @@ # define CUDA_NDARRAYS_H # include -# include #include "../ndarrays/ndarrays.h" @@ -21,7 +20,4 @@ int32_t cuda_free_host(t_ndarray arr); __host__ __device__ int32_t cuda_free(t_ndarray arr); - -using namespace std; - #endif \ No newline at end of file From bc8b5b6415fe3c83615b69dc3506f5e558161079 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 04:21:50 +0100 Subject: [PATCH 086/150] debuging perpose --- pyccel/codegen/printing/ccode.py | 1 + pyccel/codegen/utilities.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index cf571c7eaa..10cc961966 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -571,6 +571,7 @@ def _init_stack_array(self, expr): shape_init, strides_init, len(var.shape), 'false') array_init += 'stack_array_init(&{})'.format(self._print(var)) self.add_import(c_imports['ndarrays']) + self.add_import(c_imports['cuda_ndarrays']) return buffer_array, array_init def _handle_inline_func_call(self, expr): diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 9ba3dee163..4454399b58 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -48,6 +48,7 @@ internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays", accelerators = ('python',), dependencies = (internal_libs["ndarrays"][1], + internal_libs["cuda_ndarrays"][1], internal_libs["cwrapper"][1]))) #============================================================================== From f2909f4d0f40dd38cc062a5c798449f57e1a9dba Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 04:33:08 +0100 Subject: [PATCH 087/150] debuging perpose --- pyccel/codegen/printing/ccode.py | 1 + pyccel/codegen/printing/cucode.py | 2 +- pyccel/codegen/utilities.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 10cc961966..7c428e77a7 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1634,6 +1634,7 @@ def _print_Allocate(self, expr): elif (expr.status == 'allocated'): free_code += self._print(Deallocate(variable)) self.add_import(c_imports['ndarrays']) + self.add_import(c_imports['cuda_ndarrays']) shape = ", ".join(self._print(i) for i in expr.shape) if isinstance(variable.class_type, NumpyNDArrayType): #set dtype to the C struct types diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 0b599d2716..827d32109d 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -191,8 +191,8 @@ def get_declare_type(self, expr): rank = expr.rank if not isinstance(class_type, CudaArrayType ) or rank <= 0: return super().get_declare_type(expr) - self.add_import(c_imports['cuda_ndarrays']) self.add_import(c_imports['ndarrays']) + self.add_import(c_imports['cuda_ndarrays']) dtype = 't_ndarray ' return dtype diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 4454399b58..9ba3dee163 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -48,7 +48,6 @@ internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays", accelerators = ('python',), dependencies = (internal_libs["ndarrays"][1], - internal_libs["cuda_ndarrays"][1], internal_libs["cwrapper"][1]))) #============================================================================== From 8f61d134b3c21726504041635b1792f219c6e89f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 12:14:43 +0100 Subject: [PATCH 088/150] debuging perpose --- pyccel/codegen/compiling/compilers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index d909a5036e..b5bc519dc3 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -496,6 +496,7 @@ def run_command(cmd, verbose): with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as p: + p.wait() out, err = p.communicate() if verbose and out: From 7d027205f4cec4f8688a4f6dca78fc175f79be0b Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 12:32:57 +0100 Subject: [PATCH 089/150] cleaning up my PR --- pyccel/codegen/compiling/compilers.py | 2 -- pyccel/codegen/printing/ccode.py | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index b5bc519dc3..0d496b9e8d 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -493,10 +493,8 @@ def run_command(cmd, verbose): cmd = [os.path.expandvars(c) for c in cmd] if verbose: print(' '.join(cmd)) - with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as p: - p.wait() out, err = p.communicate() if verbose and out: diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 7c428e77a7..7307e47416 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -231,11 +231,11 @@ import_dict = {'omp_lib' : 'omp' } c_imports = {n : Import(n, Module(n, (), ())) for n in - ['stdlib', + ['cuda_ndarrays', + 'stdlib', 'math', 'string', 'ndarrays', - 'cuda_ndarrays', 'complex', 'stdint', 'pyc_math_c', @@ -571,7 +571,6 @@ def _init_stack_array(self, expr): shape_init, strides_init, len(var.shape), 'false') array_init += 'stack_array_init(&{})'.format(self._print(var)) self.add_import(c_imports['ndarrays']) - self.add_import(c_imports['cuda_ndarrays']) return buffer_array, array_init def _handle_inline_func_call(self, expr): @@ -1634,7 +1633,6 @@ def _print_Allocate(self, expr): elif (expr.status == 'allocated'): free_code += self._print(Deallocate(variable)) self.add_import(c_imports['ndarrays']) - self.add_import(c_imports['cuda_ndarrays']) shape = ", ".join(self._print(i) for i in expr.shape) if isinstance(variable.class_type, NumpyNDArrayType): #set dtype to the C struct types From a98a6c2930ffb1634440d01d96002bcba6c75cf9 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 13:44:57 +0200 Subject: [PATCH 090/150] Target failing test --- .github/actions/pytest_run_cuda/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 46f90552ed..a6f77dec05 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -11,7 +11,7 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" -sxv -k test_host_array --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests From e4fcff4d76117dd31840a5f79cf4636abda8298d Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 13:46:48 +0200 Subject: [PATCH 091/150] Examine compilation output --- tests/pyccel/test_pyccel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 9b0f0d443e..b1a358078d 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,7 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) + p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir) + print(p.stdout) + print(p.stderr) p.wait() assert p.returncode==0 From 73085bda481ead105268640fd55a34263314236a Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 13:47:23 +0200 Subject: [PATCH 092/150] Run pyccel in verbose mode --- tests/pyccel/test_pyccel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b1a358078d..4ad4e05e17 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -748,7 +748,7 @@ def test_kernel_collision(gpu_available): @pytest.mark.cuda def test_host_array(gpu_available): types = float - pyccel_test("scripts/kernel/host_array.py", + pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '-v', language="cuda", output_dtype=types, execute_code=gpu_available) #------------------------------------------------------------------------------ From 8215d77205d01b1eb32b230b94c7f36b62ec06e4 Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 13:56:28 +0200 Subject: [PATCH 093/150] Correct flag --- tests/pyccel/test_pyccel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 4ad4e05e17..e37286c401 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -65,7 +65,6 @@ def compile_pyccel(path_dir, test_file, options = ""): p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir) print(p.stdout) print(p.stderr) - p.wait() assert p.returncode==0 #------------------------------------------------------------------------------ @@ -748,7 +747,7 @@ def test_kernel_collision(gpu_available): @pytest.mark.cuda def test_host_array(gpu_available): types = float - pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '-v', + pyccel_test("scripts/kernel/host_array.py", pyccel_commands = '--verbose', language="cuda", output_dtype=types, execute_code=gpu_available) #------------------------------------------------------------------------------ From 9e75cba250f4b3fbdcc6a9924a924fcc01529e3c Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 13:17:15 +0100 Subject: [PATCH 094/150] debuging perpose --- tests/pyccel/test_pyccel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index e37286c401..88b737e068 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,9 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir) - print(p.stdout) - print(p.stderr) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.communicate() + assert p.returncode==0 #------------------------------------------------------------------------------ From f71e741be94be7eddb706b482e38a6ccab204a94 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 13:23:05 +0100 Subject: [PATCH 095/150] Examine compilation output --- tests/pyccel/test_pyccel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 88b737e068..e37286c401 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,9 +62,9 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p.communicate() - + p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir) + print(p.stdout) + print(p.stderr) assert p.returncode==0 #------------------------------------------------------------------------------ From 44f3503ba874e8b37732735c9945f46b5341282d Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 14:26:26 +0200 Subject: [PATCH 096/150] Check files being compiled and existence --- pyccel/codegen/compiling/compilers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index 0d496b9e8d..9ba44fbb23 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -329,6 +329,8 @@ def compile_module(self, compile_obj, output_folder, verbose = False): verbose : bool Indicates whether additional output should be shown. """ + print("Compiling : ", compile_obj.source) + print(os.path.exists(compile_obj.source)) if not compile_obj.has_target_file: return From 82f1c695b7185a4ecf539b5d35011228b94f1c1f Mon Sep 17 00:00:00 2001 From: Emily Bourne Date: Fri, 19 Jul 2024 14:29:54 +0200 Subject: [PATCH 097/150] Examine library copy --- pyccel/codegen/utilities.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 9ba3dee163..8553b89195 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -112,6 +112,7 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None): str The location that the files were copied to. """ + print("copy_internal_library : ", lib_folder) # get lib path (stdlib_path/lib_name or ext_path/lib_name) if lib_folder in external_libs: lib_path = os.path.join(ext_path, external_libs[lib_folder], lib_folder) @@ -144,6 +145,10 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None): if to_create: # Copy all files from the source to the destination shutil.copytree(lib_path, lib_dest_path) + dst_files = [os.path.relpath(os.path.join(root, f), lib_dest_path) \ + for root, dirs, files in os.walk(lib_dest_path) \ + for f in files if not f.endswith('.lock')] + print("Created : ", dst_files) # Create any requested extra files if extra_files: for filename, contents in extra_files.items(): From b9e5c949fc4bfe52826eb2fe48bbf6249d7bcf55 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 098/150] Trigger tests on push to devel or main branch --- .github/workflows/anaconda_linux.yml | 2 +- .github/workflows/anaconda_windows.yml | 2 +- .github/workflows/intel.yml | 2 +- .github/workflows/linux.yml | 2 +- .github/workflows/macosx.yml | 2 +- .github/workflows/pickle.yml | 2 +- .github/workflows/pickle_wheel.yml | 2 +- .github/workflows/windows.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/anaconda_linux.yml b/.github/workflows/anaconda_linux.yml index 5a5384e5ce..525903a54f 100644 --- a/.github/workflows/anaconda_linux.yml +++ b/.github/workflows/anaconda_linux.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/anaconda_windows.yml b/.github/workflows/anaconda_windows.yml index 154a4d01e8..0f3f8a04ed 100644 --- a/.github/workflows/anaconda_windows.yml +++ b/.github/workflows/anaconda_windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 977d5f9afd..5f340e1088 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -29,7 +29,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ad39cee725..664ae3aa60 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -28,7 +28,7 @@ env: jobs: matrix_prep: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: diff --git a/.github/workflows/macosx.yml b/.github/workflows/macosx.yml index 4768a64efa..f51041c0b8 100644 --- a/.github/workflows/macosx.yml +++ b/.github/workflows/macosx.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: macos-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/pickle.yml b/.github/workflows/pickle.yml index 052028a5cb..cc3864afd2 100644 --- a/.github/workflows/pickle.yml +++ b/.github/workflows/pickle.yml @@ -31,7 +31,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-matrix.outputs.python_version }} matrix: ${{ steps.set-matrix.outputs.matrix }} diff --git a/.github/workflows/pickle_wheel.yml b/.github/workflows/pickle_wheel.yml index 1dc82af503..718dc13dcc 100644 --- a/.github/workflows/pickle_wheel.yml +++ b/.github/workflows/pickle_wheel.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: ubuntu-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 60c560ffee..827038a279 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -28,7 +28,7 @@ env: jobs: Python_version_picker: runs-on: windows-latest - if: github.event_name != 'push' || github.repository == 'pyccel/pyccel' + if: github.event_name != 'push' || github.repository == 'pyccel/pyccel-cuda' outputs: python_version: ${{ steps.set-python_version.outputs.python_version }} steps: From df24e817053a4a1abf925875acb15727cc9850db Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:46:33 +0100 Subject: [PATCH 099/150] Add cuda workflow to test cuda developments on CI --- .github/actions/coverage_install/action.yml | 2 +- .github/actions/linux_install/action.yml | 10 +-- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 17 +++++ .github/actions/python_install/action.yml | 17 +++++ .github/workflows/cuda.yml | 83 +++++++++++++++++++++ ci_tools/bot_messages/show_tests.txt | 1 + ci_tools/bot_tools/bot_funcs.py | 12 +-- ci_tools/devel_branch_tests.py | 1 + ci_tools/json_pytest_output.py | 2 +- 10 files changed, 135 insertions(+), 14 deletions(-) create mode 100644 .github/actions/pytest_run_cuda/action.yml create mode 100644 .github/actions/python_install/action.yml create mode 100644 .github/workflows/cuda.yml diff --git a/.github/actions/coverage_install/action.yml b/.github/actions/coverage_install/action.yml index ac5294e542..5732baee34 100644 --- a/.github/actions/coverage_install/action.yml +++ b/.github/actions/coverage_install/action.yml @@ -15,7 +15,7 @@ runs: - name: Directory Creation run: | INSTALL_DIR=$(cd tests; python -c "import pyccel; print(pyccel.__path__[0])") - SITE_DIR=$(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + SITE_DIR=$(dirname ${INSTALL_DIR}) echo -e "import coverage; coverage.process_startup()" > ${SITE_DIR}/pyccel_cov.pth echo -e "[run]\nparallel = True\nsource = ${INSTALL_DIR}\ndata_file = $(pwd)/.coverage\n[report]\ninclude = ${INSTALL_DIR}/*\n[xml]\noutput = cobertura.xml" > .coveragerc echo "SITE_DIR=${SITE_DIR}" >> $GITHUB_ENV diff --git a/.github/actions/linux_install/action.yml b/.github/actions/linux_install/action.yml index 8fb5cd8505..0ef9a69b8e 100644 --- a/.github/actions/linux_install/action.yml +++ b/.github/actions/linux_install/action.yml @@ -9,22 +9,22 @@ runs: shell: bash - name: Install fortran run: - sudo apt-get install gfortran + sudo apt-get install -y gfortran shell: bash - name: Install LaPack run: - sudo apt-get install libblas-dev liblapack-dev + sudo apt-get install -y libblas-dev liblapack-dev shell: bash - name: Install MPI run: | - sudo apt-get install libopenmpi-dev openmpi-bin + sudo apt-get install -y libopenmpi-dev openmpi-bin echo "MPI_OPTS=--oversubscribe" >> $GITHUB_ENV shell: bash - name: Install OpenMP run: - sudo apt-get install libomp-dev libomp5 + sudo apt-get install -y libomp-dev libomp5 shell: bash - name: Install Valgrind run: - sudo apt-get install valgrind + sudo apt-get install -y valgrind shell: bash diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index 0b6f0f988d..b0bdc31f16 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml new file mode 100644 index 0000000000..52092a6e02 --- /dev/null +++ b/.github/actions/pytest_run_cuda/action.yml @@ -0,0 +1,17 @@ +name: 'Pyccel pytest commands generating Ccuda' +inputs: + shell_cmd: + description: 'Specifies the shell command (different for anaconda)' + required: false + default: "bash" + +runs: + using: "composite" + steps: + - name: Ccuda tests with pytest + run: | + # Catch exit 5 (no tests found) + sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + pyccel-clean + shell: ${{ inputs.shell_cmd }} + working-directory: ./tests diff --git a/.github/actions/python_install/action.yml b/.github/actions/python_install/action.yml new file mode 100644 index 0000000000..f9b720e3e1 --- /dev/null +++ b/.github/actions/python_install/action.yml @@ -0,0 +1,17 @@ +name: 'Python installation commands' + +runs: + using: "composite" + steps: + - name: Install python + run: + sudo apt-get -y install python3-dev + shell: bash + - name: python as python3 + run: + sudo apt-get -y install python-is-python3 + shell: bash + - name: Install Pip + run: + sudo apt-get -y install python3-pip + shell: bash diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 0000000000..833ebf5d85 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,83 @@ +name: Cuda unit tests + +on: + workflow_dispatch: + inputs: + python_version: + required: false + type: string + ref: + required: false + type: string + check_run_id: + required: false + type: string + pr_repo: + required: false + type: string + push: + branches: [devel, main] + +env: + COMMIT: ${{ inputs.ref || github.event.ref }} + PEM: ${{ secrets.BOT_PEM }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_CHECK_RUN_ID: ${{ inputs.check_run_id }} + PR_REPO: ${{ inputs.pr_repo || github.repository }} + +jobs: + Cuda: + + runs-on: ubuntu-20.04 + name: Unit tests + + container: nvidia/cuda:11.7.1-devel-ubuntu20.04 + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ env.COMMIT }} + repository: ${{ env.PR_REPO }} + - name: Prepare docker + run: | + apt update && apt install sudo + TZ=Europe/France + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata + shell: bash + - name: Install python (setup-python action doesn't work with containers) + uses: ./.github/actions/python_install + - name: "Setup" + id: token + run: | + pip install jwt requests + python ci_tools/setup_check_run.py cuda + - name: CUDA Version + run: nvcc --version # cuda install check + - name: Install dependencies + uses: ./.github/actions/linux_install + - name: Install Pyccel with tests + run: | + PATH=${PATH}:$HOME/.local/bin + echo "PATH=${PATH}" >> $GITHUB_ENV + python -m pip install --upgrade pip + python -m pip install --user .[test] + shell: bash + - name: Coverage install + uses: ./.github/actions/coverage_install + - name: Ccuda tests with pytest + id: cuda_pytest + uses: ./.github/actions/pytest_run_cuda + - name: Collect coverage information + continue-on-error: True + uses: ./.github/actions/coverage_collection + - name: Save code coverage report + uses: actions/upload-artifact@v3 + with: + name: coverage-artifact + path: .coverage + retention-days: 1 + - name: "Post completed" + if: always() + run: + python ci_tools/complete_check_run.py ${{ steps.cuda_pytest.outcome }} + diff --git a/ci_tools/bot_messages/show_tests.txt b/ci_tools/bot_messages/show_tests.txt index adc07e8431..eb15492d2e 100644 --- a/ci_tools/bot_messages/show_tests.txt +++ b/ci_tools/bot_messages/show_tests.txt @@ -2,6 +2,7 @@ The following is a list of keywords which can be used to run tests. Tests in bol - **linux** : Runs the unit tests on a Linux system. - **windows** : Runs the unit tests on a Windows system. - **macosx** : Runs the unit tests on a MacOS X system. +- **cuda** : Runs the cuda unit tests on a Linux system. - **coverage** : Runs the unit tests on a Linux system and checks the coverage of the tests. - **docs** : Checks if the documentation follows the numpydoc format. - **pylint** : Runs pylint on files which are too big to be handled by codacy. diff --git a/ci_tools/bot_tools/bot_funcs.py b/ci_tools/bot_tools/bot_funcs.py index 7084a01bb9..1621d1d089 100644 --- a/ci_tools/bot_tools/bot_funcs.py +++ b/ci_tools/bot_tools/bot_funcs.py @@ -23,7 +23,8 @@ 'pyccel_lint': '3.8', 'pylint': '3.8', 'spelling': '3.8', - 'windows': '3.8' + 'windows': '3.8', + 'cuda': '-' } test_names = { @@ -40,15 +41,16 @@ 'pyccel_lint': "Pyccel best practices", 'pylint': "Python linting", 'spelling': "Spelling verification", - 'windows': "Unit tests on Windows" + 'windows': "Unit tests on Windows", + 'cuda': "Unit tests on Linux with cuda" } -test_dependencies = {'coverage':['linux']} +test_dependencies = {'coverage':['linux', 'cuda']} tests_with_base = ('coverage', 'docs', 'pyccel_lint', 'pylint') pr_test_keys = ('linux', 'windows', 'macosx', 'coverage', 'docs', 'pylint', - 'pyccel_lint', 'spelling') + 'pyccel_lint', 'spelling', 'cuda') review_stage_labels = ["needs_initial_review", "Ready_for_review", "Ready_to_merge"] @@ -420,7 +422,7 @@ def is_test_required(self, commit_log, name, key, state): True if the test should be run, False otherwise. """ print("Checking : ", name, key) - if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel'): + if key in ('linux', 'windows', 'macosx', 'anaconda_linux', 'anaconda_windows', 'intel', 'cuda'): has_relevant_change = lambda diff: any((f.startswith('pyccel/') or f.startswith('tests/')) #pylint: disable=unnecessary-lambda-assignment and f.endswith('.py') and f != 'pyccel/version.py' for f in diff) diff --git a/ci_tools/devel_branch_tests.py b/ci_tools/devel_branch_tests.py index 1102ef9e92..ec67b6c49a 100644 --- a/ci_tools/devel_branch_tests.py +++ b/ci_tools/devel_branch_tests.py @@ -15,3 +15,4 @@ bot.run_tests(['anaconda_linux'], '3.10', force_run = True) bot.run_tests(['anaconda_windows'], '3.10', force_run = True) bot.run_tests(['intel'], '3.9', force_run = True) + bot.run_tests(['cuda'], '-', force_run = True) diff --git a/ci_tools/json_pytest_output.py b/ci_tools/json_pytest_output.py index 409ae76d72..b84f4a4c09 100644 --- a/ci_tools/json_pytest_output.py +++ b/ci_tools/json_pytest_output.py @@ -61,7 +61,7 @@ def mini_md_summary(title, outcome, failed_tests): summary = "" failed_pattern = re.compile(r".*FAILED.*") - languages = ('c', 'fortran', 'python') + languages = ('c', 'fortran', 'python', 'cuda') pattern = {lang: re.compile(r".*\["+lang+r"\]\ \_.*") for lang in languages} for i in p_args.tests: From 31d72476d34224b6e25a1f29f1fbefed5956db54 Mon Sep 17 00:00:00 2001 From: EmilyBourne Date: Mon, 11 Mar 2024 11:41:27 +0100 Subject: [PATCH 100/150] Trigger tests on push to devel or main branch --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9111b47d52..cf52b1c624 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -10,7 +10,7 @@ jobs: waitForWorklows: name: Wait for workflows runs-on: ubuntu-latest - if: github.event.workflow_run.head_branch == 'main' + if: github.event.workflow_run.head_branch == 'main' && github.repository == 'pyccel/pyccel' steps: - name: Checkout repository uses: actions/checkout@v4 From 17aa0e624d4fcf3819ef710b27024b104c0452c7 Mon Sep 17 00:00:00 2001 From: bauom <40796259+bauom@users.noreply.github.com> Date: Wed, 28 Feb 2024 18:11:50 +0100 Subject: [PATCH 101/150] [init] Adding CUDA language/compiler and CodePrinter (#32) This PR aims to make the C code compilable using nvcc. The cuda language was added as well as a CudaCodePrinter. Changes to stdlib: Wrapped expressions using complex types in an `ifndef __NVCC__` to avoid processing them with the nvcc compiler --------- Co-authored-by: Mouad Elalj, EmilyBourne --- .dict_custom.txt | 1 + .github/actions/pytest_parallel/action.yml | 4 +- .github/actions/pytest_run/action.yml | 4 +- .github/actions/pytest_run_cuda/action.yml | 11 +- CHANGELOG.md | 6 + pyccel/codegen/codegen.py | 8 +- pyccel/codegen/compiling/compilers.py | 5 +- pyccel/codegen/pipeline.py | 5 +- pyccel/codegen/printing/cucode.py | 74 +++++++++++ pyccel/commands/console.py | 2 +- pyccel/compilers/default_compilers.py | 13 +- pyccel/naming/__init__.py | 4 +- pyccel/naming/cudanameclashchecker.py | 92 ++++++++++++++ pyccel/stdlib/numpy/numpy_c.c | 2 + pyccel/stdlib/numpy/numpy_c.h | 2 + pytest.ini | 1 + tests/conftest.py | 11 ++ tests/epyccel/test_base.py | 136 ++++++++++----------- 18 files changed, 298 insertions(+), 83 deletions(-) create mode 100644 pyccel/codegen/printing/cucode.py create mode 100644 pyccel/naming/cudanameclashchecker.py diff --git a/.dict_custom.txt b/.dict_custom.txt index b9240f6215..161337d33b 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -110,6 +110,7 @@ Valgrind variadic subclasses oneAPI +Cuda getter setter bitwise diff --git a/.github/actions/pytest_parallel/action.yml b/.github/actions/pytest_parallel/action.yml index c7c77d99c7..f91d84915b 100644 --- a/.github/actions/pytest_parallel/action.yml +++ b/.github/actions/pytest_parallel/action.yml @@ -10,8 +10,8 @@ runs: steps: - name: Test with pytest run: | - mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m parallel -rXx - #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m parallel -rXx + mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel/test_parallel_epyccel.py -v -m "parallel and not cuda" -rXx + #mpiexec -n 4 ${MPI_OPTS} python -m pytest epyccel -v -m "parallel and not cuda" -rXx shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run/action.yml b/.github/actions/pytest_run/action.yml index b0bdc31f16..451fa39e92 100644 --- a/.github/actions/pytest_run/action.yml +++ b/.github/actions/pytest_run/action.yml @@ -51,13 +51,13 @@ runs: working-directory: ./tests id: pytest_3 - name: Test Fortran translations - run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out + run: python -m pytest -n auto -rX ${FLAGS} -m "not (parallel or xdist_incompatible) and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s4_outfile.out shell: ${{ inputs.shell_cmd }} working-directory: ./tests id: pytest_4 - name: Test multi-file Fortran translations run: | - python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or ccuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out + python -m pytest -rX ${FLAGS} -m "xdist_incompatible and not parallel and not (c or python or cuda) ${{ inputs.pytest_mark }}" --ignore=ndarrays 2>&1 | tee s5_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests diff --git a/.github/actions/pytest_run_cuda/action.yml b/.github/actions/pytest_run_cuda/action.yml index 52092a6e02..46f90552ed 100644 --- a/.github/actions/pytest_run_cuda/action.yml +++ b/.github/actions/pytest_run_cuda/action.yml @@ -1,4 +1,4 @@ -name: 'Pyccel pytest commands generating Ccuda' +name: 'Pyccel pytest commands generating Cuda' inputs: shell_cmd: description: 'Specifies the shell command (different for anaconda)' @@ -11,7 +11,14 @@ runs: - name: Ccuda tests with pytest run: | # Catch exit 5 (no tests found) - sh -c 'python -m pytest -n auto -rx -m "not (parallel or xdist_incompatible) and ccuda" --ignore=symbolic --ignore=ndarrays; ret=$?; [ $ret = 5 ] && exit 0 || exit $ret' + python -m pytest -rX ${FLAGS} -m "not (xdist_incompatible or parallel) and cuda ${{ inputs.pytest_mark }}" --ignore=symbolic --ignore=ndarrays 2>&1 | tee s1_outfile.out pyccel-clean shell: ${{ inputs.shell_cmd }} working-directory: ./tests + - name: Final step + if: always() + id: status + run: + python ci_tools/json_pytest_output.py -t "Cuda Test Summary" --tests "Cuda tests:${{ steps.pytest_1.outcome }}:tests/s1_outfile.out" + + shell: ${{ inputs.shell_cmd }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c0dbecd66..18fc7f947d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ # Change Log All notable changes to this project will be documented in this file. +## \[Cuda - UNRELEASED\] + +### Added + +- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. + ## \[UNRELEASED\] ### Added diff --git a/pyccel/codegen/codegen.py b/pyccel/codegen/codegen.py index a7a02d7804..33721a48e8 100644 --- a/pyccel/codegen/codegen.py +++ b/pyccel/codegen/codegen.py @@ -9,16 +9,18 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.printing.ccode import CCodePrinter from pyccel.codegen.printing.pycode import PythonCodePrinter +from pyccel.codegen.printing.cucode import CudaCodePrinter from pyccel.ast.core import FunctionDef, Interface, ModuleHeader from pyccel.utilities.stage import PyccelStage -_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py'} -_header_extension_registry = {'fortran': None, 'c':'h', 'python':None} +_extension_registry = {'fortran': 'f90', 'c':'c', 'python':'py', 'cuda':'cu'} +_header_extension_registry = {'fortran': None, 'c':'h', 'python':None, 'cuda':'h'} printer_registry = { 'fortran':FCodePrinter, 'c':CCodePrinter, - 'python':PythonCodePrinter + 'python':PythonCodePrinter, + 'cuda':CudaCodePrinter } pyccel_stage = PyccelStage() diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index c866ee5b1a..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -444,7 +444,10 @@ def compile_shared_library(self, compile_obj, output_folder, verbose = False, sh # Collect compile information exec_cmd, includes, libs_flags, libdirs_flags, m_code = \ self._get_compile_components(compile_obj, accelerators) - linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] + if self._info['exec'] == 'nvcc': + linker_libdirs_flags = ['-Xcompiler' if l == '-L' else f'"-Wl,-rpath,{l}"' for l in libdirs_flags] + else: + linker_libdirs_flags = ['-Wl,-rpath' if l == '-L' else l for l in libdirs_flags] flags.insert(0,"-shared") diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index 14087fb567..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -180,9 +180,10 @@ def handle_error(stage): if language is None: language = 'fortran' - # Choose Fortran compiler + # Choose Default compiler if compiler is None: - compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', 'GNU') + default_compiler_family = 'nvidia' if language == 'cuda' else 'GNU' + compiler = os.environ.get('PYCCEL_DEFAULT_COMPILER', default_compiler_family) fflags = [] if fflags is None else fflags.split() wrapper_flags = [] if wrapper_flags is None else wrapper_flags.split() diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py new file mode 100644 index 0000000000..86146b065b --- /dev/null +++ b/pyccel/codegen/printing/cucode.py @@ -0,0 +1,74 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Provide tools for generating and handling CUDA code. +This module is designed to interface Pyccel's Abstract Syntax Tree (AST) with CUDA, +enabling the direct translation of high-level Pyccel expressions into CUDA code. +""" + +from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers + +from pyccel.ast.core import Import, Module + +from pyccel.errors.errors import Errors + + +errors = Errors() + +__all__ = ["CudaCodePrinter"] + +class CudaCodePrinter(CCodePrinter): + """ + Print code in CUDA format. + + This printer converts Pyccel's Abstract Syntax Tree (AST) into strings of CUDA code. + Navigation through this file utilizes _print_X functions, + as is common with all printers. + + Parameters + ---------- + filename : str + The name of the file being pyccelised. + prefix_module : str + A prefix to be added to the name of the module. + """ + language = "cuda" + + def __init__(self, filename, prefix_module = None): + + errors.set_target(filename) + + super().__init__(filename) + + def _print_Module(self, expr): + self.set_scope(expr.scope) + self._current_module = expr.name + body = ''.join(self._print(i) for i in expr.body) + + global_variables = ''.join(self._print(d) for d in expr.declarations) + + # Print imports last to be sure that all additional_imports have been collected + imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] + c_headers_imports = '' + local_imports = '' + + for imp in imports: + if imp.source in c_library_headers: + c_headers_imports += self._print(imp) + else: + local_imports += self._print(imp) + + imports = f'{c_headers_imports}\ + extern "C"{{\n\ + {local_imports}\ + }}' + + code = f'{imports}\n\ + {global_variables}\n\ + {body}\n' + + self.exit_scope() + return code diff --git a/pyccel/commands/console.py b/pyccel/commands/console.py index 596c440ec0..fcbec009de 100644 --- a/pyccel/commands/console.py +++ b/pyccel/commands/console.py @@ -80,7 +80,7 @@ def pyccel(files=None, mpi=None, openmp=None, openacc=None, output_dir=None, com # ... backend compiler options group = parser.add_argument_group('Backend compiler options') - group.add_argument('--language', choices=('fortran', 'c', 'python'), help='Generated language') + group.add_argument('--language', choices=('fortran', 'c', 'python', 'cuda'), help='Generated language') group.add_argument('--compiler', help='Compiler family or json file containing a compiler description {GNU,intel,PGI}') diff --git a/pyccel/compilers/default_compilers.py b/pyccel/compilers/default_compilers.py index 166085d22e..d47856773c 100644 --- a/pyccel/compilers/default_compilers.py +++ b/pyccel/compilers/default_compilers.py @@ -185,6 +185,15 @@ }, 'family': 'nvidia', } +#------------------------------------------------------------ +nvcc_info = {'exec' : 'nvcc', + 'language' : 'cuda', + 'debug_flags' : ("-g",), + 'release_flags': ("-O3",), + 'general_flags': ('--compiler-options', '-fPIC',), + 'family' : 'nvidia' + } + #------------------------------------------------------------ def change_to_lib_flag(lib): @@ -288,6 +297,7 @@ def change_to_lib_flag(lib): pgfortran_info.update(python_info) nvc_info.update(python_info) nvfort_info.update(python_info) +nvcc_info.update(python_info) available_compilers = {('GNU', 'c') : gcc_info, ('GNU', 'fortran') : gfort_info, @@ -296,6 +306,7 @@ def change_to_lib_flag(lib): ('PGI', 'c') : pgcc_info, ('PGI', 'fortran') : pgfortran_info, ('nvidia', 'c') : nvc_info, - ('nvidia', 'fortran') : nvfort_info} + ('nvidia', 'fortran') : nvfort_info, + ('nvidia', 'cuda'): nvcc_info} vendors = ('GNU','intel','PGI','nvidia') diff --git a/pyccel/naming/__init__.py b/pyccel/naming/__init__.py index 72c318d3ad..b3e4bbbe0e 100644 --- a/pyccel/naming/__init__.py +++ b/pyccel/naming/__init__.py @@ -10,7 +10,9 @@ from .fortrannameclashchecker import FortranNameClashChecker from .cnameclashchecker import CNameClashChecker from .pythonnameclashchecker import PythonNameClashChecker +from .cudanameclashchecker import CudaNameClashChecker name_clash_checkers = {'fortran':FortranNameClashChecker(), 'c':CNameClashChecker(), - 'python':PythonNameClashChecker()} + 'python':PythonNameClashChecker(), + 'cuda':CudaNameClashChecker()} diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py new file mode 100644 index 0000000000..971204e912 --- /dev/null +++ b/pyccel/naming/cudanameclashchecker.py @@ -0,0 +1,92 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Handles name clash problems in Cuda +""" +from .languagenameclashchecker import LanguageNameClashChecker + +class CudaNameClashChecker(LanguageNameClashChecker): + """ + Class containing functions to help avoid problematic names in Cuda. + + A class which provides functionalities to check or propose variable names and + verify that they do not cause name clashes. Name clashes may be due to + new variables, or due to the use of reserved keywords. + """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword + keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', + 'continue', 'default', 'do', 'double', 'else', 'enum', + 'extern', 'float', 'for', 'goto', 'if', 'inline', 'int', + 'long', 'register', 'restrict', 'return', 'short', 'signed', + 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', + 'unsigned', 'void', 'volatile', 'whie', '_Alignas', + '_Alignof', '_Atomic', '_Bool', '_Complex', 'Decimal128', + '_Decimal32', '_Decimal64', '_Generic', '_Imaginary', + '_Noreturn', '_Static_assert', '_Thread_local', 't_ndarray', + 'array_create', 'new_slice', 'array_slicing', 'alias_assign', + 'transpose_alias_assign', 'array_fill', 't_slice', + 'GET_INDEX_EXP1', 'GET_INDEX_EXP2', 'GET_INDEX_EXP2', + 'GET_INDEX_EXP3', 'GET_INDEX_EXP4', 'GET_INDEX_EXP5', + 'GET_INDEX_EXP6', 'GET_INDEX_EXP7', 'GET_INDEX_EXP8', + 'GET_INDEX_EXP9', 'GET_INDEX_EXP10', 'GET_INDEX_EXP11', + 'GET_INDEX_EXP12', 'GET_INDEX_EXP13', 'GET_INDEX_EXP14', + 'GET_INDEX_EXP15', 'NUM_ARGS_H1', 'NUM_ARGS', + 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', + 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', + 'get_index', 'numpy_to_ndarray_strides', + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + + def has_clash(self, name, symbols): + """ + Indicate whether the proposed name causes any clashes. + + Checks if a suggested name conflicts with predefined + keywords or specified symbols,returning true for a clash. + This method is crucial for maintaining namespace integrity and + preventing naming conflicts in code generation processes. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + bool + True if the name is a collision. + False if the name is collision free. + """ + return any(name == k for k in self.keywords) or \ + any(name == s for s in symbols) + + def get_collisionless_name(self, name, symbols): + """ + Get a valid name which doesn't collision with symbols or Cuda keywords. + + Find a new name based on the suggested name which will not cause + conflicts with Cuda keywords, does not appear in the provided symbols, + and is a valid name in Cuda code. + + Parameters + ---------- + name : str + The suggested name. + symbols : set + Symbols which should be considered as collisions. + + Returns + ------- + str + A new name which is collision free. + """ + if len(name)>4 and all(name[i] == '_' for i in (0,1,-1,-2)): + # Ignore magic methods + return name + if name[0] == '_': + name = 'private'+name + return self._get_collisionless_name(name, symbols) diff --git a/pyccel/stdlib/numpy/numpy_c.c b/pyccel/stdlib/numpy/numpy_c.c index 7c9ecbbf6b..bc56214772 100644 --- a/pyccel/stdlib/numpy/numpy_c.c +++ b/pyccel/stdlib/numpy/numpy_c.c @@ -17,8 +17,10 @@ double fsign(double x) return SIGN(x); } +#ifndef __NVCC__ /* numpy.sign for complex */ double complex csign(double complex x) { return x ? ((!creal(x) && cimag(x) < 0) || (creal(x) < 0) ? -1 : 1) : 0; } +#endif diff --git a/pyccel/stdlib/numpy/numpy_c.h b/pyccel/stdlib/numpy/numpy_c.h index e72cf3ad57..c2a16a5516 100644 --- a/pyccel/stdlib/numpy/numpy_c.h +++ b/pyccel/stdlib/numpy/numpy_c.h @@ -15,6 +15,8 @@ long long int isign(long long int x); double fsign(double x); +#ifndef __NVCC__ double complex csign(double complex x); +#endif #endif diff --git a/pytest.ini b/pytest.ini index 42eb0d72ba..3792ab65f9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -9,3 +9,4 @@ markers = python: test to generate python code xdist_incompatible: test which compiles a file also compiled by another test external: test using an external dll (problematic with conda on Windows) + cuda: test to generate cuda code diff --git a/tests/conftest.py b/tests/conftest.py index 79144b6978..a5082ef6e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,17 @@ def language(request): return request.param +@pytest.fixture( params=[ + pytest.param("fortran", marks = pytest.mark.fortran), + pytest.param("c", marks = pytest.mark.c), + pytest.param("python", marks = pytest.mark.python), + pytest.param("cuda", marks = pytest.mark.cuda) + ], + scope = "session" +) +def language_with_cuda(request): + return request.param + def move_coverage(path_dir): for root, _, files in os.walk(path_dir): for name in files: diff --git a/tests/epyccel/test_base.py b/tests/epyccel/test_base.py index c22064d321..413f79eef1 100644 --- a/tests/epyccel/test_base.py +++ b/tests/epyccel/test_base.py @@ -7,128 +7,128 @@ from utilities import epyccel_test -def test_is_false(language): - test = epyccel_test(base.is_false, lang=language) +def test_is_false(language_with_cuda): + test = epyccel_test(base.is_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_is_true(language): - test = epyccel_test(base.is_true, lang=language) +def test_is_true(language_with_cuda): + test = epyccel_test(base.is_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_compare_is(language): - test = epyccel_test(base.compare_is, lang=language) +def test_compare_is(language_with_cuda): + test = epyccel_test(base.compare_is, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_not(language): - test = epyccel_test(base.compare_is_not, lang=language) +def test_compare_is_not(language_with_cuda): + test = epyccel_test(base.compare_is_not, lang=language_with_cuda) test.compare_epyccel( True, True ) test.compare_epyccel( True, False ) test.compare_epyccel( False, True ) test.compare_epyccel( False, False ) -def test_compare_is_int(language): - test = epyccel_test(base.compare_is_int, lang=language) +def test_compare_is_int(language_with_cuda): + test = epyccel_test(base.compare_is_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_compare_is_not_int(language): - test = epyccel_test(base.compare_is_not_int, lang=language) +def test_compare_is_not_int(language_with_cuda): + test = epyccel_test(base.compare_is_not_int, lang=language_with_cuda) test.compare_epyccel( True, 1 ) test.compare_epyccel( True, 0 ) test.compare_epyccel( False, 1 ) test.compare_epyccel( False, 0 ) -def test_not_false(language): - test = epyccel_test(base.not_false, lang=language) +def test_not_false(language_with_cuda): + test = epyccel_test(base.not_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_true(language): - test = epyccel_test(base.not_true, lang=language) +def test_not_true(language_with_cuda): + test = epyccel_test(base.not_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_eq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_eq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_eq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_false(language): - test = epyccel_test(base.eq_false, lang=language) +def test_neq_false(language_with_cuda): + test = epyccel_test(base.eq_false, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_neq_true(language): - test = epyccel_test(base.eq_true, lang=language) +def test_neq_true(language_with_cuda): + test = epyccel_test(base.eq_true, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not(language): - test = epyccel_test(base.not_val, lang=language) +def test_not(language_with_cuda): + test = epyccel_test(base.not_val, lang=language_with_cuda) test.compare_epyccel( True ) test.compare_epyccel( False ) -def test_not_int(language): - test = epyccel_test(base.not_int, lang=language) +def test_not_int(language_with_cuda): + test = epyccel_test(base.not_int, lang=language_with_cuda) test.compare_epyccel( 0 ) test.compare_epyccel( 4 ) -def test_compare_is_nil(language): - test = epyccel_test(base.is_nil, lang=language) +def test_compare_is_nil(language_with_cuda): + test = epyccel_test(base.is_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_compare_is_not_nil(language): - test = epyccel_test(base.is_not_nil, lang=language) +def test_compare_is_not_nil(language_with_cuda): + test = epyccel_test(base.is_not_nil, lang=language_with_cuda) test.compare_epyccel( None ) -def test_cast_int(language): - test = epyccel_test(base.cast_int, lang=language) +def test_cast_int(language_with_cuda): + test = epyccel_test(base.cast_int, lang=language_with_cuda) test.compare_epyccel( 4 ) - test = epyccel_test(base.cast_float_to_int, lang=language) + test = epyccel_test(base.cast_float_to_int, lang=language_with_cuda) test.compare_epyccel( 4.5 ) -def test_cast_bool(language): - test = epyccel_test(base.cast_bool, lang=language) +def test_cast_bool(language_with_cuda): + test = epyccel_test(base.cast_bool, lang=language_with_cuda) test.compare_epyccel( True ) -def test_cast_float(language): - test = epyccel_test(base.cast_float, lang=language) +def test_cast_float(language_with_cuda): + test = epyccel_test(base.cast_float, lang=language_with_cuda) test.compare_epyccel( 4.5 ) - test = epyccel_test(base.cast_int_to_float, lang=language) + test = epyccel_test(base.cast_int_to_float, lang=language_with_cuda) test.compare_epyccel( 4 ) -def test_if_0_int(language): - test = epyccel_test(base.if_0_int, lang=language) +def test_if_0_int(language_with_cuda): + test = epyccel_test(base.if_0_int, lang=language_with_cuda) test.compare_epyccel( 22 ) test.compare_epyccel( 0 ) -def test_if_0_real(language): - test = epyccel_test(base.if_0_real, lang=language) +def test_if_0_real(language_with_cuda): + test = epyccel_test(base.if_0_real, lang=language_with_cuda) test.compare_epyccel( 22.3 ) test.compare_epyccel( 0.0 ) -def test_same_int(language): - test = epyccel_test(base.is_same_int, lang=language) +def test_same_int(language_with_cuda): + test = epyccel_test(base.is_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) - test = epyccel_test(base.isnot_same_int, lang=language) + test = epyccel_test(base.isnot_same_int, lang=language_with_cuda) test.compare_epyccel( 22 ) -def test_same_float(language): - test = epyccel_test(base.is_same_float, lang=language) +def test_same_float(language_with_cuda): + test = epyccel_test(base.is_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) - test = epyccel_test(base.isnot_same_float, lang=language) + test = epyccel_test(base.isnot_same_float, lang=language_with_cuda) test.compare_epyccel( 22.2 ) @pytest.mark.parametrize( 'language', [ @@ -150,28 +150,28 @@ def test_same_complex(language): test = epyccel_test(base.isnot_same_complex, lang=language) test.compare_epyccel( complex(2,3) ) -def test_is_types(language): - test = epyccel_test(base.is_types, lang=language) +def test_is_types(language_with_cuda): + test = epyccel_test(base.is_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_isnot_types(language): - test = epyccel_test(base.isnot_types, lang=language) +def test_isnot_types(language_with_cuda): + test = epyccel_test(base.isnot_types, lang=language_with_cuda) test.compare_epyccel( 1, 1.0 ) -def test_none_is_none(language): - test = epyccel_test(base.none_is_none, lang=language) +def test_none_is_none(language_with_cuda): + test = epyccel_test(base.none_is_none, lang=language_with_cuda) test.compare_epyccel() -def test_none_isnot_none(language): - test = epyccel_test(base.none_isnot_none, lang=language) +def test_none_isnot_none(language_with_cuda): + test = epyccel_test(base.none_isnot_none, lang=language_with_cuda) test.compare_epyccel() -def test_pass_if(language): - test = epyccel_test(base.pass_if, lang=language) +def test_pass_if(language_with_cuda): + test = epyccel_test(base.pass_if, lang=language_with_cuda) test.compare_epyccel(2) -def test_pass2_if(language): - test = epyccel_test(base.pass2_if, lang=language) +def test_pass2_if(language_with_cuda): + test = epyccel_test(base.pass2_if, lang=language_with_cuda) test.compare_epyccel(0.2) test.compare_epyccel(0.0) @@ -192,15 +192,15 @@ def test_use_optional(language): test.compare_epyccel() test.compare_epyccel(6) -def test_none_equality(language): - test = epyccel_test(base.none_equality, lang=language) +def test_none_equality(language_with_cuda): + test = epyccel_test(base.none_equality, lang=language_with_cuda) test.compare_epyccel() test.compare_epyccel(6) -def test_none_none_equality(language): - test = epyccel_test(base.none_none_equality, lang=language) +def test_none_none_equality(language_with_cuda): + test = epyccel_test(base.none_none_equality, lang=language_with_cuda) test.compare_epyccel() -def test_none_literal_equality(language): - test = epyccel_test(base.none_literal_equality, lang=language) +def test_none_literal_equality(language_with_cuda): + test = epyccel_test(base.none_literal_equality, lang=language_with_cuda) test.compare_epyccel() From 2c58573886bbd50fc6c715f66de673c743ca2af5 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 15 May 2024 12:58:50 +0100 Subject: [PATCH 102/150] Fix import handling (#49) This pull request fixes https://github.com/pyccel/pyccel-cuda/issues/48, by implementing a tiny wrapper for CUDA and a wrapper for non-CUDA functionalities only with external 'C'. **Commit Summary** - Implemented new header printer for CUDA. - Added CUDA wrapper assignment - Instead of wrapping all local headers, wrap only C functions with extern 'C' --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 3 +- pyccel/codegen/printing/cucode.py | 45 ++++++++---- pyccel/codegen/python_wrapper.py | 4 ++ pyccel/codegen/wrapper/cuda_to_c_wrapper.py | 78 +++++++++++++++++++++ tests/epyccel/modules/cuda_module.py | 13 ++++ tests/epyccel/test_epyccel_modules.py | 13 ++++ 6 files changed, 142 insertions(+), 14 deletions(-) create mode 100644 pyccel/codegen/wrapper/cuda_to_c_wrapper.py create mode 100644 tests/epyccel/modules/cuda_module.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 18fc7f947d..72a8f22ded 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,8 @@ All notable changes to this project will be documented in this file. ### Added -- #32 : add support for `nvcc` Compiler and `cuda` language as a possible option. +- #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. +- #48 : Fix incorrect handling of imports in `cuda`. ## \[UNRELEASED\] diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 86146b065b..277d2a3a6a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -52,19 +52,7 @@ def _print_Module(self, expr): # Print imports last to be sure that all additional_imports have been collected imports = [Import(expr.name, Module(expr.name,(),())), *self._additional_imports.values()] - c_headers_imports = '' - local_imports = '' - - for imp in imports: - if imp.source in c_library_headers: - c_headers_imports += self._print(imp) - else: - local_imports += self._print(imp) - - imports = f'{c_headers_imports}\ - extern "C"{{\n\ - {local_imports}\ - }}' + imports = ''.join(self._print(i) for i in imports) code = f'{imports}\n\ {global_variables}\n\ @@ -72,3 +60,34 @@ def _print_Module(self, expr): self.exit_scope() return code + + def _print_ModuleHeader(self, expr): + self.set_scope(expr.module.scope) + self._in_header = True + name = expr.module.name + + funcs = "" + cuda_headers = "" + for f in expr.module.funcs: + if not f.is_inline: + if 'kernel' in f.decorators: # Checking for 'kernel' decorator + cuda_headers += self.function_signature(f) + ';\n' + else: + funcs += self.function_signature(f) + ';\n' + global_variables = ''.join('extern '+self._print(d) for d in expr.module.declarations if not d.variable.is_private) + # Print imports last to be sure that all additional_imports have been collected + imports = [*expr.module.imports, *self._additional_imports.values()] + imports = ''.join(self._print(i) for i in imports) + + self._in_header = False + self.exit_scope() + function_declaration = f'{cuda_headers}\n\ + extern "C"{{\n\ + {funcs}\ + }}\n' + return '\n'.join((f"#ifndef {name.upper()}_H", + f"#define {name.upper()}_H", + global_variables, + function_declaration, + "#endif // {name.upper()}_H\n")) + diff --git a/pyccel/codegen/python_wrapper.py b/pyccel/codegen/python_wrapper.py index 9437727042..62c303fa64 100644 --- a/pyccel/codegen/python_wrapper.py +++ b/pyccel/codegen/python_wrapper.py @@ -13,6 +13,7 @@ from pyccel.codegen.printing.fcode import FCodePrinter from pyccel.codegen.wrapper.fortran_to_c_wrapper import FortranToCWrapper from pyccel.codegen.wrapper.c_to_python_wrapper import CToPythonWrapper +from pyccel.codegen.wrapper.cuda_to_c_wrapper import CudaToCWrapper from pyccel.codegen.utilities import recompile_object from pyccel.codegen.utilities import copy_internal_library from pyccel.codegen.utilities import internal_libs @@ -144,6 +145,9 @@ def create_shared_library(codegen, verbose=verbose) timings['Bind C wrapping'] = time.time() - start_bind_c_compiling c_ast = bind_c_mod + elif language == 'cuda': + wrapper = CudaToCWrapper() + c_ast = wrapper.wrap(codegen.ast) else: c_ast = codegen.ast diff --git a/pyccel/codegen/wrapper/cuda_to_c_wrapper.py b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py new file mode 100644 index 0000000000..c0e24c7c09 --- /dev/null +++ b/pyccel/codegen/wrapper/cuda_to_c_wrapper.py @@ -0,0 +1,78 @@ +# coding: utf-8 +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +Module describing the code-wrapping class : CudaToPythonWrapper +which creates an interface exposing Cuda code to C. +""" + +from pyccel.ast.bind_c import BindCModule +from pyccel.errors.errors import Errors +from pyccel.ast.bind_c import BindCVariable +from .wrapper import Wrapper + +errors = Errors() + +class CudaToCWrapper(Wrapper): + """ + Class for creating a wrapper exposing Cuda code to C. + + While CUDA is typically compatible with C by default. + this wrapper becomes necessary in scenarios where specific adaptations + or modifications are required to ensure seamless integration with C. + """ + + def _wrap_Module(self, expr): + """ + Create a Module which is compatible with C. + + Create a Module which provides an interface between C and the + Module described by expr. + + Parameters + ---------- + expr : pyccel.ast.core.Module + The module to be wrapped. + + Returns + ------- + pyccel.ast.core.BindCModule + The C-compatible module. + """ + init_func = expr.init_func + if expr.interfaces: + errors.report("Interface wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + if expr.classes: + errors.report("Class wrapping is not yet supported for Cuda", + severity='warning', symbol=expr) + + variables = [self._wrap(v) for v in expr.variables] + + return BindCModule(expr.name, variables, expr.funcs, + init_func=init_func, + scope = expr.scope, + original_module=expr) + + def _wrap_Variable(self, expr): + """ + Create all objects necessary to expose a module variable to C. + + Create and return the objects which must be printed in the wrapping + module in order to expose the variable to C + + Parameters + ---------- + expr : pyccel.ast.variables.Variable + The module variable. + + Returns + ------- + pyccel.ast.core.BindCVariable + The C-compatible variable. which must be printed in + the wrapping module to expose the variable. + """ + return expr.clone(expr.name, new_class = BindCVariable) + diff --git a/tests/epyccel/modules/cuda_module.py b/tests/epyccel/modules/cuda_module.py new file mode 100644 index 0000000000..bb7ae6b98a --- /dev/null +++ b/tests/epyccel/modules/cuda_module.py @@ -0,0 +1,13 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import numpy as np + +g = np.float64(9.81) +r0 = np.float32(1.0) +rmin = 0.01 +rmax = 1.0 + +skip_centre = True + +method = 3 + +tiny = np.int32(4) diff --git a/tests/epyccel/test_epyccel_modules.py b/tests/epyccel/test_epyccel_modules.py index ad8ae0bd75..223f741bf0 100644 --- a/tests/epyccel/test_epyccel_modules.py +++ b/tests/epyccel/test_epyccel_modules.py @@ -200,3 +200,16 @@ def test_awkward_names(language): assert mod.function() == modnew.function() assert mod.pure() == modnew.pure() assert mod.allocate(1) == modnew.allocate(1) + +def test_cuda_module(language_with_cuda): + import modules.cuda_module as mod + + modnew = epyccel(mod, language=language_with_cuda) + + atts = ('g', 'r0', 'rmin', 'rmax', 'skip_centre', + 'method', 'tiny') + for att in atts: + mod_att = getattr(mod, att) + modnew_att = getattr(modnew, att) + assert mod_att == modnew_att + assert type(mod_att) is type(modnew_att) From 0d154f8466a6faba4785c9eb644de33b86b9b300 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Thu, 27 Jun 2024 20:31:46 +0100 Subject: [PATCH 103/150] Add support for kernels (#42) This pull request addresses issue #28 by implementing a new feature in Pyccel that allows users to define custom GPU kernels. The syntax for creating these kernels is inspired by Numba. and I also need to fix issue #45 for testing purposes **Commit Summary** - Introduced KernelCall class - Added cuda printer methods _print_KernelCall and _print_FunctionDef to generate the corresponding CUDA representation for both kernel calls and definitions - Added IndexedFunctionCall represents an indexed function call - Added CUDA module and cuda.synchronize() - Fixing a bug that I found in the header: it does not import the necessary header for the used function --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> Co-authored-by: Emily Bourne --- .dict_custom.txt | 1 + CHANGELOG.md | 2 + docs/cuda.md | 23 +++ pyccel/ast/core.py | 37 ++++ pyccel/ast/cuda.py | 65 +++++++ pyccel/ast/cudaext.py | 42 +++++ pyccel/ast/utilities.py | 4 +- pyccel/codegen/printing/cucode.py | 46 ++++- pyccel/cuda/__init__.py | 10 + pyccel/cuda/cuda_sync_primitives.py | 16 ++ pyccel/decorators.py | 32 ++++ pyccel/errors/messages.py | 8 + pyccel/parser/semantic.py | 84 ++++++++- pyccel/parser/syntactic.py | 4 + tests/conftest.py | 9 + tests/cuda/test_kernel_semantic.py | 176 ++++++++++++++++++ tests/pyccel/scripts/kernel/hello_kernel.py | 19 ++ .../scripts/kernel/kernel_name_collision.py | 8 + tests/pyccel/test_pyccel.py | 22 ++- 19 files changed, 599 insertions(+), 9 deletions(-) create mode 100644 docs/cuda.md create mode 100644 pyccel/ast/cuda.py create mode 100644 pyccel/ast/cudaext.py create mode 100644 pyccel/cuda/__init__.py create mode 100644 pyccel/cuda/cuda_sync_primitives.py create mode 100644 tests/cuda/test_kernel_semantic.py create mode 100644 tests/pyccel/scripts/kernel/hello_kernel.py create mode 100644 tests/pyccel/scripts/kernel/kernel_name_collision.py diff --git a/.dict_custom.txt b/.dict_custom.txt index 161337d33b..6ddf80b1ff 100644 --- a/.dict_custom.txt +++ b/.dict_custom.txt @@ -120,3 +120,4 @@ indexable traceback STC gFTL +GPUs diff --git a/CHANGELOG.md b/CHANGELOG.md index 72a8f22ded..aacdd31b87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #42 : Add support for custom kernel in`cuda`. +- #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md new file mode 100644 index 0000000000..de30d52b80 --- /dev/null +++ b/docs/cuda.md @@ -0,0 +1,23 @@ +# Getting started GPU + +Pyccel now supports NVIDIA CUDA, empowering users to accelerate numerical computations on GPUs seamlessly. With Pyccel's high-level syntax and automatic code generation, harnessing the power of CUDA becomes effortless. This documentation provides a quick guide to enabling CUDA in Pyccel + +## Cuda Decorator + +### kernel + +The kernel decorator allows the user to declare a CUDA kernel. The kernel can be defined in Python, and the syntax is similar to that of Numba. + +```python +from pyccel.decorators import kernel + +@kernel +def my_kernel(): + pass + +blockspergrid = 1 +threadsperblock = 1 +# Call your kernel function +my_kernel[blockspergrid, threadsperblock]() + +``` \ No newline at end of file diff --git a/pyccel/ast/core.py b/pyccel/ast/core.py index 8981ddc160..2758b75be2 100644 --- a/pyccel/ast/core.py +++ b/pyccel/ast/core.py @@ -73,6 +73,7 @@ 'If', 'IfSection', 'Import', + 'IndexedFunctionCall', 'InProgram', 'InlineFunctionDef', 'Interface', @@ -2065,6 +2066,42 @@ def _ignore(cls, c): """ return c is None or isinstance(c, (FunctionDef, *cls._ignored_types)) +class IndexedFunctionCall(FunctionCall): + """ + Represents an indexed function call in the code. + + Class representing indexed function calls, encapsulating all + relevant information for such calls within the code base. + + Parameters + ---------- + func : FunctionDef + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + indexes : iterable of TypedAstNode + The indexes of the function call. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_indexes',) + _attribute_nodes = FunctionCall._attribute_nodes + ('_indexes',) + def __init__(self, func, args, indexes, current_function = None): + self._indexes = indexes + super().__init__(func, args, current_function) + + @property + def indexes(self): + """ + Indexes of function call. + + Represents the indexes of the function call + """ + return self._indexes + class ConstructorCall(FunctionCall): """ diff --git a/pyccel/ast/cuda.py b/pyccel/ast/cuda.py new file mode 100644 index 0000000000..f1e50ef7f0 --- /dev/null +++ b/pyccel/ast/cuda.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Module +This module provides a collection of classes and utilities for CUDA programming. +""" +from pyccel.ast.core import FunctionCall + +__all__ = ( + 'KernelCall', +) + +class KernelCall(FunctionCall): + """ + Represents a kernel function call in the code. + + The class serves as a representation of a kernel + function call within the codebase. + + Parameters + ---------- + func : FunctionDef + The definition of the function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + num_blocks : TypedAstNode + The number of blocks. These objects must have a primitive type of `PrimitiveIntegerType`. + + tp_block : TypedAstNode + The number of threads per block. These objects must have a primitive type of `PrimitiveIntegerType`. + + current_function : FunctionDef, optional + The function where the call takes place. + """ + __slots__ = ('_num_blocks','_tp_block') + _attribute_nodes = (*FunctionCall._attribute_nodes, '_num_blocks', '_tp_block') + + def __init__(self, func, args, num_blocks, tp_block, current_function = None): + self._num_blocks = num_blocks + self._tp_block = tp_block + super().__init__(func, args, current_function) + + @property + def num_blocks(self): + """ + The number of blocks in the kernel being called. + + The number of blocks in the kernel being called. + """ + return self._num_blocks + + @property + def tp_block(self): + """ + The number of threads per block. + + The number of threads per block. + """ + return self._tp_block + diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py new file mode 100644 index 0000000000..b540f20993 --- /dev/null +++ b/pyccel/ast/cudaext.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +CUDA Extension Module +Provides CUDA functionality for code generation. +""" +from .internals import PyccelFunction + +from .datatypes import VoidType +from .core import Module, PyccelFunctionDef + +__all__ = ( + 'CudaSynchronize', +) + +class CudaSynchronize(PyccelFunction): + """ + Represents a call to Cuda.synchronize for code generation. + + This class serves as a representation of the Cuda.synchronize method. + """ + __slots__ = () + _attribute_nodes = () + _shape = None + _class_type = VoidType() + def __init__(self): + super().__init__() + +cuda_funcs = { + 'synchronize' : PyccelFunctionDef('synchronize' , CudaSynchronize), +} + +cuda_mod = Module('cuda', + variables=[], + funcs=cuda_funcs.values(), + imports=[] +) + diff --git a/pyccel/ast/utilities.py b/pyccel/ast/utilities.py index 1e6c0422ab..e5cd77b168 100644 --- a/pyccel/ast/utilities.py +++ b/pyccel/ast/utilities.py @@ -25,6 +25,7 @@ from .literals import LiteralInteger, LiteralEllipsis, Nil from .mathext import math_mod from .sysext import sys_mod +from .cudaext import cuda_mod from .numpyext import (NumpyEmpty, NumpyArray, numpy_mod, NumpyTranspose, NumpyLinspace) @@ -49,7 +50,8 @@ decorators_mod = Module('decorators',(), funcs = [PyccelFunctionDef(d, PyccelFunction) for d in pyccel_decorators.__all__]) pyccel_mod = Module('pyccel',(),(), - imports = [Import('decorators', decorators_mod)]) + imports = [Import('decorators', decorators_mod), + Import('cuda', cuda_mod)]) # TODO add documentation builtin_import_registry = Module('__main__', diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 277d2a3a6a..cd26843017 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -9,11 +9,12 @@ enabling the direct translation of high-level Pyccel expressions into CUDA code. """ -from pyccel.codegen.printing.ccode import CCodePrinter, c_library_headers +from pyccel.codegen.printing.ccode import CCodePrinter -from pyccel.ast.core import Import, Module +from pyccel.ast.core import Import, Module +from pyccel.ast.literals import Nil -from pyccel.errors.errors import Errors +from pyccel.errors.errors import Errors errors = Errors() @@ -61,6 +62,44 @@ def _print_Module(self, expr): self.exit_scope() return code + def function_signature(self, expr, print_arg_names = True): + """ + Get the Cuda representation of the function signature. + + Extract from the function definition `expr` all the + information (name, input, output) needed to create the + function signature and return a string describing the + function. + This is not a declaration as the signature does not end + with a semi-colon. + + Parameters + ---------- + expr : FunctionDef + The function definition for which a signature is needed. + + print_arg_names : bool, default : True + Indicates whether argument names should be printed. + + Returns + ------- + str + Signature of the function. + """ + cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + c_function_signature = super().function_signature(expr, print_arg_names) + return f'{cuda_decorater} {c_function_signature}' + + def _print_KernelCall(self, expr): + func = expr.funcdef + args = [a.value or Nil() for a in expr.args] + + args = ', '.join(self._print(a) for a in args) + return f"{func.name}<<<{expr.num_blocks}, {expr.tp_block}>>>({args});\n" + + def _print_CudaSynchronize(self, expr): + return 'cudaDeviceSynchronize();\n' + def _print_ModuleHeader(self, expr): self.set_scope(expr.module.scope) self._in_header = True @@ -87,6 +126,7 @@ def _print_ModuleHeader(self, expr): }}\n' return '\n'.join((f"#ifndef {name.upper()}_H", f"#define {name.upper()}_H", + imports, global_variables, function_declaration, "#endif // {name.upper()}_H\n")) diff --git a/pyccel/cuda/__init__.py b/pyccel/cuda/__init__.py new file mode 100644 index 0000000000..e8542ad5d5 --- /dev/null +++ b/pyccel/cuda/__init__.py @@ -0,0 +1,10 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" + This module is for exposing the CudaSubmodule functions. +""" +from .cuda_sync_primitives import synchronize + +__all__ = ['synchronize'] diff --git a/pyccel/cuda/cuda_sync_primitives.py b/pyccel/cuda/cuda_sync_primitives.py new file mode 100644 index 0000000000..f3442fe9e2 --- /dev/null +++ b/pyccel/cuda/cuda_sync_primitives.py @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This submodule contains CUDA methods for Pyccel. +""" + + +def synchronize(): + """ + Synchronize CUDA device execution. + + Synchronize CUDA device execution. + """ + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 1f640043db..77717a991f 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -19,6 +19,7 @@ 'sympy', 'template', 'types', + 'kernel' ) @@ -109,3 +110,34 @@ def allow_negative_index(f,*args): def identity(f): return f return identity + +def kernel(f): + """ + Decorator for marking a Python function as a kernel. + + This class serves as a decorator to mark a Python function + as a kernel function, typically used for GPU computations. + This allows the function to be indexed with the number of blocks and threads. + + Parameters + ---------- + f : function + The function to which the decorator is applied. + + Returns + ------- + KernelAccessor + A class representing the kernel function. + """ + class KernelAccessor: + """ + Class representing the kernel function. + + Class representing the kernel function. + """ + def __init__(self, f): + self._f = f + def __getitem__(self, args): + return self._f + + return KernelAccessor(f) diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 79eccc1df2..09966d810c 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -162,3 +162,11 @@ WRONG_LINSPACE_ENDPOINT = 'endpoint argument must be boolean' NON_LITERAL_KEEP_DIMS = 'keep_dims argument must be a literal, otherwise rank is unknown' NON_LITERAL_AXIS = 'axis argument must be a literal, otherwise pyccel cannot determine which dimension to operate on' +MISSING_KERNEL_CONFIGURATION = 'Kernel launch configuration not specified' +INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' +INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' +INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' + + + + diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index f6e9f34f39..29e851b20c 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -116,6 +116,8 @@ from pyccel.ast.variable import IndexedElement, AnnotatedPyccelSymbol from pyccel.ast.variable import DottedName, DottedVariable +from pyccel.ast.cuda import KernelCall + from pyccel.errors.errors import Errors from pyccel.errors.errors import PyccelSemanticError @@ -133,7 +135,9 @@ PYCCEL_RESTRICTION_LIST_COMPREHENSION_LIMITS, PYCCEL_RESTRICTION_LIST_COMPREHENSION_SIZE, UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, - FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC) + FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, + MISSING_KERNEL_CONFIGURATION, + INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1139,6 +1143,67 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun return new_expr + def _handle_kernel(self, expr, func, args): + """ + Create the node representing the kernel function call. + + Create a FunctionCall or an instance of a PyccelInternalFunction + from the function information and arguments. + + Parameters + ---------- + expr : IndexedFunctionCall + Node has all the information about the function call. + + func : FunctionDef | Interface | PyccelInternalFunction type + The function being called. + + args : iterable of FunctionCallArgument + The arguments passed to the function. + + Returns + ------- + Pyccel.ast.cuda.KernelCall + The semantic representation of the kernel call. + """ + if len(expr.indexes) != 2: + errors.report(INVALID_KERNEL_LAUNCH_CONFIG, + symbol=expr, + severity='fatal') + if len(func.results): + errors.report(f"cuda kernel function '{func.name}' returned a value in violation of the laid-down specification", + symbol=expr, + severity='fatal') + if isinstance(func, FunctionDef) and len(args) != len(func.arguments): + errors.report(f"{len(args)} argument types given, but function takes {len(func.arguments)} arguments", + symbol=expr, + severity='fatal') + if not isinstance(expr.indexes[0], (LiteralInteger)): + if isinstance(expr.indexes[0], PyccelSymbol): + num_blocks = self.get_variable(expr.indexes[0]) + + if not isinstance(num_blocks.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_BP_GRID, + symbol = expr, + severity='fatal') + if not isinstance(expr.indexes[1], (LiteralInteger)): + if isinstance(expr.indexes[1], PyccelSymbol): + tp_block = self.get_variable(expr.indexes[1]) + if not isinstance(tp_block.dtype, PythonNativeInt): + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + else: + errors.report(INVALID_KERNEL_CALL_TP_BLOCK, + symbol = expr, + severity='fatal') + new_expr = KernelCall(func, args, expr.indexes[0], expr.indexes[1]) + return new_expr + def _sort_function_call_args(self, func_args, args): """ Sort and add the missing call arguments to match the arguments in the function definition. @@ -2852,6 +2917,23 @@ def _visit_Lambda(self, expr): expr = Lambda(tuple(expr.variables), expr_new) return expr + def _visit_IndexedFunctionCall(self, expr): + name = expr.funcdef + name = self.scope.get_expected_name(name) + func = self.scope.find(name, 'functions') + args = self._handle_function_args(expr.args) + + if func is None: + return errors.report(UNDEFINED_FUNCTION, symbol=expr.funcdef, + bounding_box=(self.current_ast_node.lineno, self.current_ast_node.col_offset), + severity='fatal') + + func = self._annotate_the_called_function_def(func) + if 'kernel' in func.decorators : + return self._handle_kernel(expr, func, args) + else: + return errors.report("Unknown function type", + symbol=expr, severity='fatal') def _visit_FunctionCall(self, expr): name = expr.funcdef try: diff --git a/pyccel/parser/syntactic.py b/pyccel/parser/syntactic.py index 318b765703..0cfe895605 100644 --- a/pyccel/parser/syntactic.py +++ b/pyccel/parser/syntactic.py @@ -64,6 +64,8 @@ from pyccel.ast.type_annotations import SyntacticTypeAnnotation, UnionTypeAnnotation +from pyccel.ast.core import IndexedFunctionCall + from pyccel.parser.base import BasicParser from pyccel.parser.extend_tree import extend_tree from pyccel.parser.utilities import get_default_path @@ -1101,6 +1103,8 @@ def _visit_Call(self, stmt): elif isinstance(func, DottedName): func_attr = FunctionCall(func.name[-1], args) func = DottedName(*func.name[:-1], func_attr) + elif isinstance(func,IndexedElement): + func = IndexedFunctionCall(func.base, args, func.indices) else: raise NotImplementedError(f' Unknown function type {type(func)}') diff --git a/tests/conftest.py b/tests/conftest.py index a5082ef6e8..4e74d1ec7a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,6 +59,15 @@ def pytest_runtest_teardown(item, nextitem): def pytest_addoption(parser): parser.addoption("--developer-mode", action="store_true", default=github_debugging, help="Show tracebacks when pyccel errors are raised") + parser.addoption("--gpu_available", action="store_true", + default=False, help="enable GPU tests") + +def pytest_generate_tests(metafunc): + if "gpu_available" in metafunc.fixturenames: + if metafunc.config.getoption("gpu_available"): + metafunc.parametrize("gpu_available", [True]) + else: + metafunc.parametrize("gpu_available", [False]) def pytest_sessionstart(session): # setup_stuff diff --git a/tests/cuda/test_kernel_semantic.py b/tests/cuda/test_kernel_semantic.py new file mode 100644 index 0000000000..00b74c3bea --- /dev/null +++ b/tests/cuda/test_kernel_semantic.py @@ -0,0 +1,176 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import kernel +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVALID_KERNEL_CALL_TP_BLOCK, + INVALID_KERNEL_CALL_BP_GRID, + INVALID_KERNEL_LAUNCH_CONFIG) + + +@pytest.mark.cuda +def test_invalid_block_number(): + def invalid_block_number(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1.0 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_block_number, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_BP_GRID == error_info.message + + +@pytest.mark.cuda +def test_invalid_thread_per_block(): + def invalid_thread_per_block(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1.0 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_thread_per_block, language="cuda") + assert errors.has_errors() + assert errors.num_messages() == 1 + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_CALL_TP_BLOCK == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_high(): + def invalid_launch_config_high(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + third_param = 1 + kernel_call[blocks_per_grid, threads_per_block, third_param]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_high, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_launch_config_low(): + def invalid_launch_config_low(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + kernel_call[blocks_per_grid]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_launch_config_low, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert INVALID_KERNEL_LAUNCH_CONFIG == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call(): + def invalid_arguments(): + @kernel + def kernel_call(arg : int): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block]() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "0 argument types given, but function takes 1 arguments" == error_info.message + + +@pytest.mark.cuda +def test_invalid_arguments_for_kernel_call_2(): + def invalid_arguments_(): + @kernel + def kernel_call(): + pass + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_arguments_, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "1 argument types given, but function takes 0 arguments" == error_info.message + + +@pytest.mark.cuda +def test_kernel_return(): + def kernel_return(): + @kernel + def kernel_call(): + return 7 + + blocks_per_grid = 1 + threads_per_block = 1 + kernel_call[blocks_per_grid, threads_per_block](1) + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(kernel_return, language="cuda") + + assert errors.has_errors() + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert error_info.symbol.funcdef == 'kernel_call' + assert "cuda kernel function 'kernel_call' returned a value in violation of the laid-down specification" == error_info.message diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py new file mode 100644 index 0000000000..b6901b25a1 --- /dev/null +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -0,0 +1,19 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def say_hello(its_morning : bool): + if(its_morning): + print("Hello and Good morning") + else: + print("Hello and Good afternoon") + +def f(): + its_morning = True + say_hello[1,1](its_morning) + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/kernel_name_collision.py b/tests/pyccel/scripts/kernel/kernel_name_collision.py new file mode 100644 index 0000000000..ac7abe25ae --- /dev/null +++ b/tests/pyccel/scripts/kernel/kernel_name_collision.py @@ -0,0 +1,8 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel + +@kernel +def do(): + pass + +do[1,1]() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index ec1e846549..b4757a3c31 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -294,7 +294,7 @@ def compare_pyth_fort_output( p_output, f_output, dtype=float, language=None): #------------------------------------------------------------------------------ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, cwd = None, pyccel_commands = "", output_dtype = float, - language = None, output_dir = None): + language = None, output_dir = None, execute_code = True): """ Run pyccel and compare the output to ensure that the results are equivalent @@ -394,13 +394,14 @@ def pyccel_test(test_file, dependencies = None, compile_with_pyccel = True, compile_fortran(cwd, output_test_file, dependencies) elif language == 'c': compile_c(cwd, output_test_file, dependencies) - - lang_out = get_lang_output(output_test_file, language) - compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) + if execute_code: + lang_out = get_lang_output(output_test_file, language) + compare_pyth_fort_output(pyth_out, lang_out, output_dtype, language) #============================================================================== # UNIT TESTS #============================================================================== + def test_relative_imports_in_project(language): base_dir = os.path.dirname(os.path.realpath(__file__)) @@ -728,6 +729,19 @@ def test_multiple_results(language): def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_hello_kernel(gpu_available): + types = str + pyccel_test("scripts/kernel/hello_kernel.py", + language="cuda", output_dtype=types , execute_code=gpu_available) + +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_kernel_collision(gpu_available): + pyccel_test("scripts/kernel/kernel_name_collision.py", + language="cuda", execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From 2ffa7fc9f3b632f149a40c4bc8e7f84dee3cc636 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 17:37:02 +0100 Subject: [PATCH 104/150] Updated CUDA Name Clash Checker By Added CUDA-specific keywords (#60) This pull request addresses issue #59 by adding more CUDA-specific keywords to enhance the checking of variable/function names and prevent name clashes --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- CHANGELOG.md | 1 + pyccel/naming/cudanameclashchecker.py | 36 ++++++++++++++++++++++- pyccel/naming/languagenameclashchecker.py | 5 ++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aacdd31b87..ed9ebc8e21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index 971204e912..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', @@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker): 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', 'get_index', 'numpy_to_ndarray_strides', - 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) def has_clash(self, name, symbols): """ diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. From 8eef19d1f0eb02737e9fa3b7d265c80c128880d2 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:04:22 +0100 Subject: [PATCH 105/150] add handle for custom device (#61) This pull request addresses issue https://github.com/pyccel/pyccel-cuda/issues/41 by implementing a new feature in Pyccel that allows users to define a custom device **Commit Summary** - Adding handler for custom device and its code generation. - Adding test --------- Co-authored-by: EmilyBourne --- CHANGELOG.md | 1 + docs/cuda.md | 25 ++++++++++++++++- pyccel/codegen/printing/cucode.py | 7 ++--- pyccel/decorators.py | 19 +++++++++++++ pyccel/errors/messages.py | 2 +- pyccel/parser/semantic.py | 7 ++++- tests/cuda/test_device_semantic.py | 31 ++++++++++++++++++++++ tests/pyccel/scripts/kernel/device_test.py | 18 +++++++++++++ tests/pyccel/test_pyccel.py | 8 ++++++ 9 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 tests/cuda/test_device_semantic.py create mode 100644 tests/pyccel/scripts/kernel/device_test.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ed9ebc8e21..13434a32b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md index de30d52b80..7643a4ac02 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -20,4 +20,27 @@ threadsperblock = 1 # Call your kernel function my_kernel[blockspergrid, threadsperblock]() -``` \ No newline at end of file +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..7c01d93c47 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True): str Signature of the function. """ - cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' c_function_signature = super().function_signature(expr, print_arg_names) - return f'{cuda_decorater} {c_function_signature}' + return f'{cuda_decorator} {c_function_signature}' def _print_KernelCall(self, expr): func = expr.funcdef @@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr): cuda_headers = "" for f in expr.module.funcs: if not f.is_inline: - if 'kernel' in f.decorators: # Checking for 'kernel' decorator + if 'kernel' in f.decorators or 'device' in f.decorators: cuda_headers += self.function_signature(f) + ';\n' else: funcs += self.function_signature(f) + ';\n' diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 77717a991f..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -141,3 +142,21 @@ def __getitem__(self, args): return self._f return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 09966d810c..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -166,7 +166,7 @@ INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' - +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index 29e851b20c..6b4143b442 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -136,9 +136,10 @@ UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, - MISSING_KERNEL_CONFIGURATION, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b4757a3c31..2d55c6e1cb 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available): pyccel_test("scripts/kernel/kernel_name_collision.py", language="cuda", execute_code=gpu_available) +#------------------------------------------------------------------------------ + +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str From c02b661fa59e1e2a68dfe356a12dd303652047fc Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 14:42:08 +0100 Subject: [PATCH 106/150] include cuda_ndarrays.cu in package distribution --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index adffdd13dd..6aec79003f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ include = [ "pyccel/stdlib/**/*.f90", "pyccel/extensions/STC/include", "pyccel/extensions/gFTL/include/v2" + "pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu" ] exclude = [ "pyccel/extensions/STC/src", From bbbf6f8de4c39a30a8a031b1ab765f357c1186e0 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 14:51:19 +0100 Subject: [PATCH 107/150] add , to cuda include --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6aec79003f..29fbba3da1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ include = [ "pyccel/stdlib/**/*.c", "pyccel/stdlib/**/*.f90", "pyccel/extensions/STC/include", - "pyccel/extensions/gFTL/include/v2" + "pyccel/extensions/gFTL/include/v2", "pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu" ] exclude = [ From 88d74d18ae55388819fbae41330e892ef050d270 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 15:17:56 +0100 Subject: [PATCH 108/150] cleaning up my PR --- pyccel/codegen/compiling/compilers.py | 2 -- pyccel/codegen/utilities.py | 2 -- pyccel/stdlib/ndarrays/ndarrays.c | 15 ++++++++------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index 9ba44fbb23..0d496b9e8d 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -329,8 +329,6 @@ def compile_module(self, compile_obj, output_folder, verbose = False): verbose : bool Indicates whether additional output should be shown. """ - print("Compiling : ", compile_obj.source) - print(os.path.exists(compile_obj.source)) if not compile_obj.has_target_file: return diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 8553b89195..61b19d93d1 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -112,7 +112,6 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None): str The location that the files were copied to. """ - print("copy_internal_library : ", lib_folder) # get lib path (stdlib_path/lib_name or ext_path/lib_name) if lib_folder in external_libs: lib_path = os.path.join(ext_path, external_libs[lib_folder], lib_folder) @@ -148,7 +147,6 @@ def copy_internal_library(lib_folder, pyccel_dirpath, extra_files = None): dst_files = [os.path.relpath(os.path.join(root, f), lib_dest_path) \ for root, dirs, files in os.walk(lib_dest_path) \ for f in files if not f.endswith('.lock')] - print("Created : ", dst_files) # Create any requested extra files if extra_files: for filename, contents in extra_files.items(): diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c index 784b222cbb..1bbad5e29e 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.c +++ b/pyccel/stdlib/ndarrays/ndarrays.c @@ -493,7 +493,7 @@ bool is_same_shape(t_ndarray a, t_ndarray b) } return (true); } - +#ifndef __NVCC__ #define COPY_DATA_FROM_(SRC_TYPE) \ void copy_data_from_##SRC_TYPE(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp) \ { \ @@ -588,24 +588,24 @@ bool is_same_shape(t_ndarray a, t_ndarray b) if(elem_wise_cp == false)\ { \ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cfloat[i + offset] = src.nd_cfloat[i]; \ + dest->nd_cfloat[i + offset] = (float complex)src.nd_##SRC_TYPE[i]; \ }\ else \ {\ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = src.nd_cfloat[element_index(src, i, src.nd)]; \ + dest->nd_cfloat[element_index(*dest, i, dest->nd) + offset] = (float complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \ }\ break; \ case nd_cdouble: \ if(elem_wise_cp == false)\ { \ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cdouble[i + offset] = src.nd_cdouble[i]; \ + dest->nd_cdouble[i + offset] = (double complex)src.nd_##SRC_TYPE[i]; \ }\ else \ {\ for(int64_t i = 0; i < src.length; i++) \ - dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = src.nd_cdouble[element_index(src, i, src.nd)]; \ + dest->nd_cdouble[element_index(*dest, i, dest->nd) + offset] = (double complex)src.nd_##SRC_TYPE[element_index(src, i, src.nd)]; \ }\ break; \ } \ @@ -618,10 +618,8 @@ COPY_DATA_FROM_(int32) COPY_DATA_FROM_(int64) COPY_DATA_FROM_(float) COPY_DATA_FROM_(double) -#ifndef __NVCC__ COPY_DATA_FROM_(cfloat) COPY_DATA_FROM_(cdouble) -#endif void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp) { @@ -666,6 +664,8 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp } } +#= + void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset) { unsigned char *d = (unsigned char*)dest->raw_data; @@ -682,6 +682,7 @@ void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset) copy_data(&dest, src, offset, true); } } +#endif /* ** sum of ndarray From 270594970309953fc9600ed21a6d83d9cbe7498a Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 15:26:03 +0100 Subject: [PATCH 109/150] cleaning up my PR --- pyccel/ast/class_defs.py | 2 -- pyccel/stdlib/ndarrays/ndarrays.c | 1 - tests/pyccel/test_pyccel.py | 5 ++--- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pyccel/ast/class_defs.py b/pyccel/ast/class_defs.py index 10dec4694e..4318cd1575 100644 --- a/pyccel/ast/class_defs.py +++ b/pyccel/ast/class_defs.py @@ -266,8 +266,6 @@ def get_cls_base(class_type): return CudaArrayClass elif isinstance(class_type, (NumpyNumericType, NumpyNDArrayType)): return NumpyArrayClass - - # elif isinstance(class_type, StackArrayType): elif isinstance(class_type, TupleType): return TupleClass diff --git a/pyccel/stdlib/ndarrays/ndarrays.c b/pyccel/stdlib/ndarrays/ndarrays.c index 1bbad5e29e..c6502b93bb 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.c +++ b/pyccel/stdlib/ndarrays/ndarrays.c @@ -664,7 +664,6 @@ void copy_data(t_ndarray **ds, t_ndarray src, uint32_t offset, bool elem_wise_cp } } -#= void array_copy_data(t_ndarray *dest, t_ndarray src, uint32_t offset) { diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index e37286c401..689d74de3f 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -62,9 +62,8 @@ def compile_pyccel(path_dir, test_file, options = ""): cmd = [shutil.which("pyccel"), test_file] if options != "": cmd += options.strip().split() - p = subprocess.run(cmd, capture_output = True, universal_newlines=True, cwd=path_dir) - print(p.stdout) - print(p.stderr) + p = subprocess.Popen(cmd, universal_newlines=True, cwd=path_dir) + p.wait() assert p.returncode==0 #------------------------------------------------------------------------------ From 77a93e743302b38c94e5973d18ffd1307b8a0a7b Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 15:53:36 +0100 Subject: [PATCH 110/150] cleaning up my PR --- pyccel/codegen/printing/ccode.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 676d3b37a4..3b1e613da3 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1327,10 +1327,13 @@ def get_declare_type(self, expr): if isinstance(expr.class_type, (HomogeneousSetType, HomogeneousListType)): dtype = self.get_c_type(expr.class_type) return dtype - if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType, CudaArrayType)): + if isinstance(expr.class_type,(HomogeneousTupleType, NumpyNDArrayType)): if expr.rank > 15: errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) + dtype = 't_ndarray' + elif isinstance(expr.class_type, CudaArrayType): + self.add_import(c_imports['ndarrays']) self.add_import(c_imports['cuda_ndarrays']) dtype = 't_ndarray' else: From ff74a4a403ce0a967739a0ae0823258dd9b36a26 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 16:27:46 +0100 Subject: [PATCH 111/150] enable import cucomplex in ndarrays header --- pyccel/stdlib/ndarrays/ndarrays.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyccel/stdlib/ndarrays/ndarrays.h b/pyccel/stdlib/ndarrays/ndarrays.h index 2e1b8e793d..c2f1293bef 100644 --- a/pyccel/stdlib/ndarrays/ndarrays.h +++ b/pyccel/stdlib/ndarrays/ndarrays.h @@ -10,7 +10,10 @@ # include # include # include -#include + +# ifdef __NVCC__ + #include +# endif /* mapping the function array_fill to the correct type */ # define array_fill(c, arr) _Generic((c), int64_t : _array_fill_int64,\ From e138ae50cc2888cdf5d970f556483b40f77ce199 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Fri, 19 Jul 2024 17:02:30 +0100 Subject: [PATCH 112/150] cleaning up my PR --- pyccel/codegen/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 61b19d93d1..07be774063 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -43,7 +43,7 @@ "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))), "numpy_f90" : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")), "numpy_c" : ("numpy", CompileObj("numpy_c.c",folder="numpy")), - "STC_Extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h",folder="STC_Extensions", has_target_file = False)), + "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", folder="STC_Extensions", has_target_file = False)), } internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays", accelerators = ('python',), From 8c55fd9fd3056a32a6ddce5ba05925a048a6bd93 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Sun, 21 Jul 2024 18:26:55 +0100 Subject: [PATCH 113/150] adding test for cuda array addition --- pyccel/codegen/printing/cucode.py | 5 +++-- .../scripts/kernel/cuda_host_array_addition.py | 18 ++++++++++++++++++ tests/pyccel/test_pyccel.py | 6 ++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/pyccel/scripts/kernel/cuda_host_array_addition.py diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 830af67dab..d182e67593 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -147,7 +147,8 @@ def _print_Allocate(self, expr): dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)]) else: raise NotImplementedError(f"Don't know how to index {variable.class_type} type") - shape_Assign = "int64_t shape_Assign [] = {" + shape + "};\n" + shape_Assign = f"int64_t shape_Assign_{expr.variable.name} [] = {{{shape}}};\n" + is_view = 'false' if variable.on_heap else 'true' memory_location = variable.class_type.memory_location if memory_location in ('device', 'host'): @@ -155,7 +156,7 @@ def _print_Allocate(self, expr): else: memory_location = 'managedMemory' self.add_import(c_imports['cuda_ndarrays']) - alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign, {dtype}, {is_view},{memory_location});\n" + alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign_{expr.variable.name}, {dtype}, {is_view},{memory_location});\n" return f'{shape_Assign} {alloc_code}' def _print_Deallocate(self, expr): diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py new file mode 100644 index 0000000000..3734f45c7c --- /dev/null +++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel import cuda + +def addition_cuda_host_arrays(): + a = cuda.host_empty(3) + b = cuda.host_empty(3) + + for i in range(3): + b[i] = 1 + a[i] = 1 + + for i in range(3): + b[i] += a[i] + + print(b) + +if __name__ == '__main__': + addition_cuda_host_arrays() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 689d74de3f..695a3f612f 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -750,7 +750,13 @@ def test_host_array(gpu_available): language="cuda", output_dtype=types, execute_code=gpu_available) #------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_cuda_host_array_addition(gpu_available): + types = float + pyccel_test("scripts/kernel/cuda_host_array_addition.py", + language="cuda", output_dtype=types, execute_code=gpu_available) +#------------------------------------------------------------------------------ @pytest.mark.cuda def test_device_call(gpu_available): types = str From d5b733f4567f15842d95e93d7ac4967523d6c38d Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Sun, 21 Jul 2024 21:55:56 +0100 Subject: [PATCH 114/150] refactoring the code --- pyccel/ast/cudaext.py | 10 ---------- pyccel/codegen/printing/ccode.py | 4 ---- pyccel/codegen/printing/cucode.py | 4 +--- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index f890fc7999..a97623ac15 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -49,16 +49,6 @@ class CudaNewarray(PyccelFunction): __slots__ = ('_class_type', '_init_dtype', '_memory_location') name = 'newarray' - @property - def init_dtype(self): - """ - The dtype provided to the function when it was initialised in Python. - - The dtype provided to the function when it was initialised in Python. - If no dtype was provided then this should equal `None`. - """ - return self._init_dtype - def __init__(self, *args ,class_type, init_dtype, memory_location): self._class_type = class_type self._init_dtype = init_dtype diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 3b1e613da3..4dd87b0514 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1332,10 +1332,6 @@ def get_declare_type(self, expr): errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' - elif isinstance(expr.class_type, CudaArrayType): - self.add_import(c_imports['ndarrays']) - self.add_import(c_imports['cuda_ndarrays']) - dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') elif not isinstance(class_type, CustomDataType): diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index d182e67593..de3f97a4a1 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -143,12 +143,10 @@ def _print_Allocate(self, expr): shape = ", ".join(self._print(i) for i in expr.shape) if isinstance(variable.class_type, CudaArrayType): dtype = self.find_in_ndarray_type_registry(variable.dtype) - elif isinstance(variable.class_type, HomogeneousContainerType): - dtype = self.find_in_ndarray_type_registry(numpy_precision_map[(variable.dtype.primitive_type, variable.dtype.precision)]) else: raise NotImplementedError(f"Don't know how to index {variable.class_type} type") shape_Assign = f"int64_t shape_Assign_{expr.variable.name} [] = {{{shape}}};\n" - + is_view = 'false' if variable.on_heap else 'true' memory_location = variable.class_type.memory_location if memory_location in ('device', 'host'): From 1fcb3a2d22d39f110648711446059011f36afeb1 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 11:19:18 +0100 Subject: [PATCH 115/150] adding test for cuda 2d array addition --- pyccel/ast/cudaext.py | 8 ------ pyccel/ast/cudatypes.py | 25 ++++++++++++++++++ pyccel/ast/variable.py | 2 +- pyccel/codegen/printing/ccode.py | 4 +++ pyccel/codegen/printing/cucode.py | 26 ------------------- .../kernel/cuda_host_2d_array_addition.py | 19 ++++++++++++++ tests/pyccel/test_pyccel.py | 6 +++++ 7 files changed, 55 insertions(+), 35 deletions(-) create mode 100644 tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index a97623ac15..761454cae0 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -123,14 +123,6 @@ class CudaHostEmpty(CudaFull): def __init__(self, shape, dtype='float', order='C'): memory_location = 'host' super().__init__(shape, Nil(), dtype, order , memory_location) - @property - def fill_value(self): - """ - The value with which the array will be filled on initialisation. - - The value with which the array will be filled on initialisation. - """ - return None class CudaSynchronize(PyccelFunction): """ diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 70c1fc06c8..9d2cab79a9 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -98,7 +98,32 @@ def order(self): this function returns None. """ return self._order + def switch_rank(self, new_rank, new_order = None): + """ + Get a type which is identical to this type in all aspects except the rank and/or order. + + Get a type which is identical to this type in all aspects except the rank and/or order. + The order must be provided if the rank is increased from 1. Otherwise it defaults to the + same order as the current type. + + Parameters + ---------- + new_rank : int + The rank of the new type. + new_order : str, optional + The order of the new type. This should be provided if the rank is increased from 1. + + Returns + ------- + PyccelType + The new type. + """ + if new_rank == 0: + return self.element_type + else: + new_order = (new_order or self._order) if new_rank > 1 else None + return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location) def __repr__(self): dims = ','.join(':'*self._container_rank) order_str = f'(order={self._order})' if self._order else '' diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index 051cf631b7..e55f54b9ab 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -798,7 +798,7 @@ class IndexedElement(TypedAstNode): _attribute_nodes = ('_label', '_indices', '_shape') def __init__(self, base, *indices): - + if not indices: raise IndexError('Indexed needs at least one index.') diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 4dd87b0514..3b1e613da3 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -1332,6 +1332,10 @@ def get_declare_type(self, expr): errors.report(UNSUPPORTED_ARRAY_RANK, symbol=expr, severity='fatal') self.add_import(c_imports['ndarrays']) dtype = 't_ndarray' + elif isinstance(expr.class_type, CudaArrayType): + self.add_import(c_imports['ndarrays']) + self.add_import(c_imports['cuda_ndarrays']) + dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') elif not isinstance(class_type, CustomDataType): diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index de3f97a4a1..980a9d52ad 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -167,32 +167,6 @@ def _print_Deallocate(self, expr): return f"cuda_free_host({var_code});\n" else: return f"cuda_free({var_code});\n" - def get_declare_type(self, expr): - """ - Get the string which describes the type in a declaration. - - This function returns the code which describes the type - of the `expr` object such that the declaration can be written as: - `f"{self.get_declare_type(expr)} {expr.name}"` - - Parameters - ---------- - expr : Variable - The variable whose type should be described. - - Returns - ------- - str - The code describing the type. - """ - class_type = expr.class_type - rank = expr.rank - if not isinstance(class_type, CudaArrayType ) or rank <= 0: - return super().get_declare_type(expr) - self.add_import(c_imports['ndarrays']) - self.add_import(c_imports['cuda_ndarrays']) - dtype = 't_ndarray ' - return dtype def _print_Assign(self, expr): rhs = expr.rhs diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py new file mode 100644 index 0000000000..05fd29769f --- /dev/null +++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py @@ -0,0 +1,19 @@ +from pyccel import cuda + +def addition_cuda_host_2Darrays(): + a = cuda.host_empty((10,10)) + b = cuda.host_empty((10,10)) + + for i in range(10): + for j in range(10): + a[i][j] = 1 + b[i][j] = 1 + b+=a + b+=1 + + for i in range(10): + for j in range(10): + print(b[i][j]) +if __name__ == '__main__': + addition_cuda_host_2Darrays() + diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 695a3f612f..f8638a3b3c 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -755,6 +755,12 @@ def test_cuda_host_array_addition(gpu_available): types = float pyccel_test("scripts/kernel/cuda_host_array_addition.py", language="cuda", output_dtype=types, execute_code=gpu_available) +#------------------------------------------------------------------------------ +@pytest.mark.cuda +def test_cuda_host_2d_array_addition(gpu_available): + types = float + pyccel_test("scripts/kernel/cuda_host_2d_array_addition.py", + language="cuda", output_dtype=types, execute_code=gpu_available) #------------------------------------------------------------------------------ @pytest.mark.cuda From c5c9db1e7e68671f88c88ec718bb1ca5edbb1f4f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 11:29:05 +0100 Subject: [PATCH 116/150] cleaning up my PR --- pyccel/ast/variable.py | 2 +- pyccel/codegen/printing/cucode.py | 2 -- tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py | 6 +++--- tests/pyccel/scripts/kernel/cuda_host_array_addition.py | 4 +--- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pyccel/ast/variable.py b/pyccel/ast/variable.py index e55f54b9ab..051cf631b7 100644 --- a/pyccel/ast/variable.py +++ b/pyccel/ast/variable.py @@ -798,7 +798,7 @@ class IndexedElement(TypedAstNode): _attribute_nodes = ('_label', '_indices', '_shape') def __init__(self, base, *indices): - + if not indices: raise IndexError('Indexed needs at least one index.') diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 980a9d52ad..bf43a12b1a 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -16,8 +16,6 @@ from pyccel.errors.errors import Errors from pyccel.ast.cudatypes import CudaArrayType -from pyccel.ast.datatypes import HomogeneousContainerType -from pyccel.ast.numpytypes import numpy_precision_map from pyccel.ast.cudaext import CudaFull from pyccel.codegen.printing.ccode import c_imports diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py index 05fd29769f..80d8785acd 100644 --- a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py +++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py @@ -1,3 +1,4 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring from pyccel import cuda def addition_cuda_host_2Darrays(): @@ -11,9 +12,8 @@ def addition_cuda_host_2Darrays(): b+=a b+=1 - for i in range(10): - for j in range(10): - print(b[i][j]) + print(b) + if __name__ == '__main__': addition_cuda_host_2Darrays() diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py index 3734f45c7c..3ad186fd4f 100644 --- a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py +++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py @@ -9,9 +9,7 @@ def addition_cuda_host_arrays(): b[i] = 1 a[i] = 1 - for i in range(3): - b[i] += a[i] - + b += a print(b) if __name__ == '__main__': From bbd46e12c8b0597eb3b5ac12ff731aa4b08f47c7 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 11:51:47 +0100 Subject: [PATCH 117/150] cleaning up my PR --- pyccel/ast/cudatypes.py | 2 ++ pyccel/codegen/printing/ccode.py | 1 - pyccel/parser/semantic.py | 5 ++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 9d2cab79a9..671a9e031e 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -16,6 +16,7 @@ from .numpytypes import NumpyNDArrayType +__all__ = ('CudaArrayType',) class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): """ @@ -36,6 +37,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): The memory location of the new cuda array ('host' or 'device'). """ __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') + def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 3b1e613da3..1ffc85cbd2 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -46,7 +46,6 @@ from pyccel.ast.numpytypes import NumpyFloat32Type, NumpyFloat64Type, NumpyComplex64Type, NumpyComplex128Type from pyccel.ast.numpytypes import NumpyNDArrayType, numpy_precision_map from pyccel.ast.cudatypes import CudaArrayType -from pyccel.ast.cudaext import CudaFull from pyccel.ast.type_annotations import VariableTypeAnnotation diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index ee95eb041b..6b4143b442 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -3169,7 +3169,7 @@ def _visit_Assign(self, expr): elif isinstance(rhs, CodeBlock) and len(rhs.body)>1 and isinstance(rhs.body[1], FunctionalFor): return rhs - + elif isinstance(rhs, FunctionCall): func = rhs.funcdef results = func.results @@ -3202,7 +3202,7 @@ def _visit_Assign(self, expr): d_var['memory_handling'] = arg.memory_handling d_var['class_type' ] = arg.class_type d_var['cls_base' ] = arg.cls_base - + elif isinstance(rhs, NumpyTranspose): d_var = self._infer_type(rhs) if d_var['memory_handling'] == 'alias' and not isinstance(lhs, IndexedElement): @@ -3211,7 +3211,6 @@ def _visit_Assign(self, expr): if expr.lhs.is_temp: return rhs else: - raise NotImplementedError("Cannot assign result of a function without a return") else: From 89cd3aab2e13fbffd1eb233c2b247b6df72b45fc Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 11:55:31 +0100 Subject: [PATCH 118/150] cleaning up my PR --- pyccel/ast/cudatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index 671a9e031e..d3687629ab 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -37,7 +37,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): The memory location of the new cuda array ('host' or 'device'). """ __slots__ = ('_element_type', '_container_rank', '_order', '_memory_location') - + def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) From 5c49cd383fb764a66cfc79c6b11cc9d1f48903c8 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 14:56:07 +0100 Subject: [PATCH 119/150] cleaning up my PR --- pyccel/ast/cudatypes.py | 7 ++----- tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py | 4 ++-- tests/pyccel/scripts/kernel/cuda_host_array_addition.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index d3687629ab..d48a3de7f3 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -121,11 +121,8 @@ def switch_rank(self, new_rank, new_order = None): PyccelType The new type. """ - if new_rank == 0: - return self.element_type - else: - new_order = (new_order or self._order) if new_rank > 1 else None - return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location) + new_order = (new_order or self._order) if new_rank > 1 else None + return CudaArrayType(self.element_type, new_rank, new_order, self.memory_location) def __repr__(self): dims = ','.join(':'*self._container_rank) order_str = f'(order={self._order})' if self._order else '' diff --git a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py index 80d8785acd..308970e9d0 100644 --- a/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py +++ b/tests/pyccel/scripts/kernel/cuda_host_2d_array_addition.py @@ -9,8 +9,8 @@ def addition_cuda_host_2Darrays(): for j in range(10): a[i][j] = 1 b[i][j] = 1 - b+=a - b+=1 + b = b + a + b = b + 1 print(b) diff --git a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py index 3ad186fd4f..d0f61881cf 100644 --- a/tests/pyccel/scripts/kernel/cuda_host_array_addition.py +++ b/tests/pyccel/scripts/kernel/cuda_host_array_addition.py @@ -9,7 +9,7 @@ def addition_cuda_host_arrays(): b[i] = 1 a[i] = 1 - b += a + b = b + a print(b) if __name__ == '__main__': From 4ff9ed2f09e905d46073c8c649130a01cb6144bd Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 15:52:08 +0100 Subject: [PATCH 120/150] pdate Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13434a32b2..a3f9f8a996 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. - #41 : Add support for custom device in`cuda`. +- #64 : Add support for `cuda.device_empty` function. ## \[UNRELEASED\] From 843429b00d8d7813679b6747c46c769bc4f98c39 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Mon, 22 Jul 2024 16:27:29 +0100 Subject: [PATCH 121/150] refactoring the code --- pyccel/ast/numpyext.py | 3 ++- pyccel/codegen/compiling/compilers.py | 1 + pyccel/codegen/pipeline.py | 1 + pyccel/codegen/printing/codeprinter.py | 1 + pyccel/errors/errors.py | 1 + pyccel/naming/cudanameclashchecker.py | 1 + 6 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyccel/ast/numpyext.py b/pyccel/ast/numpyext.py index eb1ee92e26..861b640282 100644 --- a/pyccel/ast/numpyext.py +++ b/pyccel/ast/numpyext.py @@ -626,6 +626,7 @@ def __init__(self, *args, class_type, init_dtype = None): assert isinstance(class_type, NumpyNDArrayType) self._init_dtype = init_dtype self._class_type = class_type # pylint: disable=no-member + super().__init__(*args) @property @@ -1312,12 +1313,12 @@ def __init__(self, shape, fill_value, dtype=None, order='C'): # Convert shape to PythonTuple shape = process_shape(False, shape) + init_dtype = dtype # If there is no dtype, extract it from fill_value # TODO: must get dtype from an annotated node if dtype is None: dtype = fill_value.dtype - dtype = process_dtype(dtype) # Cast fill_value to correct type diff --git a/pyccel/codegen/compiling/compilers.py b/pyccel/codegen/compiling/compilers.py index 0d496b9e8d..d909a5036e 100644 --- a/pyccel/codegen/compiling/compilers.py +++ b/pyccel/codegen/compiling/compilers.py @@ -493,6 +493,7 @@ def run_command(cmd, verbose): cmd = [os.path.expandvars(c) for c in cmd] if verbose: print(' '.join(cmd)) + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) as p: out, err = p.communicate() diff --git a/pyccel/codegen/pipeline.py b/pyccel/codegen/pipeline.py index ff8d657704..eb357fab74 100644 --- a/pyccel/codegen/pipeline.py +++ b/pyccel/codegen/pipeline.py @@ -404,6 +404,7 @@ def get_module_dependencies(parser, deps): verbose=verbose) timers["Compilation without wrapper"] = time.time() - start_compile_target_language + # Create shared library generated_filepath, shared_lib_timers = create_shared_library(codegen, mod_obj, diff --git a/pyccel/codegen/printing/codeprinter.py b/pyccel/codegen/printing/codeprinter.py index 687dd2e378..277aa574dd 100644 --- a/pyccel/codegen/printing/codeprinter.py +++ b/pyccel/codegen/printing/codeprinter.py @@ -50,6 +50,7 @@ def doprint(self, expr): # Do the actual printing lines = self._print(expr).splitlines(True) + # Format the output return ''.join(self._format_code(lines)) diff --git a/pyccel/errors/errors.py b/pyccel/errors/errors.py index 96910b3dfa..b261a81830 100644 --- a/pyccel/errors/errors.py +++ b/pyccel/errors/errors.py @@ -345,6 +345,7 @@ def report(self, traceback = ''.join(tb.format_stack(limit=5)) else: traceback = None + info = ErrorInfo(stage=pyccel_stage.current_stage, filename=filename, message=message, diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index bd8aa0d2b6..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', From e5feffb6758654970289ff07959fc22485963bca Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 23 Jul 2024 15:53:25 +0100 Subject: [PATCH 122/150] improve kernel decorator --- pyccel/decorators.py | 29 ++++++++++++++++++++- tests/pyccel/scripts/kernel/hello_kernel.py | 2 +- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index ff413fe443..d2fc35ff34 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -130,6 +130,25 @@ def kernel(f): KernelAccessor A class representing the kernel function. """ + class CudaThreadIndexing: + """ + Class representing the CUDA thread indexing. + + Class representing the CUDA thread indexing. + """ + def __init__(self, block_idx, thread_idx): + self._block_idx = block_idx + self._thread_idx = thread_idx + + def threadIdx(self, dim): + return self._thread_idx + + def blockIdx(self, dim): + return self._block_idx + + def blockDim(self, dim): + return 0 + class KernelAccessor: """ Class representing the kernel function. @@ -139,7 +158,15 @@ class KernelAccessor: def __init__(self, f): self._f = f def __getitem__(self, args): - return self._f + num_blocks, num_threads = args + def internal_loop(*args, **kwargs): + for b in range(num_blocks): + for t in range(num_threads): + global cu + self._f.__globals__['cu'] = CudaThreadIndexing(b, t) + self._f(*args, **kwargs) + + return internal_loop return KernelAccessor(f) diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py index b6901b25a1..94a5b28b27 100644 --- a/tests/pyccel/scripts/kernel/hello_kernel.py +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -11,7 +11,7 @@ def say_hello(its_morning : bool): def f(): its_morning = True - say_hello[1,1](its_morning) + say_hello[5,5](its_morning) cuda.synchronize() if __name__ == '__main__': From 2547f6e3e904a21e5a96c8999ad5f01406364c7f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 23 Jul 2024 16:33:12 +0100 Subject: [PATCH 123/150] addinf doc string to all CudaThreadIndexing fucntions --- pyccel/decorators.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index d2fc35ff34..9e283fa20e 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -141,12 +141,27 @@ def __init__(self, block_idx, thread_idx): self._thread_idx = thread_idx def threadIdx(self, dim): + """ + Get the thread index. + + Get the thread index. + """ return self._thread_idx def blockIdx(self, dim): + """ + Get the block index. + + Get the block index. + """ return self._block_idx def blockDim(self, dim): + """ + Get the block dimension. + + Get the block dimension. + """ return 0 class KernelAccessor: @@ -162,7 +177,6 @@ def __getitem__(self, args): def internal_loop(*args, **kwargs): for b in range(num_blocks): for t in range(num_threads): - global cu self._f.__globals__['cu'] = CudaThreadIndexing(b, t) self._f(*args, **kwargs) From f80eed5345301b4c3f3af82236a5bbc02920b99f Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 23 Jul 2024 16:39:43 +0100 Subject: [PATCH 124/150] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13434a32b2..767405659b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. - #41 : Add support for custom device in`cuda`. +- #41 : Improve kernel decorator. ## \[UNRELEASED\] From 6df004db4f848440a7be84f300f5d19e88e82844 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 23 Jul 2024 18:11:40 +0100 Subject: [PATCH 125/150] Add missing docstring for internal_loop in KernelAccessor class --- pyccel/decorators.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 9e283fa20e..108d2e0981 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -175,6 +175,16 @@ def __init__(self, f): def __getitem__(self, args): num_blocks, num_threads = args def internal_loop(*args, **kwargs): + """ + The internal loop for kernel execution. + + Parameters + ---------- + *args : tuple + Positional arguments for the kernel function. + **kwargs : dict + Keyword arguments for the kernel function. + """ for b in range(num_blocks): for t in range(num_threads): self._f.__globals__['cu'] = CudaThreadIndexing(b, t) From 048bd16e91be4e34ebe73cb0bbb9e68a05a03dce Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Tue, 23 Jul 2024 18:25:39 +0100 Subject: [PATCH 126/150] Add missing docstring for internal_loop in KernelAccessor class --- pyccel/decorators.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 108d2e0981..fbe4eae90a 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -178,12 +178,7 @@ def internal_loop(*args, **kwargs): """ The internal loop for kernel execution. - Parameters - ---------- - *args : tuple - Positional arguments for the kernel function. - **kwargs : dict - Keyword arguments for the kernel function. + The internal loop for kernel execution. """ for b in range(num_blocks): for t in range(num_threads): From 0e9292e82b815d95c17d20533e8b5b98f27203a5 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 24 Jul 2024 16:17:52 +0100 Subject: [PATCH 127/150] Change doc of cuda+host_empty and fix some errors --- pyccel/ast/cudaext.py | 17 ++++++++++------- pyccel/ast/cudatypes.py | 12 +++++++----- pyccel/codegen/printing/ccode.py | 8 +++++--- pyccel/codegen/printing/cucode.py | 9 ++++++--- pyccel/cuda/cuda_arrays.py | 4 ++-- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu | 14 +++++++++----- pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h | 4 ++-- 7 files changed, 41 insertions(+), 27 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 761454cae0..21a18c1ba5 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -61,7 +61,7 @@ class CudaFull(CudaNewarray): Represents a call to `cuda.full` for code generation. Represents a call to the Cuda function `full` which creates an array - of a specified size and shape filled with a specified value. + filled with a specified value. Parameters ---------- @@ -109,14 +109,17 @@ class CudaHostEmpty(CudaFull): Parameters ---------- - shape : tuple of int , int - The shape of the new array. + shape : TypedAstNode + Shape of the new array, e.g., ``(2, 3)`` or ``2``. + For a 1D array this is either a `LiteralInteger` or an expression. + For a cuda ND array this is a `TypedAstNode` with the class type HomogeneousTupleType. - dtype : PythonType, LiteralString, str - The actual dtype passed to the NumPy function. + dtype : PythonType, PyccelFunctionDef, LiteralString, str, optional + Datatype for the constructed array. - order : str , LiteralString - The order passed to the function defoulting to 'C'. + order : {'C', 'F'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. """ __slots__ = () name = 'empty' diff --git a/pyccel/ast/cudatypes.py b/pyccel/ast/cudatypes.py index d48a3de7f3..1edbdb0ccc 100644 --- a/pyccel/ast/cudatypes.py +++ b/pyccel/ast/cudatypes.py @@ -4,7 +4,7 @@ # This file is part of Pyccel which is released under MIT License. See the LICENSE file or # # go to https://github.com/pyccel/pyccel/blob/devel/LICENSE for full license details. # #------------------------------------------------------------------------------------------# -""" Module containing types from the numpy module understood by pyccel +""" Module containing types from the cuda module understood by pyccel """ from functools import lru_cache import numpy as np @@ -42,6 +42,7 @@ class CudaArrayType(HomogeneousContainerType, metaclass = ArgumentSingleton): def __init__(self, dtype, rank, order, memory_location): assert isinstance(rank, int) assert order in (None, 'C', 'F') + assert memory_location in ('host', 'device') self._element_type = dtype self._container_rank = rank @@ -60,6 +61,9 @@ def memory_location(self): @lru_cache def __add__(self, other): + if(isinstance(other, CudaArrayType)): + assert self.memory_location == other.memory_location + test_type = np.zeros(1, dtype = pyccel_type_to_original_type[self.element_type]) if isinstance(other, FixedSizeNumericType): comparison_type = pyccel_type_to_original_type[other]() @@ -67,16 +71,14 @@ def __add__(self, other): comparison_type = np.zeros(1, dtype = pyccel_type_to_original_type[other.element_type]) else: return NotImplemented - if(isinstance(other, CudaArrayType)): - assert self.memory_location == other.memory_location result_type = original_type_to_pyccel_type[np.result_type(test_type, comparison_type).type] rank = max(other.rank, self.rank) if rank < 2: order = None else: - other_f_contiguous = other.order in (None, 'F') - self_f_contiguous = self.order in (None, 'F') + other_f_contiguous = other.order == 'F' + self_f_contiguous = self.order == 'F' order = 'F' if other_f_contiguous and self_f_contiguous else 'C' return CudaArrayType(result_type, rank, order, self.memory_location) diff --git a/pyccel/codegen/printing/ccode.py b/pyccel/codegen/printing/ccode.py index 1ffc85cbd2..cdcc4b48a6 100644 --- a/pyccel/codegen/printing/ccode.py +++ b/pyccel/codegen/printing/ccode.py @@ -62,6 +62,7 @@ from pyccel.codegen.printing.codeprinter import CodePrinter + from pyccel.errors.errors import Errors from pyccel.errors.messages import (PYCCEL_RESTRICTION_TODO, INCOMPATIBLE_TYPEVAR_TO_FUNC, PYCCEL_RESTRICTION_IS_ISNOT, UNSUPPORTED_ARRAY_RANK) @@ -230,8 +231,7 @@ import_dict = {'omp_lib' : 'omp' } c_imports = {n : Import(n, Module(n, (), ())) for n in - ['cuda_ndarrays', - 'stdlib', + ['stdlib', 'math', 'string', 'ndarrays', @@ -244,6 +244,7 @@ 'assert', 'numpy_c']} + import_header_guard_prefix = {'Set_extensions' : '_TOOLS_SET'} class CCodePrinter(CodePrinter): @@ -1319,6 +1320,7 @@ def get_declare_type(self, expr): >>> self.get_declare_type(v) 't_ndarray*' """ + from pyccel.codegen.printing.cucode import cu_imports class_type = expr.class_type rank = expr.rank @@ -1333,7 +1335,7 @@ def get_declare_type(self, expr): dtype = 't_ndarray' elif isinstance(expr.class_type, CudaArrayType): self.add_import(c_imports['ndarrays']) - self.add_import(c_imports['cuda_ndarrays']) + self.add_import(cu_imports['cuda_ndarrays']) dtype = 't_ndarray' else: errors.report(PYCCEL_RESTRICTION_TODO+' (rank>0)', symbol=expr, severity='fatal') diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index bf43a12b1a..171ecc97fa 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -17,13 +17,16 @@ from pyccel.errors.errors import Errors from pyccel.ast.cudatypes import CudaArrayType from pyccel.ast.cudaext import CudaFull -from pyccel.codegen.printing.ccode import c_imports errors = Errors() __all__ = ["CudaCodePrinter"] +cu_imports = {n : Import(n, Module(n, (), ())) for n in + ['cuda_ndarrays',] + } + class CudaCodePrinter(CCodePrinter): """ Print code in CUDA format. @@ -148,10 +151,10 @@ def _print_Allocate(self, expr): is_view = 'false' if variable.on_heap else 'true' memory_location = variable.class_type.memory_location if memory_location in ('device', 'host'): - memory_location = 'allocateMemoryOn' + str(memory_location).capitalize() + memory_location = str(memory_location).capitalize() + 'Memory' else: memory_location = 'managedMemory' - self.add_import(c_imports['cuda_ndarrays']) + self.add_import(cu_imports['cuda_ndarrays']) alloc_code = f"{self._print(expr.variable)} = cuda_array_create({variable.rank}, shape_Assign_{expr.variable.name}, {dtype}, {is_view},{memory_location});\n" return f'{shape_Assign} {alloc_code}' diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py index cbdf938c0c..ff37139b3f 100644 --- a/pyccel/cuda/cuda_arrays.py +++ b/pyccel/cuda/cuda_arrays.py @@ -6,7 +6,7 @@ This submodule contains cuda_arrays methods for Pyccel. """ -def host_empty(shape): +def host_empty(shape, dtype = 'float', order = 'C'): """ Create an empty array on the host. @@ -23,7 +23,7 @@ def host_empty(shape): The empty array on the host. """ import numpy as np - a = np.empty(shape) + a = np.empty(shape, dtype = dtype, order = order) return a diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu index 47b0e5d420..348cb146b3 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.cu @@ -1,19 +1,19 @@ #include "cuda_ndarrays.h" -void device_memory(void** devPtr, size_t size) +void allocateMemoryOnDevice(void** devPtr, size_t size) { cudaMalloc(devPtr, size); } -void host_memory(void** devPtr, size_t size) +void allocateMemoryOnHost(void** devPtr, size_t size) { - cudaMallocHost(devPtr, size); + *devPtr = malloc(size); } t_ndarray cuda_array_create(int32_t nd, int64_t *shape, enum e_types type, bool is_view , enum e_memory_locations location) { t_ndarray arr; - void (*fun_ptr_arr[])(void**, size_t) = {host_memory, device_memory}; + void (*fun_ptr_arr[])(void**, size_t) = {allocateMemoryOnHost, allocateMemoryOnDevice}; arr.nd = nd; arr.type = type; @@ -66,10 +66,12 @@ int32_t cuda_free_host(t_ndarray arr) { if (arr.shape == NULL) return (0); - cudaFreeHost(arr.raw_data); + free(arr.raw_data); arr.raw_data = NULL; cudaFree(arr.shape); arr.shape = NULL; + cudaFree(arr.strides); + arr.strides = NULL; return (1); } @@ -82,5 +84,7 @@ int32_t cuda_free(t_ndarray arr) arr.raw_data = NULL; cudaFree(arr.shape); arr.shape = NULL; + cudaFree(arr.strides); + arr.strides = NULL; return (0); } \ No newline at end of file diff --git a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h index e7cbb4581f..9a29be594d 100644 --- a/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h +++ b/pyccel/stdlib/cuda_ndarrays/cuda_ndarrays.h @@ -8,8 +8,8 @@ enum e_memory_locations { - allocateMemoryOnHost, - allocateMemoryOnDevice + HostMemory, + DeviceMemory }; From 8a4106fa0d36bfb8e758a703d278e1232c7600f7 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 24 Jul 2024 16:34:21 +0100 Subject: [PATCH 128/150] Split a long line to make it readable --- pyccel/codegen/utilities.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyccel/codegen/utilities.py b/pyccel/codegen/utilities.py index 07be774063..e535e284d4 100644 --- a/pyccel/codegen/utilities.py +++ b/pyccel/codegen/utilities.py @@ -40,10 +40,12 @@ "cuda_ndarrays": ("cuda_ndarrays", CompileObj("cuda_ndarrays.cu",folder="cuda_ndarrays")), "pyc_math_f90" : ("math", CompileObj("pyc_math_f90.f90",folder="math")), "pyc_math_c" : ("math", CompileObj("pyc_math_c.c",folder="math")), - "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", accelerators=('python',))), + "cwrapper" : ("cwrapper", CompileObj("cwrapper.c",folder="cwrapper", + accelerators=('python',))), "numpy_f90" : ("numpy", CompileObj("numpy_f90.f90",folder="numpy")), "numpy_c" : ("numpy", CompileObj("numpy_c.c",folder="numpy")), - "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", folder="STC_Extensions", has_target_file = False)), + "Set_extensions" : ("STC_Extensions", CompileObj("Set_Extensions.h", + folder="STC_Extensions", has_target_file = False)), } internal_libs["cwrapper_ndarrays"] = ("cwrapper_ndarrays", CompileObj("cwrapper_ndarrays.c",folder="cwrapper_ndarrays", accelerators = ('python',), From 738371a3dbf2fd73662800b2fb9716f5c24210bd Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Wed, 24 Jul 2024 16:43:06 +0100 Subject: [PATCH 129/150] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 767405659b..7e5c2904da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ All notable changes to this project will be documented in this file. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. - #41 : Add support for custom device in`cuda`. -- #41 : Improve kernel decorator. +- #69 : Improve kernel decorator. ## \[UNRELEASED\] From eea028adc743f5c0b3792e23a364b868dad20504 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 10:56:13 +0100 Subject: [PATCH 130/150] fix doc string of host_empty --- pyccel/cuda/cuda_arrays.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyccel/cuda/cuda_arrays.py b/pyccel/cuda/cuda_arrays.py index ff37139b3f..5aa34bbf38 100644 --- a/pyccel/cuda/cuda_arrays.py +++ b/pyccel/cuda/cuda_arrays.py @@ -17,6 +17,12 @@ def host_empty(shape, dtype = 'float', order = 'C'): shape : tuple of int or int The shape of the array. + dtype : str, optional + The data type of the array. The default is 'float'. + + order : str, optional + The order of the array. The default is 'C'. + Returns ------- array From c5a508c54ce8e5b652161f840bfa1078714844ac Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 11:17:18 +0100 Subject: [PATCH 131/150] Make sure tests are running successfully --- tests/pyccel/scripts/kernel/device_array.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/pyccel/scripts/kernel/device_array.py b/tests/pyccel/scripts/kernel/device_array.py index 2282b37682..335f90358c 100644 --- a/tests/pyccel/scripts/kernel/device_array.py +++ b/tests/pyccel/scripts/kernel/device_array.py @@ -2,15 +2,16 @@ from pyccel.decorators import kernel @kernel -def kernel_call(a : 'int[:]'): +def kernel_call(a : 'int[:]', size : 'int'): i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0) - if(i == 1): + if(i < size): a[i] = 1 print(a[i]) def f(): + size = 10 x = cuda.device_empty(10) - kernel_call[1,10](x) + kernel_call[1,10](x , size) if __name__ == "__main__": f() \ No newline at end of file From 52ebe93fae22898281a4387f9ada5539d7b10d63 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 12:02:35 +0100 Subject: [PATCH 132/150] refactoring the code --- pyccel/decorators.py | 2 +- tests/pyccel/scripts/kernel/device_array.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index fbe4eae90a..90014d16b4 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -182,7 +182,7 @@ def internal_loop(*args, **kwargs): """ for b in range(num_blocks): for t in range(num_threads): - self._f.__globals__['cu'] = CudaThreadIndexing(b, t) + self._f.__globals__['cuda'] = CudaThreadIndexing(b, t) self._f(*args, **kwargs) return internal_loop diff --git a/tests/pyccel/scripts/kernel/device_array.py b/tests/pyccel/scripts/kernel/device_array.py index e6b1f3509d..335f90358c 100644 --- a/tests/pyccel/scripts/kernel/device_array.py +++ b/tests/pyccel/scripts/kernel/device_array.py @@ -3,7 +3,7 @@ @kernel def kernel_call(a : 'int[:]', size : 'int'): - i = cuda.threadIdx(0) + cu.blockIdx(0) * cu.blockDim(0) + i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0) if(i < size): a[i] = 1 print(a[i]) From ec738b3b55f3f9ad23b227262f436f132477af43 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 12:33:22 +0100 Subject: [PATCH 133/150] refactoring the code --- pyccel/decorators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index fbe4eae90a..cab4c0d77e 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -6,6 +6,7 @@ """ This module contains all the provided decorator methods. """ +from pyccel.ast.cudaext import cuda_mod import warnings __all__ = ( @@ -182,9 +183,8 @@ def internal_loop(*args, **kwargs): """ for b in range(num_blocks): for t in range(num_threads): - self._f.__globals__['cu'] = CudaThreadIndexing(b, t) + self._f.__globals__['cuda'].CudaThreadIndexing = CudaThreadIndexing(b, t) self._f(*args, **kwargs) - return internal_loop return KernelAccessor(f) From aa76f916848235c095e44be7c820d496cec000a5 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 12:41:10 +0100 Subject: [PATCH 134/150] refactoring the code --- CHANGELOG.md | 1 - pyccel/decorators.py | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e5c2904da..13434a32b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,6 @@ All notable changes to this project will be documented in this file. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. - #41 : Add support for custom device in`cuda`. -- #69 : Improve kernel decorator. ## \[UNRELEASED\] diff --git a/pyccel/decorators.py b/pyccel/decorators.py index cab4c0d77e..084d27b297 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -142,27 +142,12 @@ def __init__(self, block_idx, thread_idx): self._thread_idx = thread_idx def threadIdx(self, dim): - """ - Get the thread index. - - Get the thread index. - """ return self._thread_idx def blockIdx(self, dim): - """ - Get the block index. - - Get the block index. - """ return self._block_idx def blockDim(self, dim): - """ - Get the block dimension. - - Get the block dimension. - """ return 0 class KernelAccessor: From 528099f81210532cc16f73fa1d79392b55fd4216 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 14:25:41 +0100 Subject: [PATCH 135/150] move CudaThreadIndexing to pyccel/cuda --- pyccel/cuda/cuda_thread_indexing.py | 79 +++++++++++++++++++++ pyccel/decorators.py | 23 +----- tests/pyccel/scripts/kernel/hello_kernel.py | 2 +- 3 files changed, 82 insertions(+), 22 deletions(-) create mode 100644 pyccel/cuda/cuda_thread_indexing.py diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py new file mode 100644 index 0000000000..1987c07d7f --- /dev/null +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -0,0 +1,79 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This module contains all the CUDA thread indexing methods +""" +class CudaThreadIndexing: + """ + Class representing the CUDA thread indexing. + + Class representing the CUDA thread indexing. + """ + def __init__(self, block_idx, thread_idx): + self._block_idx = block_idx + self._thread_idx = thread_idx + + def threadIdx(self, dim): + """ + Get the thread index. + + Get the thread index. + + Parameters + ----------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ----------------- + int + The index of the thread in the specified dimension of its block. + """ + return self._thread_idx + + def blockIdx(self, dim): + """ + Get the block index. + + Get the block index. + + Parameters + ----------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ----------------- + int + The index of the block in the specified dimension. + """ + return self._block_idx + + def blockDim(self, dim): + """ + Get the block dimension. + + Get the block dimension. + + Parameters + ----------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ----------------- + int + The size of the block in the specified dimension. + """ + return 0 \ No newline at end of file diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 084d27b297..4c59808675 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -6,7 +6,7 @@ """ This module contains all the provided decorator methods. """ -from pyccel.ast.cudaext import cuda_mod +from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing import warnings __all__ = ( @@ -131,25 +131,6 @@ def kernel(f): KernelAccessor A class representing the kernel function. """ - class CudaThreadIndexing: - """ - Class representing the CUDA thread indexing. - - Class representing the CUDA thread indexing. - """ - def __init__(self, block_idx, thread_idx): - self._block_idx = block_idx - self._thread_idx = thread_idx - - def threadIdx(self, dim): - return self._thread_idx - - def blockIdx(self, dim): - return self._block_idx - - def blockDim(self, dim): - return 0 - class KernelAccessor: """ Class representing the kernel function. @@ -168,7 +149,7 @@ def internal_loop(*args, **kwargs): """ for b in range(num_blocks): for t in range(num_threads): - self._f.__globals__['cuda'].CudaThreadIndexing = CudaThreadIndexing(b, t) + self._f.__globals__['cuda'] = CudaThreadIndexing(b, t) self._f(*args, **kwargs) return internal_loop diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py index 94a5b28b27..27b7625b5b 100644 --- a/tests/pyccel/scripts/kernel/hello_kernel.py +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -1,6 +1,5 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import kernel -from pyccel import cuda @kernel def say_hello(its_morning : bool): @@ -12,6 +11,7 @@ def say_hello(its_morning : bool): def f(): its_morning = True say_hello[5,5](its_morning) + from pyccel import cuda cuda.synchronize() if __name__ == '__main__': From f1f63ef0e63eeb680bd526ba4e8e35792d92f2e3 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 14:28:20 +0100 Subject: [PATCH 136/150] cleaning upmy PR --- pyccel/cuda/cuda_thread_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index 1987c07d7f..0d7584a57a 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -22,7 +22,7 @@ def threadIdx(self, dim): Get the thread index. Parameters - ----------- + ---------- dim : int The dimension of the indexing. It can be: - 0 for the x-dimension @@ -30,7 +30,7 @@ def threadIdx(self, dim): - 2 for the z-dimension Returns - ----------------- + ------- int The index of the thread in the specified dimension of its block. """ From 1aa26b15930b9290d17a672ae25012cc531896c6 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 14:31:09 +0100 Subject: [PATCH 137/150] add final new line --- pyccel/cuda/cuda_thread_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index 0d7584a57a..9901601acf 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -76,4 +76,4 @@ def blockDim(self, dim): int The size of the block in the specified dimension. """ - return 0 \ No newline at end of file + return 0 From ea1beb79ae279a3910357e2b7036dd12c2ddaf98 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 14:33:31 +0100 Subject: [PATCH 138/150] add final new line --- pyccel/cuda/cuda_thread_indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index 9901601acf..ec5cbdaac1 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -77,3 +77,4 @@ def blockDim(self, dim): The size of the block in the specified dimension. """ return 0 + From 0f076a05b6f50fc2c37c3f24f79dccf30b0f82ce Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 14:45:26 +0100 Subject: [PATCH 139/150] Make sure tests are passing --- tests/pyccel/scripts/kernel/device_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py index a4762a6242..50aba2ed7b 100644 --- a/tests/pyccel/scripts/kernel/device_test.py +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -1,6 +1,5 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import device, kernel -from pyccel import cuda @device def device_call(): @@ -12,6 +11,7 @@ def kernel_call(): def f(): kernel_call[1,1]() + from pyccel import cuda cuda.synchronize() if __name__ == '__main__': From 57f977e3007e1f5bdeb21b2f8f3899b7fbcad009 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 15:57:48 +0100 Subject: [PATCH 140/150] refactoring the code --- pyccel/decorators.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 4c59808675..9ce6367210 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -149,7 +149,10 @@ def internal_loop(*args, **kwargs): """ for b in range(num_blocks): for t in range(num_threads): - self._f.__globals__['cuda'] = CudaThreadIndexing(b, t) + cu = CudaThreadIndexing(b, t) + self._f.__globals__['cuda'].threadIdx = cu.threadIdx + self._f.__globals__['cuda'].blockIdx = cu.blockIdx + self._f.__globals__['cuda'].blockDim = num_threads self._f(*args, **kwargs) return internal_loop From 26fcdc0d5e22929ee45e67ffeef93a2ede936b9e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 16:30:15 +0100 Subject: [PATCH 141/150] adding missing import to device test --- tests/pyccel/scripts/kernel/device_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py index 50aba2ed7b..4ae1b40f5f 100644 --- a/tests/pyccel/scripts/kernel/device_test.py +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -1,5 +1,6 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import device, kernel +from pyccel import cuda @device def device_call(): From 9f58f02aec354fea1c965735837f3b63af4e6aae Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 16:32:13 +0100 Subject: [PATCH 142/150] adding missing import to kernel --- tests/pyccel/scripts/kernel/hello_kernel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py index 27b7625b5b..70cad30598 100644 --- a/tests/pyccel/scripts/kernel/hello_kernel.py +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -1,5 +1,6 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import kernel +from pyccel import cuda @kernel def say_hello(its_morning : bool): From c45c6151cd298c83f1bfca466b756daab6313eaa Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 16:36:17 +0100 Subject: [PATCH 143/150] refactoring the code --- tests/pyccel/scripts/kernel/device_test.py | 1 - tests/pyccel/scripts/kernel/hello_kernel.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py index 4ae1b40f5f..3796a4c2fd 100644 --- a/tests/pyccel/scripts/kernel/device_test.py +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -12,7 +12,6 @@ def kernel_call(): def f(): kernel_call[1,1]() - from pyccel import cuda cuda.synchronize() if __name__ == '__main__': diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py index 70cad30598..99fa9f50e2 100644 --- a/tests/pyccel/scripts/kernel/hello_kernel.py +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -12,7 +12,6 @@ def say_hello(its_morning : bool): def f(): its_morning = True say_hello[5,5](its_morning) - from pyccel import cuda cuda.synchronize() if __name__ == '__main__': From 572cdd8e46a944732a502d3c30e78ab02938985e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 16:52:45 +0100 Subject: [PATCH 144/150] refactoring the code --- pyccel/decorators.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 9ce6367210..dbd1013aec 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -150,10 +150,12 @@ def internal_loop(*args, **kwargs): for b in range(num_blocks): for t in range(num_threads): cu = CudaThreadIndexing(b, t) - self._f.__globals__['cuda'].threadIdx = cu.threadIdx - self._f.__globals__['cuda'].blockIdx = cu.blockIdx - self._f.__globals__['cuda'].blockDim = num_threads - self._f(*args, **kwargs) + if 'cuda' in self._f.__globals__: + self._f.__globals__['cuda'].threadIdx = cu.threadIdx + self._f.__globals__['cuda'].blockIdx = cu.blockIdx + self._f.__globals__['cuda'].blockDim = num_threads + else: + self._f.__globals__['cuda'] = cu return internal_loop return KernelAccessor(f) From d969ebb370bb96ce537811975b73beaef3b7b74e Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 17:03:47 +0100 Subject: [PATCH 145/150] update doc --- pyccel/cuda/cuda_thread_indexing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index ec5cbdaac1..3b93bc5788 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -10,6 +10,13 @@ class CudaThreadIndexing: Class representing the CUDA thread indexing. Class representing the CUDA thread indexing. + + Parameters + ---------- + block_idx : int + The index of the block in the x-dimension + thread_idx : int + The index of the thread in the x-dimension """ def __init__(self, block_idx, thread_idx): self._block_idx = block_idx From 2b3085fb2bd89fb3f59bc895348c7fd895d60f21 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 17:06:51 +0100 Subject: [PATCH 146/150] update doc --- pyccel/cuda/cuda_thread_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index 3b93bc5788..cf6687faa7 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -14,9 +14,9 @@ class CudaThreadIndexing: Parameters ---------- block_idx : int - The index of the block in the x-dimension + The index of the block in the x-dimension. thread_idx : int - The index of the thread in the x-dimension + The index of the thread in the x-dimension. """ def __init__(self, block_idx, thread_idx): self._block_idx = block_idx From 34d801f074f21b89c97dff4d93c968c9a951fb61 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 17:14:31 +0100 Subject: [PATCH 147/150] update doc --- pyccel/cuda/cuda_thread_indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index cf6687faa7..574f013731 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -15,6 +15,7 @@ class CudaThreadIndexing: ---------- block_idx : int The index of the block in the x-dimension. + thread_idx : int The index of the thread in the x-dimension. """ From e9436a9b5f33022fce5222d8c73e81622e6ff5a7 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 17:22:32 +0100 Subject: [PATCH 148/150] update doc --- pyccel/cuda/cuda_thread_indexing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py index 574f013731..7d8cce3fa5 100644 --- a/pyccel/cuda/cuda_thread_indexing.py +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -51,7 +51,7 @@ def blockIdx(self, dim): Get the block index. Parameters - ----------- + ---------- dim : int The dimension of the indexing. It can be: - 0 for the x-dimension @@ -59,7 +59,7 @@ def blockIdx(self, dim): - 2 for the z-dimension Returns - ----------------- + ------- int The index of the block in the specified dimension. """ @@ -72,7 +72,7 @@ def blockDim(self, dim): Get the block dimension. Parameters - ----------- + ---------- dim : int The dimension of the indexing. It can be: - 0 for the x-dimension @@ -80,7 +80,7 @@ def blockDim(self, dim): - 2 for the z-dimension Returns - ----------------- + ------- int The size of the block in the specified dimension. """ From ac936dedb0cacff4cd7dd2136dce8c8b59ca4e33 Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 22:52:22 +0100 Subject: [PATCH 149/150] work in progress --- pyccel/ast/cudaext.py | 12 +++++++++++- pyccel/codegen/printing/cucode.py | 20 +++++++++++++++++++- tests/pyccel/scripts/kernel/device_array.py | 4 +++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 99d1fc5326..794b727994 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -153,7 +153,7 @@ def fill_value(self): The value with which the array will be filled on initialisation. """ return None - + class CudaDimFunction(PyccelFunction): """ Represents a call to a CUDA dimension-related function for code generation. @@ -170,6 +170,16 @@ def __init__(self, dim=0): @property def dim(self): + """ + Returns the dimension Of The Grid. + + Returns the dimension Of The Grid. + + Returns + ------- + int + The dimension of the grid. + """ return self._dim class threadIdx(CudaDimFunction): diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index 73d970f91c..818c8d4da0 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -178,11 +178,29 @@ def _print_Assign(self, expr): return super()._print_Assign(expr) def _get_cuda_dim(self, dim, prefix): + """ + Get the CUDA representation of the CUDA dimension call. + + Get the CUDA representation of the CUDA dimension call. + + Parameters + ---------- + dim : int + The dimension of the CUDA call (0, 1, or 2). + + prefix : str + The prefix of the CUDA call (e.g., 'block', 'thread'). + + Returns + ------- + str + The CUDA representation of the CUDA dimension call. + """ if dim == 0: return f'{prefix}.x' elif dim == 1: return f'{prefix}.y' - elif dim == 2: + else: return f'{prefix}.z' def _print_threadIdx(self, expr): diff --git a/tests/pyccel/scripts/kernel/device_array.py b/tests/pyccel/scripts/kernel/device_array.py index 335f90358c..0a29e809a6 100644 --- a/tests/pyccel/scripts/kernel/device_array.py +++ b/tests/pyccel/scripts/kernel/device_array.py @@ -1,3 +1,4 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring from pyccel import cuda from pyccel.decorators import kernel @@ -14,4 +15,5 @@ def f(): kernel_call[1,10](x , size) if __name__ == "__main__": - f() \ No newline at end of file + f() + From e3db4c096be120be56e089310e5f278e225aa69c Mon Sep 17 00:00:00 2001 From: smazouz42 Date: Thu, 25 Jul 2024 23:18:19 +0100 Subject: [PATCH 150/150] work in progress --- pyccel/ast/cudaext.py | 2 +- pyccel/decorators.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyccel/ast/cudaext.py b/pyccel/ast/cudaext.py index 794b727994..f96a79b55e 100644 --- a/pyccel/ast/cudaext.py +++ b/pyccel/ast/cudaext.py @@ -174,7 +174,7 @@ def dim(self): Returns the dimension Of The Grid. Returns the dimension Of The Grid. - + Returns ------- int diff --git a/pyccel/decorators.py b/pyccel/decorators.py index dbd1013aec..1af59fcd18 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -153,9 +153,10 @@ def internal_loop(*args, **kwargs): if 'cuda' in self._f.__globals__: self._f.__globals__['cuda'].threadIdx = cu.threadIdx self._f.__globals__['cuda'].blockIdx = cu.blockIdx - self._f.__globals__['cuda'].blockDim = num_threads + self._f.__globals__['cuda'].blockDim = cu.blockDim else: self._f.__globals__['cuda'] = cu + self._f(*args, **kwargs) return internal_loop return KernelAccessor(f)