diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index fa33ff60..15ebfca7 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -50,12 +50,19 @@ jobs: set -eux [ "$(ghc --numeric-version)" = "${{ matrix.ghc }}" ] cabal update - cabal build --enable-tests --enable-benchmarks - cabal test - cabal bench - cabal haddock + cabal build --enable-tests --enable-benchmarks all + cabal test filepath + cabal bench filepath + cabal haddock filepath + ( + cd filepath cabal check - cabal sdist + ) + ( + cd filepath-internals + cabal check + ) + cabal sdist all shell: bash - if: matrix.os == 'ubuntu-latest' @@ -64,7 +71,7 @@ jobs: set -eux export "PATH=$HOME/.cabal/bin:$PATH" cabal install --overwrite-policy=always --install-method=copy cpphs - make all + make -C filepath all git diff --exit-code i386: @@ -82,8 +89,8 @@ jobs: run: | . ~/.ghcup/env cabal update - cabal test - cabal bench + cabal test filepath + cabal bench filepath # We use github.com/haskell self-hosted runners for ARM testing. # If they become unavailable in future, put ['armv7', 'aarch64'] @@ -107,13 +114,13 @@ jobs: uses: docker://hasufell/arm32v7-ubuntu-haskell:focal name: Run build (arm32v7 linux) with: - args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 filepath && cabal bench -w ghc-9.2.2 filepath" - if: matrix.arch == 'arm64v8' uses: docker://hasufell/arm64v8-ubuntu-haskell:focal name: Run build (arm64v8 linux) with: - args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 filepath && cabal bench -w ghc-9.2.2 filepath" darwin_arm: runs-on: ${{ matrix.os }} @@ -144,7 +151,7 @@ jobs: export RANLIB="$HOME/.brew/opt/llvm@11/bin/llvm-ranlib" . .github/scripts/env.sh curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 BOOTSTRAP_HASKELL_GHC_VERSION=${{ matrix.ghc }} BOOTSTRAP_HASKELL_ADJUST_BASHRC=yes sh - cabal test - cabal bench + cabal test filepath + cabal bench filepath env: HOMEBREW_CHANGE_ARCH_TO_ARM: 1 diff --git a/README.md b/README.md deleted file mode 100644 index 5ec5e51f..00000000 --- a/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# FilePath [![Hackage version](https://img.shields.io/hackage/v/filepath.svg?label=Hackage)](https://hackage.haskell.org/package/filepath) - -The `filepath` package provides functionality for manipulating `FilePath` values, and is shipped with [GHC](https://www.haskell.org/ghc/). -It provides two variants for filepaths: - -1. legacy filepaths: `type FilePath = String` -2. operating system abstracted filepaths (`OsPath`): internally unpinned `ShortByteString` (platform-dependent encoding) - -It is recommended to use `OsPath` when possible, because it is more correct. - -For each variant there are three main modules: - -* `System.FilePath.Posix` / `System.OsPath.Posix` manipulates POSIX\/Linux style `FilePath` values (with `/` as the path separator). -* `System.FilePath.Windows` / `System.OsPath.Windows` manipulates Windows style `FilePath` values (with either `\` or `/` as the path separator, and deals with drives). -* `System.FilePath` / `System.OsPath` for dealing with current platform-specific filepaths - -All three modules provide the same API, and the same documentation (calling out differences in the different variants). - -`System.OsString` is like `System.OsPath`, but more general purpose. Refer to the documentation of -those modules for more information. - -### What is a `FilePath`? - -In Haskell, the legacy definition (used in `base` and Prelude) is `type FilePath = String`, -where a Haskell `String` is a list of Unicode code points. - -The new definition is (simplified) `newtype OsPath = AFP ShortByteString`, where -`ShortByteString` is an unpinned byte array and follows syscall conventions, preserving the encoding. - -On unix, filenames don't have a predefined encoding as per the -[POSIX specification](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170) -and are passed as `char[]` to syscalls. - -On windows (at least the API used by `Win32`) filepaths are UTF-16LE strings. - -You are encouraged to use `OsPath` whenever possible, because it is more correct. - -Also note that this is a low-level library and it makes no attempt at providing a more -type safe variant for filepaths (e.g. by distinguishing between absolute and relative -paths) and ensures no invariants (such as filepath validity). - -For such libraries, check out the following: - -* [hpath](https://hackage.haskell.org/package/hpath) -* [path](https://hackage.haskell.org/package/path) -* [paths](https://hackage.haskell.org/package/paths) -* [strong-path](https://hackage.haskell.org/package/strong-path) diff --git a/README.md b/README.md new file mode 120000 index 00000000..49b170fd --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +filepath/README.md \ No newline at end of file diff --git a/cabal.project b/cabal.project index 6f920794..eaaa0926 100644 --- a/cabal.project +++ b/cabal.project @@ -1 +1,2 @@ -packages: ./ +packages: ./filepath/filepath.cabal + ./filepath-internals/filepath-internals.cabal diff --git a/filepath-internals/.github/scripts/brew.sh b/filepath-internals/.github/scripts/brew.sh new file mode 100644 index 00000000..d59922ee --- /dev/null +++ b/filepath-internals/.github/scripts/brew.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +set -eux + +. .github/scripts/env.sh + +if [ -e "$HOME/.brew" ] ; then + ( + cd "$HOME/.brew" + git fetch --depth 1 + git reset --hard origin/master + ) +else + git clone --depth=1 https://github.com/Homebrew/brew "$HOME/.brew" +fi +export PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$PATH" + +mkdir -p $CI_PROJECT_DIR/.brew_cache +export HOMEBREW_CACHE=$CI_PROJECT_DIR/.brew_cache +mkdir -p $CI_PROJECT_DIR/.brew_logs +export HOMEBREW_LOGS=$CI_PROJECT_DIR/.brew_logs +mkdir -p /private/tmp/.brew_tmp +export HOMEBREW_TEMP=/private/tmp/.brew_tmp + +brew update +brew install ${1+"$@"} + + +set -eux + +. .github/scripts/env.sh + +if [ -e "$HOME/.brew" ] ; then + ( + cd "$HOME/.brew" + git fetch --depth 1 + git reset --hard origin/master + ) +else + git clone --depth=1 https://github.com/Homebrew/brew "$HOME/.brew" +fi +export PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$PATH" + +mkdir -p $CI_PROJECT_DIR/.brew_cache +export HOMEBREW_CACHE=$CI_PROJECT_DIR/.brew_cache +mkdir -p $CI_PROJECT_DIR/.brew_logs +export HOMEBREW_LOGS=$CI_PROJECT_DIR/.brew_logs +mkdir -p /private/tmp/.brew_tmp +export HOMEBREW_TEMP=/private/tmp/.brew_tmp + +brew update +brew install ${1+"$@"} + diff --git a/filepath-internals/.github/scripts/env.sh b/filepath-internals/.github/scripts/env.sh new file mode 100644 index 00000000..088b4c8f --- /dev/null +++ b/filepath-internals/.github/scripts/env.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +if [ "${RUNNER_OS}" = "Windows" ] ; then + ext=".exe" +else + ext='' +fi + +export DEBIAN_FRONTEND=noninteractive +export TZ=Asia/Singapore + +export OS="$RUNNER_OS" +export PATH="$HOME/.local/bin:$PATH" + +if [ "${RUNNER_OS}" = "Windows" ] ; then + # on windows use pwd to get unix style path + CI_PROJECT_DIR="$(pwd)" + export CI_PROJECT_DIR + export GHCUP_INSTALL_BASE_PREFIX="/c" + export GHCUP_BIN="$GHCUP_INSTALL_BASE_PREFIX/ghcup/bin" + export PATH="$GHCUP_BIN:$PATH" + export CABAL_DIR="C:\\Users\\runneradmin\\AppData\\Roaming\\cabal" +else + export CI_PROJECT_DIR="${GITHUB_WORKSPACE}" + export GHCUP_INSTALL_BASE_PREFIX="$CI_PROJECT_DIR" + export GHCUP_BIN="$GHCUP_INSTALL_BASE_PREFIX/.ghcup/bin" + export PATH="$GHCUP_BIN:$PATH" + export CABAL_DIR="$CI_PROJECT_DIR/cabal" + export CABAL_CACHE="$CI_PROJECT_DIR/cabal-cache" +fi diff --git a/filepath-internals/.github/workflows/test.yaml b/filepath-internals/.github/workflows/test.yaml new file mode 100644 index 00000000..fa33ff60 --- /dev/null +++ b/filepath-internals/.github/workflows/test.yaml @@ -0,0 +1,150 @@ +name: Haskell CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + ghc: ['8.0.2', '8.2.2', '8.4.4', '8.6.5', '8.8.4', '8.10.7', '9.0.2', '9.2.7', '9.4.5', '9.6.1'] + cabal: ['3.8.1.0'] + include: + - os: macOS-latest + ghc: '9.4.5' + cabal: '3.8.1.0' + - os: macOS-latest + ghc: '9.6.1' + cabal: '3.8.1.0' + - os: windows-latest + ghc: '9.4.5' + cabal: '3.8.1.0' + - os: windows-latest + ghc: '9.6.1' + cabal: '3.8.1.0' + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get -y update + sudo apt-get -y install libtinfo5 libtinfo6 libncurses5 libncurses6 + + - name: Install ghc/cabal + run: | + set -eux + ghcup install ghc --set ${{ matrix.ghc }} + ghcup install cabal ${{ matrix.cabal }} + shell: bash + + - name: Build + run: | + set -eux + [ "$(ghc --numeric-version)" = "${{ matrix.ghc }}" ] + cabal update + cabal build --enable-tests --enable-benchmarks + cabal test + cabal bench + cabal haddock + cabal check + cabal sdist + shell: bash + + - if: matrix.os == 'ubuntu-latest' + name: make all + run: | + set -eux + export "PATH=$HOME/.cabal/bin:$PATH" + cabal install --overwrite-policy=always --install-method=copy cpphs + make all + git diff --exit-code + + i386: + runs-on: ubuntu-latest + container: + image: i386/ubuntu:bionic + steps: + - name: Install + run: | + apt-get update -y + apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh + - uses: actions/checkout@v1 + - name: Test + run: | + . ~/.ghcup/env + cabal update + cabal test + cabal bench + + # We use github.com/haskell self-hosted runners for ARM testing. + # If they become unavailable in future, put ['armv7', 'aarch64'] + # back to emulation jobs above. + arm: + runs-on: [self-hosted, Linux, ARM64] + strategy: + fail-fast: true + matrix: + arch: [arm32v7, arm64v8] + steps: + - uses: docker://hasufell/arm64v8-ubuntu-haskell:focal + name: Cleanup + with: + args: "find . -mindepth 1 -maxdepth 1 -exec rm -rf -- {} +" + + - name: Checkout code + uses: actions/checkout@v3 + + - if: matrix.arch == 'arm32v7' + uses: docker://hasufell/arm32v7-ubuntu-haskell:focal + name: Run build (arm32v7 linux) + with: + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + + - if: matrix.arch == 'arm64v8' + uses: docker://hasufell/arm64v8-ubuntu-haskell:focal + name: Run build (arm64v8 linux) + with: + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + + darwin_arm: + runs-on: ${{ matrix.os }} + env: + MACOSX_DEPLOYMENT_TARGET: 10.13 + strategy: + fail-fast: false + matrix: + include: + - os: [self-hosted, macOS, ARM64] + ghc: 8.10.7 + - os: [self-hosted, macOS, ARM64] + ghc: 9.2.6 + - os: [self-hosted, macOS, ARM64] + ghc: 9.4.4 + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run build + run: | + bash .github/scripts/brew.sh git coreutils llvm@11 autoconf automake + export PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$HOME/.brew/opt/llvm@11/bin:$PATH" + export CC="$HOME/.brew/opt/llvm@11/bin/clang" + export CXX="$HOME/.brew/opt/llvm@11/bin/clang++" + export LD=ld + export AR="$HOME/.brew/opt/llvm@11/bin/llvm-ar" + export RANLIB="$HOME/.brew/opt/llvm@11/bin/llvm-ranlib" + . .github/scripts/env.sh + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 BOOTSTRAP_HASKELL_GHC_VERSION=${{ matrix.ghc }} BOOTSTRAP_HASKELL_ADJUST_BASHRC=yes sh + cabal test + cabal bench + env: + HOMEBREW_CHANGE_ARCH_TO_ARM: 1 diff --git a/filepath-internals/.github/workflows/test.yaml.orig b/filepath-internals/.github/workflows/test.yaml.orig new file mode 100644 index 00000000..0b0084fc --- /dev/null +++ b/filepath-internals/.github/workflows/test.yaml.orig @@ -0,0 +1,218 @@ +name: Haskell CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: +<<<<<<< HEAD + os: [ubuntu-latest] + ghc: ['8.0.2', '8.2.2', '8.4.4', '8.6.5', '8.8.4', '8.10.7', '9.0.2', '9.2.7', '9.4.5', '9.6.1'] + cabal: ['3.8.1.0'] + include: +======= + os: [ubuntu-latest, macOS-latest, windows-latest] + ghc: ['8.0.2', '8.2.2', '8.4.4', '8.6.5', '8.8.4', '8.10.7', '9.0.2', '9.2.5', '9.4.4'] + cabal: ['3.6.2.0'] + include: + - os: ubuntu-latest + ghc: 'recommended' + - os: ubuntu-latest + ghc: 'latest' + cabal: 3.6.2.0 + exclude: +>>>>>>> github/pr/178 + - os: macOS-latest + ghc: '9.4.5' + cabal: '3.8.1.0' + - os: macOS-latest + ghc: '9.6.1' + cabal: '3.8.1.0' + - os: windows-latest + ghc: '9.4.5' + cabal: '3.8.1.0' + - os: windows-latest + ghc: '9.6.1' + cabal: '3.8.1.0' + steps: + - uses: actions/checkout@v3 + + - name: Install dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get -y update + sudo apt-get -y install libtinfo5 libtinfo6 libncurses5 libncurses6 + + - name: Install ghc/cabal + run: | + set -eux + ghcup install ghc --set ${{ matrix.ghc }} +<<<<<<< HEAD + ghcup install cabal ${{ matrix.cabal }} +======= + ghcup install cabal --set ${{ matrix.cabal }} +>>>>>>> github/pr/178 + shell: bash + + - name: Build + run: | + set -eux +<<<<<<< HEAD + [ "$(ghc --numeric-version)" = "${{ matrix.ghc }}" ] +======= + [ -e ~/.ghcup/env ] && source ~/.ghcup/env + [ "${{ matrix.ghc }}" == 'recommended' ] || + [ "${{ matrix.ghc }}" == 'latest' ] || + [ "$(ghc --numeric-version)" = "${{ matrix.ghc }}" ] +>>>>>>> github/pr/178 + cabal update + cabal build --enable-tests --enable-benchmarks + cabal test + cabal bench + cabal haddock + cabal check + cabal sdist + shell: bash + + - if: matrix.os == 'ubuntu-latest' + name: make all + run: | + set -eux + export "PATH=$HOME/.cabal/bin:$PATH" + cabal install --overwrite-policy=always --install-method=copy cpphs + make all + git diff --exit-code + +<<<<<<< HEAD + i386: +======= + emulated: + needs: build + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + arch: ['s390x', 'ppc64le', 'armv7', 'aarch64'] + steps: + - uses: actions/checkout@v2 + - uses: uraimo/run-on-arch-action@v2.1.1 + timeout-minutes: 60 + with: + arch: ${{ matrix.arch }} + distro: ubuntu20.04 + githubToken: ${{ github.token }} + install: | + apt-get update -y + apt-get install -y ghc cabal-install libghc-quickcheck2-dev cpphs git make + run: | + cabal update + cabal test + cabal haddock + cabal check + cabal sdist + + emulated-i386: +>>>>>>> github/pr/178 + runs-on: ubuntu-latest + container: + image: i386/ubuntu:bionic + steps: + - name: Install + run: | +<<<<<<< HEAD + apt-get update -y + apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh +======= + apt-get update -y + apt-get install -y ghc cabal-install libghc-quickcheck2-dev cpphs git make + shell: bash +>>>>>>> github/pr/178 + - uses: actions/checkout@v1 + - name: Test + run: | +<<<<<<< HEAD + . ~/.ghcup/env + cabal update + cabal test + cabal bench + + # We use github.com/haskell self-hosted runners for ARM testing. + # If they become unavailable in future, put ['armv7', 'aarch64'] + # back to emulation jobs above. + arm: + runs-on: [self-hosted, Linux, ARM64] + strategy: + fail-fast: true + matrix: + arch: [arm32v7, arm64v8] + steps: + - uses: docker://hasufell/arm64v8-ubuntu-haskell:focal + name: Cleanup + with: + args: "find . -mindepth 1 -maxdepth 1 -exec rm -rf -- {} +" + + - name: Checkout code + uses: actions/checkout@v3 + + - if: matrix.arch == 'arm32v7' + uses: docker://hasufell/arm32v7-ubuntu-haskell:focal + name: Run build (arm32v7 linux) + with: + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + + - if: matrix.arch == 'arm64v8' + uses: docker://hasufell/arm64v8-ubuntu-haskell:focal + name: Run build (arm64v8 linux) + with: + args: sh -c "cabal update && ghcup install ghc --isolate=/usr --force 9.2.2 && cabal test -w ghc-9.2.2 && cabal bench -w ghc-9.2.2" + + darwin_arm: + runs-on: ${{ matrix.os }} + env: + MACOSX_DEPLOYMENT_TARGET: 10.13 + strategy: + fail-fast: false + matrix: + include: + - os: [self-hosted, macOS, ARM64] + ghc: 8.10.7 + - os: [self-hosted, macOS, ARM64] + ghc: 9.2.6 + - os: [self-hosted, macOS, ARM64] + ghc: 9.4.4 + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run build + run: | + bash .github/scripts/brew.sh git coreutils llvm@11 autoconf automake + export PATH="$HOME/.brew/bin:$HOME/.brew/sbin:$HOME/.brew/opt/llvm@11/bin:$PATH" + export CC="$HOME/.brew/opt/llvm@11/bin/clang" + export CXX="$HOME/.brew/opt/llvm@11/bin/clang++" + export LD=ld + export AR="$HOME/.brew/opt/llvm@11/bin/llvm-ar" + export RANLIB="$HOME/.brew/opt/llvm@11/bin/llvm-ranlib" + . .github/scripts/env.sh + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 BOOTSTRAP_HASKELL_GHC_VERSION=${{ matrix.ghc }} BOOTSTRAP_HASKELL_ADJUST_BASHRC=yes sh + cabal test + cabal bench + env: + HOMEBREW_CHANGE_ARCH_TO_ARM: 1 +======= + cabal update + cabal test + cabal haddock + cabal check + cabal sdist + shell: bash +>>>>>>> github/pr/178 diff --git a/LICENSE b/filepath-internals/LICENSE similarity index 100% rename from LICENSE rename to filepath-internals/LICENSE diff --git a/filepath-internals/README.md b/filepath-internals/README.md new file mode 100644 index 00000000..5968b273 --- /dev/null +++ b/filepath-internals/README.md @@ -0,0 +1,3 @@ +# filepath-internals + +FilePath internals. Use at your own risk. diff --git a/Setup.hs b/filepath-internals/Setup.hs similarity index 100% rename from Setup.hs rename to filepath-internals/Setup.hs diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/filepath-internals/System/OsPath/Data/ByteString/Short/Internal.hs similarity index 100% rename from System/OsPath/Data/ByteString/Short/Internal.hs rename to filepath-internals/System/OsPath/Data/ByteString/Short/Internal.hs diff --git a/filepath-internals/System/OsPath/Data/ByteString/Short/Word16/Internal.hs b/filepath-internals/System/OsPath/Data/ByteString/Short/Word16/Internal.hs new file mode 100644 index 00000000..d6a34c1d --- /dev/null +++ b/filepath-internals/System/OsPath/Data/ByteString/Short/Word16/Internal.hs @@ -0,0 +1,782 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} + +{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} + +-- | +-- Module : System.OsPath.Data.ByteString.Short.Word16 +-- Copyright : © 2022 Julian Ospald +-- License : MIT +-- +-- Maintainer : Julian Ospald +-- Stability : experimental +-- Portability : portable +-- +-- ShortByteStrings encoded as UTF16-LE, suitable for windows FFI calls. +-- +-- Word16s are *always* in BE encoding (both input and output), so e.g. 'pack' +-- takes a list of BE encoded @[Word16]@ and produces a UTF16-LE encoded ShortByteString. +-- +-- Likewise, 'unpack' takes a UTF16-LE encoded ShortByteString and produces a list of BE encoded @[Word16]@. +-- +-- Indices and lengths are always in respect to Word16, not Word8. +-- +-- All functions will error out if the input string is not a valid UTF16 stream (uneven number of bytes). +-- So use this module with caution. +module System.OsPath.Data.ByteString.Short.Word16.Internal +where +import Data.ByteString.Short.Internal (ShortByteString, null, empty) +import System.OsPath.Data.ByteString.Short.Internal +import Data.Bits + ( shiftR + ) +import Data.Word +import Prelude hiding + ( Foldable(..) + , all + , any + , reverse + , break + , concat + , drop + , dropWhile + , filter + , head + , init + , last + , map + , replicate + , span + , splitAt + , tail + , take + , takeWhile + ) +import qualified Data.Foldable as Foldable +import GHC.ST ( ST ) +import GHC.Stack ( HasCallStack ) +import GHC.Exts ( inline ) + +import qualified Data.ByteString.Short.Internal as BS +import qualified Data.List as List + + +-- ----------------------------------------------------------------------------- +-- Introducing and eliminating 'ShortByteString's + +-- | /O(1)/ Convert a 'Word16' into a 'ShortByteString' +singleton :: Word16 -> ShortByteString +singleton = \w -> create 2 (\mba -> writeWord16Array mba 0 w) + + +-- | /O(n)/. Convert a list into a 'ShortByteString' +pack :: [Word16] -> ShortByteString +pack = packWord16 + + +-- | /O(n)/. Convert a 'ShortByteString' into a list. +unpack :: ShortByteString -> [Word16] +unpack = unpackWord16 . assertEven + + +-- --------------------------------------------------------------------- +-- Basic interface + +-- | This is like 'length', but the number of 'Word16', not 'Word8'. +numWord16 :: ShortByteString -> Int +numWord16 = (`shiftR` 1) . BS.length . assertEven + +infixr 5 `cons` --same as list (:) +infixl 5 `snoc` + +-- | /O(n)/ Append a Word16 to the end of a 'ShortByteString' +-- +-- Note: copies the entire byte array +snoc :: ShortByteString -> Word16 -> ShortByteString +snoc = \(assertEven -> sbs) c -> let l = BS.length sbs + nl = l + 2 + in create nl $ \mba -> do + copyByteArray (asBA sbs) 0 mba 0 l + writeWord16Array mba l c + +-- | /O(n)/ 'cons' is analogous to (:) for lists. +-- +-- Note: copies the entire byte array +cons :: Word16 -> ShortByteString -> ShortByteString +cons c = \(assertEven -> sbs) -> let l = BS.length sbs + nl = l + 2 + in create nl $ \mba -> do + writeWord16Array mba 0 c + copyByteArray (asBA sbs) 0 mba 2 l + +-- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +last :: HasCallStack => ShortByteString -> Word16 +last = \(assertEven -> sbs) -> case null sbs of + True -> errorEmptySBS "last" + False -> indexWord16Array (asBA sbs) (BS.length sbs - 2) + +-- | /O(n)/ Extract the elements after the head of a ShortByteString, which must at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +-- +-- Note: copies the entire byte array +tail :: HasCallStack => ShortByteString -> ShortByteString +tail = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if + | l <= 0 -> errorEmptySBS "tail" + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl + +-- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing +-- if it is empty. +uncons :: ShortByteString -> Maybe (Word16, ShortByteString) +uncons = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if | l <= 0 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl + in Just (h, t) + +-- | /O(n)/ Extract first two elements and the rest of a ByteString, +-- returning Nothing if it is shorter than two elements. +uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) +uncons2 = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 4 + in if | l <= 2 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + h' = indexWord16Array (asBA sbs) 2 + t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl + in Just (h, h', t) + +-- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. +-- An exception will be thrown in the case of an empty ShortByteString. +head :: HasCallStack => ShortByteString -> Word16 +head = \(assertEven -> sbs) -> case null sbs of + True -> errorEmptySBS "last" + False -> indexWord16Array (asBA sbs) 0 + +-- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one. +-- An exception will be thrown in the case of an empty ShortByteString. +-- +-- Note: copies the entire byte array +init :: HasCallStack => ShortByteString -> ShortByteString +init = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if + | l <= 0 -> errorEmptySBS "tail" + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + +-- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing +-- if it is empty. +unsnoc :: ShortByteString -> Maybe (ShortByteString, Word16) +unsnoc = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 2 + in if | l <= 0 -> Nothing + | otherwise -> let l' = indexWord16Array (asBA sbs) (l - 2) + i = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + in Just (i, l') + + +-- --------------------------------------------------------------------- +-- Transformations + +-- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each +-- element of @xs@. +map :: (Word16 -> Word16) -> ShortByteString -> ShortByteString +map f = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + in create l (\mba -> go ba mba 0 l) + where + go :: BA -> MBA s -> Int -> Int -> ST s () + go !ba !mba !i !l + | i >= l = return () + | otherwise = do + let w = indexWord16Array ba i + writeWord16Array mba i (f w) + go ba mba (i+2) l + +-- TODO: implement more efficiently +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. +reverse :: ShortByteString -> ShortByteString +reverse = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + in create l (\mba -> go ba mba 0 l) + where + go :: BA -> MBA s -> Int -> Int -> ST s () + go !ba !mba !i !l + | i >= l = return () + | otherwise = do + let w = indexWord16Array ba i + writeWord16Array mba (l - 2 - i) w + go ba mba (i+2) l + + +-- --------------------------------------------------------------------- +-- Special folds + +-- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines +-- if all elements of the 'ShortByteString' satisfy the predicate. +all :: (Word16 -> Bool) -> ShortByteString -> Bool +all k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = True + | otherwise = k (w n) && go (n + 2) + in go 0 + + +-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if +-- any element of the 'ByteString' satisfies the predicate. +any :: (Word16 -> Bool) -> ShortByteString -> Bool +any k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = False + | otherwise = k (w n) || go (n + 2) + in go 0 + + +-- --------------------------------------------------------------------- +-- Unfolds and replicates + + +-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ +-- the value of every element. The following holds: +-- +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c +replicate :: Int -> Word16 -> ShortByteString +replicate w c + | w <= 0 = empty + -- can't use setByteArray here, because we write UTF-16LE + | otherwise = create (w * 2) (`go` 0) + where + go mba ix + | ix < 0 || ix >= w * 2 = pure () + | otherwise = writeWord16Array mba ix c >> go mba (ix + 2) + +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a +-- ShortByteString from a seed value. The function takes the element and +-- returns 'Nothing' if it is done producing the ShortByteString or returns +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, +-- and @b@ is the seed value for further production. +-- +-- This function is not efficient/safe. It will build a list of @[Word16]@ +-- and run the generator until it returns `Nothing`, otherwise recurse infinitely, +-- then finally create a 'ShortByteString'. +-- +-- Examples: +-- +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 +-- > == pack [0, 1, 2, 3, 4, 5] +-- +unfoldr :: (a -> Maybe (Word16, a)) -> a -> ShortByteString +unfoldr f x0 = packWord16Rev $ go x0 mempty + where + go x words' = case f x of + Nothing -> words' + Just (w, x') -> go x' (w:words') + +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed +-- value. However, the length of the result is limited by the first +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' +-- when the maximum length of the result is known. +-- +-- The following equation relates 'unfoldrN' and 'unfoldr': +-- +-- > fst (unfoldrN n f s) == take n (unfoldr f s) +-- +unfoldrN :: forall a. + Int -- ^ number of 'Word16' + -> (a -> Maybe (Word16, a)) + -> a + -> (ShortByteString, Maybe a) +unfoldrN i f = \x0 -> + if | i < 0 -> (empty, Just x0) + | otherwise -> createAndTrim (i * 2) $ \mba -> go mba x0 0 + + where + go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a) + go !mba !x !n = go' x n + where + go' :: a -> Int -> ST s (Int, Maybe a) + go' !x' !n' + | n' == i * 2 = return (n', Just x') + | otherwise = case f x' of + Nothing -> return (n', Nothing) + Just (w, x'') -> do + writeWord16Array mba n' w + go' x'' (n'+2) + + +-- -------------------------------------------------------------------- +-- Predicates + + + +-- --------------------------------------------------------------------- +-- Substrings + +-- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. +-- +-- Note: copies the entire byte array +take :: Int -- ^ number of Word16 + -> ShortByteString + -> ShortByteString +take = \n (assertEven -> sbs) -> + let sl = numWord16 sbs + len8 = n * 2 + in if | n >= sl -> sbs + | n <= 0 -> empty + | otherwise -> + create len8 $ \mba -> copyByteArray (asBA sbs) 0 mba 0 len8 + + +-- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@. +-- Takes @n@ elements from end of bytestring. +-- +-- >>> takeEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "e\NULf\NULg\NUL" +-- >>> takeEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "" +-- >>> takeEnd 4 "a\NULb\NULc\NUL" +-- "a\NULb\NULc\NUL" +takeEnd :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +takeEnd n = \(assertEven -> sbs) -> + let sl = BS.length sbs + n2 = n * 2 + in if | n2 >= sl -> sbs + | n2 <= 0 -> empty + | otherwise -> create n2 $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n2)) mba 0 n2 + +-- | Similar to 'P.takeWhile', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate. +takeWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +takeWhile f ps = take (findIndexOrLength (not . f) ps) ps + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate. +-- +-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. +takeWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +takeWhileEnd f ps = drop (findFromEndUntil (not . f) ps) ps + + +-- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@. +-- +-- Note: copies the entire byte array +drop :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +drop = \n' (assertEven -> sbs) -> + let len = BS.length sbs + n = n' * 2 + in if | n <= 0 -> sbs + | n >= len -> empty + | otherwise -> + let newLen = len - n + in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen + +-- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@. +-- Drops @n@ elements from end of bytestring. +-- +-- >>> dropEnd 3 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "a\NULb\NULc\NULd\NUL" +-- >>> dropEnd 0 "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- "a\NULb\NULc\NULd\NULe\NULf\NULg\NUL" +-- >>> dropEnd 4 "a\NULb\NULc\NUL" +-- "" +dropEnd :: Int -- ^ number of 'Word16' + -> ShortByteString + -> ShortByteString +dropEnd n' = \(assertEven -> sbs) -> + let sl = BS.length sbs + nl = sl - n + n = n' * 2 + in if | n >= sl -> empty + | n <= 0 -> sbs + | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl + +-- | Similar to 'P.dropWhile', +-- drops the longest (possibly empty) prefix of elements +-- satisfying the predicate and returns the remainder. +-- +-- Note: copies the entire byte array +dropWhile :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +dropWhile f = \(assertEven -> ps) -> drop (findIndexOrLength (not . f) ps) ps + +-- | Similar to 'P.dropWhileEnd', +-- drops the longest (possibly empty) suffix of elements +-- satisfying the predicate and returns the remainder. +-- +-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. +-- +-- @since 0.10.12.0 +dropWhileEnd :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +dropWhileEnd f = \(assertEven -> ps) -> take (findFromEndUntil (not . f) ps) ps + +-- | Returns the longest (possibly empty) suffix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@. +breakEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +breakEnd p = \(assertEven -> sbs) -> splitAt (findFromEndUntil p sbs) sbs + +-- | Similar to 'P.break', +-- returns the longest (possibly empty) prefix of elements which __do not__ +-- satisfy the predicate and the remainder of the string. +-- +-- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@. +break :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +break = \p (assertEven -> ps) -> case findIndexOrLength p ps of n -> splitAt n ps + +-- | Similar to 'P.span', +-- returns the longest (possibly empty) prefix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@. +-- +span :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +{- HLINT ignore "Use span" -} +span p = break (not . p) . assertEven + +-- | Returns the longest (possibly empty) suffix of elements +-- satisfying the predicate and the remainder of the string. +-- +-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@. +-- +-- We have +-- +-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z") +-- +-- and +-- +-- > spanEnd (not . isSpace) ps +-- > == +-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x) +-- +spanEnd :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +spanEnd p = \(assertEven -> ps) -> splitAt (findFromEndUntil (not.p) ps) ps + +-- | /O(n)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. +-- +-- Note: copies the substrings +splitAt :: Int -- ^ number of Word16 + -> ShortByteString + -> (ShortByteString, ShortByteString) +splitAt n' = \(assertEven -> sbs) -> if + | n <= 0 -> (empty, sbs) + | otherwise -> + let slen = BS.length sbs + in if | n >= BS.length sbs -> (sbs, empty) + | otherwise -> + let llen = min slen (max 0 n) + rlen = max 0 (slen - max 0 n) + lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen + rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen + in (lsbs, rsbs) + where + n = n' * 2 + +-- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte +-- argument, consuming the delimiter. I.e. +-- +-- > split 10 "a\nb\nd\ne" == ["a","b","d","e"] -- fromEnum '\n' == 10 +-- > split 97 "aXaXaXa" == ["","X","X","X",""] -- fromEnum 'a' == 97 +-- > split 120 "x" == ["",""] -- fromEnum 'x' == 120 +-- > split undefined "" == [] -- and not [""] +-- +-- and +-- +-- > intercalate [c] . split c == id +-- > split == splitWith . (==) +-- +-- Note: copies the substrings +split :: Word16 -> ShortByteString -> [ShortByteString] +split w = splitWith (== w) . assertEven + + +-- | /O(n)/ Splits a 'ShortByteString' into components delimited by +-- separators, where the predicate returns True for a separator element. +-- The resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97 +-- > splitWith undefined "" == [] -- and not [""] +-- +splitWith :: (Word16 -> Bool) -> ShortByteString -> [ShortByteString] +splitWith p = \(assertEven -> sbs) -> if + | BS.null sbs -> [] + | otherwise -> go sbs + where + go sbs' + | BS.null sbs' = [mempty] + | otherwise = + case break p sbs' of + (a, b) + | BS.null b -> [a] + | otherwise -> a : go (tail b) + + +-- | Check whether one string is a substring of another. +isInfixOf :: ShortByteString -> ShortByteString -> Bool +isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) + + +-- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 +breakSubstring :: ShortByteString -- ^ String to search for + -> ShortByteString -- ^ String to search in + -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring +breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 + where + lpat = BS.length bPat + linp = BS.length bInp + go ix + | let ix' = ix * 2 + , linp >= ix' + lpat = + if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp + | otherwise -> go (ix + 1) + | otherwise + = (bInp, mempty) + + +-- --------------------------------------------------------------------- +-- Reducing 'ByteString's + +-- | 'foldl', applied to a binary operator, a starting value (typically +-- the left-identity of the operator), and a ShortByteString, reduces the +-- ShortByteString using the binary operator, from left to right. +-- +foldl :: (a -> Word16 -> a) -> a -> ShortByteString -> a +foldl f v = List.foldl f v . unpack . assertEven + +-- | 'foldl'' is like 'foldl', but strict in the accumulator. +-- +foldl' :: (a -> Word16 -> a) -> a -> ShortByteString -> a +foldl' f v = List.foldl' f v . unpack . assertEven + +-- | 'foldr', applied to a binary operator, a starting value +-- (typically the right-identity of the operator), and a ShortByteString, +-- reduces the ShortByteString using the binary operator, from right to left. +foldr :: (Word16 -> a -> a) -> a -> ShortByteString -> a +foldr f v = List.foldr f v . unpack . assertEven + +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +foldr' :: (Word16 -> a -> a) -> a -> ShortByteString -> a +foldr' k v = Foldable.foldr' k v . unpack . assertEven + +-- | 'foldl1' is a variant of 'foldl' that has no starting value +-- argument, and thus must be applied to non-empty 'ShortByteString's. +-- An exception will be thrown in the case of an empty ShortByteString. +foldl1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldl1 k = List.foldl1 k . unpack . assertEven + +-- | 'foldl1'' is like 'foldl1', but strict in the accumulator. +-- An exception will be thrown in the case of an empty ShortByteString. +foldl1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldl1' k = List.foldl1' k . unpack . assertEven + +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, +-- and thus must be applied to non-empty 'ShortByteString's +-- An exception will be thrown in the case of an empty ShortByteString. +foldr1 :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldr1 k = List.foldr1 k . unpack . assertEven + +-- | 'foldr1'' is a variant of 'foldr1', but is strict in the +-- accumulator. +foldr1' :: HasCallStack => (Word16 -> Word16 -> Word16) -> ShortByteString -> Word16 +foldr1' k = \(assertEven -> sbs) -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs) + + +-- -------------------------------------------------------------------- +-- Searching ShortByteString + +-- | /O(1)/ 'ShortByteString' index (subscript) operator, starting from 0. +index :: HasCallStack + => ShortByteString + -> Int -- ^ number of 'Word16' + -> Word16 +index = \(assertEven -> sbs) i -> if + | i >= 0 && i < numWord16 sbs -> unsafeIndex sbs i + | otherwise -> indexError sbs i + +-- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 0.11.0.0 +indexMaybe :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Maybe Word16 +indexMaybe = \(assertEven -> sbs) i -> if + | i >= 0 && i < numWord16 sbs -> Just $! unsafeIndex sbs i + | otherwise -> Nothing +{-# INLINE indexMaybe #-} + +unsafeIndex :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Word16 +unsafeIndex sbs i = indexWord16Array (asBA sbs) (i * 2) + +indexError :: HasCallStack => ShortByteString -> Int -> a +indexError sbs i = + moduleError "index" $ "error in array index: " ++ show i + ++ " not in range [0.." ++ show (numWord16 sbs) ++ "]" + +-- | /O(1)/ 'ShortByteString' index, starting from 0, that returns 'Just' if: +-- +-- > 0 <= n < length bs +-- +-- @since 0.11.0.0 +(!?) :: ShortByteString + -> Int -- ^ number of 'Word16' + -> Maybe Word16 +(!?) = indexMaybe +{-# INLINE (!?) #-} + +-- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate. +elem :: Word16 -> ShortByteString -> Bool +elem c = \(assertEven -> sbs) -> case elemIndex c sbs of Nothing -> False ; _ -> True + +-- | /O(n)/ 'filter', applied to a predicate and a ByteString, +-- returns a ByteString containing those characters that satisfy the +-- predicate. +filter :: (Word16 -> Bool) -> ShortByteString -> ShortByteString +filter k = \(assertEven -> sbs) -> + let l = BS.length sbs + in if | l <= 0 -> sbs + | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l + where + go :: forall s. MBA s -- mutable output bytestring + -> BA -- input bytestring + -> Int -- length of input bytestring + -> ST s Int + go !mba ba !l = go' 0 0 + where + go' :: Int -- bytes read + -> Int -- bytes written + -> ST s Int + go' !br !bw + | br >= l = return bw + | otherwise = do + let w = indexWord16Array ba br + if k w + then do + writeWord16Array mba bw w + go' (br+2) (bw+2) + else + go' (br+2) bw + +-- | /O(n)/ The 'find' function takes a predicate and a ByteString, +-- and returns the first element in matching the predicate, or 'Nothing' +-- if there is no such element. +-- +-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing +-- +find :: (Word16 -> Bool) -> ShortByteString -> Maybe Word16 +find f = \(assertEven -> sbs) -> case findIndex f sbs of + Just n -> Just (sbs `index` n) + _ -> Nothing + +-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns +-- the pair of ByteStrings with elements which do and do not satisfy the +-- predicate, respectively; i.e., +-- +-- > partition p bs == (filter p xs, filter (not . p) xs) +-- +partition :: (Word16 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString) +partition k = \(assertEven -> sbs) -> + let l = BS.length sbs + in if | l <= 0 -> (sbs, sbs) + | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l + where + go :: forall s. + MBA s -- mutable output bytestring1 + -> MBA s -- mutable output bytestring2 + -> BA -- input bytestring + -> Int -- length of input bytestring + -> ST s (Int, Int) -- (length mba1, length mba2) + go !mba1 !mba2 ba !l = go' 0 0 + where + go' :: Int -- bytes read + -> Int -- bytes written to bytestring 1 + -> ST s (Int, Int) -- (length mba1, length mba2) + go' !br !bw1 + | br >= l = return (bw1, br - bw1) + | otherwise = do + let w = indexWord16Array ba br + if k w + then do + writeWord16Array mba1 bw1 w + go' (br+2) (bw1+2) + else do + writeWord16Array mba2 (br - bw1) w + go' (br+2) bw1 + +-- -------------------------------------------------------------------- +-- Indexing ShortByteString + +-- | /O(n)/ The 'elemIndex' function returns the index of the first +-- element in the given 'ShortByteString' which is equal to the query +-- element, or 'Nothing' if there is no such element. +elemIndex :: Word16 + -> ShortByteString + -> Maybe Int -- ^ number of 'Word16' +{- HLINT ignore "Use elemIndex" -} +elemIndex k = findIndex (==k) . assertEven + +-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning +-- the indices of all elements equal to the query element, in ascending order. +elemIndices :: Word16 -> ShortByteString -> [Int] +{- HLINT ignore "Use elemIndices" -} +elemIndices k = findIndices (==k) . assertEven + +-- | count returns the number of times its argument appears in the ShortByteString +count :: Word16 -> ShortByteString -> Int +count w = List.length . elemIndices w . assertEven + +-- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and +-- returns the index of the first element in the ByteString +-- satisfying the predicate. +findIndex :: (Word16 -> Bool) -> ShortByteString -> Maybe Int +findIndex k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = Nothing + | k (w n) = Just (n `shiftR` 1) + | otherwise = go (n + 2) + in go 0 + +-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the +-- indices of all elements satisfying the predicate, in ascending order. +findIndices :: (Word16 -> Bool) -> ShortByteString -> [Int] +findIndices k = \(assertEven -> sbs) -> + let l = BS.length sbs + ba = asBA sbs + w = indexWord16Array ba + go !n | n >= l = [] + | k (w n) = (n `shiftR` 1) : go (n + 2) + | otherwise = go (n + 2) + in go 0 + + diff --git a/System/OsPath/Encoding/Internal.hs b/filepath-internals/System/OsPath/Encoding/Internal.hs similarity index 96% rename from System/OsPath/Encoding/Internal.hs rename to filepath-internals/System/OsPath/Encoding/Internal.hs index 1ae1c85a..abbb0c4f 100644 --- a/System/OsPath/Encoding/Internal.hs +++ b/filepath-internals/System/OsPath/Encoding/Internal.hs @@ -8,8 +8,8 @@ module System.OsPath.Encoding.Internal where -import qualified System.OsPath.Data.ByteString.Short as BS8 -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 +import qualified Data.ByteString.Short.Internal as BS8 +import qualified System.OsPath.Data.ByteString.Short.Internal as BS8 import GHC.Base import GHC.Real @@ -305,14 +305,14 @@ encodeWithBasePosix str = withFilePathPosix str $ \cstr -> BS8.packCStringLen cs -- | This mimics the filepath decoder base uses on windows, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). -decodeWithBaseWindows :: BS16.ShortByteString -> IO String -decodeWithBaseWindows ba = BS16.useAsCWStringLen ba $ \fp -> peekFilePathWin fp +decodeWithBaseWindows :: BS8.ShortByteString -> IO String +decodeWithBaseWindows ba = BS8.useAsCWStringLen ba $ \fp -> peekFilePathWin fp -- | This mimics the filepath dencoder base uses on windows, -- with the small distinction that we're not truncating at NUL bytes (because we're not at -- the outer FFI layer). -encodeWithBaseWindows :: String -> IO BS16.ShortByteString -encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS16.packCWStringLen (cstr, l) +encodeWithBaseWindows :: String -> IO BS8.ShortByteString +encodeWithBaseWindows str = withFilePathWin str $ \l cstr -> BS8.packCWStringLen (cstr, l) -- ----------------------------------------------------------------------------- diff --git a/filepath-internals/System/OsPath/Types/Internal.hs b/filepath-internals/System/OsPath/Types/Internal.hs new file mode 100644 index 00000000..4cdafec8 --- /dev/null +++ b/filepath-internals/System/OsPath/Types/Internal.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE CPP #-} + +module System.OsPath.Types.Internal + ( module System.OsString.Types.Internal + , module System.OsPath.Types.Internal + ) +where + +import System.OsString.Types.Internal + + +-- | Filepaths are @wchar_t*@ data on windows as passed to syscalls. +type WindowsPath = WindowsString + +-- | Filepaths are @char[]@ data on unix as passed to syscalls. +type PosixPath = PosixString + +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +-- | Ifdef around current platform (either 'WindowsPath' or 'PosixPath'). +type PlatformPath = WindowsPath +#else +-- | Ifdef around current platform (either 'WindowsPath' or 'PosixPath'). +type PlatformPath = PosixPath +#endif + + +-- | Type representing filenames\/pathnames. +-- +-- This type doesn't add any guarantees over 'OsString'. +type OsPath = OsString diff --git a/System/OsString/Internal/Types.hs b/filepath-internals/System/OsString/Types/Internal.hs similarity index 97% rename from System/OsString/Internal/Types.hs rename to filepath-internals/System/OsString/Types/Internal.hs index 33f960ff..33505e48 100644 --- a/System/OsString/Internal/Types.hs +++ b/filepath-internals/System/OsString/Types/Internal.hs @@ -8,7 +8,7 @@ {-# LANGUAGE TypeApplications #-} {-# LANGUAGE PatternSynonyms #-} -module System.OsString.Internal.Types +module System.OsString.Types.Internal ( WindowsString(..) , pattern WS @@ -41,8 +41,8 @@ import Data.Semigroup import GHC.Generics (Generic) import System.OsPath.Encoding.Internal -import qualified System.OsPath.Data.ByteString.Short as BS -import qualified System.OsPath.Data.ByteString.Short.Word16 as BS16 +import qualified Data.ByteString.Short.Internal as BS (unpack, pack, ShortByteString, empty) +import qualified System.OsPath.Data.ByteString.Short.Word16.Internal as BS16 (unpack) #if MIN_VERSION_template_haskell(2,16,0) import qualified Language.Haskell.TH.Syntax as TH #endif diff --git a/filepath-internals/changelog.md b/filepath-internals/changelog.md new file mode 100644 index 00000000..5c8ef68f --- /dev/null +++ b/filepath-internals/changelog.md @@ -0,0 +1,8 @@ +# Changelog for [`filepath-internals` package](http://hackage.haskell.org/package/filepath-internals) + +_Note: below all `FilePath` values are unquoted, so `\\` really means two backslashes._ + +## 1.4.100.5 *Aug 2023* + +* Initial split from `filepath` package + diff --git a/filepath-internals/filepath-internals.cabal b/filepath-internals/filepath-internals.cabal new file mode 100644 index 00000000..dc36a360 --- /dev/null +++ b/filepath-internals/filepath-internals.cabal @@ -0,0 +1,72 @@ +cabal-version: 2.2 +name: filepath-internals +version: 1.4.100.5 + +-- NOTE: Don't forget to update ./changelog.md +license: BSD-3-Clause +license-file: LICENSE +author: Neil Mitchell +maintainer: Julian Ospald +copyright: Neil Mitchell 2005-2020, Julain Ospald 2021-2022 +bug-reports: https://github.com/haskell/filepath/issues +homepage: + https://github.com/haskell/filepath/blob/master/README.md + +category: System +build-type: Simple +synopsis: Library for manipulating FilePaths in a cross platform way (internals). +tested-with: + GHC ==8.0.2 + || ==8.2.2 + || ==8.4.4 + || ==8.6.5 + || ==8.8.4 + || ==8.10.7 + || ==9.0.2 + || ==9.2.3 + +description: + FilePath internals. This follows PVP, but makes no attempts at providing a stable interface. + +extra-doc-files: + changelog.md + README.md + +flag cpphs + description: Use cpphs (fixes haddock source links) + default: False + manual: True + +source-repository head + type: git + location: https://github.com/haskell/filepath + +library + exposed-modules: + System.OsPath.Data.ByteString.Short.Internal + System.OsPath.Data.ByteString.Short.Word16.Internal + System.OsPath.Encoding.Internal + System.OsPath.Types.Internal + System.OsString.Types.Internal + + other-extensions: + CPP + PatternGuards + + if impl(ghc >=7.2) + other-extensions: Safe + + default-language: Haskell2010 + build-depends: + , base >=4.9 && <4.20 + , bytestring >=0.11.3.0 + , deepseq + , exceptions + , template-haskell + + ghc-options: -Wall + + if flag(cpphs) + ghc-options: -pgmPcpphs -optP--cpp + build-tool-depends: cpphs:cpphs -any + diff --git a/Generate.hs b/filepath/Generate.hs similarity index 100% rename from Generate.hs rename to filepath/Generate.hs diff --git a/HACKING.md b/filepath/HACKING.md similarity index 100% rename from HACKING.md rename to filepath/HACKING.md diff --git a/filepath/LICENSE b/filepath/LICENSE new file mode 100644 index 00000000..5fc319a5 --- /dev/null +++ b/filepath/LICENSE @@ -0,0 +1,30 @@ +Copyright Neil Mitchell 2005-2020. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Neil Mitchell nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/filepath/Makefile similarity index 100% rename from Makefile rename to filepath/Makefile diff --git a/filepath/README.md b/filepath/README.md new file mode 100644 index 00000000..5ec5e51f --- /dev/null +++ b/filepath/README.md @@ -0,0 +1,47 @@ +# FilePath [![Hackage version](https://img.shields.io/hackage/v/filepath.svg?label=Hackage)](https://hackage.haskell.org/package/filepath) + +The `filepath` package provides functionality for manipulating `FilePath` values, and is shipped with [GHC](https://www.haskell.org/ghc/). +It provides two variants for filepaths: + +1. legacy filepaths: `type FilePath = String` +2. operating system abstracted filepaths (`OsPath`): internally unpinned `ShortByteString` (platform-dependent encoding) + +It is recommended to use `OsPath` when possible, because it is more correct. + +For each variant there are three main modules: + +* `System.FilePath.Posix` / `System.OsPath.Posix` manipulates POSIX\/Linux style `FilePath` values (with `/` as the path separator). +* `System.FilePath.Windows` / `System.OsPath.Windows` manipulates Windows style `FilePath` values (with either `\` or `/` as the path separator, and deals with drives). +* `System.FilePath` / `System.OsPath` for dealing with current platform-specific filepaths + +All three modules provide the same API, and the same documentation (calling out differences in the different variants). + +`System.OsString` is like `System.OsPath`, but more general purpose. Refer to the documentation of +those modules for more information. + +### What is a `FilePath`? + +In Haskell, the legacy definition (used in `base` and Prelude) is `type FilePath = String`, +where a Haskell `String` is a list of Unicode code points. + +The new definition is (simplified) `newtype OsPath = AFP ShortByteString`, where +`ShortByteString` is an unpinned byte array and follows syscall conventions, preserving the encoding. + +On unix, filenames don't have a predefined encoding as per the +[POSIX specification](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170) +and are passed as `char[]` to syscalls. + +On windows (at least the API used by `Win32`) filepaths are UTF-16LE strings. + +You are encouraged to use `OsPath` whenever possible, because it is more correct. + +Also note that this is a low-level library and it makes no attempt at providing a more +type safe variant for filepaths (e.g. by distinguishing between absolute and relative +paths) and ensures no invariants (such as filepath validity). + +For such libraries, check out the following: + +* [hpath](https://hackage.haskell.org/package/hpath) +* [path](https://hackage.haskell.org/package/path) +* [paths](https://hackage.haskell.org/package/paths) +* [strong-path](https://hackage.haskell.org/package/strong-path) diff --git a/filepath/Setup.hs b/filepath/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/filepath/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/System/FilePath.hs b/filepath/System/FilePath.hs similarity index 100% rename from System/FilePath.hs rename to filepath/System/FilePath.hs diff --git a/System/FilePath/Internal.hs b/filepath/System/FilePath/Internal.hs similarity index 100% rename from System/FilePath/Internal.hs rename to filepath/System/FilePath/Internal.hs diff --git a/System/FilePath/Posix.hs b/filepath/System/FilePath/Posix.hs similarity index 100% rename from System/FilePath/Posix.hs rename to filepath/System/FilePath/Posix.hs diff --git a/System/FilePath/Windows.hs b/filepath/System/FilePath/Windows.hs similarity index 100% rename from System/FilePath/Windows.hs rename to filepath/System/FilePath/Windows.hs diff --git a/System/OsPath.hs b/filepath/System/OsPath.hs similarity index 100% rename from System/OsPath.hs rename to filepath/System/OsPath.hs diff --git a/System/OsPath.hs-boot b/filepath/System/OsPath.hs-boot similarity index 100% rename from System/OsPath.hs-boot rename to filepath/System/OsPath.hs-boot diff --git a/System/OsPath/Common.hs b/filepath/System/OsPath/Common.hs similarity index 99% rename from System/OsPath/Common.hs rename to filepath/System/OsPath/Common.hs index 0af0ed60..4e9d0379 100644 --- a/System/OsPath/Common.hs +++ b/filepath/System/OsPath/Common.hs @@ -44,6 +44,7 @@ module System.OsPath , PS.encodeUtf , PS.encodeWith , PS.encodeFS + , unsafeFromBytes #if defined(WINDOWS) || defined(POSIX) , pstr #else @@ -111,6 +112,7 @@ where import System.OsPath.Types import System.OsString.Windows as PS ( unsafeFromChar + , unsafeFromBytes , toChar , decodeUtf , decodeWith @@ -143,6 +145,7 @@ import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import System.OsPath.Types import System.OsString.Posix as PS ( unsafeFromChar + , unsafeFromBytes , toChar , decodeUtf , decodeWith @@ -168,6 +171,7 @@ import System.OsPath.Internal as PS , encodeWith , encodeFS , unpack + , unsafeFromBytes ) import System.OsPath.Types ( OsPath ) @@ -182,7 +186,7 @@ import qualified System.OsPath.Posix as C import Data.Bifunctor ( bimap ) #endif -import System.OsString.Internal.Types +import System.OsString.Types.Internal ------------------------ diff --git a/System/OsPath/Data/ByteString/Short.hs b/filepath/System/OsPath/Data/ByteString/Short.hs similarity index 100% rename from System/OsPath/Data/ByteString/Short.hs rename to filepath/System/OsPath/Data/ByteString/Short.hs diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/filepath/System/OsPath/Data/ByteString/Short/Word16.hs similarity index 100% rename from System/OsPath/Data/ByteString/Short/Word16.hs rename to filepath/System/OsPath/Data/ByteString/Short/Word16.hs diff --git a/System/OsPath/Encoding.hs b/filepath/System/OsPath/Encoding.hs similarity index 100% rename from System/OsPath/Encoding.hs rename to filepath/System/OsPath/Encoding.hs diff --git a/System/OsPath/Internal.hs b/filepath/System/OsPath/Internal.hs similarity index 89% rename from System/OsPath/Internal.hs rename to filepath/System/OsPath/Internal.hs index 3bdf5318..9e64d9e0 100644 --- a/System/OsPath/Internal.hs +++ b/filepath/System/OsPath/Internal.hs @@ -19,7 +19,7 @@ import Language.Haskell.TH.Syntax ( Lift (..), lift ) import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) -import System.OsString.Internal.Types +import System.OsString.Types.Internal import System.OsPath.Encoding import Control.Monad (when) import System.IO @@ -99,13 +99,19 @@ decodeFS = OS.decodeFS -- | Constructs an @OsPath@ from a ByteString. -- --- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. +-- On windows, this expects valid UCS-2LE, on unix it is passed unchanged/unchecked. -- -- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). -fromBytes :: MonadThrow m - => ByteString - -> m OsPath -fromBytes = OS.fromBytes +-- +-- Note: This is rarely what you want, because you have to ensure the input will +-- work on both platforms (unix and windows) equally (Word16 vs Word8 array). +-- This is unlikely to be the case, unless you get the raw bytes via FFI +-- from 'unix' and 'Win32' package or manually construct correct platform +-- specific ByteStrings. +unsafeFromBytes :: MonadThrow m + => ByteString + -> m OsPath +unsafeFromBytes = OS.unsafeFromBytes diff --git a/System/OsPath/Posix.hs b/filepath/System/OsPath/Posix.hs similarity index 100% rename from System/OsPath/Posix.hs rename to filepath/System/OsPath/Posix.hs diff --git a/System/OsPath/Posix/Internal.hs b/filepath/System/OsPath/Posix/Internal.hs similarity index 100% rename from System/OsPath/Posix/Internal.hs rename to filepath/System/OsPath/Posix/Internal.hs diff --git a/System/OsPath/Types.hs b/filepath/System/OsPath/Types.hs similarity index 95% rename from System/OsPath/Types.hs rename to filepath/System/OsPath/Types.hs index 6bf1b774..31324fe9 100644 --- a/System/OsPath/Types.hs +++ b/filepath/System/OsPath/Types.hs @@ -18,7 +18,7 @@ module System.OsPath.Types ) where -import System.OsString.Internal.Types +import System.OsString.Types.Internal -- | Filepaths are @wchar_t*@ data on windows as passed to syscalls. diff --git a/System/OsPath/Windows.hs b/filepath/System/OsPath/Windows.hs similarity index 100% rename from System/OsPath/Windows.hs rename to filepath/System/OsPath/Windows.hs diff --git a/System/OsPath/Windows/Internal.hs b/filepath/System/OsPath/Windows/Internal.hs similarity index 100% rename from System/OsPath/Windows/Internal.hs rename to filepath/System/OsPath/Windows/Internal.hs diff --git a/System/OsString.hs b/filepath/System/OsString.hs similarity index 93% rename from System/OsString.hs rename to filepath/System/OsString.hs index c11a4bdf..cefb22b4 100644 --- a/System/OsString.hs +++ b/filepath/System/OsString.hs @@ -23,6 +23,7 @@ module System.OsString , encodeUtf , encodeWith , encodeFS + , unsafeFromBytes , osstr , pack @@ -45,6 +46,7 @@ where import System.OsString.Internal ( unsafeFromChar + , unsafeFromBytes , toChar , encodeUtf , encodeWith @@ -56,5 +58,5 @@ import System.OsString.Internal , decodeFS , unpack ) -import System.OsString.Internal.Types +import System.OsString.Types.Internal ( OsString, OsChar ) diff --git a/System/OsString/Common.hs b/filepath/System/OsString/Common.hs similarity index 97% rename from System/OsString/Common.hs rename to filepath/System/OsString/Common.hs index 80eb69b5..112537aa 100644 --- a/System/OsString/Common.hs +++ b/filepath/System/OsString/Common.hs @@ -26,7 +26,7 @@ module System.OsString.MODULE_NAME , encodeUtf , encodeWith , encodeFS - , fromBytes + , unsafeFromBytes , pstr , pack @@ -46,7 +46,7 @@ where -import System.OsString.Internal.Types ( +import System.OsString.Types.Internal ( #ifdef WINDOWS WindowsString(..), WindowsChar(..) #else @@ -218,7 +218,7 @@ decodeFS (PosixString ba) = decodeWithBasePosix ba #ifdef WINDOWS_DOC -- | Constructs a platform string from a ByteString. -- --- This ensures valid UCS-2LE. +-- This expects valid UCS-2LE. -- Note that this doesn't expand Word8 to Word16 on windows, so you may get invalid UTF-16. -- -- Throws 'EncodingException' on invalid UCS-2LE (although unlikely). @@ -227,15 +227,15 @@ decodeFS (PosixString ba) = decodeWithBasePosix ba -- -- This is a no-op. #endif -fromBytes :: MonadThrow m - => ByteString - -> m PLATFORM_STRING +unsafeFromBytes :: MonadThrow m + => ByteString + -> m PLATFORM_STRING #ifdef WINDOWS -fromBytes bs = +unsafeFromBytes bs = let ws = WindowsString . BS16.toShort $ bs in either throwM (const . pure $ ws) $ decodeWith ucs2le ws #else -fromBytes = pure . PosixString . BS.toShort +unsafeFromBytes = pure . PosixString . BS.toShort #endif diff --git a/System/OsString/Internal.hs b/filepath/System/OsString/Internal.hs similarity index 90% rename from System/OsString/Internal.hs rename to filepath/System/OsString/Internal.hs index f72fdcb7..5d01425b 100644 --- a/System/OsString/Internal.hs +++ b/filepath/System/OsString/Internal.hs @@ -4,7 +4,7 @@ module System.OsString.Internal where -import System.OsString.Internal.Types +import System.OsString.Types.Internal import Control.Monad.Catch ( MonadThrow ) @@ -107,13 +107,19 @@ decodeFS (OsString x) = PF.decodeFS x -- | Constructs an @OsString@ from a ByteString. -- --- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked. +-- On windows, this expects valid UCS-2LE, on unix it is passed unchanged/unchecked. -- -- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely). -fromBytes :: MonadThrow m - => ByteString - -> m OsString -fromBytes = fmap OsString . PF.fromBytes +-- +-- Note: This is rarely what you want, because you have to ensure the input will +-- work on both platforms (unix and windows) equally (Word16 vs Word8 array). +-- This is unlikely to be the case, unless you get the raw bytes via FFI +-- from 'unix' and 'Win32' package or manually construct correct platform +-- specific ByteStrings. +unsafeFromBytes :: MonadThrow m + => ByteString + -> m OsString +unsafeFromBytes = fmap OsString . PF.unsafeFromBytes -- | QuasiQuote an 'OsString'. This accepts Unicode characters diff --git a/System/OsString/Posix.hs b/filepath/System/OsString/Posix.hs similarity index 100% rename from System/OsString/Posix.hs rename to filepath/System/OsString/Posix.hs diff --git a/System/OsString/Types.hs b/filepath/System/OsString/Types.hs similarity index 100% rename from System/OsString/Types.hs rename to filepath/System/OsString/Types.hs diff --git a/System/OsString/Windows.hs b/filepath/System/OsString/Windows.hs similarity index 100% rename from System/OsString/Windows.hs rename to filepath/System/OsString/Windows.hs diff --git a/bench/BenchFilePath.hs b/filepath/bench/BenchFilePath.hs similarity index 98% rename from bench/BenchFilePath.hs rename to filepath/bench/BenchFilePath.hs index 5319f1c0..04bbccde 100644 --- a/bench/BenchFilePath.hs +++ b/filepath/bench/BenchFilePath.hs @@ -6,7 +6,7 @@ module Main where import System.OsPath.Types import System.OsPath.Encoding ( ucs2le ) -import qualified System.OsString.Internal.Types as OST +import qualified System.OsString.Types.Internal as OST import qualified Data.ByteString.Short as SBS import Test.Tasty.Bench @@ -214,8 +214,8 @@ main = defaultMain , ("pack PlatformString (posix)" , nf APF.pack (APF.unpack posixPathAFPP)) , ("pack PlatformString (windows)" , nf AWF.pack (AWF.unpack windowsPathAFPP)) - , ("fromBytes (posix)" , nf (OSP.fromBytes @Maybe) (SBS.fromShort . OST.getPosixString $ posixPathAFPP)) - , ("fromBytes (windows)" , nf (WSP.fromBytes @Maybe) (SBS.fromShort . OST.getWindowsString $ windowsPathAFPP)) + , ("unsafeFromBytes (posix)" , nf (OSP.unsafeFromBytes @Maybe) (SBS.fromShort . OST.getPosixString $ posixPathAFPP)) + , ("unsafeFromBytes (windows)" , nf (WSP.unsafeFromBytes @Maybe) (SBS.fromShort . OST.getWindowsString $ windowsPathAFPP)) ] ] diff --git a/changelog.md b/filepath/changelog.md similarity index 95% rename from changelog.md rename to filepath/changelog.md index a5bca439..2b68278c 100644 --- a/changelog.md +++ b/filepath/changelog.md @@ -2,6 +2,12 @@ _Note: below all `FilePath` values are unquoted, so `\\` really means two backslashes._ +## 1.4.100.5 *Aug 2023* + +* split internals into `filepath-internals` package +* expose `unsafeFromBytes` in public API (previously `fromBytes`) +* rename `System.OsString.Internal.Types` to `System.OsString.Types.Internal` + ## 1.4.100.4 *Jul 2023* * Fix isInfixOf and breakSubString in Word16, wrt [#195](https://github.com/haskell/filepath/issues/195) diff --git a/filepath.cabal b/filepath/filepath.cabal similarity index 94% rename from filepath.cabal rename to filepath/filepath.cabal index c3b76370..adb1928c 100644 --- a/filepath.cabal +++ b/filepath/filepath.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: filepath -version: 1.4.100.4 +version: 1.4.100.5 -- NOTE: Don't forget to update ./changelog.md license: BSD-3-Clause @@ -78,21 +78,19 @@ library System.FilePath.Windows System.OsPath System.OsPath.Data.ByteString.Short - System.OsPath.Data.ByteString.Short.Internal System.OsPath.Data.ByteString.Short.Word16 System.OsPath.Encoding - System.OsPath.Encoding.Internal - System.OsPath.Internal System.OsPath.Posix - System.OsPath.Posix.Internal System.OsPath.Types System.OsPath.Windows - System.OsPath.Windows.Internal System.OsString - System.OsString.Internal - System.OsString.Internal.Types System.OsString.Posix System.OsString.Windows + other-modules: + System.OsPath.Internal + System.OsPath.Posix.Internal + System.OsPath.Windows.Internal + System.OsString.Internal other-extensions: CPP @@ -107,6 +105,7 @@ library , bytestring >=0.11.3.0 , deepseq , exceptions + , filepath-internals >=1.4.100.5 && < 1.5 , template-haskell ghc-options: -Wall @@ -127,6 +126,7 @@ test-suite filepath-tests , base , bytestring >=0.11.3.0 , filepath + , filepath-internals >=1.4.100.5 && < 1.5 , QuickCheck >=2.7 && <2.15 default-language: Haskell2010 @@ -148,6 +148,7 @@ test-suite filepath-equivalent-tests , base , bytestring >=0.11.3.0 , filepath + , filepath-internals >=1.4.100.5 && < 1.5 , QuickCheck >=2.7 && <2.15 test-suite bytestring-tests @@ -165,6 +166,7 @@ test-suite bytestring-tests , base , bytestring >=0.11.3.0 , filepath + , filepath-internals >=1.4.100.5 && < 1.5 , QuickCheck >=2.7 && <2.15 test-suite abstract-filepath @@ -184,6 +186,7 @@ test-suite abstract-filepath , bytestring >=0.11.3.0 , deepseq , filepath + , filepath-internals >=1.4.100.5 && < 1.5 , QuickCheck >=2.7 && <2.15 , quickcheck-classes-base ^>=0.6.2 @@ -198,6 +201,7 @@ benchmark bench-filepath , bytestring >=0.11.3.0 , deepseq , filepath + , filepath-internals >=1.4.100.5 && < 1.5 , tasty-bench ghc-options: -with-rtsopts=-A32m diff --git a/prologue.txt b/filepath/prologue.txt similarity index 100% rename from prologue.txt rename to filepath/prologue.txt diff --git a/tests/TestUtil.hs b/filepath/tests/TestUtil.hs similarity index 99% rename from tests/TestUtil.hs rename to filepath/tests/TestUtil.hs index 1f926a12..9f0494ac 100644 --- a/tests/TestUtil.hs +++ b/filepath/tests/TestUtil.hs @@ -24,7 +24,7 @@ import qualified System.OsPath.Windows as AFP_W import qualified System.OsPath.Posix as AFP_P import System.OsPath.Types #endif -import System.OsString.Internal.Types +import System.OsString.Types.Internal import System.OsPath.Encoding.Internal import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) diff --git a/tests/abstract-filepath/Arbitrary.hs b/filepath/tests/abstract-filepath/Arbitrary.hs similarity index 98% rename from tests/abstract-filepath/Arbitrary.hs rename to filepath/tests/abstract-filepath/Arbitrary.hs index 7918eb16..576a6b9d 100644 --- a/tests/abstract-filepath/Arbitrary.hs +++ b/filepath/tests/abstract-filepath/Arbitrary.hs @@ -5,7 +5,7 @@ module Arbitrary where import Data.Char import Data.Maybe import System.OsString -import System.OsString.Internal.Types +import System.OsString.Types.Internal import qualified System.OsString.Posix as Posix import qualified System.OsString.Windows as Windows import Data.ByteString ( ByteString ) diff --git a/tests/abstract-filepath/EncodingSpec.hs b/filepath/tests/abstract-filepath/EncodingSpec.hs similarity index 100% rename from tests/abstract-filepath/EncodingSpec.hs rename to filepath/tests/abstract-filepath/EncodingSpec.hs diff --git a/tests/abstract-filepath/OsPathSpec.hs b/filepath/tests/abstract-filepath/OsPathSpec.hs similarity index 99% rename from tests/abstract-filepath/OsPathSpec.hs rename to filepath/tests/abstract-filepath/OsPathSpec.hs index bee6fb57..e3f12327 100644 --- a/tests/abstract-filepath/OsPathSpec.hs +++ b/filepath/tests/abstract-filepath/OsPathSpec.hs @@ -8,11 +8,11 @@ module OsPathSpec where import Data.Maybe import System.OsPath as OSP -import System.OsString.Internal.Types +import System.OsString.Types.Internal import System.OsPath.Posix as Posix import System.OsPath.Windows as Windows import System.OsPath.Encoding -import qualified System.OsString.Internal.Types as OS +import qualified System.OsString.Types.Internal as OS import System.OsPath.Data.ByteString.Short ( toShort ) import System.OsString.Posix as PosixS import System.OsString.Windows as WindowsS diff --git a/tests/abstract-filepath/Test.hs b/filepath/tests/abstract-filepath/Test.hs similarity index 100% rename from tests/abstract-filepath/Test.hs rename to filepath/tests/abstract-filepath/Test.hs diff --git a/tests/bytestring-tests/Main.hs b/filepath/tests/bytestring-tests/Main.hs similarity index 100% rename from tests/bytestring-tests/Main.hs rename to filepath/tests/bytestring-tests/Main.hs diff --git a/tests/bytestring-tests/Properties/Common.hs b/filepath/tests/bytestring-tests/Properties/Common.hs similarity index 100% rename from tests/bytestring-tests/Properties/Common.hs rename to filepath/tests/bytestring-tests/Properties/Common.hs diff --git a/tests/bytestring-tests/Properties/ShortByteString.hs b/filepath/tests/bytestring-tests/Properties/ShortByteString.hs similarity index 100% rename from tests/bytestring-tests/Properties/ShortByteString.hs rename to filepath/tests/bytestring-tests/Properties/ShortByteString.hs diff --git a/tests/bytestring-tests/Properties/ShortByteString/Word16.hs b/filepath/tests/bytestring-tests/Properties/ShortByteString/Word16.hs similarity index 100% rename from tests/bytestring-tests/Properties/ShortByteString/Word16.hs rename to filepath/tests/bytestring-tests/Properties/ShortByteString/Word16.hs diff --git a/tests/filepath-equivalent-tests/Legacy/System/FilePath.hs b/filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath.hs similarity index 100% rename from tests/filepath-equivalent-tests/Legacy/System/FilePath.hs rename to filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath.hs diff --git a/tests/filepath-equivalent-tests/Legacy/System/FilePath/Posix.hs b/filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath/Posix.hs similarity index 100% rename from tests/filepath-equivalent-tests/Legacy/System/FilePath/Posix.hs rename to filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath/Posix.hs diff --git a/tests/filepath-equivalent-tests/Legacy/System/FilePath/Windows.hs b/filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath/Windows.hs similarity index 100% rename from tests/filepath-equivalent-tests/Legacy/System/FilePath/Windows.hs rename to filepath/tests/filepath-equivalent-tests/Legacy/System/FilePath/Windows.hs diff --git a/tests/filepath-equivalent-tests/TestEquiv.hs b/filepath/tests/filepath-equivalent-tests/TestEquiv.hs similarity index 100% rename from tests/filepath-equivalent-tests/TestEquiv.hs rename to filepath/tests/filepath-equivalent-tests/TestEquiv.hs diff --git a/tests/filepath-tests/Test.hs b/filepath/tests/filepath-tests/Test.hs similarity index 100% rename from tests/filepath-tests/Test.hs rename to filepath/tests/filepath-tests/Test.hs diff --git a/tests/filepath-tests/TestGen.hs b/filepath/tests/filepath-tests/TestGen.hs similarity index 99% rename from tests/filepath-tests/TestGen.hs rename to filepath/tests/filepath-tests/TestGen.hs index 2075e7f0..9f525a9e 100755 --- a/tests/filepath-tests/TestGen.hs +++ b/filepath/tests/filepath-tests/TestGen.hs @@ -14,7 +14,7 @@ import Data.String import GHC.IO.Encoding.Failure ( CodingFailureMode(..) ) import GHC.IO.Encoding.UTF16 ( mkUTF16le ) import GHC.IO.Encoding.UTF8 ( mkUTF8 ) -import System.OsString.Internal.Types +import System.OsString.Types.Internal import System.OsPath.Encoding.Internal import qualified Data.Char as C import qualified System.OsPath.Data.ByteString.Short as SBS diff --git a/stack.yaml b/stack.yaml index e3290669..7dd8a9f2 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,6 +1,7 @@ resolver: lts-18.28 packages: -- . +- filepath +- filepath-internals extra-deps: - bytestring-0.11.3.1@sha256:0cc97b237df9a34d12de37973ad306391e24b0c262387c4d846bb8d1d65699dd,7339 - text-1.2.5.0@sha256:791f0f6c97ed96113f17ab520cf0efe1a3a4f883a8c85910a5660567c8241c40,7895