From bf47a49bb806ee8393a3cc51dd9bd14fe106bca0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 5 Aug 2024 13:24:42 -0700 Subject: [PATCH 01/52] Add Hedgehog roundtrip tests for cbor encoding --- .../U/Codebase/Sqlite/Branch/Format.hs | 3 +- .../U/Codebase/Sqlite/Causal.hs | 1 + .../U/Codebase/Sqlite/Decl/Format.hs | 2 + .../U/Codebase/Sqlite/Entity.hs | 1 + .../U/Codebase/Sqlite/LocalIds.hs | 2 +- .../U/Codebase/Sqlite/Patch/Format.hs | 2 + .../U/Codebase/Sqlite/Term/Format.hs | 2 + hie.yaml | 3 + unison-cli/src/Unison/Share/SyncV2.hs | 742 ++++++++++++++++++ unison-cli/unison-cli.cabal | 1 + unison-share-api/package.yaml | 15 + unison-share-api/src/Unison/Server/Orphans.hs | 63 ++ unison-share-api/tests/Main.hs | 23 + .../tests/Unison/Test/Sync/Gen.hs | 52 ++ .../tests/Unison/Test/Sync/Roundtrip.hs | 29 + unison-share-api/unison-share-api.cabal | 108 +++ 16 files changed, 1047 insertions(+), 2 deletions(-) create mode 100644 unison-cli/src/Unison/Share/SyncV2.hs create mode 100644 unison-share-api/tests/Main.hs create mode 100644 unison-share-api/tests/Unison/Test/Sync/Gen.hs create mode 100644 unison-share-api/tests/Unison/Test/Sync/Roundtrip.hs diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs index ce07a487fb..2a2300329f 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Branch/Format.hs @@ -101,7 +101,7 @@ data BranchLocalIds' t d p c = LocalIds branchPatchLookup :: Vector p, branchChildLookup :: Vector c } - deriving (Show) + deriving (Show, Eq) -- | Bytes encoding a LocalBranch newtype LocalBranchBytes = LocalBranchBytes ByteString @@ -110,6 +110,7 @@ newtype LocalBranchBytes = LocalBranchBytes ByteString data SyncBranchFormat' parent text defn patch child = SyncFull (BranchLocalIds' text defn patch child) LocalBranchBytes | SyncDiff parent (BranchLocalIds' text defn patch child) LocalBranchBytes + deriving (Eq, Show) type SyncBranchFormat = SyncBranchFormat' BranchObjectId TextId ObjectId PatchObjectId (BranchObjectId, CausalHashId) diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Causal.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Causal.hs index 582bfc65a3..87f532bf25 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Causal.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Causal.hs @@ -22,5 +22,6 @@ data SyncCausalFormat' causalHash valueHash = SyncCausalFormat { valueHash :: valueHash, parents :: Vector causalHash } + deriving stock (Eq, Show) type SyncCausalFormat = SyncCausalFormat' CausalHashId BranchHashId diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Decl/Format.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Decl/Format.hs index 5a6f401964..5752d2dd87 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Decl/Format.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Decl/Format.hs @@ -36,9 +36,11 @@ type SyncDeclFormat = data SyncDeclFormat' t d = SyncDecl (SyncLocallyIndexedComponent' t d) + deriving stock (Eq, Show) newtype SyncLocallyIndexedComponent' t d = SyncLocallyIndexedComponent (Vector (LocalIds' t d, ByteString)) + deriving stock (Eq, Show) -- [OldDecl] ==map==> [NewDecl] ==number==> [(NewDecl, Int)] ==sort==> [(NewDecl, Int)] ==> permutation is map snd of that diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Entity.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Entity.hs index 3b93fd4b16..92cbb58828 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Entity.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Entity.hs @@ -24,6 +24,7 @@ data SyncEntity' text hash defn patch branchh branch causal | N (Namespace.SyncBranchFormat' branch text defn patch (branch, causal)) | P (Patch.SyncPatchFormat' patch text hash defn) | C (Causal.SyncCausalFormat' causal branchh) + deriving stock (Eq, Show) entityType :: SyncEntity' text hash defn patch branchh branch causal -> TempEntityType entityType = \case diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/LocalIds.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/LocalIds.hs index d8645b81ae..f68016de78 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/LocalIds.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/LocalIds.hs @@ -15,7 +15,7 @@ data LocalIds' t h = LocalIds { textLookup :: Vector t, defnLookup :: Vector h } - deriving (Functor, Show) + deriving stock (Functor, Show, Eq) type LocalIds = LocalIds' TextId ObjectId diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Patch/Format.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Patch/Format.hs index 7defa50234..452df27904 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Patch/Format.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Patch/Format.hs @@ -40,6 +40,7 @@ data PatchLocalIds' t h d = LocalIds patchHashLookup :: Vector h, patchDefnLookup :: Vector d } + deriving stock (Eq, Show) type SyncPatchFormat = SyncPatchFormat' PatchObjectId TextId HashId ObjectId @@ -47,6 +48,7 @@ data SyncPatchFormat' parent text hash defn = SyncFull (PatchLocalIds' text hash defn) ByteString | -- | p is the identity of the thing that the diff is relative to SyncDiff parent (PatchLocalIds' text hash defn) ByteString + deriving stock (Eq, Show) -- | Apply a list of patch diffs to a patch, left to right. applyPatchDiffs :: Patch -> [PatchDiff] -> Patch diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Term/Format.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Term/Format.hs index e50d215ecf..f06fc70ec3 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Term/Format.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Term/Format.hs @@ -49,6 +49,7 @@ newtype LocallyIndexedComponent' t d = LocallyIndexedComponent newtype SyncLocallyIndexedComponent' t d = SyncLocallyIndexedComponent (Vector (LocalIds' t d, ByteString)) + deriving stock (Eq, Show) {- message = "hello, world" -> ABT { ... { Term.F.Text "hello, world" } } -> hashes to (#abc, 0) @@ -127,6 +128,7 @@ data TermFormat' t d = Term (LocallyIndexedComponent' t d) type SyncTermFormat = SyncTermFormat' TextId ObjectId data SyncTermFormat' t d = SyncTerm (SyncLocallyIndexedComponent' t d) + deriving stock (Eq, Show) data WatchResultFormat = WatchResult WatchLocalIds Term diff --git a/hie.yaml b/hie.yaml index ce2a6418a5..e3ebb06c22 100644 --- a/hie.yaml +++ b/hie.yaml @@ -135,6 +135,9 @@ cradle: - path: "unison-share-api/src" component: "unison-share-api:lib" + - path: "unison-share-api/tests" + component: "unison-share-api:test:unison-share-api-tests" + - path: "unison-share-projects-api/src" component: "unison-share-projects-api:lib" diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs new file mode 100644 index 0000000000..d79d9cc3b2 --- /dev/null +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -0,0 +1,742 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE TypeOperators #-} + +module Unison.Share.SyncV2 + ( -- ** Get causal hash by path + getCausalHashByPath, + GetCausalHashByPathError (..), + + -- ** Pull/Download + pull, + PullError (..), + downloadEntities, + ) +where + +import Control.Concurrent.STM +import Control.Lens +import Control.Monad.Except +import Control.Monad.Reader (ask) +import Control.Monad.Trans.Reader (ReaderT, runReaderT) +import Control.Monad.Trans.Reader qualified as Reader +import Data.Map qualified as Map +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Proxy +import Data.Set qualified as Set +import Data.Set.NonEmpty (NESet) +import Data.Set.NonEmpty qualified as NESet +import Data.Text.Lazy qualified as Text.Lazy +import Data.Text.Lazy.Encoding qualified as Text.Lazy +import GHC.IO (unsafePerformIO) +import Ki qualified +import Network.HTTP.Client qualified as Http.Client +import Network.HTTP.Types qualified as HTTP +import Servant.API qualified as Servant ((:<|>) (..), (:>)) +import Servant.Client (BaseUrl) +import Servant.Client qualified as Servant +import System.Environment (lookupEnv) +import U.Codebase.HashTags (CausalHash) +import U.Codebase.Sqlite.Queries qualified as Q +import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) +import Unison.Auth.HTTPClient (AuthenticatedHttpClient) +import Unison.Auth.HTTPClient qualified as Auth +import Unison.Cli.Monad (Cli) +import Unison.Cli.Monad qualified as Cli +import Unison.Codebase qualified as Codebase +import Unison.Debug qualified as Debug +import Unison.Hash32 (Hash32) +import Unison.Prelude +import Unison.Share.API.Hash qualified as Share +import Unison.Share.ExpectedHashMismatches (expectedCausalHashMismatches, expectedComponentHashMismatches) +import Unison.Share.Sync.Types +import Unison.Sqlite qualified as Sqlite +import Unison.Sync.API qualified as Share (API) +import Unison.Sync.Common (entityToTempEntity, expectEntity, hash32ToCausalHash) +import Unison.Sync.EntityValidation qualified as EV +import Unison.Sync.Types qualified as Share +import Unison.Util.Monoid (foldMapM) + +------------------------------------------------------------------------------------------------------------------------ +-- Pile of constants + +-- | The maximum number of downloader threads, during a pull. +maxSimultaneousPullDownloaders :: Int +maxSimultaneousPullDownloaders = unsafePerformIO $ do + lookupEnv "UNISON_PULL_WORKERS" <&> \case + Just n -> read n + Nothing -> 5 +{-# NOINLINE maxSimultaneousPullDownloaders #-} + +-- | The maximum number of push workers at a time. Each push worker reads from the database and uploads entities. +-- Share currently parallelizes on it's own in the backend, and any more than one push worker +-- just results in serialization conflicts which slow things down. +maxSimultaneousPushWorkers :: Int +maxSimultaneousPushWorkers = unsafePerformIO $ do + lookupEnv "UNISON_PUSH_WORKERS" <&> \case + Just n -> read n + Nothing -> 1 +{-# NOINLINE maxSimultaneousPushWorkers #-} + +syncChunkSize :: Int +syncChunkSize = unsafePerformIO $ do + lookupEnv "UNISON_SYNC_CHUNK_SIZE" <&> \case + Just n -> read n + Nothing -> 50 +{-# NOINLINE syncChunkSize #-} + +------------------------------------------------------------------------------------------------------------------------ +-- Pull + +pull :: + -- | The Unison Share URL. + BaseUrl -> + -- | The repo+path to pull from. + Share.Path -> + -- | Callback that's given a number of entities we just downloaded. + (Int -> IO ()) -> + Cli (Either (SyncError PullError) CausalHash) +pull unisonShareUrl repoPath downloadedCallback = + getCausalHashByPath unisonShareUrl repoPath >>= \case + Left err -> pure (Left (PullError'GetCausalHash <$> err)) + -- There's nothing at the remote path, so there's no causal to pull. + Right Nothing -> pure (Left (SyncError (PullError'NoHistoryAtPath repoPath))) + Right (Just hashJwt) -> + downloadEntities unisonShareUrl (Share.pathRepoInfo repoPath) hashJwt downloadedCallback <&> \case + Left err -> Left (PullError'DownloadEntities <$> err) + Right () -> Right (hash32ToCausalHash (Share.hashJWTHash hashJwt)) + +------------------------------------------------------------------------------------------------------------------------ +-- Download entities + +downloadEntities :: + -- | The Unison Share URL. + BaseUrl -> + -- | The repo to download from. + Share.RepoInfo -> + -- | The hash to download. + Share.HashJWT -> + -- | Callback that's given a number of entities we just downloaded. + (Int -> IO ()) -> + Cli (Either (SyncError Share.DownloadEntitiesError) ()) +downloadEntities unisonShareUrl repoInfo hashJwt downloadedCallback = do + Cli.Env {authHTTPClient, codebase} <- ask + + Cli.label \done -> do + let failed :: SyncError Share.DownloadEntitiesError -> Cli void + failed = done . Left + + let hash = Share.hashJWTHash hashJwt + + maybeTempEntities <- + Cli.runTransaction (Q.entityLocation hash) >>= \case + Just Q.EntityInMainStorage -> pure Nothing + Just Q.EntityInTempStorage -> pure (Just (NESet.singleton hash)) + Nothing -> do + let request = + httpDownloadEntities + authHTTPClient + unisonShareUrl + Share.DownloadEntitiesRequest {repoInfo, hashes = NESet.singleton hashJwt} + entities <- + liftIO request >>= \case + Left err -> failed (TransportError err) + Right (Share.DownloadEntitiesFailure err) -> failed (SyncError err) + Right (Share.DownloadEntitiesSuccess entities) -> pure entities + case validateEntities entities of + Left err -> failed . SyncError . Share.DownloadEntitiesEntityValidationFailure $ err + Right () -> pure () + tempEntities <- Cli.runTransaction (insertEntities entities) + liftIO (downloadedCallback 1) + pure (NESet.nonEmptySet tempEntities) + + whenJust maybeTempEntities \tempEntities -> do + let doCompleteTempEntities = + completeTempEntities + authHTTPClient + unisonShareUrl + ( \action -> + Codebase.withConnection codebase \conn -> + action (Sqlite.runTransaction conn) + ) + repoInfo + downloadedCallback + tempEntities + liftIO doCompleteTempEntities & onLeftM \err -> + failed err + -- Since we may have just inserted and then deleted many temp entities, we attempt to recover some disk space by + -- vacuuming after each pull. If the vacuum fails due to another open transaction on this connection, that's ok, + -- we'll try vacuuming again next pull. + _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) + pure (Right ()) + +-- | Validates the provided entities if and only if the environment variable `UNISON_ENTITY_VALIDATION` is set to "true". +validateEntities :: NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT) -> Either Share.EntityValidationError () +validateEntities entities = + when shouldValidateEntities $ do + ifor_ (NEMap.toMap entities) \hash entity -> do + let entityWithHashes = entity & Share.entityHashes_ %~ Share.hashJWTHash + case EV.validateEntity hash entityWithHashes of + Nothing -> pure () + Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> + let expectedMismatches = case et of + Share.TermComponentType -> expectedComponentHashMismatches + Share.DeclComponentType -> expectedComponentHashMismatches + Share.CausalType -> expectedCausalHashMismatches + _ -> mempty + in case Map.lookup supplied expectedMismatches of + Just expected + | expected == computed -> pure () + _ -> do + Left err + Just err -> do + Left err + +-- | Validate entities received from the server unless this flag is set to false. +validationEnvKey :: String +validationEnvKey = "UNISON_ENTITY_VALIDATION" + +shouldValidateEntities :: Bool +shouldValidateEntities = unsafePerformIO $ do + lookupEnv validationEnvKey <&> \case + Just "false" -> False + _ -> True +{-# NOINLINE shouldValidateEntities #-} + +type WorkerCount = + TVar Int + +newWorkerCount :: IO WorkerCount +newWorkerCount = + newTVarIO 0 + +recordWorking :: WorkerCount -> STM () +recordWorking sem = + modifyTVar' sem (+ 1) + +recordNotWorking :: WorkerCount -> STM () +recordNotWorking sem = + modifyTVar' sem \n -> n - 1 + +-- What the dispatcher is to do +data DispatcherJob + = DispatcherForkWorker (NESet Share.HashJWT) + | DispatcherReturnEarlyBecauseDownloaderFailed (SyncError Share.DownloadEntitiesError) + | DispatcherDone + +-- | Finish downloading entities from Unison Share (or return the first failure to download something). +-- +-- Precondition: the entities were *already* downloaded at some point in the past, and are now sitting in the +-- `temp_entity` table, waiting for their dependencies to arrive so they can be flushed to main storage. +completeTempEntities :: + AuthenticatedHttpClient -> + BaseUrl -> + (forall a. ((forall x. Sqlite.Transaction x -> IO x) -> IO a) -> IO a) -> + Share.RepoInfo -> + (Int -> IO ()) -> + NESet Hash32 -> + IO (Either (SyncError Share.DownloadEntitiesError) ()) +completeTempEntities httpClient unisonShareUrl connect repoInfo downloadedCallback initialNewTempEntities = do + -- The set of hashes we still need to download + hashesVar <- newTVarIO Set.empty + + -- The set of hashes that we haven't inserted yet, but will soon, because we've committed to downloading them. + uninsertedHashesVar <- newTVarIO Set.empty + + -- The entities payloads (along with the jwts that we used to download them) that we've downloaded + entitiesQueue <- newTQueueIO + + -- The sets of new (at the time of inserting, anyway) temp entity rows, which we need to elaborate, then download. + newTempEntitiesQueue <- newTQueueIO + + -- How many workers (downloader / inserter / elaborator) are currently doing stuff. + workerCount <- newWorkerCount + + -- The first download error seen by a downloader, if any. + downloaderFailedVar <- newEmptyTMVarIO + + -- Kick off the cycle of inserter->elaborator->dispatcher->downloader by giving the elaborator something to do + atomically (writeTQueue newTempEntitiesQueue (Set.empty, Just initialNewTempEntities)) + + Ki.scoped \scope -> do + Ki.fork_ scope (inserter entitiesQueue newTempEntitiesQueue workerCount) + Ki.fork_ scope (elaborator hashesVar uninsertedHashesVar newTempEntitiesQueue workerCount) + dispatcher hashesVar uninsertedHashesVar entitiesQueue newTempEntitiesQueue workerCount downloaderFailedVar + where + -- Dispatcher thread: "dequeue" from `hashesVar`, fork one-shot downloaders. + -- + -- We stop when either all of the following are true: + -- + -- - There are no outstanding workers (downloaders, inserter, elaboraror) + -- - The inserter thread doesn't have any outstanding work enqueued (in `entitiesQueue`) + -- - The elaborator thread doesn't have any outstanding work enqueued (in `newTempEntitiesQueue`) + -- + -- Or: + -- + -- - Some downloader failed to download something + dispatcher :: + TVar (Set Share.HashJWT) -> + TVar (Set Share.HashJWT) -> + TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> + TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> + WorkerCount -> + TMVar (SyncError Share.DownloadEntitiesError) -> + IO (Either (SyncError Share.DownloadEntitiesError) ()) + dispatcher hashesVar uninsertedHashesVar entitiesQueue newTempEntitiesQueue workerCount downloaderFailedVar = + Ki.scoped \scope -> + let loop :: IO (Either (SyncError Share.DownloadEntitiesError) ()) + loop = + atomically (checkIfDownloaderFailedMode <|> dispatchWorkMode <|> checkIfDoneMode) >>= \case + DispatcherDone -> pure (Right ()) + DispatcherReturnEarlyBecauseDownloaderFailed err -> pure (Left err) + DispatcherForkWorker hashes -> do + atomically do + -- Limit number of simultaneous downloaders (plus 2, for inserter and elaborator) + workers <- readTVar workerCount + check (workers < maxSimultaneousPullDownloaders + 2) + -- we do need to record the downloader as working outside of the worker thread, not inside. + -- otherwise, we might erroneously fall through the teardown logic below and conclude there's + -- nothing more for the dispatcher to do, when in fact a downloader thread just hasn't made it as + -- far as recording its own existence + recordWorking workerCount + _ <- + Ki.fork @() scope do + downloader entitiesQueue workerCount hashes & onLeftM \err -> + void (atomically (tryPutTMVar downloaderFailedVar err)) + loop + in loop + where + checkIfDownloaderFailedMode :: STM DispatcherJob + checkIfDownloaderFailedMode = + DispatcherReturnEarlyBecauseDownloaderFailed <$> readTMVar downloaderFailedVar + + dispatchWorkMode :: STM DispatcherJob + dispatchWorkMode = do + hashes <- readTVar hashesVar + check (not (Set.null hashes)) + let (hashes1, hashes2) = Set.splitAt syncChunkSize hashes + modifyTVar' uninsertedHashesVar (Set.union hashes1) + writeTVar hashesVar hashes2 + pure (DispatcherForkWorker (NESet.unsafeFromSet hashes1)) + + -- Check to see if there are no hashes left to download, no outstanding workers, and no work in either queue + checkIfDoneMode :: STM DispatcherJob + checkIfDoneMode = do + workers <- readTVar workerCount + check (workers == 0) + isEmptyTQueue entitiesQueue >>= check + isEmptyTQueue newTempEntitiesQueue >>= check + pure DispatcherDone + + -- Downloader thread: download entities, (if successful) enqueue to `entitiesQueue` + downloader :: + TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> + WorkerCount -> + NESet Share.HashJWT -> + IO (Either (SyncError Share.DownloadEntitiesError) ()) + downloader entitiesQueue workerCount hashes = do + httpDownloadEntities httpClient unisonShareUrl Share.DownloadEntitiesRequest {repoInfo, hashes} >>= \case + Left err -> do + atomically (recordNotWorking workerCount) + pure (Left (TransportError err)) + Right (Share.DownloadEntitiesFailure err) -> do + atomically (recordNotWorking workerCount) + pure (Left (SyncError err)) + Right (Share.DownloadEntitiesSuccess entities) -> do + downloadedCallback (NESet.size hashes) + case validateEntities entities of + Left err -> pure . Left . SyncError . Share.DownloadEntitiesEntityValidationFailure $ err + Right () -> do + atomically do + writeTQueue entitiesQueue (hashes, entities) + recordNotWorking workerCount + pure (Right ()) + + -- Inserter thread: dequeue from `entitiesQueue`, insert entities, enqueue to `newTempEntitiesQueue` + inserter :: + TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> + TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> + WorkerCount -> + IO Void + inserter entitiesQueue newTempEntitiesQueue workerCount = + connect \runTransaction -> + forever do + (hashJwts, entities) <- + atomically do + entities <- readTQueue entitiesQueue + recordWorking workerCount + pure entities + newTempEntities0 <- + runTransaction do + NEMap.toList entities & foldMapM \(hash, entity) -> + upsertEntitySomewhere hash entity <&> \case + Q.EntityInMainStorage -> Set.empty + Q.EntityInTempStorage -> Set.singleton hash + atomically do + writeTQueue newTempEntitiesQueue (NESet.toSet hashJwts, NESet.nonEmptySet newTempEntities0) + recordNotWorking workerCount + + -- Elaborator thread: dequeue from `newTempEntitiesQueue`, elaborate, "enqueue" to `hashesVar` + elaborator :: + TVar (Set Share.HashJWT) -> + TVar (Set Share.HashJWT) -> + TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> + WorkerCount -> + IO Void + elaborator hashesVar uninsertedHashesVar newTempEntitiesQueue workerCount = + connect \runTransaction -> + forever do + maybeNewTempEntities <- + atomically do + (hashJwts, mayNewTempEntities) <- readTQueue newTempEntitiesQueue + -- Avoid unnecessary retaining of these hashes to keep memory usage more stable. This algorithm would + -- still be correct if we never delete from `uninsertedHashes`. + -- + -- We remove the inserted hashes from uninsertedHashesVar at this point rather than right after insertion + -- in order to ensure that no running transaction of the elaborator is viewing a snapshot that precedes + -- the snapshot that inserted those hashes. + modifyTVar' uninsertedHashesVar \uninsertedHashes -> Set.difference uninsertedHashes hashJwts + case mayNewTempEntities of + Nothing -> pure Nothing + Just newTempEntities -> do + recordWorking workerCount + pure (Just newTempEntities) + whenJust maybeNewTempEntities \newTempEntities -> do + newElaboratedHashes <- runTransaction (elaborateHashes newTempEntities) + atomically do + uninsertedHashes <- readTVar uninsertedHashesVar + hashes0 <- readTVar hashesVar + writeTVar hashesVar $! Set.union (Set.difference newElaboratedHashes uninsertedHashes) hashes0 + recordNotWorking workerCount + +-- | Insert entities into the database, and return the subset that went into temp storage (`temp_entitiy`) rather than +-- of main storage (`object` / `causal`) due to missing dependencies. +insertEntities :: NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT) -> Sqlite.Transaction (Set Hash32) +insertEntities entities = + NEMap.toList entities & foldMapM \(hash, entity) -> + upsertEntitySomewhere hash entity <&> \case + Q.EntityInMainStorage -> Set.empty + Q.EntityInTempStorage -> Set.singleton hash + +------------------------------------------------------------------------------------------------------------------------ +-- Get causal hash by path + +-- | Get the causal hash of a path hosted on Unison Share. +getCausalHashByPath :: + -- | The Unison Share URL. + BaseUrl -> + Share.Path -> + Cli (Either (SyncError GetCausalHashByPathError) (Maybe Share.HashJWT)) +getCausalHashByPath unisonShareUrl repoPath = do + Cli.Env {authHTTPClient} <- ask + liftIO (httpGetCausalHashByPath authHTTPClient unisonShareUrl (Share.GetCausalHashByPathRequest repoPath)) <&> \case + Left err -> Left (TransportError err) + Right (Share.GetCausalHashByPathSuccess maybeHashJwt) -> Right maybeHashJwt + Right (Share.GetCausalHashByPathNoReadPermission _) -> + Left (SyncError (GetCausalHashByPathErrorNoReadPermission repoPath)) + Right (Share.GetCausalHashByPathInvalidRepoInfo err repoInfo) -> + Left (SyncError (GetCausalHashByPathErrorInvalidRepoInfo err repoInfo)) + Right Share.GetCausalHashByPathUserNotFound -> + Left (SyncError $ GetCausalHashByPathErrorUserNotFound (Share.pathRepoInfo repoPath)) + +------------------------------------------------------------------------------------------------------------------------ +-- Upload entities + +data UploadDispatcherJob + = UploadDispatcherReturnFailure (SyncError Share.UploadEntitiesError) + | UploadDispatcherForkWorkerWhenAvailable (NESet Hash32) + | UploadDispatcherForkWorker (NESet Hash32) + | UploadDispatcherDone + +-- | Upload a set of entities to Unison Share. If the server responds that it cannot yet store any hash(es) due to +-- missing dependencies, send those dependencies too, and on and on, until the server stops responding that it's missing +-- anything. +-- +-- Returns true on success, false on failure (because the user does not have write permission). +uploadEntities :: + BaseUrl -> + Share.RepoInfo -> + NESet Hash32 -> + (Int -> IO ()) -> + Cli (Either (SyncError Share.UploadEntitiesError) ()) +uploadEntities unisonShareUrl repoInfo hashes0 uploadedCallback = do + Cli.Env {authHTTPClient, codebase} <- ask + + liftIO do + hashesVar <- newTVarIO (NESet.toSet hashes0) + -- Semantically, this is the set of hashes we've uploaded so far, but we do delete from it when it's safe to, so it + -- doesn't grow unbounded. It's used to filter out hashes that would be duplicate uploads: the server, when + -- responding to any particular upload request, may declare that it still needs some hashes that we're in the + -- process of uploading from another thread. + dedupeVar <- newTVarIO Set.empty + nextWorkerIdVar <- newTVarIO 0 + workersVar <- newTVarIO Set.empty + workerFailedVar <- newEmptyTMVarIO + + Ki.scoped \scope -> + dispatcher + scope + authHTTPClient + (Codebase.runTransaction codebase) + hashesVar + dedupeVar + nextWorkerIdVar + workersVar + workerFailedVar + where + dispatcher :: + Ki.Scope -> + AuthenticatedHttpClient -> + (forall a. Sqlite.Transaction a -> IO a) -> + TVar (Set Hash32) -> + TVar (Set Hash32) -> + TVar Int -> + TVar (Set Int) -> + TMVar (SyncError Share.UploadEntitiesError) -> + IO (Either (SyncError Share.UploadEntitiesError) ()) + dispatcher scope httpClient runTransaction hashesVar dedupeVar nextWorkerIdVar workersVar workerFailedVar = do + loop + where + loop :: IO (Either (SyncError Share.UploadEntitiesError) ()) + loop = + doJob [checkForFailureMode, dispatchWorkMode, checkIfDoneMode] + + doJob :: [STM UploadDispatcherJob] -> IO (Either (SyncError Share.UploadEntitiesError) ()) + doJob jobs = + atomically (asum jobs) >>= \case + UploadDispatcherReturnFailure err -> pure (Left err) + UploadDispatcherForkWorkerWhenAvailable hashes -> doJob [forkWorkerMode hashes, checkForFailureMode] + UploadDispatcherForkWorker hashes -> do + workerId <- + atomically do + workerId <- readTVar nextWorkerIdVar + writeTVar nextWorkerIdVar $! workerId + 1 + modifyTVar' workersVar (Set.insert workerId) + pure workerId + _ <- + Ki.fork @() scope do + worker httpClient runTransaction hashesVar dedupeVar workersVar workerFailedVar workerId hashes + loop + UploadDispatcherDone -> pure (Right ()) + + checkForFailureMode :: STM UploadDispatcherJob + checkForFailureMode = do + err <- readTMVar workerFailedVar + pure (UploadDispatcherReturnFailure err) + + dispatchWorkMode :: STM UploadDispatcherJob + dispatchWorkMode = do + hashes <- readTVar hashesVar + when (Set.null hashes) retry + let (hashes1, hashes2) = Set.splitAt syncChunkSize hashes + modifyTVar' dedupeVar (Set.union hashes1) + writeTVar hashesVar hashes2 + pure (UploadDispatcherForkWorkerWhenAvailable (NESet.unsafeFromSet hashes1)) + + forkWorkerMode :: NESet Hash32 -> STM UploadDispatcherJob + forkWorkerMode hashes = do + workers <- readTVar workersVar + when (Set.size workers >= maxSimultaneousPushWorkers) retry + pure (UploadDispatcherForkWorker hashes) + + checkIfDoneMode :: STM UploadDispatcherJob + checkIfDoneMode = do + workers <- readTVar workersVar + when (not (Set.null workers)) retry + pure UploadDispatcherDone + + worker :: + AuthenticatedHttpClient -> + (forall a. Sqlite.Transaction a -> IO a) -> + TVar (Set Hash32) -> + TVar (Set Hash32) -> + TVar (Set Int) -> + TMVar (SyncError Share.UploadEntitiesError) -> + Int -> + NESet Hash32 -> + IO () + worker httpClient runTransaction hashesVar dedupeVar workersVar workerFailedVar workerId hashes = do + entities <- + fmap NEMap.fromAscList do + runTransaction do + for (NESet.toAscList hashes) \hash -> do + entity <- expectEntity hash + pure (hash, entity) + + result <- + httpUploadEntities httpClient unisonShareUrl Share.UploadEntitiesRequest {entities, repoInfo} <&> \case + Left err -> Left (TransportError err) + Right response -> + case response of + Share.UploadEntitiesSuccess -> Right Set.empty + Share.UploadEntitiesFailure err -> + case err of + Share.UploadEntitiesError'NeedDependencies (Share.NeedDependencies moreHashes) -> + Right (NESet.toSet moreHashes) + err -> Left (SyncError err) + + case result of + Left err -> void (atomically (tryPutTMVar workerFailedVar err)) + Right moreHashes -> do + uploadedCallback (NESet.size hashes) + maybeYoungestWorkerThatWasAlive <- + atomically do + -- Record ourselves as "dead". The only work we have left to do is remove the hashes we just uploaded from + -- the `dedupe` set, but whether or not we are "alive" is relevant only to: + -- + -- - The main dispatcher thread, which terminates when there are no more hashes to upload, and no alive + -- workers. It is not important for us to delete from the `dedupe` set in this case. + -- + -- - Other worker threads, each of which independently decides when it is safe to delete the set of + -- hashes they just uploaded from the `dedupe` set (as we are doing now). + !workers <- Set.delete workerId <$> readTVar workersVar + writeTVar workersVar workers + -- Add more work (i.e. hashes to upload) to the work queue (really a work set), per the response we just + -- got from the server. Remember to only add hashes that aren't in the `dedupe` set (see the comment on + -- the dedupe set above for more info). + when (not (Set.null moreHashes)) do + dedupe <- readTVar dedupeVar + hashes0 <- readTVar hashesVar + writeTVar hashesVar $! Set.union (Set.difference moreHashes dedupe) hashes0 + pure (Set.lookupMax workers) + -- Block until we are sure that the server does not have any uncommitted transactions that see a version of + -- the database that does not include the entities we just uploaded. After that point, it's fine to remove the + -- hashes of the entities we just uploaded from the `dedupe` set, because they will never be relevant for any + -- subsequent deduping operations. If we didn't delete from the `dedupe` set, this algorithm would still be + -- correct, it would just use an unbounded amount of memory to remember all the hashes we've uploaded so far. + whenJust maybeYoungestWorkerThatWasAlive \youngestWorkerThatWasAlive -> do + atomically do + workers <- readTVar workersVar + whenJust (Set.lookupMin workers) \oldestWorkerAlive -> + when (oldestWorkerAlive <= youngestWorkerThatWasAlive) retry + atomically (modifyTVar' dedupeVar (`Set.difference` (NESet.toSet hashes))) + +------------------------------------------------------------------------------------------------------------------------ +-- Database operations + +-- | "Elaborate" a set of `temp_entity` hashes. +-- +-- For each hash, then we ought to instead download its missing dependencies (which themselves are +-- elaborated by this same procedure, in case we have any of *them* already in temp storage, too. +-- 3. If it's in main storage, we should ignore it. +-- +-- In the end, we return a set of hashes that correspond to entities we actually need to download. +elaborateHashes :: NESet Hash32 -> Sqlite.Transaction (Set Share.HashJWT) +elaborateHashes hashes = + Q.elaborateHashes (NESet.toList hashes) <&> Set.fromList . coerce @[Text] @[Share.HashJWT] + +-- | Upsert a downloaded entity "somewhere" - +-- +-- 1. Nowhere if we already had the entity (in main or temp storage). +-- 2. In main storage if we already have all of its dependencies in main storage. +-- 3. In temp storage otherwise. +upsertEntitySomewhere :: + Hash32 -> + Share.Entity Text Hash32 Share.HashJWT -> + Sqlite.Transaction Q.EntityLocation +upsertEntitySomewhere hash entity = + Q.entityLocation hash >>= \case + Just location -> pure location + Nothing -> do + missingDependencies1 :: Map Hash32 Share.HashJWT <- + Share.entityDependencies entity + & foldMapM + ( \hashJwt -> do + let hash = Share.hashJWTHash hashJwt + Q.entityExists hash <&> \case + True -> Map.empty + False -> Map.singleton hash hashJwt + ) + case NEMap.nonEmptyMap missingDependencies1 of + Nothing -> do + _id <- Q.saveTempEntityInMain v2HashHandle hash (entityToTempEntity Share.hashJWTHash entity) + pure Q.EntityInMainStorage + Just missingDependencies -> do + Q.insertTempEntity + hash + (entityToTempEntity Share.hashJWTHash entity) + ( coerce + @(NEMap Hash32 Share.HashJWT) + @(NEMap Hash32 Text) + missingDependencies + ) + pure Q.EntityInTempStorage + +------------------------------------------------------------------------------------------------------------------------ +-- HTTP calls + +httpGetCausalHashByPath :: + Auth.AuthenticatedHttpClient -> + BaseUrl -> + Share.GetCausalHashByPathRequest -> + IO (Either CodeserverTransportError Share.GetCausalHashByPathResponse) +httpDownloadEntities :: + Auth.AuthenticatedHttpClient -> + BaseUrl -> + Share.DownloadEntitiesRequest -> + IO (Either CodeserverTransportError Share.DownloadEntitiesResponse) +httpUploadEntities :: + Auth.AuthenticatedHttpClient -> + BaseUrl -> + Share.UploadEntitiesRequest -> + IO (Either CodeserverTransportError Share.UploadEntitiesResponse) +( httpGetCausalHashByPath, + httpDownloadEntities, + httpUploadEntities + ) = + let ( httpGetCausalHashByPath + Servant.:<|> httpDownloadEntities + Servant.:<|> httpUploadEntities + ) = + let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> Share.API) + pp = Proxy + in Servant.hoistClient pp hoist (Servant.client pp) + in ( go httpGetCausalHashByPath, + go httpDownloadEntities, + go httpUploadEntities + ) + where + hoist :: Servant.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a + hoist m = do + clientEnv <- Reader.ask + liftIO (Servant.runClientM m clientEnv) >>= \case + Right a -> pure a + Left err -> do + Debug.debugLogM Debug.Sync (show err) + throwError case err of + Servant.FailureResponse _req resp -> + case HTTP.statusCode $ Servant.responseStatusCode resp of + 401 -> Unauthenticated (Servant.baseUrl clientEnv) + -- The server should provide semantically relevant permission-denied messages + -- when possible, but this should catch any we miss. + 403 -> PermissionDenied (Text.Lazy.toStrict . Text.Lazy.decodeUtf8 $ Servant.responseBody resp) + 408 -> Timeout + 429 -> RateLimitExceeded + 504 -> Timeout + _ -> UnexpectedResponse resp + Servant.DecodeFailure msg resp -> DecodeFailure msg resp + Servant.UnsupportedContentType _ct resp -> UnexpectedResponse resp + Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp + Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) + + go :: + (req -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) resp) -> + Auth.AuthenticatedHttpClient -> + BaseUrl -> + req -> + IO (Either CodeserverTransportError resp) + go f (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = + (Servant.mkClientEnv httpClient unisonShareUrl) + { Servant.makeClientRequest = \url request -> + -- Disable client-side timeouts + (Servant.defaultMakeClientRequest url request) + <&> \r -> + r + { Http.Client.responseTimeout = Http.Client.responseTimeoutNone + } + } + & runReaderT (f req) + & runExceptT + + + diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 6a3df61e73..02c4926a5d 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -152,6 +152,7 @@ library Unison.Share.ExpectedHashMismatches Unison.Share.Sync Unison.Share.Sync.Types + Unison.Share.SyncV2 Unison.Util.HTTP Unison.Version hs-source-dirs: diff --git a/unison-share-api/package.yaml b/unison-share-api/package.yaml index 132a623041..795268354f 100644 --- a/unison-share-api/package.yaml +++ b/unison-share-api/package.yaml @@ -8,6 +8,19 @@ library: - condition: false other-modules: Paths_unison_share_api +tests: + unison-share-api-tests: + when: + - condition: false + other-modules: Paths_unison_share_api + dependencies: + - code-page + - easytest + - hedgehog + - unison-share-api + main: Main.hs + source-dirs: tests + dependencies: - aeson >= 2.0.0.0 - async @@ -15,6 +28,7 @@ dependencies: - binary - bytes - bytestring + - cborg - containers - cryptonite - Diff @@ -35,6 +49,7 @@ dependencies: - nonempty-containers - openapi3 - regex-tdfa + - serialise - servant - servant-docs - servant-openapi3 diff --git a/unison-share-api/src/Unison/Server/Orphans.hs b/unison-share-api/src/Unison/Server/Orphans.hs index bab2d26fef..020d96c4fc 100644 --- a/unison-share-api/src/Unison/Server/Orphans.hs +++ b/unison-share-api/src/Unison/Server/Orphans.hs @@ -3,6 +3,10 @@ module Unison.Server.Orphans where +import Codec.CBOR.Encoding qualified as CBOR +import Codec.Serialise (Serialise (..)) +import Codec.Serialise qualified as CBOR +import Codec.Serialise.Class qualified as CBOR import Control.Lens import Data.Aeson import Data.Aeson qualified as Aeson @@ -12,9 +16,19 @@ import Data.List.NonEmpty (NonEmpty (..)) import Data.OpenApi import Data.Proxy import Data.Text qualified as Text +import Data.Vector qualified as Vector import Servant import Servant.Docs (DocCapture (DocCapture), DocQueryParam (..), ParamKind (..), ToCapture (..), ToParam (..)) import U.Codebase.HashTags +import U.Codebase.Sqlite.Branch.Format qualified as BranchFormat +import U.Codebase.Sqlite.Causal qualified as SqliteCausal +import U.Codebase.Sqlite.Decl.Format qualified as DeclFormat +import U.Codebase.Sqlite.Entity qualified as Entity +import U.Codebase.Sqlite.LocalIds qualified as LocalIds +import U.Codebase.Sqlite.Patch.Format qualified as PatchFormat +import U.Codebase.Sqlite.TempEntity (TempEntity) +import U.Codebase.Sqlite.Term.Format qualified as TermFormat +import U.Util.Base32Hex (Base32Hex (..)) import Unison.Codebase.Editor.DisplayObject import Unison.Codebase.Path qualified as Path import Unison.Codebase.Path.Parse qualified as Path @@ -25,6 +39,7 @@ import Unison.ConstructorType qualified as CT import Unison.Core.Project (ProjectBranchName (..), ProjectName (..)) import Unison.Hash (Hash (..)) import Unison.Hash qualified as Hash +import Unison.Hash32 (Hash32 (..)) import Unison.HashQualified qualified as HQ import Unison.HashQualifiedPrime qualified as HQ' import Unison.Name (Name) @@ -424,3 +439,51 @@ instance ToCapture (Capture "branch-name" ProjectBranchName) where "The name of a branch in a project. E.g. @handle/name" deriving via Text instance ToJSON ProjectBranchName + +-- CBOR encodings + +deriving via Text instance Serialise Hash32 + +termComponentTag, declComponentTag, patchTag, namespaceTag, causalTag :: CBOR.Encoding +(termComponentTag, declComponentTag, patchTag, namespaceTag, causalTag) = over each CBOR.encodeWord (0, 1, 2, 3, 4) + +instance Serialise TempEntity where + encode = \case + Entity.TC (TermFormat.SyncTerm (TermFormat.SyncLocallyIndexedComponent elements)) -> + termComponentTag + <> encodeVectorWith encodeElement elements + Entity.DC (DeclFormat.SyncDecl (DeclFormat.SyncLocallyIndexedComponent elements)) -> + declComponentTag + <> encodeVectorWith encodeElement elements + Entity.P (PatchFormat.SyncDiff {}) -> error "Serializing Diffs are not supported" + Entity.P (PatchFormat.SyncFull (PatchFormat.LocalIds {patchTextLookup, patchHashLookup, patchDefnLookup}) bytes) -> + patchTag + <> CBOR.encodeVector patchTextLookup + <> CBOR.encodeVector patchHashLookup + <> CBOR.encodeVector patchDefnLookup + <> CBOR.encodeBytes bytes + Entity.N (BranchFormat.SyncDiff {}) -> error "Serializing Diffs are not supported" + Entity.N (BranchFormat.SyncFull (BranchFormat.LocalIds {branchTextLookup, branchDefnLookup, branchPatchLookup, branchChildLookup}) (BranchFormat.LocalBranchBytes bytes)) -> + namespaceTag + <> CBOR.encodeVector branchTextLookup + <> CBOR.encodeVector branchDefnLookup + <> CBOR.encodeVector branchPatchLookup + <> CBOR.encodeVector branchChildLookup + <> CBOR.encodeBytes bytes + Entity.C (SqliteCausal.SyncCausalFormat {valueHash, parents}) -> + causalTag + <> CBOR.encode valueHash + <> CBOR.encodeVector parents + where + encodeElement :: (Serialise t, Serialise d) => (LocalIds.LocalIds' t d, ByteString) -> CBOR.Encoding + encodeElement (LocalIds.LocalIds {textLookup, defnLookup}, bytes) = + CBOR.encodeVector textLookup + <> CBOR.encodeVector defnLookup + <> CBOR.encodeBytes bytes + + decode = error "Decoding Share.Entity not supported" + +encodeVectorWith :: (a -> CBOR.Encoding) -> Vector.Vector a -> CBOR.Encoding +encodeVectorWith f xs = + CBOR.encodeListLen (fromIntegral $ Vector.length xs) + <> (foldr (\a b -> f a <> b) mempty xs) diff --git a/unison-share-api/tests/Main.hs b/unison-share-api/tests/Main.hs new file mode 100644 index 0000000000..232452d79b --- /dev/null +++ b/unison-share-api/tests/Main.hs @@ -0,0 +1,23 @@ +module Main where + +import EasyTest +import System.Environment (getArgs) +import System.IO +import System.IO.CodePage (withCP65001) +import Unison.Test.Sync.Roundtrip qualified as SyncRoundtrip + +test :: Test () +test = + tests + [ SyncRoundtrip.test + ] + +main :: IO () +main = withCP65001 do + args <- getArgs + mapM_ (`hSetEncoding` utf8) [stdout, stdin, stderr] + case args of + [] -> runOnly "" test + [prefix] -> runOnly prefix test + [seed, prefix] -> rerunOnly (read seed) prefix test + _ -> error "expected no args, a prefix, or a seed and a prefix" diff --git a/unison-share-api/tests/Unison/Test/Sync/Gen.hs b/unison-share-api/tests/Unison/Test/Sync/Gen.hs new file mode 100644 index 0000000000..ffdf42b035 --- /dev/null +++ b/unison-share-api/tests/Unison/Test/Sync/Gen.hs @@ -0,0 +1,52 @@ +-- | Hedghog generators for Sync types. +module Unison.Test.Sync.Gen + ( genTempEntity, + ) +where + +import Data.ByteString (ByteString) +import Data.ByteString.Short qualified as BShort +import Data.Text (Text) +import Data.Vector qualified as Vector +import Hedgehog +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import U.Codebase.Sqlite.Entity qualified as Entity +import U.Codebase.Sqlite.LocalIds qualified as LocalIds +import U.Codebase.Sqlite.TempEntity (TempEntity) +import U.Codebase.Sqlite.Term.Format qualified as TermFormat +import Unison.Hash (Hash (..)) +import Unison.Hash32 (Hash32) +import Unison.Hash32 qualified as Hash32 + +genTempEntity :: Gen TempEntity +genTempEntity = do + Gen.choice + [ Entity.TC <$> genSyncTermFormat + ] + +genSyncTermFormat :: Gen (TermFormat.SyncTermFormat' Text Hash32) +genSyncTermFormat = do + elems <- Gen.list (Range.linear 1 4) do + localIds <- genLocalIds + term <- genBodyBytes + pure (localIds, term) + pure $ TermFormat.SyncTerm $ TermFormat.SyncLocallyIndexedComponent $ Vector.fromList elems + +genBodyBytes :: Gen ByteString +genBodyBytes = Gen.bytes (Range.linear 0 100) + +genLocalIds :: Gen (LocalIds.LocalIds' Text Hash32) +genLocalIds = do + textLookup <- Vector.fromList <$> Gen.list (Range.linear 0 10) genTextLiteral + defnLookup <- Vector.fromList <$> Gen.list (Range.linear 0 10) genHash32 + pure $ LocalIds.LocalIds {textLookup, defnLookup} + +genHash32 :: Gen Hash32 +genHash32 = Hash32.fromHash <$> genHash + +genHash :: Gen Hash +genHash = Hash . BShort.toShort <$> Gen.bytes (Range.singleton 64) + +genTextLiteral :: Gen Text +genTextLiteral = Gen.text (Range.linear 0 100) Gen.unicodeAll diff --git a/unison-share-api/tests/Unison/Test/Sync/Roundtrip.hs b/unison-share-api/tests/Unison/Test/Sync/Roundtrip.hs new file mode 100644 index 0000000000..fb83748817 --- /dev/null +++ b/unison-share-api/tests/Unison/Test/Sync/Roundtrip.hs @@ -0,0 +1,29 @@ +{-# LANGUAGE OverloadedStrings #-} + +-- | Roundtrip tests for types used in sync. +module Unison.Test.Sync.Roundtrip (Unison.Test.Sync.Roundtrip.test) where + +import Codec.Serialise qualified as Serialise +import EasyTest qualified as EasyTest +import Hedgehog hiding (Test, test) +import Unison.Prelude +import Unison.Server.Orphans () +import Unison.Test.Sync.Gen qualified as Gen + +test :: EasyTest.Test () +test = + void . EasyTest.scope "syncv2.roundtrip" $ do + success <- + EasyTest.io $ + checkParallel $ + Group + "syncv2.roundtrip" + [ ("termComponentRoundtrip", termComponentRoundtrip) + ] + EasyTest.expect success + +termComponentRoundtrip :: Property +termComponentRoundtrip = + property $ do + te <- forAll $ Gen.genTempEntity + (Serialise.deserialise . Serialise.serialise $ te) === te diff --git a/unison-share-api/unison-share-api.cabal b/unison-share-api/unison-share-api.cabal index e3878a9e7f..20f22f054a 100644 --- a/unison-share-api/unison-share-api.cabal +++ b/unison-share-api/unison-share-api.cabal @@ -90,6 +90,7 @@ library , binary , bytes , bytestring + , cborg , containers , cryptonite , directory @@ -109,6 +110,7 @@ library , nonempty-containers , openapi3 , regex-tdfa + , serialise , servant , servant-auth , servant-docs @@ -142,3 +144,109 @@ library , warp , yaml default-language: Haskell2010 + +test-suite unison-share-api-tests + type: exitcode-stdio-1.0 + main-is: Main.hs + other-modules: + Unison.Test.Sync.Gen + Unison.Test.Sync.Roundtrip + hs-source-dirs: + tests + default-extensions: + BlockArguments + ConstraintKinds + DeriveAnyClass + DeriveFunctor + DeriveGeneric + DerivingStrategies + DerivingVia + DoAndIfThenElse + DuplicateRecordFields + FlexibleContexts + FlexibleInstances + GADTs + GeneralizedNewtypeDeriving + KindSignatures + ImportQualifiedPost + LambdaCase + MultiParamTypeClasses + NamedFieldPuns + NumericUnderscores + OverloadedStrings + PatternSynonyms + RankNTypes + ScopedTypeVariables + StandaloneDeriving + TupleSections + TypeApplications + TypeOperators + ViewPatterns + ImportQualifiedPost + ghc-options: -Wall + build-depends: + Diff + , aeson >=2.0.0.0 + , async + , base + , binary + , bytes + , bytestring + , cborg + , code-page + , containers + , cryptonite + , directory + , easytest + , errors + , extra + , filepath + , fuzzyfind + , hedgehog + , http-media + , http-types + , jose + , jwt + , lens + , lucid + , memory + , mtl + , mwc-random + , nonempty-containers + , openapi3 + , regex-tdfa + , serialise + , servant + , servant-auth + , servant-docs + , servant-openapi3 + , servant-server + , text + , transformers + , unison-codebase + , unison-codebase-sqlite + , unison-codebase-sqlite-hashing-v2 + , unison-core + , unison-core1 + , unison-hash + , unison-hash-orphans-aeson + , unison-hashing-v2 + , unison-parser-typechecker + , unison-prelude + , unison-pretty-printer + , unison-share-api + , unison-share-projects-api + , unison-sqlite + , unison-syntax + , unison-util-base32hex + , unison-util-relation + , unliftio + , unordered-containers + , uri-encode + , utf8-string + , vector + , wai + , wai-cors + , warp + , yaml + default-language: Haskell2010 From 32285825a91fa3c3ad06d08b6b8f04a3041b97ca Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 5 Aug 2024 15:35:37 -0700 Subject: [PATCH 02/52] Fill in remaining instances for temp entity format --- .../tests/Unison/Test/Sync/Gen.hs | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/unison-share-api/tests/Unison/Test/Sync/Gen.hs b/unison-share-api/tests/Unison/Test/Sync/Gen.hs index ffdf42b035..8e45bc1445 100644 --- a/unison-share-api/tests/Unison/Test/Sync/Gen.hs +++ b/unison-share-api/tests/Unison/Test/Sync/Gen.hs @@ -11,8 +11,12 @@ import Data.Vector qualified as Vector import Hedgehog import Hedgehog.Gen qualified as Gen import Hedgehog.Range qualified as Range +import U.Codebase.Sqlite.Branch.Format qualified as BranchFormat +import U.Codebase.Sqlite.Causal qualified as CausalFormat +import U.Codebase.Sqlite.Decl.Format qualified as DeclFormat import U.Codebase.Sqlite.Entity qualified as Entity import U.Codebase.Sqlite.LocalIds qualified as LocalIds +import U.Codebase.Sqlite.Patch.Format qualified as PatchFormat import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.Term.Format qualified as TermFormat import Unison.Hash (Hash (..)) @@ -22,7 +26,11 @@ import Unison.Hash32 qualified as Hash32 genTempEntity :: Gen TempEntity genTempEntity = do Gen.choice - [ Entity.TC <$> genSyncTermFormat + [ Entity.TC <$> genSyncTermFormat, + Entity.DC <$> genSyncDeclFormat, + Entity.P <$> genPatchFormat, + Entity.N <$> genNamespaceFormat, + Entity.C <$> genCausalFormat ] genSyncTermFormat :: Gen (TermFormat.SyncTermFormat' Text Hash32) @@ -33,6 +41,39 @@ genSyncTermFormat = do pure (localIds, term) pure $ TermFormat.SyncTerm $ TermFormat.SyncLocallyIndexedComponent $ Vector.fromList elems +genSyncDeclFormat :: Gen (DeclFormat.SyncDeclFormat' Text Hash32) +genSyncDeclFormat = do + elems <- Gen.list (Range.linear 1 4) do + localIds <- genLocalIds + decl <- genBodyBytes + pure (localIds, decl) + pure $ DeclFormat.SyncDecl $ DeclFormat.SyncLocallyIndexedComponent $ Vector.fromList elems + +genPatchFormat :: Gen (PatchFormat.SyncPatchFormat' Hash32 Text Hash32 Hash32) +genPatchFormat = do + patchTextLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genTextLiteral + patchHashLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genHash32 + patchDefnLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genHash32 + let localIds = PatchFormat.LocalIds {patchTextLookup, patchHashLookup, patchDefnLookup} + body <- genBodyBytes + pure $ PatchFormat.SyncFull localIds body + +genNamespaceFormat :: Gen (BranchFormat.SyncBranchFormat' Hash32 Text Hash32 Hash32 (Hash32, Hash32)) +genNamespaceFormat = do + branchTextLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genTextLiteral + branchDefnLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genHash32 + branchPatchLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) genHash32 + branchChildLookup <- Vector.fromList <$> Gen.list (Range.linear 0 5) ((,) <$> genHash32 <*> genHash32) + let branchLocalIds = BranchFormat.LocalIds {branchTextLookup, branchDefnLookup, branchPatchLookup, branchChildLookup} + body <- BranchFormat.LocalBranchBytes <$> genBodyBytes + pure $ BranchFormat.SyncFull branchLocalIds body + +genCausalFormat :: Gen (CausalFormat.SyncCausalFormat' Hash32 Hash32) +genCausalFormat = do + valueHash <- genHash32 + parents <- Vector.fromList <$> Gen.list (Range.linear 0 5) genHash32 + pure $ CausalFormat.SyncCausalFormat {valueHash, parents} + genBodyBytes :: Gen ByteString genBodyBytes = Gen.bytes (Range.linear 0 100) From 53b1701deecb084ebd5bc8fa57406dfe2668fb56 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 5 Aug 2024 15:50:24 -0700 Subject: [PATCH 03/52] Implement CBOR decoder for TempEntities --- unison-share-api/src/Unison/Server/Orphans.hs | 92 +++++++++++++++---- 1 file changed, 75 insertions(+), 17 deletions(-) diff --git a/unison-share-api/src/Unison/Server/Orphans.hs b/unison-share-api/src/Unison/Server/Orphans.hs index 020d96c4fc..6a12fc5a5b 100644 --- a/unison-share-api/src/Unison/Server/Orphans.hs +++ b/unison-share-api/src/Unison/Server/Orphans.hs @@ -1,8 +1,10 @@ {-# LANGUAGE DataKinds #-} +{-# LANGUAGE MultiWayIf #-} {-# OPTIONS_GHC -Wno-orphans #-} module Unison.Server.Orphans where +import Codec.CBOR.Decoding qualified as CBOR import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) import Codec.Serialise qualified as CBOR @@ -16,6 +18,7 @@ import Data.List.NonEmpty (NonEmpty (..)) import Data.OpenApi import Data.Proxy import Data.Text qualified as Text +import Data.Vector (Vector) import Data.Vector qualified as Vector import Servant import Servant.Docs (DocCapture (DocCapture), DocQueryParam (..), ParamKind (..), ToCapture (..), ToParam (..)) @@ -444,44 +447,99 @@ deriving via Text instance ToJSON ProjectBranchName deriving via Text instance Serialise Hash32 -termComponentTag, declComponentTag, patchTag, namespaceTag, causalTag :: CBOR.Encoding -(termComponentTag, declComponentTag, patchTag, namespaceTag, causalTag) = over each CBOR.encodeWord (0, 1, 2, 3, 4) +data SyncTag + = TermComponentTag + | DeclComponentTag + | PatchTag + | NamespaceTag + | CausalTag + deriving (Eq, Show) + +instance Serialise SyncTag where + encode = \case + TermComponentTag -> CBOR.encodeWord 0 + DeclComponentTag -> CBOR.encodeWord 1 + PatchTag -> CBOR.encodeWord 2 + NamespaceTag -> CBOR.encodeWord 3 + CausalTag -> CBOR.encodeWord 4 + + decode = do + tag <- CBOR.decodeWord + case tag of + 0 -> pure TermComponentTag + 1 -> pure DeclComponentTag + 2 -> pure PatchTag + 3 -> pure NamespaceTag + 4 -> pure CausalTag + _ -> fail $ "Unknown tag: " <> show tag + +newtype ComponentBody t d = ComponentBody {unComponentBody :: (LocalIds.LocalIds' t d, ByteString)} + +instance (Serialise t, Serialise d) => Serialise (ComponentBody t d) where + encode (ComponentBody (LocalIds.LocalIds {textLookup, defnLookup}, bytes)) = + CBOR.encodeVector textLookup + <> CBOR.encodeVector defnLookup + <> CBOR.encodeBytes bytes + + decode = do + textLookup <- CBOR.decodeVector + defnLookup <- CBOR.decodeVector + bytes <- CBOR.decodeBytes + pure $ ComponentBody (LocalIds.LocalIds {textLookup, defnLookup}, bytes) instance Serialise TempEntity where encode = \case Entity.TC (TermFormat.SyncTerm (TermFormat.SyncLocallyIndexedComponent elements)) -> - termComponentTag - <> encodeVectorWith encodeElement elements + CBOR.encode TermComponentTag + <> CBOR.encodeVector (coerce @(Vector (LocalIds.LocalIds' Text Hash32, ByteString)) @(Vector (ComponentBody Text Hash32)) elements) Entity.DC (DeclFormat.SyncDecl (DeclFormat.SyncLocallyIndexedComponent elements)) -> - declComponentTag - <> encodeVectorWith encodeElement elements + CBOR.encode DeclComponentTag + <> CBOR.encodeVector (coerce @(Vector (LocalIds.LocalIds' Text Hash32, ByteString)) @(Vector (ComponentBody Text Hash32)) elements) Entity.P (PatchFormat.SyncDiff {}) -> error "Serializing Diffs are not supported" Entity.P (PatchFormat.SyncFull (PatchFormat.LocalIds {patchTextLookup, patchHashLookup, patchDefnLookup}) bytes) -> - patchTag + CBOR.encode PatchTag <> CBOR.encodeVector patchTextLookup <> CBOR.encodeVector patchHashLookup <> CBOR.encodeVector patchDefnLookup <> CBOR.encodeBytes bytes Entity.N (BranchFormat.SyncDiff {}) -> error "Serializing Diffs are not supported" Entity.N (BranchFormat.SyncFull (BranchFormat.LocalIds {branchTextLookup, branchDefnLookup, branchPatchLookup, branchChildLookup}) (BranchFormat.LocalBranchBytes bytes)) -> - namespaceTag + CBOR.encode NamespaceTag <> CBOR.encodeVector branchTextLookup <> CBOR.encodeVector branchDefnLookup <> CBOR.encodeVector branchPatchLookup <> CBOR.encodeVector branchChildLookup <> CBOR.encodeBytes bytes Entity.C (SqliteCausal.SyncCausalFormat {valueHash, parents}) -> - causalTag + CBOR.encode CausalTag <> CBOR.encode valueHash <> CBOR.encodeVector parents - where - encodeElement :: (Serialise t, Serialise d) => (LocalIds.LocalIds' t d, ByteString) -> CBOR.Encoding - encodeElement (LocalIds.LocalIds {textLookup, defnLookup}, bytes) = - CBOR.encodeVector textLookup - <> CBOR.encodeVector defnLookup - <> CBOR.encodeBytes bytes - - decode = error "Decoding Share.Entity not supported" + + decode = do + CBOR.decode >>= \case + TermComponentTag -> do + elements <- coerce @(Vector (ComponentBody Text Hash32)) @(Vector (LocalIds.LocalIds' Text Hash32, ByteString)) <$> CBOR.decodeVector + pure $ Entity.TC (TermFormat.SyncTerm (TermFormat.SyncLocallyIndexedComponent elements)) + DeclComponentTag -> do + elements <- coerce @(Vector (ComponentBody Text Hash32)) @(Vector (LocalIds.LocalIds' Text Hash32, ByteString)) <$> CBOR.decodeVector + pure $ Entity.DC (DeclFormat.SyncDecl (DeclFormat.SyncLocallyIndexedComponent elements)) + PatchTag -> do + patchTextLookup <- CBOR.decodeVector + patchHashLookup <- CBOR.decodeVector + patchDefnLookup <- CBOR.decodeVector + bytes <- CBOR.decodeBytes + pure $ Entity.P (PatchFormat.SyncFull (PatchFormat.LocalIds {patchTextLookup, patchHashLookup, patchDefnLookup}) bytes) + NamespaceTag -> do + branchTextLookup <- CBOR.decodeVector + branchDefnLookup <- CBOR.decodeVector + branchPatchLookup <- CBOR.decodeVector + branchChildLookup <- CBOR.decodeVector + bytes <- CBOR.decodeBytes + pure $ Entity.N (BranchFormat.SyncFull (BranchFormat.LocalIds {branchTextLookup, branchDefnLookup, branchPatchLookup, branchChildLookup}) (BranchFormat.LocalBranchBytes bytes)) + CausalTag -> do + valueHash <- CBOR.decode + parents <- CBOR.decodeVector + pure $ Entity.C (SqliteCausal.SyncCausalFormat {valueHash, parents}) encodeVectorWith :: (a -> CBOR.Encoding) -> Vector.Vector a -> CBOR.Encoding encodeVectorWith f xs = From 97858843840420ea84ff32719f352f37377b08f3 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 6 Aug 2024 09:03:57 -0700 Subject: [PATCH 04/52] WIP syncv2 client --- unison-cli/src/Unison/Share/SyncV2.hs | 92 +++++++++---------- unison-share-api/src/Unison/Server/Orphans.hs | 3 + unison-share-api/src/Unison/SyncV2/API.hs | 40 ++++++++ unison-share-api/src/Unison/SyncV2/Types.hs | 49 ++++++++++ .../src/Unison/Util/Servant/CBOR.hs | 54 +++++++++++ unison-share-api/unison-share-api.cabal | 3 + 6 files changed, 191 insertions(+), 50 deletions(-) create mode 100644 unison-share-api/src/Unison/SyncV2/API.hs create mode 100644 unison-share-api/src/Unison/SyncV2/Types.hs create mode 100644 unison-share-api/src/Unison/Util/Servant/CBOR.hs diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index d79d9cc3b2..edd487c4f6 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -32,12 +32,15 @@ import GHC.IO (unsafePerformIO) import Ki qualified import Network.HTTP.Client qualified as Http.Client import Network.HTTP.Types qualified as HTTP -import Servant.API qualified as Servant ((:<|>) (..), (:>)) +import Servant.API qualified as Servant ((:>)) import Servant.Client (BaseUrl) import Servant.Client qualified as Servant +import Servant.Client.Streaming qualified as ServantStreaming +import Servant.Types.SourceT qualified as Servant import System.Environment (lookupEnv) import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q +import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) import Unison.Auth.HTTPClient (AuthenticatedHttpClient) import Unison.Auth.HTTPClient qualified as Auth @@ -51,33 +54,13 @@ import Unison.Share.API.Hash qualified as Share import Unison.Share.ExpectedHashMismatches (expectedCausalHashMismatches, expectedComponentHashMismatches) import Unison.Share.Sync.Types import Unison.Sqlite qualified as Sqlite -import Unison.Sync.API qualified as Share (API) import Unison.Sync.Common (entityToTempEntity, expectEntity, hash32ToCausalHash) import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share +import Unison.SyncV2.API qualified as SyncV2 +import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) ------------------------------------------------------------------------------------------------------------------------- --- Pile of constants - --- | The maximum number of downloader threads, during a pull. -maxSimultaneousPullDownloaders :: Int -maxSimultaneousPullDownloaders = unsafePerformIO $ do - lookupEnv "UNISON_PULL_WORKERS" <&> \case - Just n -> read n - Nothing -> 5 -{-# NOINLINE maxSimultaneousPullDownloaders #-} - --- | The maximum number of push workers at a time. Each push worker reads from the database and uploads entities. --- Share currently parallelizes on it's own in the backend, and any more than one push worker --- just results in serialization conflicts which slow things down. -maxSimultaneousPushWorkers :: Int -maxSimultaneousPushWorkers = unsafePerformIO $ do - lookupEnv "UNISON_PUSH_WORKERS" <&> \case - Just n -> read n - Nothing -> 1 -{-# NOINLINE maxSimultaneousPushWorkers #-} - syncChunkSize :: Int syncChunkSize = unsafePerformIO $ do lookupEnv "UNISON_SYNC_CHUNK_SIZE" <&> \case @@ -170,6 +153,11 @@ downloadEntities unisonShareUrl repoInfo hashJwt downloadedCallback = do _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) pure (Right ()) +-- | The caller is responsible for forking. +streamCausalAndDependencies :: TBQueue (Hash32, TempEntity) -> IO () +streamCausalAndDependencies queue = do + _ + -- | Validates the provided entities if and only if the environment variable `UNISON_ENTITY_VALIDATION` is set to "true". validateEntities :: NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT) -> Either Share.EntityValidationError () validateEntities entities = @@ -429,14 +417,14 @@ getCausalHashByPath :: Cli (Either (SyncError GetCausalHashByPathError) (Maybe Share.HashJWT)) getCausalHashByPath unisonShareUrl repoPath = do Cli.Env {authHTTPClient} <- ask - liftIO (httpGetCausalHashByPath authHTTPClient unisonShareUrl (Share.GetCausalHashByPathRequest repoPath)) <&> \case + liftIO (httpGetCausalHash authHTTPClient unisonShareUrl (SyncV2.GetCausalHashByPathRequest repoPath)) <&> \case Left err -> Left (TransportError err) - Right (Share.GetCausalHashByPathSuccess maybeHashJwt) -> Right maybeHashJwt - Right (Share.GetCausalHashByPathNoReadPermission _) -> + Right (SyncV2.GetCausalHashByPathSuccess maybeHashJwt) -> Right maybeHashJwt + Right (SyncV2.GetCausalHashByPathNoReadPermission _) -> Left (SyncError (GetCausalHashByPathErrorNoReadPermission repoPath)) - Right (Share.GetCausalHashByPathInvalidRepoInfo err repoInfo) -> + Right (SyncV2.GetCausalHashByPathInvalidRepoInfo err repoInfo) -> Left (SyncError (GetCausalHashByPathErrorInvalidRepoInfo err repoInfo)) - Right Share.GetCausalHashByPathUserNotFound -> + Right SyncV2.GetCausalHashByPathUserNotFound -> Left (SyncError $ GetCausalHashByPathErrorUserNotFound (Share.pathRepoInfo repoPath)) ------------------------------------------------------------------------------------------------------------------------ @@ -665,41 +653,48 @@ upsertEntitySomewhere hash entity = ------------------------------------------------------------------------------------------------------------------------ -- HTTP calls -httpGetCausalHashByPath :: +-- syncV2Client :: SyncV2.Routes (Servant.AsClientT ServantStreaming.ClientM) +-- syncV2Client = ServantStreaming.client SyncV2.api -- remember: api :: Proxy MovieCatalogAPI + +-- syncV2Client :: SyncV2.Routes (Servant.AsClientT ServantStreaming.ClientM) +-- Routes {} = ServantStreaming.client SyncV2.api + +httpGetCausalHash :: Auth.AuthenticatedHttpClient -> BaseUrl -> - Share.GetCausalHashByPathRequest -> - IO (Either CodeserverTransportError Share.GetCausalHashByPathResponse) + SyncV2.GetCausalHashRequest -> + IO (Either CodeserverTransportError SyncV2.GetCausalHashResponse) httpDownloadEntities :: Auth.AuthenticatedHttpClient -> BaseUrl -> - Share.DownloadEntitiesRequest -> - IO (Either CodeserverTransportError Share.DownloadEntitiesResponse) + SyncV2.DownloadEntitiesRequest -> + (IO (Either CodeserverTransportError (Servant.SourceT IO ByteString))) httpUploadEntities :: Auth.AuthenticatedHttpClient -> BaseUrl -> - Share.UploadEntitiesRequest -> - IO (Either CodeserverTransportError Share.UploadEntitiesResponse) -( httpGetCausalHashByPath, + SyncV2.UploadEntitiesRequest -> + IO (Either CodeserverTransportError (Servant.SourceT IO ByteString)) +( httpGetCausalHash, httpDownloadEntities, httpUploadEntities ) = - let ( httpGetCausalHashByPath - Servant.:<|> httpDownloadEntities - Servant.:<|> httpUploadEntities - ) = - let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> Share.API) + let SyncV2.Routes + { getCausalHash, + downloadEntitiesStream, + uploadEntitiesStream + } = + let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> SyncV2.API) pp = Proxy - in Servant.hoistClient pp hoist (Servant.client pp) - in ( go httpGetCausalHashByPath, - go httpDownloadEntities, - go httpUploadEntities + in ServantStreaming.hoistClient pp hoist (ServantStreaming.client SyncV2.api) + in ( go getCausalHash, + go downloadEntitiesStream, + go uploadEntitiesStream ) where - hoist :: Servant.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a + hoist :: ServantStreaming.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a hoist m = do clientEnv <- Reader.ask - liftIO (Servant.runClientM m clientEnv) >>= \case + liftIO (ServantStreaming.runClientM m clientEnv) >>= \case Right a -> pure a Left err -> do Debug.debugLogM Debug.Sync (show err) @@ -737,6 +732,3 @@ httpUploadEntities :: } & runReaderT (f req) & runExceptT - - - diff --git a/unison-share-api/src/Unison/Server/Orphans.hs b/unison-share-api/src/Unison/Server/Orphans.hs index 6a12fc5a5b..c19804f1b2 100644 --- a/unison-share-api/src/Unison/Server/Orphans.hs +++ b/unison-share-api/src/Unison/Server/Orphans.hs @@ -52,6 +52,7 @@ import Unison.Prelude import Unison.Project import Unison.Reference qualified as Reference import Unison.Referent qualified as Referent +import Unison.Share.API.Hash (HashJWT (..)) import Unison.ShortHash (ShortHash) import Unison.ShortHash qualified as SH import Unison.Syntax.HashQualified qualified as HQ (parseText) @@ -447,6 +448,8 @@ deriving via Text instance ToJSON ProjectBranchName deriving via Text instance Serialise Hash32 +deriving via Text instance Serialise HashJWT + data SyncTag = TermComponentTag | DeclComponentTag diff --git a/unison-share-api/src/Unison/SyncV2/API.hs b/unison-share-api/src/Unison/SyncV2/API.hs new file mode 100644 index 0000000000..54e98a3abf --- /dev/null +++ b/unison-share-api/src/Unison/SyncV2/API.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE DataKinds #-} + +module Unison.SyncV2.API + ( API, + api, + Routes (..), + ) +where + +import Data.ByteString (ByteString) +import Data.Proxy +import GHC.Generics (Generic) +import Servant.API +import Unison.SyncV2.Types +import Unison.Util.Servant.CBOR (CBOR) + +api :: Proxy API +api = Proxy + +type API = NamedRoutes Routes + +type DownloadEntitiesStream = + -- | The causal hash the client needs. The server should provide it and all of its dependencies + ReqBody '[CBOR] DownloadEntitiesRequest + :> StreamPost NoFraming OctetStream (SourceIO ByteString) + +type UploadEntitiesStream = + ReqBody '[CBOR] UploadEntitiesRequest + :> StreamPost NoFraming OctetStream (SourceIO ByteString) + +type GetCausalHashEndpoint = + ReqBody '[CBOR] GetCausalHashRequest + :> Post '[CBOR] GetCausalHashResponse + +data Routes mode = Routes + { getCausalHash :: mode :- "path" :> "get" :> GetCausalHashEndpoint, + downloadEntitiesStream :: mode :- "entities" :> "download" :> DownloadEntitiesStream, + uploadEntitiesStream :: mode :- "entities" :> "upload" :> UploadEntitiesStream + } + deriving stock (Generic) diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs new file mode 100644 index 0000000000..336f781fd0 --- /dev/null +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -0,0 +1,49 @@ +module Unison.SyncV2.Types + ( GetCausalHashRequest (..), + GetCausalHashResponse (..), + DownloadEntitiesRequest (..), + UploadEntitiesRequest (..), + BranchRef (..), + ) +where + +import Codec.Serialise (Serialise (..)) +import Data.Text (Text) +import Unison.Hash32 (Hash32) +import Unison.Server.Orphans () +import Unison.Share.API.Hash (HashJWT) + +newtype BranchRef = BranchRef Text + deriving (Serialise) via Text + +data GetCausalHashRequest = GetCausalHashRequest + { branchRef :: BranchRef + } + +instance Serialise GetCausalHashRequest where + encode (GetCausalHashRequest {branchRef}) = + encode branchRef + decode = GetCausalHashRequest <$> decode + +data GetCausalHashResponse = GetCausalHashResponse + { causalHash :: HashJWT + } + +instance Serialise GetCausalHashResponse where + encode (GetCausalHashResponse {causalHash}) = + encode causalHash + decode = GetCausalHashResponse <$> decode + +data DownloadEntitiesRequest = DownloadEntitiesRequest + { causalHash :: HashJWT, + knownHashes :: [Hash32] + } + +instance Serialise DownloadEntitiesRequest where + encode (DownloadEntitiesRequest {causalHash, knownHashes}) = + encode causalHash <> encode knownHashes + decode = DownloadEntitiesRequest <$> decode <*> decode + +data UploadEntitiesRequest = UploadEntitiesRequest + { branchRef :: BranchRef + } diff --git a/unison-share-api/src/Unison/Util/Servant/CBOR.hs b/unison-share-api/src/Unison/Util/Servant/CBOR.hs new file mode 100644 index 0000000000..160b4ea151 --- /dev/null +++ b/unison-share-api/src/Unison/Util/Servant/CBOR.hs @@ -0,0 +1,54 @@ +-- | Servant configuration for the CBOR media type +-- +-- Adapted from https://hackage.haskell.org/package/servant-serialization-0.3/docs/Servant-API-ContentTypes-SerialiseCBOR.html via MIT license +module Unison.Util.Servant.CBOR (CBOR) where + +import Codec.CBOR.Read (DeserialiseFailure (..)) +import Codec.Serialise (Serialise, deserialiseOrFail, serialise) +import Data.ByteString.Lazy qualified as BL +import Data.List.NonEmpty qualified as NonEmpty +import Network.HTTP.Media.MediaType qualified as MediaType +import Servant + +-- | Content-type for encoding and decoding objects as their CBOR representations +data CBOR + +-- | Mime-type for CBOR and additional ones using the word "hackage" and the +-- name of the package "serialise". +instance Accept CBOR where + contentTypes Proxy = + NonEmpty.singleton ("application" MediaType.// "cbor") + +-- | +-- +-- >>> mimeRender (Proxy :: Proxy CBOR) ("Hello" :: String) +-- "eHello" +instance (Serialise a) => MimeRender CBOR a where + mimeRender Proxy = serialise + +-- | +-- +-- >>> let bsl = mimeRender (Proxy :: Proxy CBOR) (3.14 :: Float) +-- >>> mimeUnrender (Proxy :: Proxy CBOR) bsl :: Either String Float +-- Right 3.14 +-- +-- >>> mimeUnrender (Proxy :: Proxy CBOR) (bsl <> "trailing garbage") :: Either String Float +-- Right 3.14 +-- +-- >>> mimeUnrender (Proxy :: Proxy CBOR) ("preceding garbage" <> bsl) :: Either String Float +-- Left "Codec.Serialise.deserialiseOrFail: expected float at byte-offset 0" +instance (Serialise a) => MimeUnrender CBOR a where + mimeUnrender Proxy = mapLeft prettyErr . deserialiseOrFail + where + mapLeft f = either (Left . f) Right + prettyErr (DeserialiseFailure offset err) = + "Codec.Serialise.deserialiseOrFail: " ++ err ++ " at byte-offset " ++ show offset + +-- | Wrapper for CBOR data that has already been serialized +newtype RawCBOR = RawCBOR BL.ByteString + +instance {-# OVERLAPPING #-} MimeRender CBOR RawCBOR where + mimeRender Proxy (RawCBOR bs) = bs + +instance {-# OVERLAPPING #-} MimeUnrender CBOR RawCBOR where + mimeUnrender Proxy bs = Right (RawCBOR bs) diff --git a/unison-share-api/unison-share-api.cabal b/unison-share-api/unison-share-api.cabal index 20f22f054a..d0a49365b1 100644 --- a/unison-share-api/unison-share-api.cabal +++ b/unison-share-api/unison-share-api.cabal @@ -48,7 +48,10 @@ library Unison.Sync.Common Unison.Sync.EntityValidation Unison.Sync.Types + Unison.SyncV2.API + Unison.SyncV2.Types Unison.Util.Find + Unison.Util.Servant.CBOR hs-source-dirs: src default-extensions: From bc42c139ff96ae9c1481fd47765ddaa3e31c84dc Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 6 Aug 2024 09:35:41 -0700 Subject: [PATCH 05/52] Implement Serialise for Download Chunks --- unison-share-api/src/Unison/Sync/Types.hs | 110 ++++++++++++++++++ unison-share-api/src/Unison/SyncV2/API.hs | 2 +- unison-share-api/src/Unison/SyncV2/Types.hs | 43 ++++++- .../src/Unison/Util/Servant/CBOR.hs | 10 -- 4 files changed, 152 insertions(+), 13 deletions(-) diff --git a/unison-share-api/src/Unison/Sync/Types.hs b/unison-share-api/src/Unison/Sync/Types.hs index 35d7030cc8..df956dfba4 100644 --- a/unison-share-api/src/Unison/Sync/Types.hs +++ b/unison-share-api/src/Unison/Sync/Types.hs @@ -56,6 +56,10 @@ module Unison.Sync.Types ) where +import Codec.CBOR.Decoding qualified as CBOR +import Codec.CBOR.Encoding qualified as CBOR +import Codec.Serialise +import Codec.Serialise qualified as CBOR import Control.Lens (both, traverseOf) import Data.Aeson import Data.Aeson qualified as Aeson @@ -73,6 +77,7 @@ import U.Codebase.Sqlite.Branch.Format (LocalBranchBytes (..)) import Unison.Hash32 (Hash32) import Unison.Hash32.Orphans.Aeson () import Unison.Prelude +import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) import Unison.Util.Set qualified as Set @@ -91,6 +96,7 @@ instance FromJSON Base64Bytes where newtype RepoInfo = RepoInfo {unRepoInfo :: Text} deriving newtype (Show, Eq, Ord, ToJSON, FromJSON) + deriving (Serialise) via Text data Path = Path { -- This is a nonempty list, where we require the first segment to be the repo name / user name / whatever, @@ -482,6 +488,27 @@ data EntityType | CausalType deriving stock (Eq, Ord, Show) +instance Serialise EntityType where + encode = \case + TermComponentType -> CBOR.encodeWord8 0 + DeclComponentType -> CBOR.encodeWord8 1 + PatchType -> CBOR.encodeWord8 2 + PatchDiffType -> CBOR.encodeWord8 3 + NamespaceType -> CBOR.encodeWord8 4 + NamespaceDiffType -> CBOR.encodeWord8 5 + CausalType -> CBOR.encodeWord8 6 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure TermComponentType + 1 -> pure DeclComponentType + 2 -> pure PatchType + 3 -> pure PatchDiffType + 4 -> pure NamespaceType + 5 -> pure NamespaceDiffType + 6 -> pure CausalType + _ -> fail "invalid tag" + instance ToJSON EntityType where toJSON = String . \case @@ -590,6 +617,48 @@ data DownloadEntitiesError | DownloadEntitiesEntityValidationFailure EntityValidationError deriving stock (Eq, Show) +data DownloadEntitiesErrorTag + = NoReadPermissionTag + | InvalidRepoInfoTag + | UserNotFoundTag + | ProjectNotFoundTag + | EntityValidationFailureTag + deriving stock (Eq, Show) + +instance Serialise DownloadEntitiesErrorTag where + encode = \case + NoReadPermissionTag -> CBOR.encodeWord8 0 + InvalidRepoInfoTag -> CBOR.encodeWord8 1 + UserNotFoundTag -> CBOR.encodeWord8 2 + ProjectNotFoundTag -> CBOR.encodeWord8 3 + EntityValidationFailureTag -> CBOR.encodeWord8 4 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure NoReadPermissionTag + 1 -> pure InvalidRepoInfoTag + 2 -> pure UserNotFoundTag + 3 -> pure ProjectNotFoundTag + 4 -> pure EntityValidationFailureTag + _ -> fail "invalid tag" + +instance Serialise DownloadEntitiesError where + encode = \case + DownloadEntitiesNoReadPermission repoInfo -> CBOR.encode NoReadPermissionTag <> CBOR.encode repoInfo + DownloadEntitiesInvalidRepoInfo msg repoInfo -> CBOR.encode InvalidRepoInfoTag <> CBOR.encode (msg, repoInfo) + DownloadEntitiesUserNotFound userHandle -> CBOR.encode UserNotFoundTag <> CBOR.encode userHandle + DownloadEntitiesProjectNotFound projectShorthand -> CBOR.encode ProjectNotFoundTag <> CBOR.encode projectShorthand + DownloadEntitiesEntityValidationFailure err -> CBOR.encode EntityValidationFailureTag <> CBOR.encode err + + decode = do + tag <- CBOR.decode + case tag of + NoReadPermissionTag -> DownloadEntitiesNoReadPermission <$> CBOR.decode + InvalidRepoInfoTag -> uncurry DownloadEntitiesInvalidRepoInfo <$> CBOR.decode + UserNotFoundTag -> DownloadEntitiesUserNotFound <$> CBOR.decode + ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode + EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode + instance ToJSON DownloadEntitiesResponse where toJSON = \case DownloadEntitiesSuccess entities -> jsonUnion "success" entities @@ -618,6 +687,43 @@ data EntityValidationError deriving stock (Show, Eq, Ord) deriving anyclass (Exception) +data EntityValidationErrorTag + = HashMismatchTag + | UnsupportedTypeTag + | InvalidByteEncodingTag + | HashResolutionFailureTag + deriving stock (Eq, Show) + +instance Serialise EntityValidationErrorTag where + encode = \case + HashMismatchTag -> CBOR.encodeWord8 0 + UnsupportedTypeTag -> CBOR.encodeWord8 1 + InvalidByteEncodingTag -> CBOR.encodeWord8 2 + HashResolutionFailureTag -> CBOR.encodeWord8 3 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure HashMismatchTag + 1 -> pure UnsupportedTypeTag + 2 -> pure InvalidByteEncodingTag + 3 -> pure HashResolutionFailureTag + _ -> fail "invalid tag" + +instance Serialise EntityValidationError where + encode = \case + EntityHashMismatch typ mismatch -> CBOR.encode HashMismatchTag <> CBOR.encode typ <> CBOR.encode mismatch + UnsupportedEntityType hash typ -> CBOR.encode UnsupportedTypeTag <> CBOR.encode hash <> CBOR.encode typ + InvalidByteEncoding hash typ errMsg -> CBOR.encode InvalidByteEncodingTag <> CBOR.encode hash <> CBOR.encode typ <> CBOR.encode errMsg + HashResolutionFailure hash -> CBOR.encode HashResolutionFailureTag <> CBOR.encode hash + + decode = do + tag <- CBOR.decode + case tag of + HashMismatchTag -> EntityHashMismatch <$> CBOR.decode <*> CBOR.decode + UnsupportedTypeTag -> UnsupportedEntityType <$> CBOR.decode <*> CBOR.decode + InvalidByteEncodingTag -> InvalidByteEncoding <$> CBOR.decode <*> CBOR.decode <*> CBOR.decode + HashResolutionFailureTag -> HashResolutionFailure <$> CBOR.decode + instance ToJSON EntityValidationError where toJSON = \case EntityHashMismatch typ mismatch -> jsonUnion "mismatched_hash" (object ["type" .= typ, "mismatch" .= mismatch]) @@ -693,6 +799,10 @@ data HashMismatchForEntity = HashMismatchForEntity } deriving stock (Show, Eq, Ord) +instance Serialise HashMismatchForEntity where + encode (HashMismatchForEntity supplied computed) = CBOR.encode supplied <> CBOR.encode computed + decode = HashMismatchForEntity <$> CBOR.decode <*> CBOR.decode + instance ToJSON UploadEntitiesResponse where toJSON = \case UploadEntitiesSuccess -> jsonUnion "success" (Object mempty) diff --git a/unison-share-api/src/Unison/SyncV2/API.hs b/unison-share-api/src/Unison/SyncV2/API.hs index 54e98a3abf..6562185d62 100644 --- a/unison-share-api/src/Unison/SyncV2/API.hs +++ b/unison-share-api/src/Unison/SyncV2/API.hs @@ -22,7 +22,7 @@ type API = NamedRoutes Routes type DownloadEntitiesStream = -- | The causal hash the client needs. The server should provide it and all of its dependencies ReqBody '[CBOR] DownloadEntitiesRequest - :> StreamPost NoFraming OctetStream (SourceIO ByteString) + :> StreamPost NoFraming CBOR (SourceIO DownloadEntitiesChunk) type UploadEntitiesStream = ReqBody '[CBOR] UploadEntitiesRequest diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 336f781fd0..4e8dd13b3b 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -2,16 +2,22 @@ module Unison.SyncV2.Types ( GetCausalHashRequest (..), GetCausalHashResponse (..), DownloadEntitiesRequest (..), + DownloadEntitiesChunk (..), + CBORBytes (..), UploadEntitiesRequest (..), BranchRef (..), ) where +import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) +import Codec.Serialise.Decoding qualified as CBOR +import Data.ByteString.Lazy qualified as BL import Data.Text (Text) import Unison.Hash32 (Hash32) import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) +import Unison.Sync.Types qualified as SyncV1 newtype BranchRef = BranchRef Text deriving (Serialise) via Text @@ -44,6 +50,39 @@ instance Serialise DownloadEntitiesRequest where encode causalHash <> encode knownHashes decode = DownloadEntitiesRequest <$> decode <*> decode +-- | Wrapper for CBOR data that has already been serialized. +-- In our case, we use this because we may load pre-serialized CBOR directly from the database, +-- but it's also useful in allowing us to more quickly seek through a CBOR stream, since we only need to decode the CBOR when/if we actually need to use it, and can skip past it using a byte offset otherwise. +newtype CBORBytes = CBORBytes BL.ByteString + deriving (Serialise) via (BL.ByteString) + +-- | A chunk of the download entities response stream. +data DownloadEntitiesChunk + = EntityChunk {hash :: Hash32, entityCBOR :: CBORBytes} + | ErrorChunk {err :: SyncV1.DownloadEntitiesError} + +data DownloadEntitiesChunkTag = EntityChunkTag | ErrorChunkTag + +instance Serialise DownloadEntitiesChunkTag where + encode EntityChunkTag = CBOR.encodeWord8 0 + encode ErrorChunkTag = CBOR.encodeWord8 1 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure EntityChunkTag + 1 -> pure ErrorChunkTag + _ -> fail "invalid tag" + +instance Serialise DownloadEntitiesChunk where + encode (EntityChunk {hash, entityCBOR}) = + encode EntityChunkTag <> encode hash <> encode entityCBOR + encode (ErrorChunk {err}) = + encode ErrorChunkTag <> encode err + decode = do + tag <- decode + case tag of + EntityChunkTag -> EntityChunk <$> decode <*> decode + ErrorChunkTag -> ErrorChunk <$> decode + +-- TODO data UploadEntitiesRequest = UploadEntitiesRequest - { branchRef :: BranchRef - } diff --git a/unison-share-api/src/Unison/Util/Servant/CBOR.hs b/unison-share-api/src/Unison/Util/Servant/CBOR.hs index 160b4ea151..481cbc20b3 100644 --- a/unison-share-api/src/Unison/Util/Servant/CBOR.hs +++ b/unison-share-api/src/Unison/Util/Servant/CBOR.hs @@ -5,7 +5,6 @@ module Unison.Util.Servant.CBOR (CBOR) where import Codec.CBOR.Read (DeserialiseFailure (..)) import Codec.Serialise (Serialise, deserialiseOrFail, serialise) -import Data.ByteString.Lazy qualified as BL import Data.List.NonEmpty qualified as NonEmpty import Network.HTTP.Media.MediaType qualified as MediaType import Servant @@ -43,12 +42,3 @@ instance (Serialise a) => MimeUnrender CBOR a where mapLeft f = either (Left . f) Right prettyErr (DeserialiseFailure offset err) = "Codec.Serialise.deserialiseOrFail: " ++ err ++ " at byte-offset " ++ show offset - --- | Wrapper for CBOR data that has already been serialized -newtype RawCBOR = RawCBOR BL.ByteString - -instance {-# OVERLAPPING #-} MimeRender CBOR RawCBOR where - mimeRender Proxy (RawCBOR bs) = bs - -instance {-# OVERLAPPING #-} MimeUnrender CBOR RawCBOR where - mimeUnrender Proxy bs = Right (RawCBOR bs) From f2b62de39a7ca3d5f5684e9a1d0a85046ce9ccb0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 7 Aug 2024 08:53:01 -0700 Subject: [PATCH 06/52] SyncV2 WIP --- unison-cli/package.yaml | 2 + unison-cli/src/Unison/Share/SyncV2.hs | 297 ++++++-------------- unison-cli/unison-cli.cabal | 6 + unison-share-api/src/Unison/SyncV2/Types.hs | 90 +++++- 4 files changed, 167 insertions(+), 228 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 23b18fa9d9..ddebe6ae98 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -23,6 +23,7 @@ dependencies: - co-log-core - code-page - concurrent-output + - conduit - configurator - containers >= 0.6.3 - cryptonite @@ -66,6 +67,7 @@ dependencies: - regex-tdfa - semialign - semigroups + - serialise - servant - servant-client - shellmet diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index edd487c4f6..77fb5fc7dd 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -13,12 +13,15 @@ module Unison.Share.SyncV2 ) where +import Conduit (ConduitT) import Control.Concurrent.STM import Control.Lens import Control.Monad.Except -import Control.Monad.Reader (ask) +import Control.Monad.Reader (MonadReader, ask) import Control.Monad.Trans.Reader (ReaderT, runReaderT) import Control.Monad.Trans.Reader qualified as Reader +import Data.Conduit ((.|)) +import Data.Conduit.Combinators qualified as Conduit import Data.Map qualified as Map import Data.Map.NonEmpty (NEMap) import Data.Map.NonEmpty qualified as NEMap @@ -32,7 +35,7 @@ import GHC.IO (unsafePerformIO) import Ki qualified import Network.HTTP.Client qualified as Http.Client import Network.HTTP.Types qualified as HTTP -import Servant.API qualified as Servant ((:>)) +import Servant.API qualified as Servant import Servant.Client (BaseUrl) import Servant.Client qualified as Servant import Servant.Client.Streaming qualified as ServantStreaming @@ -54,7 +57,7 @@ import Unison.Share.API.Hash qualified as Share import Unison.Share.ExpectedHashMismatches (expectedCausalHashMismatches, expectedComponentHashMismatches) import Unison.Share.Sync.Types import Unison.Sqlite qualified as Sqlite -import Unison.Sync.Common (entityToTempEntity, expectEntity, hash32ToCausalHash) +import Unison.Sync.Common (entityToTempEntity, hash32ToCausalHash) import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share import Unison.SyncV2.API qualified as SyncV2 @@ -76,12 +79,13 @@ pull :: BaseUrl -> -- | The repo+path to pull from. Share.Path -> + SyncV2.BranchRef -> -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError PullError) CausalHash) -pull unisonShareUrl repoPath downloadedCallback = - getCausalHashByPath unisonShareUrl repoPath >>= \case - Left err -> pure (Left (PullError'GetCausalHash <$> err)) +pull unisonShareUrl repoPath branchRef downloadedCallback = + getCausalHashByPath unisonShareUrl branchRef >>= \case + Left err -> pure (Left (SyncV2.PullError'GetCausalHash <$> err)) -- There's nothing at the remote path, so there's no causal to pull. Right Nothing -> pure (Left (SyncError (PullError'NoHistoryAtPath repoPath))) Right (Just hashJwt) -> @@ -99,10 +103,11 @@ downloadEntities :: Share.RepoInfo -> -- | The hash to download. Share.HashJWT -> + Set Hash32 -> -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError Share.DownloadEntitiesError) ()) -downloadEntities unisonShareUrl repoInfo hashJwt downloadedCallback = do +downloadEntities unisonShareUrl repoInfo hashJwt knownHashes downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask Cli.label \done -> do @@ -120,15 +125,15 @@ downloadEntities unisonShareUrl repoInfo hashJwt downloadedCallback = do httpDownloadEntities authHTTPClient unisonShareUrl - Share.DownloadEntitiesRequest {repoInfo, hashes = NESet.singleton hashJwt} + SyncV2.DownloadEntitiesRequest {repoInfo, causalHash = hashJwt, knownHashes} entities <- liftIO request >>= \case Left err -> failed (TransportError err) - Right (Share.DownloadEntitiesFailure err) -> failed (SyncError err) - Right (Share.DownloadEntitiesSuccess entities) -> pure entities - case validateEntities entities of - Left err -> failed . SyncError . Share.DownloadEntitiesEntityValidationFailure $ err - Right () -> pure () + Right source -> do + conduit <- liftIO $ Servant.fromSourceIO source + let entityPipeline = conduit .| unpackEntities .| entityValidator .| entityInserter + _ + tempEntities <- Cli.runTransaction (insertEntities entities) liftIO (downloadedCallback 1) pure (NESet.nonEmptySet tempEntities) @@ -153,32 +158,42 @@ downloadEntities unisonShareUrl repoInfo hashJwt downloadedCallback = do _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) pure (Right ()) --- | The caller is responsible for forking. -streamCausalAndDependencies :: TBQueue (Hash32, TempEntity) -> IO () -streamCausalAndDependencies queue = do - _ - --- | Validates the provided entities if and only if the environment variable `UNISON_ENTITY_VALIDATION` is set to "true". -validateEntities :: NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT) -> Either Share.EntityValidationError () -validateEntities entities = - when shouldValidateEntities $ do - ifor_ (NEMap.toMap entities) \hash entity -> do - let entityWithHashes = entity & Share.entityHashes_ %~ Share.hashJWTHash - case EV.validateEntity hash entityWithHashes of - Nothing -> pure () - Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> - let expectedMismatches = case et of - Share.TermComponentType -> expectedComponentHashMismatches - Share.DeclComponentType -> expectedComponentHashMismatches - Share.CausalType -> expectedCausalHashMismatches - _ -> mempty - in case Map.lookup supplied expectedMismatches of - Just expected - | expected == computed -> pure () - _ -> do - Left err - Just err -> do - Left err +entityValidator :: (MonadError Share.EntityValidationError m) => ConduitT (Hash32, (Share.Entity Text Hash32 Hash32)) (Hash32, (Share.Entity Text Hash32 Hash32)) m () +entityValidator = Conduit.iterM $ \(hash, entity) -> + -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. + case EV.validateEntity hash entity of + Nothing -> pure () + Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> + let expectedMismatches = case et of + Share.TermComponentType -> expectedComponentHashMismatches + Share.DeclComponentType -> expectedComponentHashMismatches + Share.CausalType -> expectedCausalHashMismatches + _ -> mempty + in case Map.lookup supplied expectedMismatches of + Just expected + | expected == computed -> pure () + _ -> do + throwError err + Just err -> do + throwError err + +entityInserter :: (MonadIO m, MonadReader Cli.Env m) => ConduitT (Hash32, (Share.Entity Text Hash32 Hash32)) () m () +entityInserter = Conduit.mapM \(hash, entity) -> do + Cli.Env {codebase} <- ask + liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity + pure hash + +unpackEntities :: (MonadError e m) => ConduitT SyncV2.DownloadEntitiesChunk (Hash32, (Share.Entity Text Hash32 Hash32)) m () +unpackEntities = Conduit.mapM_ $ \case + SyncV2.EntityChunk {hash, entityCBOR = SyncV2.CBORBytes entityBytes} -> + case CBOR.deserialiseOrFail entityBytes of + Left err -> do + throwError err + Right (_, entity) -> do + pure (hash, entity) + SyncV2.ErrorChunk {err} -> do + Debug.debugLogM Debug.Sync (show err) + pure () -- | Validate entities received from the server unless this flag is set to false. validationEnvKey :: String @@ -413,191 +428,38 @@ insertEntities entities = getCausalHashByPath :: -- | The Unison Share URL. BaseUrl -> - Share.Path -> - Cli (Either (SyncError GetCausalHashByPathError) (Maybe Share.HashJWT)) -getCausalHashByPath unisonShareUrl repoPath = do + SyncV2.BranchRef -> + Cli (Either (SyncError SyncV2.GetCausalHashError) (Maybe Share.HashJWT)) +getCausalHashByPath unisonShareUrl branchRef = do Cli.Env {authHTTPClient} <- ask - liftIO (httpGetCausalHash authHTTPClient unisonShareUrl (SyncV2.GetCausalHashByPathRequest repoPath)) <&> \case + liftIO (httpGetCausalHash authHTTPClient unisonShareUrl (SyncV2.GetCausalHashRequest branchRef)) <&> \case Left err -> Left (TransportError err) - Right (SyncV2.GetCausalHashByPathSuccess maybeHashJwt) -> Right maybeHashJwt - Right (SyncV2.GetCausalHashByPathNoReadPermission _) -> - Left (SyncError (GetCausalHashByPathErrorNoReadPermission repoPath)) - Right (SyncV2.GetCausalHashByPathInvalidRepoInfo err repoInfo) -> - Left (SyncError (GetCausalHashByPathErrorInvalidRepoInfo err repoInfo)) - Right SyncV2.GetCausalHashByPathUserNotFound -> - Left (SyncError $ GetCausalHashByPathErrorUserNotFound (Share.pathRepoInfo repoPath)) + Right (SyncV2.GetCausalHashSuccess hashJWT) -> Right hashJWT + Right (SyncV2.GetCausalHashError err) -> + case err of + SyncV2.GetCausalHashNoReadPermission repoInfo -> + Left (SyncError (SyncV2.GetCausalHashNoReadPermission repoInfo)) + SyncV2.GetCausalHashInvalidRepoInfo err repoInfo -> + Left (SyncError (SyncV2.GetCausalHashInvalidRepoInfo err repoInfo)) + SyncV2.GetCausalHashUserNotFound -> + Left (SyncError SyncV2.GetCausalHashUserNotFound) ------------------------------------------------------------------------------------------------------------------------ -- Upload entities -data UploadDispatcherJob - = UploadDispatcherReturnFailure (SyncError Share.UploadEntitiesError) - | UploadDispatcherForkWorkerWhenAvailable (NESet Hash32) - | UploadDispatcherForkWorker (NESet Hash32) - | UploadDispatcherDone - -- | Upload a set of entities to Unison Share. If the server responds that it cannot yet store any hash(es) due to -- missing dependencies, send those dependencies too, and on and on, until the server stops responding that it's missing -- anything. -- -- Returns true on success, false on failure (because the user does not have write permission). -uploadEntities :: +_uploadEntities :: BaseUrl -> Share.RepoInfo -> NESet Hash32 -> (Int -> IO ()) -> Cli (Either (SyncError Share.UploadEntitiesError) ()) -uploadEntities unisonShareUrl repoInfo hashes0 uploadedCallback = do - Cli.Env {authHTTPClient, codebase} <- ask - - liftIO do - hashesVar <- newTVarIO (NESet.toSet hashes0) - -- Semantically, this is the set of hashes we've uploaded so far, but we do delete from it when it's safe to, so it - -- doesn't grow unbounded. It's used to filter out hashes that would be duplicate uploads: the server, when - -- responding to any particular upload request, may declare that it still needs some hashes that we're in the - -- process of uploading from another thread. - dedupeVar <- newTVarIO Set.empty - nextWorkerIdVar <- newTVarIO 0 - workersVar <- newTVarIO Set.empty - workerFailedVar <- newEmptyTMVarIO - - Ki.scoped \scope -> - dispatcher - scope - authHTTPClient - (Codebase.runTransaction codebase) - hashesVar - dedupeVar - nextWorkerIdVar - workersVar - workerFailedVar - where - dispatcher :: - Ki.Scope -> - AuthenticatedHttpClient -> - (forall a. Sqlite.Transaction a -> IO a) -> - TVar (Set Hash32) -> - TVar (Set Hash32) -> - TVar Int -> - TVar (Set Int) -> - TMVar (SyncError Share.UploadEntitiesError) -> - IO (Either (SyncError Share.UploadEntitiesError) ()) - dispatcher scope httpClient runTransaction hashesVar dedupeVar nextWorkerIdVar workersVar workerFailedVar = do - loop - where - loop :: IO (Either (SyncError Share.UploadEntitiesError) ()) - loop = - doJob [checkForFailureMode, dispatchWorkMode, checkIfDoneMode] - - doJob :: [STM UploadDispatcherJob] -> IO (Either (SyncError Share.UploadEntitiesError) ()) - doJob jobs = - atomically (asum jobs) >>= \case - UploadDispatcherReturnFailure err -> pure (Left err) - UploadDispatcherForkWorkerWhenAvailable hashes -> doJob [forkWorkerMode hashes, checkForFailureMode] - UploadDispatcherForkWorker hashes -> do - workerId <- - atomically do - workerId <- readTVar nextWorkerIdVar - writeTVar nextWorkerIdVar $! workerId + 1 - modifyTVar' workersVar (Set.insert workerId) - pure workerId - _ <- - Ki.fork @() scope do - worker httpClient runTransaction hashesVar dedupeVar workersVar workerFailedVar workerId hashes - loop - UploadDispatcherDone -> pure (Right ()) - - checkForFailureMode :: STM UploadDispatcherJob - checkForFailureMode = do - err <- readTMVar workerFailedVar - pure (UploadDispatcherReturnFailure err) - - dispatchWorkMode :: STM UploadDispatcherJob - dispatchWorkMode = do - hashes <- readTVar hashesVar - when (Set.null hashes) retry - let (hashes1, hashes2) = Set.splitAt syncChunkSize hashes - modifyTVar' dedupeVar (Set.union hashes1) - writeTVar hashesVar hashes2 - pure (UploadDispatcherForkWorkerWhenAvailable (NESet.unsafeFromSet hashes1)) - - forkWorkerMode :: NESet Hash32 -> STM UploadDispatcherJob - forkWorkerMode hashes = do - workers <- readTVar workersVar - when (Set.size workers >= maxSimultaneousPushWorkers) retry - pure (UploadDispatcherForkWorker hashes) - - checkIfDoneMode :: STM UploadDispatcherJob - checkIfDoneMode = do - workers <- readTVar workersVar - when (not (Set.null workers)) retry - pure UploadDispatcherDone - - worker :: - AuthenticatedHttpClient -> - (forall a. Sqlite.Transaction a -> IO a) -> - TVar (Set Hash32) -> - TVar (Set Hash32) -> - TVar (Set Int) -> - TMVar (SyncError Share.UploadEntitiesError) -> - Int -> - NESet Hash32 -> - IO () - worker httpClient runTransaction hashesVar dedupeVar workersVar workerFailedVar workerId hashes = do - entities <- - fmap NEMap.fromAscList do - runTransaction do - for (NESet.toAscList hashes) \hash -> do - entity <- expectEntity hash - pure (hash, entity) - - result <- - httpUploadEntities httpClient unisonShareUrl Share.UploadEntitiesRequest {entities, repoInfo} <&> \case - Left err -> Left (TransportError err) - Right response -> - case response of - Share.UploadEntitiesSuccess -> Right Set.empty - Share.UploadEntitiesFailure err -> - case err of - Share.UploadEntitiesError'NeedDependencies (Share.NeedDependencies moreHashes) -> - Right (NESet.toSet moreHashes) - err -> Left (SyncError err) - - case result of - Left err -> void (atomically (tryPutTMVar workerFailedVar err)) - Right moreHashes -> do - uploadedCallback (NESet.size hashes) - maybeYoungestWorkerThatWasAlive <- - atomically do - -- Record ourselves as "dead". The only work we have left to do is remove the hashes we just uploaded from - -- the `dedupe` set, but whether or not we are "alive" is relevant only to: - -- - -- - The main dispatcher thread, which terminates when there are no more hashes to upload, and no alive - -- workers. It is not important for us to delete from the `dedupe` set in this case. - -- - -- - Other worker threads, each of which independently decides when it is safe to delete the set of - -- hashes they just uploaded from the `dedupe` set (as we are doing now). - !workers <- Set.delete workerId <$> readTVar workersVar - writeTVar workersVar workers - -- Add more work (i.e. hashes to upload) to the work queue (really a work set), per the response we just - -- got from the server. Remember to only add hashes that aren't in the `dedupe` set (see the comment on - -- the dedupe set above for more info). - when (not (Set.null moreHashes)) do - dedupe <- readTVar dedupeVar - hashes0 <- readTVar hashesVar - writeTVar hashesVar $! Set.union (Set.difference moreHashes dedupe) hashes0 - pure (Set.lookupMax workers) - -- Block until we are sure that the server does not have any uncommitted transactions that see a version of - -- the database that does not include the entities we just uploaded. After that point, it's fine to remove the - -- hashes of the entities we just uploaded from the `dedupe` set, because they will never be relevant for any - -- subsequent deduping operations. If we didn't delete from the `dedupe` set, this algorithm would still be - -- correct, it would just use an unbounded amount of memory to remember all the hashes we've uploaded so far. - whenJust maybeYoungestWorkerThatWasAlive \youngestWorkerThatWasAlive -> do - atomically do - workers <- readTVar workersVar - whenJust (Set.lookupMin workers) \oldestWorkerAlive -> - when (oldestWorkerAlive <= youngestWorkerThatWasAlive) retry - atomically (modifyTVar' dedupeVar (`Set.difference` (NESet.toSet hashes))) +_uploadEntities _unisonShareUrl _repoInfo _hashes0 _uploadedCallback = do + error "syncv2 uploadEntities not implemented" ------------------------------------------------------------------------------------------------------------------------ -- Database operations @@ -620,20 +482,19 @@ elaborateHashes hashes = -- 3. In temp storage otherwise. upsertEntitySomewhere :: Hash32 -> - Share.Entity Text Hash32 Share.HashJWT -> + Share.Entity Text Hash32 Hash32 -> Sqlite.Transaction Q.EntityLocation upsertEntitySomewhere hash entity = Q.entityLocation hash >>= \case Just location -> pure location Nothing -> do - missingDependencies1 :: Map Hash32 Share.HashJWT <- + missingDependencies1 :: Set Hash32 <- Share.entityDependencies entity & foldMapM - ( \hashJwt -> do - let hash = Share.hashJWTHash hashJwt - Q.entityExists hash <&> \case - True -> Map.empty - False -> Map.singleton hash hashJwt + ( \depHash -> do + Q.entityExists depHash <&> \case + True -> mempty + False -> Set.singleton depHash ) case NEMap.nonEmptyMap missingDependencies1 of Nothing -> do @@ -668,15 +529,15 @@ httpDownloadEntities :: Auth.AuthenticatedHttpClient -> BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (IO (Either CodeserverTransportError (Servant.SourceT IO ByteString))) -httpUploadEntities :: + (IO (Either CodeserverTransportError (Servant.SourceT IO SyncV2.DownloadEntitiesChunk))) +_httpUploadEntities :: Auth.AuthenticatedHttpClient -> BaseUrl -> SyncV2.UploadEntitiesRequest -> IO (Either CodeserverTransportError (Servant.SourceT IO ByteString)) ( httpGetCausalHash, httpDownloadEntities, - httpUploadEntities + _httpUploadEntities ) = let SyncV2.Routes { getCausalHash, diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 02c4926a5d..5791c8c42c 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -206,6 +206,7 @@ library , co-log-core , code-page , concurrent-output + , conduit , configurator , containers >=0.6.3 , cryptonite @@ -249,6 +250,7 @@ library , regex-tdfa , semialign , semigroups + , serialise , servant , servant-client , shellmet @@ -348,6 +350,7 @@ executable transcripts , co-log-core , code-page , concurrent-output + , conduit , configurator , containers >=0.6.3 , cryptonite @@ -392,6 +395,7 @@ executable transcripts , regex-tdfa , semialign , semigroups + , serialise , servant , servant-client , shellmet @@ -497,6 +501,7 @@ test-suite cli-tests , co-log-core , code-page , concurrent-output + , conduit , configurator , containers >=0.6.3 , cryptonite @@ -542,6 +547,7 @@ test-suite cli-tests , regex-tdfa , semialign , semigroups + , serialise , servant , servant-client , shellmet diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 4e8dd13b3b..3ea29e1272 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -1,11 +1,13 @@ module Unison.SyncV2.Types ( GetCausalHashRequest (..), GetCausalHashResponse (..), + GetCausalHashError (..), DownloadEntitiesRequest (..), DownloadEntitiesChunk (..), CBORBytes (..), UploadEntitiesRequest (..), BranchRef (..), + PullError (..), ) where @@ -13,6 +15,7 @@ import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) import Codec.Serialise.Decoding qualified as CBOR import Data.ByteString.Lazy qualified as BL +import Data.Set (Set) import Data.Text (Text) import Unison.Hash32 (Hash32) import Unison.Server.Orphans () @@ -31,24 +34,85 @@ instance Serialise GetCausalHashRequest where encode branchRef decode = GetCausalHashRequest <$> decode -data GetCausalHashResponse = GetCausalHashResponse - { causalHash :: HashJWT - } +data GetCausalHashResponse + = GetCausalHashSuccess (Maybe HashJWT) + | GetCausalHashError GetCausalHashError + deriving stock (Show, Eq, Ord) instance Serialise GetCausalHashResponse where - encode (GetCausalHashResponse {causalHash}) = - encode causalHash - decode = GetCausalHashResponse <$> decode + encode (GetCausalHashSuccess hash) = + encode GetCausalHashSuccessTag <> encode hash + encode (GetCausalHashError err) = + encode GetCausalHashErrorTag <> encode err + decode = do + tag <- decode + case tag of + GetCausalHashSuccessTag -> GetCausalHashSuccess <$> decode + GetCausalHashErrorTag -> GetCausalHashError <$> decode + +data GetCausalHashResponseTag + = GetCausalHashSuccessTag + | GetCausalHashErrorTag + deriving stock (Show, Eq, Ord) + +instance Serialise GetCausalHashResponseTag where + encode GetCausalHashSuccessTag = CBOR.encodeWord8 0 + encode GetCausalHashErrorTag = CBOR.encodeWord8 1 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure GetCausalHashSuccessTag + 1 -> pure GetCausalHashErrorTag + _ -> fail "invalid tag" + +data GetCausalHashError + = GetCausalHashNoReadPermission SyncV1.RepoInfo + | GetCausalHashUserNotFound + | GetCausalHashInvalidRepoInfo Text SyncV1.RepoInfo + deriving stock (Show, Eq, Ord) + +instance Serialise GetCausalHashError where + encode (GetCausalHashNoReadPermission repoInfo) = + encode GetCausalHashNoReadPermissionTag <> encode repoInfo + encode GetCausalHashUserNotFound = + encode GetCausalHashUserNotFoundTag + encode (GetCausalHashInvalidRepoInfo err repoInfo) = + encode GetCausalHashInvalidRepoInfoTag <> encode err <> encode repoInfo + decode = do + tag <- decode + case tag of + GetCausalHashNoReadPermissionTag -> GetCausalHashNoReadPermission <$> decode + GetCausalHashUserNotFoundTag -> pure GetCausalHashUserNotFound + GetCausalHashInvalidRepoInfoTag -> GetCausalHashInvalidRepoInfo <$> decode <*> decode + +data GetCausalHashErrorTag + = GetCausalHashNoReadPermissionTag + | GetCausalHashUserNotFoundTag + | GetCausalHashInvalidRepoInfoTag + deriving stock (Show, Eq, Ord) + +instance Serialise GetCausalHashErrorTag where + encode GetCausalHashNoReadPermissionTag = CBOR.encodeWord8 0 + encode GetCausalHashUserNotFoundTag = CBOR.encodeWord8 1 + encode GetCausalHashInvalidRepoInfoTag = CBOR.encodeWord8 2 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure GetCausalHashNoReadPermissionTag + 1 -> pure GetCausalHashUserNotFoundTag + 2 -> pure GetCausalHashInvalidRepoInfoTag + _ -> fail "invalid tag" data DownloadEntitiesRequest = DownloadEntitiesRequest { causalHash :: HashJWT, - knownHashes :: [Hash32] + repoInfo :: SyncV1.RepoInfo, + knownHashes :: Set Hash32 } instance Serialise DownloadEntitiesRequest where - encode (DownloadEntitiesRequest {causalHash, knownHashes}) = - encode causalHash <> encode knownHashes - decode = DownloadEntitiesRequest <$> decode <*> decode + encode (DownloadEntitiesRequest {causalHash, repoInfo, knownHashes}) = + encode causalHash <> encode repoInfo <> encode knownHashes + decode = DownloadEntitiesRequest <$> decode <*> decode <*> decode -- | Wrapper for CBOR data that has already been serialized. -- In our case, we use this because we may load pre-serialized CBOR directly from the database, @@ -86,3 +150,9 @@ instance Serialise DownloadEntitiesChunk where -- TODO data UploadEntitiesRequest = UploadEntitiesRequest + +-- | An error occurred while pulling code from Unison Share. +data PullError + = PullError'DownloadEntities SyncV1.DownloadEntitiesError + | PullError'GetCausalHash GetCausalHashError + deriving stock (Show) From 86754c12ff5263c3c92760d2ba64a02f14e31f4d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 12 Aug 2024 11:15:20 -0700 Subject: [PATCH 07/52] Client is compiling --- .../U/Codebase/Sqlite/Queries.hs | 31 ++ .../sql/015-syncv2-temp-entity-tables.sql | 53 +++ .../unison-codebase-sqlite.cabal | 3 +- unison-cli/package.yaml | 2 + unison-cli/src/Unison/Share/SyncV2.hs | 397 +++--------------- unison-cli/unison-cli.cabal | 6 + .../src/Unison/Sync/EntityValidation.hs | 10 +- unison-share-api/src/Unison/Sync/Types.hs | 42 -- unison-share-api/src/Unison/SyncV2/Types.hs | 85 +++- 9 files changed, 251 insertions(+), 378 deletions(-) create mode 100644 codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs index 822cdd125e..d47a6e840d 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs @@ -228,6 +228,7 @@ module U.Codebase.Sqlite.Queries expectEntity, syncToTempEntity, insertTempEntity, + insertTempEntityV2, saveTempEntityInMain, expectTempEntity, deleteTempEntity, @@ -315,6 +316,7 @@ import Data.Map.NonEmpty qualified as NEMap import Data.Maybe qualified as Maybe import Data.Sequence qualified as Seq import Data.Set qualified as Set +import Data.Set.NonEmpty (NESet) import Data.Text qualified as Text import Data.Text.Encoding qualified as Text import Data.Text.Lazy qualified as Text.Lazy @@ -2980,6 +2982,35 @@ insertTempEntity entityHash entity missingDependencies = do entityType = Entity.entityType entity +-- | Insert a new `temp_entity` row, and its associated 1+ `temp_entity_missing_dependency` rows. +-- +-- Preconditions: +-- 1. The entity does not already exist in "main" storage (`object` / `causal`) +-- 2. The entity does not already exist in `temp_entity`. +insertTempEntityV2 :: Hash32 -> TempEntity -> NESet Hash32 -> Transaction () +insertTempEntityV2 entityHash entity missingDependencies = do + execute + [sql| + INSERT INTO temp_entity (hash, blob, type_id) + VALUES (:entityHash, :entityBlob, :entityType) + ON CONFLICT DO NOTHING + |] + + for_ missingDependencies \depHash -> + execute + [sql| + INSERT INTO temp_entity_missing_dependency (dependent, dependency) + VALUES (:entityHash, :depHash) + |] + where + entityBlob :: ByteString + entityBlob = + runPutS (Serialization.putTempEntity entity) + + entityType :: TempEntityType + entityType = + Entity.entityType entity + -- | Delete a row from the `temp_entity` table, if it exists. deleteTempEntity :: Hash32 -> Transaction () deleteTempEntity hash = diff --git a/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql b/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql new file mode 100644 index 0000000000..e125539afa --- /dev/null +++ b/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql @@ -0,0 +1,53 @@ +-- Copy-paste of the original temp entity tables, but without hashjwts since syncv2 doesn't use them. +DROP TABLE temp_entity_missing_dependency; +DROP TABLE temp_entity; + +-- A "temp entity" is a term/decl/namespace/patch/causal that we cannot store in the database proper due to missing +-- dependencies. +-- +-- The existence of each `temp_entity` row implies the existence of one or more corresponding +-- `temp_entity_missing_dependency` rows: it does not make sense to make a `temp_entity` row for a thing that has no +-- missing dependencies! +-- +-- Similarly, each `temp_entity` row implies we do not have the entity in the database proper. When and if we *do* store +-- an entity proper (after storing all of its dependencies), we should always atomically delete the corresponding +-- `temp_entity` row, if any. +create table if not exists temp_entity ( + hash text primary key not null, + blob bytes not null, + type_id integer not null references temp_entity_type_description(id) +); + +-- A many-to-many relationship between `temp_entity` (entities we can't yet store due to missing dependencies), and the +-- non-empty set of hashes of each entity's dependencies. +-- +-- We store with each missing dependency the JWT that Unison Share provided us to download that dependency. For +-- downloading a particular dependency #bar, we only need one JWT, even if it's in the table multiple times. (In fact, +-- in this case, there is one "best" JWT - the one with the latest expiry time). +-- +-- The JWTs are also encoded in the local ids part of entity itself (`temp_entity.blob`), but we don't want to have to +-- keep going back there there to decode during a pull. +-- +-- For example, if we wanted to store term #foo, but couldn't because it depends on term #bar which we don't have yet, +-- we would end up with the following rows. +-- +-- temp_entity +-- +------------------------+ +-- | hash | blob | type_id | +-- |========================| +-- | #foo | ... | 0 (term) | +-- +------------------------+ +-- +-- temp_entity_missing_dependency +-- +----------------------------------------+ +-- | dependent | dependency | dependencyJwt | +-- |========================================| +-- | #foo | #bar | aT.Eb.cx | +-- +----------------------------------------+ +create table if not exists temp_entity_missing_dependency ( + dependent text not null references temp_entity(hash), + dependency text not null, + unique (dependent, dependency) +); +create index if not exists temp_entity_missing_dependency_ix_dependent on temp_entity_missing_dependency (dependent); +create index if not exists temp_entity_missing_dependency_ix_dependency on temp_entity_missing_dependency (dependency); diff --git a/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal b/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal index f5211b310d..86f7466997 100644 --- a/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal +++ b/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.35.2. +-- This file has been generated from package.yaml by hpack version 0.36.0. -- -- see: https://github.com/sol/hpack @@ -24,6 +24,7 @@ extra-source-files: sql/012-add-current-project-path-table.sql sql/013-add-project-branch-reflog-table.sql sql/014-add-project-branch-causal-hash-id.sql + sql/015-syncv2-temp-entity-tables.sql sql/create.sql source-repository head diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index ddebe6ae98..0bba34555e 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -27,6 +27,7 @@ dependencies: - configurator - containers >= 0.6.3 - cryptonite + - deepseq - directory - either - errors @@ -70,6 +71,7 @@ dependencies: - serialise - servant - servant-client + - servant-conduit - shellmet - stm - template-haskell diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 77fb5fc7dd..4d6f9ebc1e 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -3,7 +3,7 @@ module Unison.Share.SyncV2 ( -- ** Get causal hash by path - getCausalHashByPath, + getCausalHash, GetCausalHashByPathError (..), -- ** Pull/Download @@ -14,38 +14,33 @@ module Unison.Share.SyncV2 where import Conduit (ConduitT) -import Control.Concurrent.STM import Control.Lens import Control.Monad.Except import Control.Monad.Reader (MonadReader, ask) import Control.Monad.Trans.Reader (ReaderT, runReaderT) import Control.Monad.Trans.Reader qualified as Reader import Data.Conduit ((.|)) +import Data.Conduit qualified as Conduit import Data.Conduit.Combinators qualified as Conduit import Data.Map qualified as Map -import Data.Map.NonEmpty (NEMap) -import Data.Map.NonEmpty qualified as NEMap import Data.Proxy import Data.Set qualified as Set import Data.Set.NonEmpty (NESet) import Data.Set.NonEmpty qualified as NESet import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy -import GHC.IO (unsafePerformIO) -import Ki qualified import Network.HTTP.Client qualified as Http.Client import Network.HTTP.Types qualified as HTTP import Servant.API qualified as Servant import Servant.Client (BaseUrl) import Servant.Client qualified as Servant import Servant.Client.Streaming qualified as ServantStreaming +import Servant.Conduit () import Servant.Types.SourceT qualified as Servant -import System.Environment (lookupEnv) import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) -import Unison.Auth.HTTPClient (AuthenticatedHttpClient) import Unison.Auth.HTTPClient qualified as Auth import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli @@ -57,20 +52,14 @@ import Unison.Share.API.Hash qualified as Share import Unison.Share.ExpectedHashMismatches (expectedCausalHashMismatches, expectedComponentHashMismatches) import Unison.Share.Sync.Types import Unison.Sqlite qualified as Sqlite -import Unison.Sync.Common (entityToTempEntity, hash32ToCausalHash) +import Unison.Sync.Common (hash32ToCausalHash, tempEntityToEntity) import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share import Unison.SyncV2.API qualified as SyncV2 +import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) -syncChunkSize :: Int -syncChunkSize = unsafePerformIO $ do - lookupEnv "UNISON_SYNC_CHUNK_SIZE" <&> \case - Just n -> read n - Nothing -> 50 -{-# NOINLINE syncChunkSize #-} - ------------------------------------------------------------------------------------------------------------------------ -- Pull @@ -82,15 +71,16 @@ pull :: SyncV2.BranchRef -> -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> - Cli (Either (SyncError PullError) CausalHash) + Cli (Either (SyncError SyncV2.PullError) CausalHash) pull unisonShareUrl repoPath branchRef downloadedCallback = - getCausalHashByPath unisonShareUrl branchRef >>= \case + getCausalHash unisonShareUrl branchRef >>= \case Left err -> pure (Left (SyncV2.PullError'GetCausalHash <$> err)) -- There's nothing at the remote path, so there's no causal to pull. - Right Nothing -> pure (Left (SyncError (PullError'NoHistoryAtPath repoPath))) - Right (Just hashJwt) -> - downloadEntities unisonShareUrl (Share.pathRepoInfo repoPath) hashJwt downloadedCallback <&> \case - Left err -> Left (PullError'DownloadEntities <$> err) + Right hashJwt -> do + -- TODO: include known hashes + let knownHashes = Set.empty + downloadEntities unisonShareUrl (Share.pathRepoInfo repoPath) hashJwt knownHashes downloadedCallback <&> \case + Left err -> Left err Right () -> Right (hash32ToCausalHash (Share.hashJWTHash hashJwt)) ------------------------------------------------------------------------------------------------------------------------ @@ -106,62 +96,52 @@ downloadEntities :: Set Hash32 -> -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> - Cli (Either (SyncError Share.DownloadEntitiesError) ()) + Cli (Either (SyncError SyncV2.PullError) ()) downloadEntities unisonShareUrl repoInfo hashJwt knownHashes downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask Cli.label \done -> do - let failed :: SyncError Share.DownloadEntitiesError -> Cli void + let failed :: SyncError SyncV2.PullError -> Cli void failed = done . Left let hash = Share.hashJWTHash hashJwt - maybeTempEntities <- - Cli.runTransaction (Q.entityLocation hash) >>= \case - Just Q.EntityInMainStorage -> pure Nothing - Just Q.EntityInTempStorage -> pure (Just (NESet.singleton hash)) - Nothing -> do - let request = - httpDownloadEntities - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {repoInfo, causalHash = hashJwt, knownHashes} - entities <- - liftIO request >>= \case - Left err -> failed (TransportError err) - Right source -> do - conduit <- liftIO $ Servant.fromSourceIO source - let entityPipeline = conduit .| unpackEntities .| entityValidator .| entityInserter - _ - - tempEntities <- Cli.runTransaction (insertEntities entities) - liftIO (downloadedCallback 1) - pure (NESet.nonEmptySet tempEntities) - - whenJust maybeTempEntities \tempEntities -> do - let doCompleteTempEntities = - completeTempEntities - authHTTPClient - unisonShareUrl - ( \action -> - Codebase.withConnection codebase \conn -> - action (Sqlite.runTransaction conn) - ) - repoInfo - downloadedCallback - tempEntities - liftIO doCompleteTempEntities & onLeftM \err -> - failed err - -- Since we may have just inserted and then deleted many temp entities, we attempt to recover some disk space by - -- vacuuming after each pull. If the vacuum fails due to another open transaction on this connection, that's ok, + Cli.runTransaction (Q.entityLocation hash) >>= \case + Just Q.EntityInMainStorage -> pure () + -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" + _ -> do + let request = + httpDownloadEntities + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {repoInfo, causalHash = hashJwt, knownHashes} + liftIO request >>= \case + Left err -> failed (TransportError err) + Right source -> do + conduit <- liftIO $ Servant.fromSourceIO source + let entityPipeline :: ConduitT () c (ExceptT SyncV2.PullError Cli) () + entityPipeline = conduit .| unpackEntities downloadedCallback .| entityValidator .| entityInserter + runExceptT (Conduit.runConduit entityPipeline) >>= \case + Left err -> failed (SyncError err) + Right () -> pure () + didCausalSuccessfullyImport codebase hash >>= \case + False -> do + failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) + True -> pure () -- we'll try vacuuming again next pull. _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) pure (Right ()) + where + -- Verify that the expected hash made it into main storage. + didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> Cli Bool + didCausalSuccessfullyImport codebase hash = do + let expectedHash = hash32ToCausalHash hash + isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) -entityValidator :: (MonadError Share.EntityValidationError m) => ConduitT (Hash32, (Share.Entity Text Hash32 Hash32)) (Hash32, (Share.Entity Text Hash32 Hash32)) m () +entityValidator :: (MonadError SyncV2.PullError m) => ConduitT (Hash32, TempEntity) (Hash32, TempEntity) m () entityValidator = Conduit.iterM $ \(hash, entity) -> -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. - case EV.validateEntity hash entity of + case EV.validateTempEntity hash entity of Nothing -> pure () Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> let expectedMismatches = case et of @@ -173,264 +153,38 @@ entityValidator = Conduit.iterM $ \(hash, entity) -> Just expected | expected == computed -> pure () _ -> do - throwError err + throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err Just err -> do - throwError err + throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err -entityInserter :: (MonadIO m, MonadReader Cli.Env m) => ConduitT (Hash32, (Share.Entity Text Hash32 Hash32)) () m () -entityInserter = Conduit.mapM \(hash, entity) -> do +entityInserter :: (MonadIO m, MonadReader Cli.Env m) => ConduitT (Hash32, TempEntity) o m () +entityInserter = Conduit.mapM_ \(hash, entity) -> do Cli.Env {codebase} <- ask liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity - pure hash + pure () -unpackEntities :: (MonadError e m) => ConduitT SyncV2.DownloadEntitiesChunk (Hash32, (Share.Entity Text Hash32 Hash32)) m () -unpackEntities = Conduit.mapM_ $ \case - SyncV2.EntityChunk {hash, entityCBOR = SyncV2.CBORBytes entityBytes} -> - case CBOR.deserialiseOrFail entityBytes of +unpackEntities :: (MonadError SyncV2.PullError m, MonadIO m) => (Int -> IO ()) -> ConduitT SyncV2.DownloadEntitiesChunk (Hash32, TempEntity) m () +unpackEntities downloadedCallback = Conduit.mapM $ \case + SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> + case CBOR.deserialiseOrFailCBORBytes entityBytes of Left err -> do - throwError err - Right (_, entity) -> do + throwError (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> do + liftIO (downloadedCallback 1) pure (hash, entity) SyncV2.ErrorChunk {err} -> do - Debug.debugLogM Debug.Sync (show err) - pure () - --- | Validate entities received from the server unless this flag is set to false. -validationEnvKey :: String -validationEnvKey = "UNISON_ENTITY_VALIDATION" - -shouldValidateEntities :: Bool -shouldValidateEntities = unsafePerformIO $ do - lookupEnv validationEnvKey <&> \case - Just "false" -> False - _ -> True -{-# NOINLINE shouldValidateEntities #-} - -type WorkerCount = - TVar Int - -newWorkerCount :: IO WorkerCount -newWorkerCount = - newTVarIO 0 - -recordWorking :: WorkerCount -> STM () -recordWorking sem = - modifyTVar' sem (+ 1) - -recordNotWorking :: WorkerCount -> STM () -recordNotWorking sem = - modifyTVar' sem \n -> n - 1 - --- What the dispatcher is to do -data DispatcherJob - = DispatcherForkWorker (NESet Share.HashJWT) - | DispatcherReturnEarlyBecauseDownloaderFailed (SyncError Share.DownloadEntitiesError) - | DispatcherDone - --- | Finish downloading entities from Unison Share (or return the first failure to download something). --- --- Precondition: the entities were *already* downloaded at some point in the past, and are now sitting in the --- `temp_entity` table, waiting for their dependencies to arrive so they can be flushed to main storage. -completeTempEntities :: - AuthenticatedHttpClient -> - BaseUrl -> - (forall a. ((forall x. Sqlite.Transaction x -> IO x) -> IO a) -> IO a) -> - Share.RepoInfo -> - (Int -> IO ()) -> - NESet Hash32 -> - IO (Either (SyncError Share.DownloadEntitiesError) ()) -completeTempEntities httpClient unisonShareUrl connect repoInfo downloadedCallback initialNewTempEntities = do - -- The set of hashes we still need to download - hashesVar <- newTVarIO Set.empty - - -- The set of hashes that we haven't inserted yet, but will soon, because we've committed to downloading them. - uninsertedHashesVar <- newTVarIO Set.empty - - -- The entities payloads (along with the jwts that we used to download them) that we've downloaded - entitiesQueue <- newTQueueIO - - -- The sets of new (at the time of inserting, anyway) temp entity rows, which we need to elaborate, then download. - newTempEntitiesQueue <- newTQueueIO - - -- How many workers (downloader / inserter / elaborator) are currently doing stuff. - workerCount <- newWorkerCount - - -- The first download error seen by a downloader, if any. - downloaderFailedVar <- newEmptyTMVarIO - - -- Kick off the cycle of inserter->elaborator->dispatcher->downloader by giving the elaborator something to do - atomically (writeTQueue newTempEntitiesQueue (Set.empty, Just initialNewTempEntities)) - - Ki.scoped \scope -> do - Ki.fork_ scope (inserter entitiesQueue newTempEntitiesQueue workerCount) - Ki.fork_ scope (elaborator hashesVar uninsertedHashesVar newTempEntitiesQueue workerCount) - dispatcher hashesVar uninsertedHashesVar entitiesQueue newTempEntitiesQueue workerCount downloaderFailedVar - where - -- Dispatcher thread: "dequeue" from `hashesVar`, fork one-shot downloaders. - -- - -- We stop when either all of the following are true: - -- - -- - There are no outstanding workers (downloaders, inserter, elaboraror) - -- - The inserter thread doesn't have any outstanding work enqueued (in `entitiesQueue`) - -- - The elaborator thread doesn't have any outstanding work enqueued (in `newTempEntitiesQueue`) - -- - -- Or: - -- - -- - Some downloader failed to download something - dispatcher :: - TVar (Set Share.HashJWT) -> - TVar (Set Share.HashJWT) -> - TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> - TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> - WorkerCount -> - TMVar (SyncError Share.DownloadEntitiesError) -> - IO (Either (SyncError Share.DownloadEntitiesError) ()) - dispatcher hashesVar uninsertedHashesVar entitiesQueue newTempEntitiesQueue workerCount downloaderFailedVar = - Ki.scoped \scope -> - let loop :: IO (Either (SyncError Share.DownloadEntitiesError) ()) - loop = - atomically (checkIfDownloaderFailedMode <|> dispatchWorkMode <|> checkIfDoneMode) >>= \case - DispatcherDone -> pure (Right ()) - DispatcherReturnEarlyBecauseDownloaderFailed err -> pure (Left err) - DispatcherForkWorker hashes -> do - atomically do - -- Limit number of simultaneous downloaders (plus 2, for inserter and elaborator) - workers <- readTVar workerCount - check (workers < maxSimultaneousPullDownloaders + 2) - -- we do need to record the downloader as working outside of the worker thread, not inside. - -- otherwise, we might erroneously fall through the teardown logic below and conclude there's - -- nothing more for the dispatcher to do, when in fact a downloader thread just hasn't made it as - -- far as recording its own existence - recordWorking workerCount - _ <- - Ki.fork @() scope do - downloader entitiesQueue workerCount hashes & onLeftM \err -> - void (atomically (tryPutTMVar downloaderFailedVar err)) - loop - in loop - where - checkIfDownloaderFailedMode :: STM DispatcherJob - checkIfDownloaderFailedMode = - DispatcherReturnEarlyBecauseDownloaderFailed <$> readTMVar downloaderFailedVar - - dispatchWorkMode :: STM DispatcherJob - dispatchWorkMode = do - hashes <- readTVar hashesVar - check (not (Set.null hashes)) - let (hashes1, hashes2) = Set.splitAt syncChunkSize hashes - modifyTVar' uninsertedHashesVar (Set.union hashes1) - writeTVar hashesVar hashes2 - pure (DispatcherForkWorker (NESet.unsafeFromSet hashes1)) - - -- Check to see if there are no hashes left to download, no outstanding workers, and no work in either queue - checkIfDoneMode :: STM DispatcherJob - checkIfDoneMode = do - workers <- readTVar workerCount - check (workers == 0) - isEmptyTQueue entitiesQueue >>= check - isEmptyTQueue newTempEntitiesQueue >>= check - pure DispatcherDone - - -- Downloader thread: download entities, (if successful) enqueue to `entitiesQueue` - downloader :: - TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> - WorkerCount -> - NESet Share.HashJWT -> - IO (Either (SyncError Share.DownloadEntitiesError) ()) - downloader entitiesQueue workerCount hashes = do - httpDownloadEntities httpClient unisonShareUrl Share.DownloadEntitiesRequest {repoInfo, hashes} >>= \case - Left err -> do - atomically (recordNotWorking workerCount) - pure (Left (TransportError err)) - Right (Share.DownloadEntitiesFailure err) -> do - atomically (recordNotWorking workerCount) - pure (Left (SyncError err)) - Right (Share.DownloadEntitiesSuccess entities) -> do - downloadedCallback (NESet.size hashes) - case validateEntities entities of - Left err -> pure . Left . SyncError . Share.DownloadEntitiesEntityValidationFailure $ err - Right () -> do - atomically do - writeTQueue entitiesQueue (hashes, entities) - recordNotWorking workerCount - pure (Right ()) - - -- Inserter thread: dequeue from `entitiesQueue`, insert entities, enqueue to `newTempEntitiesQueue` - inserter :: - TQueue (NESet Share.HashJWT, NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT)) -> - TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> - WorkerCount -> - IO Void - inserter entitiesQueue newTempEntitiesQueue workerCount = - connect \runTransaction -> - forever do - (hashJwts, entities) <- - atomically do - entities <- readTQueue entitiesQueue - recordWorking workerCount - pure entities - newTempEntities0 <- - runTransaction do - NEMap.toList entities & foldMapM \(hash, entity) -> - upsertEntitySomewhere hash entity <&> \case - Q.EntityInMainStorage -> Set.empty - Q.EntityInTempStorage -> Set.singleton hash - atomically do - writeTQueue newTempEntitiesQueue (NESet.toSet hashJwts, NESet.nonEmptySet newTempEntities0) - recordNotWorking workerCount - - -- Elaborator thread: dequeue from `newTempEntitiesQueue`, elaborate, "enqueue" to `hashesVar` - elaborator :: - TVar (Set Share.HashJWT) -> - TVar (Set Share.HashJWT) -> - TQueue (Set Share.HashJWT, Maybe (NESet Hash32)) -> - WorkerCount -> - IO Void - elaborator hashesVar uninsertedHashesVar newTempEntitiesQueue workerCount = - connect \runTransaction -> - forever do - maybeNewTempEntities <- - atomically do - (hashJwts, mayNewTempEntities) <- readTQueue newTempEntitiesQueue - -- Avoid unnecessary retaining of these hashes to keep memory usage more stable. This algorithm would - -- still be correct if we never delete from `uninsertedHashes`. - -- - -- We remove the inserted hashes from uninsertedHashesVar at this point rather than right after insertion - -- in order to ensure that no running transaction of the elaborator is viewing a snapshot that precedes - -- the snapshot that inserted those hashes. - modifyTVar' uninsertedHashesVar \uninsertedHashes -> Set.difference uninsertedHashes hashJwts - case mayNewTempEntities of - Nothing -> pure Nothing - Just newTempEntities -> do - recordWorking workerCount - pure (Just newTempEntities) - whenJust maybeNewTempEntities \newTempEntities -> do - newElaboratedHashes <- runTransaction (elaborateHashes newTempEntities) - atomically do - uninsertedHashes <- readTVar uninsertedHashesVar - hashes0 <- readTVar hashesVar - writeTVar hashesVar $! Set.union (Set.difference newElaboratedHashes uninsertedHashes) hashes0 - recordNotWorking workerCount - --- | Insert entities into the database, and return the subset that went into temp storage (`temp_entitiy`) rather than --- of main storage (`object` / `causal`) due to missing dependencies. -insertEntities :: NEMap Hash32 (Share.Entity Text Hash32 Share.HashJWT) -> Sqlite.Transaction (Set Hash32) -insertEntities entities = - NEMap.toList entities & foldMapM \(hash, entity) -> - upsertEntitySomewhere hash entity <&> \case - Q.EntityInMainStorage -> Set.empty - Q.EntityInTempStorage -> Set.singleton hash + throwError (SyncV2.PullError'DownloadEntities err) ------------------------------------------------------------------------------------------------------------------------ -- Get causal hash by path -- | Get the causal hash of a path hosted on Unison Share. -getCausalHashByPath :: +getCausalHash :: -- | The Unison Share URL. BaseUrl -> SyncV2.BranchRef -> - Cli (Either (SyncError SyncV2.GetCausalHashError) (Maybe Share.HashJWT)) -getCausalHashByPath unisonShareUrl branchRef = do + Cli (Either (SyncError SyncV2.GetCausalHashError) Share.HashJWT) +getCausalHash unisonShareUrl branchRef = do Cli.Env {authHTTPClient} <- ask liftIO (httpGetCausalHash authHTTPClient unisonShareUrl (SyncV2.GetCausalHashRequest branchRef)) <&> \case Left err -> Left (TransportError err) @@ -464,17 +218,6 @@ _uploadEntities _unisonShareUrl _repoInfo _hashes0 _uploadedCallback = do ------------------------------------------------------------------------------------------------------------------------ -- Database operations --- | "Elaborate" a set of `temp_entity` hashes. --- --- For each hash, then we ought to instead download its missing dependencies (which themselves are --- elaborated by this same procedure, in case we have any of *them* already in temp storage, too. --- 3. If it's in main storage, we should ignore it. --- --- In the end, we return a set of hashes that correspond to entities we actually need to download. -elaborateHashes :: NESet Hash32 -> Sqlite.Transaction (Set Share.HashJWT) -elaborateHashes hashes = - Q.elaborateHashes (NESet.toList hashes) <&> Set.fromList . coerce @[Text] @[Share.HashJWT] - -- | Upsert a downloaded entity "somewhere" - -- -- 1. Nowhere if we already had the entity (in main or temp storage). @@ -482,33 +225,29 @@ elaborateHashes hashes = -- 3. In temp storage otherwise. upsertEntitySomewhere :: Hash32 -> - Share.Entity Text Hash32 Hash32 -> + TempEntity -> Sqlite.Transaction Q.EntityLocation upsertEntitySomewhere hash entity = Q.entityLocation hash >>= \case Just location -> pure location Nothing -> do missingDependencies1 :: Set Hash32 <- - Share.entityDependencies entity + Share.entityDependencies (tempEntityToEntity entity) & foldMapM ( \depHash -> do Q.entityExists depHash <&> \case True -> mempty False -> Set.singleton depHash ) - case NEMap.nonEmptyMap missingDependencies1 of + case NESet.nonEmptySet missingDependencies1 of Nothing -> do - _id <- Q.saveTempEntityInMain v2HashHandle hash (entityToTempEntity Share.hashJWTHash entity) + _id <- Q.saveTempEntityInMain v2HashHandle hash entity pure Q.EntityInMainStorage Just missingDependencies -> do - Q.insertTempEntity + Q.insertTempEntityV2 hash - (entityToTempEntity Share.hashJWTHash entity) - ( coerce - @(NEMap Hash32 Share.HashJWT) - @(NEMap Hash32 Text) - missingDependencies - ) + entity + missingDependencies pure Q.EntityInTempStorage ------------------------------------------------------------------------------------------------------------------------ @@ -555,7 +294,7 @@ _httpUploadEntities :: hoist :: ServantStreaming.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a hoist m = do clientEnv <- Reader.ask - liftIO (ServantStreaming.runClientM m clientEnv) >>= \case + (lift . lift $ ServantStreaming.withClientM m clientEnv pure) >>= \case Right a -> pure a Left err -> do Debug.debugLogM Debug.Sync (show err) diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 5791c8c42c..a4be1a6afe 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -210,6 +210,7 @@ library , configurator , containers >=0.6.3 , cryptonite + , deepseq , directory , either , errors @@ -253,6 +254,7 @@ library , serialise , servant , servant-client + , servant-conduit , shellmet , stm , template-haskell @@ -354,6 +356,7 @@ executable transcripts , configurator , containers >=0.6.3 , cryptonite + , deepseq , directory , easytest , either @@ -398,6 +401,7 @@ executable transcripts , serialise , servant , servant-client + , servant-conduit , shellmet , silently , stm @@ -505,6 +509,7 @@ test-suite cli-tests , configurator , containers >=0.6.3 , cryptonite + , deepseq , directory , easytest , either @@ -550,6 +555,7 @@ test-suite cli-tests , serialise , servant , servant-client + , servant-conduit , shellmet , stm , template-haskell diff --git a/unison-share-api/src/Unison/Sync/EntityValidation.hs b/unison-share-api/src/Unison/Sync/EntityValidation.hs index 02ad6d8330..4e8c854407 100644 --- a/unison-share-api/src/Unison/Sync/EntityValidation.hs +++ b/unison-share-api/src/Unison/Sync/EntityValidation.hs @@ -4,6 +4,7 @@ -- | Module for validating hashes of entities received/sent via sync. module Unison.Sync.EntityValidation ( validateEntity, + validateTempEntity, ) where @@ -21,6 +22,7 @@ import U.Codebase.Sqlite.HashHandle qualified as HH import U.Codebase.Sqlite.Orphans () import U.Codebase.Sqlite.Patch.Format qualified as PatchFormat import U.Codebase.Sqlite.Serialization qualified as Serialization +import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.Term.Format qualified as TermFormat import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) import Unison.Hash (Hash) @@ -35,7 +37,13 @@ import Unison.Sync.Types qualified as Share -- We should add more validation as more entities are shared. validateEntity :: Hash32 -> Share.Entity Text Hash32 Hash32 -> Maybe Share.EntityValidationError validateEntity expectedHash32 entity = do - case Share.entityToTempEntity id entity of + validateTempEntity expectedHash32 $ Share.entityToTempEntity id entity + +-- | Note: We currently only validate Namespace hashes. +-- We should add more validation as more entities are shared. +validateTempEntity :: Hash32 -> TempEntity -> Maybe Share.EntityValidationError +validateTempEntity expectedHash32 tempEntity = do + case tempEntity of Entity.TC (TermFormat.SyncTerm localComp) -> do validateTerm expectedHash localComp Entity.DC (DeclFormat.SyncDecl localComp) -> do diff --git a/unison-share-api/src/Unison/Sync/Types.hs b/unison-share-api/src/Unison/Sync/Types.hs index df956dfba4..3e52c6e103 100644 --- a/unison-share-api/src/Unison/Sync/Types.hs +++ b/unison-share-api/src/Unison/Sync/Types.hs @@ -617,48 +617,6 @@ data DownloadEntitiesError | DownloadEntitiesEntityValidationFailure EntityValidationError deriving stock (Eq, Show) -data DownloadEntitiesErrorTag - = NoReadPermissionTag - | InvalidRepoInfoTag - | UserNotFoundTag - | ProjectNotFoundTag - | EntityValidationFailureTag - deriving stock (Eq, Show) - -instance Serialise DownloadEntitiesErrorTag where - encode = \case - NoReadPermissionTag -> CBOR.encodeWord8 0 - InvalidRepoInfoTag -> CBOR.encodeWord8 1 - UserNotFoundTag -> CBOR.encodeWord8 2 - ProjectNotFoundTag -> CBOR.encodeWord8 3 - EntityValidationFailureTag -> CBOR.encodeWord8 4 - decode = do - tag <- CBOR.decodeWord8 - case tag of - 0 -> pure NoReadPermissionTag - 1 -> pure InvalidRepoInfoTag - 2 -> pure UserNotFoundTag - 3 -> pure ProjectNotFoundTag - 4 -> pure EntityValidationFailureTag - _ -> fail "invalid tag" - -instance Serialise DownloadEntitiesError where - encode = \case - DownloadEntitiesNoReadPermission repoInfo -> CBOR.encode NoReadPermissionTag <> CBOR.encode repoInfo - DownloadEntitiesInvalidRepoInfo msg repoInfo -> CBOR.encode InvalidRepoInfoTag <> CBOR.encode (msg, repoInfo) - DownloadEntitiesUserNotFound userHandle -> CBOR.encode UserNotFoundTag <> CBOR.encode userHandle - DownloadEntitiesProjectNotFound projectShorthand -> CBOR.encode ProjectNotFoundTag <> CBOR.encode projectShorthand - DownloadEntitiesEntityValidationFailure err -> CBOR.encode EntityValidationFailureTag <> CBOR.encode err - - decode = do - tag <- CBOR.decode - case tag of - NoReadPermissionTag -> DownloadEntitiesNoReadPermission <$> CBOR.decode - InvalidRepoInfoTag -> uncurry DownloadEntitiesInvalidRepoInfo <$> CBOR.decode - UserNotFoundTag -> DownloadEntitiesUserNotFound <$> CBOR.decode - ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode - EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode - instance ToJSON DownloadEntitiesResponse where toJSON = \case DownloadEntitiesSuccess entities -> jsonUnion "success" entities diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 3ea29e1272..ff244606ea 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -4,7 +4,10 @@ module Unison.SyncV2.Types GetCausalHashError (..), DownloadEntitiesRequest (..), DownloadEntitiesChunk (..), + SyncError (..), + DownloadEntitiesError (..), CBORBytes (..), + deserialiseOrFailCBORBytes, UploadEntitiesRequest (..), BranchRef (..), PullError (..), @@ -13,10 +16,13 @@ where import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) +import Codec.Serialise qualified as CBOR import Codec.Serialise.Decoding qualified as CBOR import Data.ByteString.Lazy qualified as BL import Data.Set (Set) import Data.Text (Text) +import U.Codebase.HashTags (CausalHash) +import U.Codebase.Sqlite.TempEntity (TempEntity) import Unison.Hash32 (Hash32) import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) @@ -35,7 +41,7 @@ instance Serialise GetCausalHashRequest where decode = GetCausalHashRequest <$> decode data GetCausalHashResponse - = GetCausalHashSuccess (Maybe HashJWT) + = GetCausalHashSuccess HashJWT | GetCausalHashError GetCausalHashError deriving stock (Show, Eq, Ord) @@ -117,13 +123,72 @@ instance Serialise DownloadEntitiesRequest where -- | Wrapper for CBOR data that has already been serialized. -- In our case, we use this because we may load pre-serialized CBOR directly from the database, -- but it's also useful in allowing us to more quickly seek through a CBOR stream, since we only need to decode the CBOR when/if we actually need to use it, and can skip past it using a byte offset otherwise. -newtype CBORBytes = CBORBytes BL.ByteString +-- +-- The 't' phantom type is the type of the data encoded in the bytestring. +newtype CBORBytes t = CBORBytes BL.ByteString deriving (Serialise) via (BL.ByteString) +-- | Deserialize a 'CBORBytes' value into its tagged type, throwing an error if the deserialization fails. +deserialiseOrFailCBORBytes :: (Serialise t) => CBORBytes t -> Either CBOR.DeserialiseFailure t +deserialiseOrFailCBORBytes (CBORBytes bs) = CBOR.deserialiseOrFail bs + +data DownloadEntitiesError + = DownloadEntitiesNoReadPermission SyncV1.RepoInfo + | -- | msg, repoInfo + DownloadEntitiesInvalidRepoInfo Text SyncV1.RepoInfo + | -- | userHandle + DownloadEntitiesUserNotFound Text + | -- | project shorthand + DownloadEntitiesProjectNotFound Text + | DownloadEntitiesEntityValidationFailure SyncV1.EntityValidationError + deriving stock (Eq, Show) + +data DownloadEntitiesErrorTag + = NoReadPermissionTag + | InvalidRepoInfoTag + | UserNotFoundTag + | ProjectNotFoundTag + | EntityValidationFailureTag + deriving stock (Eq, Show) + +instance Serialise DownloadEntitiesErrorTag where + encode = \case + NoReadPermissionTag -> CBOR.encodeWord8 0 + InvalidRepoInfoTag -> CBOR.encodeWord8 1 + UserNotFoundTag -> CBOR.encodeWord8 2 + ProjectNotFoundTag -> CBOR.encodeWord8 3 + EntityValidationFailureTag -> CBOR.encodeWord8 4 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure NoReadPermissionTag + 1 -> pure InvalidRepoInfoTag + 2 -> pure UserNotFoundTag + 3 -> pure ProjectNotFoundTag + 4 -> pure EntityValidationFailureTag + _ -> fail "invalid tag" + +instance Serialise DownloadEntitiesError where + encode = \case + DownloadEntitiesNoReadPermission repoInfo -> CBOR.encode NoReadPermissionTag <> CBOR.encode repoInfo + DownloadEntitiesInvalidRepoInfo msg repoInfo -> CBOR.encode InvalidRepoInfoTag <> CBOR.encode (msg, repoInfo) + DownloadEntitiesUserNotFound userHandle -> CBOR.encode UserNotFoundTag <> CBOR.encode userHandle + DownloadEntitiesProjectNotFound projectShorthand -> CBOR.encode ProjectNotFoundTag <> CBOR.encode projectShorthand + DownloadEntitiesEntityValidationFailure err -> CBOR.encode EntityValidationFailureTag <> CBOR.encode err + + decode = do + tag <- CBOR.decode + case tag of + NoReadPermissionTag -> DownloadEntitiesNoReadPermission <$> CBOR.decode + InvalidRepoInfoTag -> uncurry DownloadEntitiesInvalidRepoInfo <$> CBOR.decode + UserNotFoundTag -> DownloadEntitiesUserNotFound <$> CBOR.decode + ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode + EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode + -- | A chunk of the download entities response stream. data DownloadEntitiesChunk - = EntityChunk {hash :: Hash32, entityCBOR :: CBORBytes} - | ErrorChunk {err :: SyncV1.DownloadEntitiesError} + = EntityChunk {hash :: Hash32, entityCBOR :: CBORBytes TempEntity} + | ErrorChunk {err :: DownloadEntitiesError} data DownloadEntitiesChunkTag = EntityChunkTag | ErrorChunkTag @@ -151,8 +216,18 @@ instance Serialise DownloadEntitiesChunk where -- TODO data UploadEntitiesRequest = UploadEntitiesRequest +instance Serialise UploadEntitiesRequest where + encode _ = mempty + decode = pure UploadEntitiesRequest + -- | An error occurred while pulling code from Unison Share. data PullError - = PullError'DownloadEntities SyncV1.DownloadEntitiesError + = PullError'DownloadEntities DownloadEntitiesError | PullError'GetCausalHash GetCausalHashError + | PullError'Sync SyncError + deriving stock (Show) + +data SyncError + = SyncErrorExpectedResultNotInMain CausalHash + | SyncErrorDeserializationFailure CBOR.DeserialiseFailure deriving stock (Show) From 3aee5e7c439f0275422fd9698b9b4d46b0bcbd4b Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 12 Aug 2024 15:22:00 -0700 Subject: [PATCH 08/52] SyncV2 WIP --- unison-cli/src/Unison/Share/SyncV2.hs | 207 ++++++-------------- unison-share-api/src/Unison/SyncV2/API.hs | 13 +- unison-share-api/src/Unison/SyncV2/Types.hs | 126 +++++------- 3 files changed, 101 insertions(+), 245 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 4d6f9ebc1e..a14c524e1f 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -2,14 +2,7 @@ {-# LANGUAGE TypeOperators #-} module Unison.Share.SyncV2 - ( -- ** Get causal hash by path - getCausalHash, - GetCausalHashByPathError (..), - - -- ** Pull/Download - pull, - PullError (..), - downloadEntities, + ( downloadEntities, ) where @@ -25,7 +18,6 @@ import Data.Conduit.Combinators qualified as Conduit import Data.Map qualified as Map import Data.Proxy import Data.Set qualified as Set -import Data.Set.NonEmpty (NESet) import Data.Set.NonEmpty qualified as NESet import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy @@ -37,7 +29,6 @@ import Servant.Client qualified as Servant import Servant.Client.Streaming qualified as ServantStreaming import Servant.Conduit () import Servant.Types.SourceT qualified as Servant -import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) @@ -60,44 +51,21 @@ import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) ------------------------------------------------------------------------------------------------------------------------- --- Pull - -pull :: - -- | The Unison Share URL. - BaseUrl -> - -- | The repo+path to pull from. - Share.Path -> - SyncV2.BranchRef -> - -- | Callback that's given a number of entities we just downloaded. - (Int -> IO ()) -> - Cli (Either (SyncError SyncV2.PullError) CausalHash) -pull unisonShareUrl repoPath branchRef downloadedCallback = - getCausalHash unisonShareUrl branchRef >>= \case - Left err -> pure (Left (SyncV2.PullError'GetCausalHash <$> err)) - -- There's nothing at the remote path, so there's no causal to pull. - Right hashJwt -> do - -- TODO: include known hashes - let knownHashes = Set.empty - downloadEntities unisonShareUrl (Share.pathRepoInfo repoPath) hashJwt knownHashes downloadedCallback <&> \case - Left err -> Left err - Right () -> Right (hash32ToCausalHash (Share.hashJWTHash hashJwt)) - ------------------------------------------------------------------------------------------------------------------------ -- Download entities downloadEntities :: -- | The Unison Share URL. BaseUrl -> - -- | The repo to download from. - Share.RepoInfo -> + -- | The branch to download from. + SyncV2.BranchRef -> -- | The hash to download. Share.HashJWT -> Set Hash32 -> -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -downloadEntities unisonShareUrl repoInfo hashJwt knownHashes downloadedCallback = do +downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask Cli.label \done -> do @@ -114,7 +82,7 @@ downloadEntities unisonShareUrl repoInfo hashJwt knownHashes downloadedCallback httpDownloadEntities authHTTPClient unisonShareUrl - SyncV2.DownloadEntitiesRequest {repoInfo, causalHash = hashJwt, knownHashes} + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} liftIO request >>= \case Left err -> failed (TransportError err) Right source -> do @@ -175,46 +143,6 @@ unpackEntities downloadedCallback = Conduit.mapM $ \case SyncV2.ErrorChunk {err} -> do throwError (SyncV2.PullError'DownloadEntities err) ------------------------------------------------------------------------------------------------------------------------- --- Get causal hash by path - --- | Get the causal hash of a path hosted on Unison Share. -getCausalHash :: - -- | The Unison Share URL. - BaseUrl -> - SyncV2.BranchRef -> - Cli (Either (SyncError SyncV2.GetCausalHashError) Share.HashJWT) -getCausalHash unisonShareUrl branchRef = do - Cli.Env {authHTTPClient} <- ask - liftIO (httpGetCausalHash authHTTPClient unisonShareUrl (SyncV2.GetCausalHashRequest branchRef)) <&> \case - Left err -> Left (TransportError err) - Right (SyncV2.GetCausalHashSuccess hashJWT) -> Right hashJWT - Right (SyncV2.GetCausalHashError err) -> - case err of - SyncV2.GetCausalHashNoReadPermission repoInfo -> - Left (SyncError (SyncV2.GetCausalHashNoReadPermission repoInfo)) - SyncV2.GetCausalHashInvalidRepoInfo err repoInfo -> - Left (SyncError (SyncV2.GetCausalHashInvalidRepoInfo err repoInfo)) - SyncV2.GetCausalHashUserNotFound -> - Left (SyncError SyncV2.GetCausalHashUserNotFound) - ------------------------------------------------------------------------------------------------------------------------- --- Upload entities - --- | Upload a set of entities to Unison Share. If the server responds that it cannot yet store any hash(es) due to --- missing dependencies, send those dependencies too, and on and on, until the server stops responding that it's missing --- anything. --- --- Returns true on success, false on failure (because the user does not have write permission). -_uploadEntities :: - BaseUrl -> - Share.RepoInfo -> - NESet Hash32 -> - (Int -> IO ()) -> - Cli (Either (SyncError Share.UploadEntitiesError) ()) -_uploadEntities _unisonShareUrl _repoInfo _hashes0 _uploadedCallback = do - error "syncv2 uploadEntities not implemented" - ------------------------------------------------------------------------------------------------------------------------ -- Database operations @@ -253,82 +181,59 @@ upsertEntitySomewhere hash entity = ------------------------------------------------------------------------------------------------------------------------ -- HTTP calls --- syncV2Client :: SyncV2.Routes (Servant.AsClientT ServantStreaming.ClientM) --- syncV2Client = ServantStreaming.client SyncV2.api -- remember: api :: Proxy MovieCatalogAPI - --- syncV2Client :: SyncV2.Routes (Servant.AsClientT ServantStreaming.ClientM) --- Routes {} = ServantStreaming.client SyncV2.api - -httpGetCausalHash :: - Auth.AuthenticatedHttpClient -> - BaseUrl -> - SyncV2.GetCausalHashRequest -> - IO (Either CodeserverTransportError SyncV2.GetCausalHashResponse) httpDownloadEntities :: Auth.AuthenticatedHttpClient -> BaseUrl -> SyncV2.DownloadEntitiesRequest -> (IO (Either CodeserverTransportError (Servant.SourceT IO SyncV2.DownloadEntitiesChunk))) -_httpUploadEntities :: - Auth.AuthenticatedHttpClient -> - BaseUrl -> - SyncV2.UploadEntitiesRequest -> - IO (Either CodeserverTransportError (Servant.SourceT IO ByteString)) -( httpGetCausalHash, - httpDownloadEntities, - _httpUploadEntities - ) = - let SyncV2.Routes - { getCausalHash, - downloadEntitiesStream, - uploadEntitiesStream - } = - let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> SyncV2.API) - pp = Proxy - in ServantStreaming.hoistClient pp hoist (ServantStreaming.client SyncV2.api) - in ( go getCausalHash, - go downloadEntitiesStream, - go uploadEntitiesStream - ) - where - hoist :: ServantStreaming.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a - hoist m = do - clientEnv <- Reader.ask - (lift . lift $ ServantStreaming.withClientM m clientEnv pure) >>= \case - Right a -> pure a - Left err -> do - Debug.debugLogM Debug.Sync (show err) - throwError case err of - Servant.FailureResponse _req resp -> - case HTTP.statusCode $ Servant.responseStatusCode resp of - 401 -> Unauthenticated (Servant.baseUrl clientEnv) - -- The server should provide semantically relevant permission-denied messages - -- when possible, but this should catch any we miss. - 403 -> PermissionDenied (Text.Lazy.toStrict . Text.Lazy.decodeUtf8 $ Servant.responseBody resp) - 408 -> Timeout - 429 -> RateLimitExceeded - 504 -> Timeout - _ -> UnexpectedResponse resp - Servant.DecodeFailure msg resp -> DecodeFailure msg resp - Servant.UnsupportedContentType _ct resp -> UnexpectedResponse resp - Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp - Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) - - go :: - (req -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) resp) -> - Auth.AuthenticatedHttpClient -> - BaseUrl -> - req -> - IO (Either CodeserverTransportError resp) - go f (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = - (Servant.mkClientEnv httpClient unisonShareUrl) - { Servant.makeClientRequest = \url request -> - -- Disable client-side timeouts - (Servant.defaultMakeClientRequest url request) - <&> \r -> - r - { Http.Client.responseTimeout = Http.Client.responseTimeoutNone - } - } - & runReaderT (f req) - & runExceptT +httpDownloadEntities = + let SyncV2.Routes + { downloadEntitiesStream + } = + let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> SyncV2.API) + pp = Proxy + in ServantStreaming.hoistClient pp hoist (ServantStreaming.client SyncV2.api) + in ( go downloadEntitiesStream + ) + where + hoist :: ServantStreaming.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a + hoist m = do + clientEnv <- Reader.ask + (lift . lift $ ServantStreaming.withClientM m clientEnv pure) >>= \case + Right a -> pure a + Left err -> do + Debug.debugLogM Debug.Sync (show err) + throwError case err of + Servant.FailureResponse _req resp -> + case HTTP.statusCode $ Servant.responseStatusCode resp of + 401 -> Unauthenticated (Servant.baseUrl clientEnv) + -- The server should provide semantically relevant permission-denied messages + -- when possible, but this should catch any we miss. + 403 -> PermissionDenied (Text.Lazy.toStrict . Text.Lazy.decodeUtf8 $ Servant.responseBody resp) + 408 -> Timeout + 429 -> RateLimitExceeded + 504 -> Timeout + _ -> UnexpectedResponse resp + Servant.DecodeFailure msg resp -> DecodeFailure msg resp + Servant.UnsupportedContentType _ct resp -> UnexpectedResponse resp + Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp + Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) + + go :: + (req -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) resp) -> + Auth.AuthenticatedHttpClient -> + BaseUrl -> + req -> + IO (Either CodeserverTransportError resp) + go f (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = + (Servant.mkClientEnv httpClient unisonShareUrl) + { Servant.makeClientRequest = \url request -> + -- Disable client-side timeouts + (Servant.defaultMakeClientRequest url request) + <&> \r -> + r + { Http.Client.responseTimeout = Http.Client.responseTimeoutNone + } + } + & runReaderT (f req) + & runExceptT diff --git a/unison-share-api/src/Unison/SyncV2/API.hs b/unison-share-api/src/Unison/SyncV2/API.hs index 6562185d62..84bf6f17b8 100644 --- a/unison-share-api/src/Unison/SyncV2/API.hs +++ b/unison-share-api/src/Unison/SyncV2/API.hs @@ -7,7 +7,6 @@ module Unison.SyncV2.API ) where -import Data.ByteString (ByteString) import Data.Proxy import GHC.Generics (Generic) import Servant.API @@ -24,17 +23,7 @@ type DownloadEntitiesStream = ReqBody '[CBOR] DownloadEntitiesRequest :> StreamPost NoFraming CBOR (SourceIO DownloadEntitiesChunk) -type UploadEntitiesStream = - ReqBody '[CBOR] UploadEntitiesRequest - :> StreamPost NoFraming OctetStream (SourceIO ByteString) - -type GetCausalHashEndpoint = - ReqBody '[CBOR] GetCausalHashRequest - :> Post '[CBOR] GetCausalHashResponse - data Routes mode = Routes - { getCausalHash :: mode :- "path" :> "get" :> GetCausalHashEndpoint, - downloadEntitiesStream :: mode :- "entities" :> "download" :> DownloadEntitiesStream, - uploadEntitiesStream :: mode :- "entities" :> "upload" :> UploadEntitiesStream + { downloadEntitiesStream :: mode :- "entities" :> "download" :> DownloadEntitiesStream } deriving stock (Generic) diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index ff244606ea..cf40801b67 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -1,12 +1,10 @@ module Unison.SyncV2.Types - ( GetCausalHashRequest (..), - GetCausalHashResponse (..), - GetCausalHashError (..), - DownloadEntitiesRequest (..), + ( DownloadEntitiesRequest (..), DownloadEntitiesChunk (..), SyncError (..), DownloadEntitiesError (..), CBORBytes (..), + EntityKind (..), deserialiseOrFailCBORBytes, UploadEntitiesRequest (..), BranchRef (..), @@ -28,96 +26,36 @@ import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) import Unison.Sync.Types qualified as SyncV1 -newtype BranchRef = BranchRef Text - deriving (Serialise) via Text - -data GetCausalHashRequest = GetCausalHashRequest - { branchRef :: BranchRef - } - -instance Serialise GetCausalHashRequest where - encode (GetCausalHashRequest {branchRef}) = - encode branchRef - decode = GetCausalHashRequest <$> decode - -data GetCausalHashResponse - = GetCausalHashSuccess HashJWT - | GetCausalHashError GetCausalHashError - deriving stock (Show, Eq, Ord) - -instance Serialise GetCausalHashResponse where - encode (GetCausalHashSuccess hash) = - encode GetCausalHashSuccessTag <> encode hash - encode (GetCausalHashError err) = - encode GetCausalHashErrorTag <> encode err - decode = do - tag <- decode - case tag of - GetCausalHashSuccessTag -> GetCausalHashSuccess <$> decode - GetCausalHashErrorTag -> GetCausalHashError <$> decode - -data GetCausalHashResponseTag - = GetCausalHashSuccessTag - | GetCausalHashErrorTag - deriving stock (Show, Eq, Ord) - -instance Serialise GetCausalHashResponseTag where - encode GetCausalHashSuccessTag = CBOR.encodeWord8 0 - encode GetCausalHashErrorTag = CBOR.encodeWord8 1 - decode = do - tag <- CBOR.decodeWord8 - case tag of - 0 -> pure GetCausalHashSuccessTag - 1 -> pure GetCausalHashErrorTag - _ -> fail "invalid tag" - -data GetCausalHashError - = GetCausalHashNoReadPermission SyncV1.RepoInfo - | GetCausalHashUserNotFound - | GetCausalHashInvalidRepoInfo Text SyncV1.RepoInfo - deriving stock (Show, Eq, Ord) - -instance Serialise GetCausalHashError where - encode (GetCausalHashNoReadPermission repoInfo) = - encode GetCausalHashNoReadPermissionTag <> encode repoInfo - encode GetCausalHashUserNotFound = - encode GetCausalHashUserNotFoundTag - encode (GetCausalHashInvalidRepoInfo err repoInfo) = - encode GetCausalHashInvalidRepoInfoTag <> encode err <> encode repoInfo - decode = do - tag <- decode - case tag of - GetCausalHashNoReadPermissionTag -> GetCausalHashNoReadPermission <$> decode - GetCausalHashUserNotFoundTag -> pure GetCausalHashUserNotFound - GetCausalHashInvalidRepoInfoTag -> GetCausalHashInvalidRepoInfo <$> decode <*> decode +newtype BranchRef = BranchRef {unBranchRef :: Text} + deriving (Serialise, Eq, Show, Ord) via Text data GetCausalHashErrorTag = GetCausalHashNoReadPermissionTag | GetCausalHashUserNotFoundTag - | GetCausalHashInvalidRepoInfoTag + | GetCausalHashInvalidBranchRefTag deriving stock (Show, Eq, Ord) instance Serialise GetCausalHashErrorTag where encode GetCausalHashNoReadPermissionTag = CBOR.encodeWord8 0 encode GetCausalHashUserNotFoundTag = CBOR.encodeWord8 1 - encode GetCausalHashInvalidRepoInfoTag = CBOR.encodeWord8 2 + encode GetCausalHashInvalidBranchRefTag = CBOR.encodeWord8 2 decode = do tag <- CBOR.decodeWord8 case tag of 0 -> pure GetCausalHashNoReadPermissionTag 1 -> pure GetCausalHashUserNotFoundTag - 2 -> pure GetCausalHashInvalidRepoInfoTag + 2 -> pure GetCausalHashInvalidBranchRefTag _ -> fail "invalid tag" data DownloadEntitiesRequest = DownloadEntitiesRequest { causalHash :: HashJWT, - repoInfo :: SyncV1.RepoInfo, + branchRef :: BranchRef, knownHashes :: Set Hash32 } instance Serialise DownloadEntitiesRequest where - encode (DownloadEntitiesRequest {causalHash, repoInfo, knownHashes}) = - encode causalHash <> encode repoInfo <> encode knownHashes + encode (DownloadEntitiesRequest {causalHash, branchRef, knownHashes}) = + encode causalHash <> encode branchRef <> encode knownHashes decode = DownloadEntitiesRequest <$> decode <*> decode <*> decode -- | Wrapper for CBOR data that has already been serialized. @@ -133,9 +71,9 @@ deserialiseOrFailCBORBytes :: (Serialise t) => CBORBytes t -> Either CBOR.Deseri deserialiseOrFailCBORBytes (CBORBytes bs) = CBOR.deserialiseOrFail bs data DownloadEntitiesError - = DownloadEntitiesNoReadPermission SyncV1.RepoInfo - | -- | msg, repoInfo - DownloadEntitiesInvalidRepoInfo Text SyncV1.RepoInfo + = DownloadEntitiesNoReadPermission BranchRef + | -- | msg, branchRef + DownloadEntitiesInvalidBranchRef Text BranchRef | -- | userHandle DownloadEntitiesUserNotFound Text | -- | project shorthand @@ -145,7 +83,7 @@ data DownloadEntitiesError data DownloadEntitiesErrorTag = NoReadPermissionTag - | InvalidRepoInfoTag + | InvalidBranchRefTag | UserNotFoundTag | ProjectNotFoundTag | EntityValidationFailureTag @@ -154,7 +92,7 @@ data DownloadEntitiesErrorTag instance Serialise DownloadEntitiesErrorTag where encode = \case NoReadPermissionTag -> CBOR.encodeWord8 0 - InvalidRepoInfoTag -> CBOR.encodeWord8 1 + InvalidBranchRefTag -> CBOR.encodeWord8 1 UserNotFoundTag -> CBOR.encodeWord8 2 ProjectNotFoundTag -> CBOR.encodeWord8 3 EntityValidationFailureTag -> CBOR.encodeWord8 4 @@ -162,7 +100,7 @@ instance Serialise DownloadEntitiesErrorTag where tag <- CBOR.decodeWord8 case tag of 0 -> pure NoReadPermissionTag - 1 -> pure InvalidRepoInfoTag + 1 -> pure InvalidBranchRefTag 2 -> pure UserNotFoundTag 3 -> pure ProjectNotFoundTag 4 -> pure EntityValidationFailureTag @@ -170,8 +108,8 @@ instance Serialise DownloadEntitiesErrorTag where instance Serialise DownloadEntitiesError where encode = \case - DownloadEntitiesNoReadPermission repoInfo -> CBOR.encode NoReadPermissionTag <> CBOR.encode repoInfo - DownloadEntitiesInvalidRepoInfo msg repoInfo -> CBOR.encode InvalidRepoInfoTag <> CBOR.encode (msg, repoInfo) + DownloadEntitiesNoReadPermission branchRef -> CBOR.encode NoReadPermissionTag <> CBOR.encode branchRef + DownloadEntitiesInvalidBranchRef msg branchRef -> CBOR.encode InvalidBranchRefTag <> CBOR.encode (msg, branchRef) DownloadEntitiesUserNotFound userHandle -> CBOR.encode UserNotFoundTag <> CBOR.encode userHandle DownloadEntitiesProjectNotFound projectShorthand -> CBOR.encode ProjectNotFoundTag <> CBOR.encode projectShorthand DownloadEntitiesEntityValidationFailure err -> CBOR.encode EntityValidationFailureTag <> CBOR.encode err @@ -180,7 +118,7 @@ instance Serialise DownloadEntitiesError where tag <- CBOR.decode case tag of NoReadPermissionTag -> DownloadEntitiesNoReadPermission <$> CBOR.decode - InvalidRepoInfoTag -> uncurry DownloadEntitiesInvalidRepoInfo <$> CBOR.decode + InvalidBranchRefTag -> uncurry DownloadEntitiesInvalidBranchRef <$> CBOR.decode UserNotFoundTag -> DownloadEntitiesUserNotFound <$> CBOR.decode ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode @@ -223,7 +161,6 @@ instance Serialise UploadEntitiesRequest where -- | An error occurred while pulling code from Unison Share. data PullError = PullError'DownloadEntities DownloadEntitiesError - | PullError'GetCausalHash GetCausalHashError | PullError'Sync SyncError deriving stock (Show) @@ -231,3 +168,28 @@ data SyncError = SyncErrorExpectedResultNotInMain CausalHash | SyncErrorDeserializationFailure CBOR.DeserialiseFailure deriving stock (Show) + +data EntityKind + = CausalEntity + | NamespaceEntity + | TermEntity + | TypeEntity + | PatchEntity + deriving (Show, Eq, Ord) + +instance Serialise EntityKind where + encode = \case + CausalEntity -> CBOR.encodeWord8 0 + NamespaceEntity -> CBOR.encodeWord8 1 + TermEntity -> CBOR.encodeWord8 2 + TypeEntity -> CBOR.encodeWord8 3 + PatchEntity -> CBOR.encodeWord8 4 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure CausalEntity + 1 -> pure NamespaceEntity + 2 -> pure TermEntity + 3 -> pure TypeEntity + 4 -> pure PatchEntity + _ -> fail "invalid tag" From b4d7ea8b5e3056917a0d6d5f0caf1a66bac088c5 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 12 Aug 2024 15:38:25 -0700 Subject: [PATCH 09/52] Fix exports --- unison-share-api/src/Unison/SyncV2/Types.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index cf40801b67..845401f43e 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -5,6 +5,7 @@ module Unison.SyncV2.Types DownloadEntitiesError (..), CBORBytes (..), EntityKind (..), + serialiseCBORBytes, deserialiseOrFailCBORBytes, UploadEntitiesRequest (..), BranchRef (..), @@ -70,6 +71,9 @@ newtype CBORBytes t = CBORBytes BL.ByteString deserialiseOrFailCBORBytes :: (Serialise t) => CBORBytes t -> Either CBOR.DeserialiseFailure t deserialiseOrFailCBORBytes (CBORBytes bs) = CBOR.deserialiseOrFail bs +serialiseCBORBytes :: (Serialise t) => t -> CBORBytes t +serialiseCBORBytes = CBORBytes . CBOR.serialise + data DownloadEntitiesError = DownloadEntitiesNoReadPermission BranchRef | -- | msg, branchRef From 109248df7ccf4352d3c6427ebcebacd9b95dec9d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 12 Aug 2024 16:13:21 -0700 Subject: [PATCH 10/52] WIP --- unison-cli/src/Unison/Cli/DownloadUtils.hs | 14 +++++++++++--- unison-cli/src/Unison/Share/SyncV2.hs | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index 343ebfeeb5..1b8932ad8b 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -10,6 +10,7 @@ where import Control.Concurrent.STM (atomically) import Control.Concurrent.STM.TVar (modifyTVar', newTVarIO, readTVar, readTVarIO) import Data.List.NonEmpty (pattern (:|)) +import Data.Set qualified as Set import System.Console.Regions qualified as Console.Regions import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Queries @@ -28,9 +29,11 @@ import Unison.Share.API.Hash qualified as Share import Unison.Share.Codeserver qualified as Codeserver import Unison.Share.Sync qualified as Share import Unison.Share.Sync.Types qualified as Share +import Unison.Share.SyncV2 qualified as SyncV2 import Unison.Share.Types (codeserverBaseURL) import Unison.Sync.Common qualified as Sync.Common import Unison.Sync.Types qualified as Share +import Unison.SyncV2.Types qualified as SyncV2 -- | Download a project/branch from Share. downloadProjectBranchFromShare :: @@ -41,7 +44,6 @@ downloadProjectBranchFromShare :: downloadProjectBranchFromShare useSquashed branch = Cli.labelE \done -> do let remoteProjectBranchName = branch.branchName - let repoInfo = Share.RepoInfo (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) causalHashJwt <- case (useSquashed, branch.squashedBranchHead) of (Share.IncludeSquashedHead, Nothing) -> done Output.ShareExpectedSquashedHead @@ -51,12 +53,18 @@ downloadProjectBranchFromShare useSquashed branch = when (not exists) do (result, numDownloaded) <- Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do - result <- Share.downloadEntities Share.hardCodedBaseUrl repoInfo causalHashJwt downloadedCallback + let branchRef = SyncV2.BranchRef (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) + -- TODO: Fill this in. + let knownHashes = Set.empty + result <- SyncV2.downloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback numDownloaded <- liftIO getNumDownloaded pure (result, numDownloaded) result & onLeft \err0 -> do done case err0 of - Share.SyncError err -> Output.ShareErrorDownloadEntities err + Share.SyncError err -> + -- TODO: Fix this + error (show err) + -- Output.ShareErrorDownloadEntities err Share.TransportError err -> Output.ShareErrorTransport err Cli.respond (Output.DownloadedEntities numDownloaded) pure (Sync.Common.hash32ToCausalHash (Share.hashJWTHash causalHashJwt)) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index a14c524e1f..51133bad26 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -190,9 +190,9 @@ httpDownloadEntities = let SyncV2.Routes { downloadEntitiesStream } = - let pp :: Proxy ("ucm" Servant.:> "v1" Servant.:> "sync" Servant.:> SyncV2.API) + let pp :: Proxy ("ucm" Servant.:> "v2" Servant.:> "sync" Servant.:> SyncV2.API) pp = Proxy - in ServantStreaming.hoistClient pp hoist (ServantStreaming.client SyncV2.api) + in ServantStreaming.hoistClient pp hoist (ServantStreaming.client pp) in ( go downloadEntitiesStream ) where From dd5aee906c27f1048078e672329bdba4ba7ed20a Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 13 Aug 2024 00:39:57 -0700 Subject: [PATCH 11/52] Fix use of withStream --- unison-cli/src/Unison/Share/SyncV2.hs | 164 ++++++++++---------- unison-share-api/src/Unison/SyncV2/API.hs | 4 +- unison-share-api/src/Unison/SyncV2/Types.hs | 18 ++- 3 files changed, 105 insertions(+), 81 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 51133bad26..a4f9edf9c0 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -9,9 +9,7 @@ where import Conduit (ConduitT) import Control.Lens import Control.Monad.Except -import Control.Monad.Reader (MonadReader, ask) -import Control.Monad.Trans.Reader (ReaderT, runReaderT) -import Control.Monad.Trans.Reader qualified as Reader +import Control.Monad.Reader (ask) import Data.Conduit ((.|)) import Data.Conduit qualified as Conduit import Data.Conduit.Combinators qualified as Conduit @@ -24,9 +22,7 @@ import Data.Text.Lazy.Encoding qualified as Text.Lazy import Network.HTTP.Client qualified as Http.Client import Network.HTTP.Types qualified as HTTP import Servant.API qualified as Servant -import Servant.Client (BaseUrl) -import Servant.Client qualified as Servant -import Servant.Client.Streaming qualified as ServantStreaming +import Servant.Client.Streaming qualified as Servant import Servant.Conduit () import Servant.Types.SourceT qualified as Servant import U.Codebase.Sqlite.Queries qualified as Q @@ -46,6 +42,7 @@ import Unison.Sqlite qualified as Sqlite import Unison.Sync.Common (hash32ToCausalHash, tempEntityToEntity) import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share +import Unison.SyncV2.API (Routes (downloadEntitiesStream)) import Unison.SyncV2.API qualified as SyncV2 import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 @@ -56,7 +53,7 @@ import Unison.Util.Monoid (foldMapM) downloadEntities :: -- | The Unison Share URL. - BaseUrl -> + Servant.BaseUrl -> -- | The branch to download from. SyncV2.BranchRef -> -- | The hash to download. @@ -78,20 +75,24 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback Just Q.EntityInMainStorage -> pure () -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do - let request = - httpDownloadEntities - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - liftIO request >>= \case + result <- liftIO $ + httpDownloadEntities + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} + \conduit -> do + Debug.debugLogM Debug.Temp $ "Kicking off sync request" + let entityPipeline :: ConduitT () c (ExceptT SyncV2.PullError IO) () + entityPipeline = Conduit.transPipe liftIO conduit .| unpackEntities downloadedCallback .| entityValidator .| entityInserter codebase + Debug.debugLogM Debug.Temp $ "Running conduit" + runExceptT (Conduit.runConduit entityPipeline) + -- >>= \case + -- Left err -> failed (SyncError err) + -- Right () -> pure () + case result of Left err -> failed (TransportError err) - Right source -> do - conduit <- liftIO $ Servant.fromSourceIO source - let entityPipeline :: ConduitT () c (ExceptT SyncV2.PullError Cli) () - entityPipeline = conduit .| unpackEntities downloadedCallback .| entityValidator .| entityInserter - runExceptT (Conduit.runConduit entityPipeline) >>= \case - Left err -> failed (SyncError err) - Right () -> pure () + Right (Left syncErr) -> failed (SyncError syncErr) + Right (Right ()) -> pure () didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -107,7 +108,9 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) entityValidator :: (MonadError SyncV2.PullError m) => ConduitT (Hash32, TempEntity) (Hash32, TempEntity) m () -entityValidator = Conduit.iterM $ \(hash, entity) -> +entityValidator = Conduit.iterM $ \(hash, entity) -> do + Debug.debugLogM Debug.Temp $ "Validating entity" + -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. case EV.validateTempEntity hash entity of Nothing -> pure () @@ -125,15 +128,16 @@ entityValidator = Conduit.iterM $ \(hash, entity) -> Just err -> do throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err -entityInserter :: (MonadIO m, MonadReader Cli.Env m) => ConduitT (Hash32, TempEntity) o m () -entityInserter = Conduit.mapM_ \(hash, entity) -> do - Cli.Env {codebase} <- ask +entityInserter :: (MonadIO m) => Codebase.Codebase IO v a -> ConduitT (Hash32, TempEntity) o m () +entityInserter codebase = Conduit.mapM_ \(hash, entity) -> do + Debug.debugLogM Debug.Temp $ "Inserting entity" liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity pure () unpackEntities :: (MonadError SyncV2.PullError m, MonadIO m) => (Int -> IO ()) -> ConduitT SyncV2.DownloadEntitiesChunk (Hash32, TempEntity) m () unpackEntities downloadedCallback = Conduit.mapM $ \case - SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> + SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do + Debug.debugLogM Debug.Temp $ "Got entity chunk" case CBOR.deserialiseOrFailCBORBytes entityBytes of Left err -> do throwError (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) @@ -141,6 +145,7 @@ unpackEntities downloadedCallback = Conduit.mapM $ \case liftIO (downloadedCallback 1) pure (hash, entity) SyncV2.ErrorChunk {err} -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" throwError (SyncV2.PullError'DownloadEntities err) ------------------------------------------------------------------------------------------------------------------------ @@ -181,59 +186,62 @@ upsertEntitySomewhere hash entity = ------------------------------------------------------------------------------------------------------------------------ -- HTTP calls +type SyncAPI = ("ucm" Servant.:> "v2" Servant.:> "sync" Servant.:> SyncV2.API) + +syncAPI :: Proxy SyncAPI +syncAPI = Proxy @SyncAPI + +downloadEntitiesStreamClientM :: SyncV2.DownloadEntitiesRequest -> Servant.ClientM (Servant.SourceT IO SyncV2.DownloadEntitiesChunk) +SyncV2.Routes + { downloadEntitiesStream = downloadEntitiesStreamClientM + } = Servant.client syncAPI + +-- | Helper for running clientM that returns a stream of entities. +-- You MUST consume the stream within the callback, it will be closed when the callback returns. +handleStream :: (MonadIO m) => Servant.ClientEnv -> (ConduitT () o m () -> IO r) -> Servant.ClientM (Servant.SourceT IO o) -> IO (Either Servant.ClientError r) +handleStream clientEnv consumeStream clientM = do + Servant.withClientM clientM clientEnv $ \case + Left err -> pure $ Left err + Right source -> do + conduit <- Servant.fromSourceIO source + Right <$> consumeStream conduit + +handleClientError :: Servant.ClientEnv -> Servant.ClientError -> CodeserverTransportError +handleClientError clientEnv err = + case err of + Servant.FailureResponse _req resp -> + case HTTP.statusCode $ Servant.responseStatusCode resp of + 401 -> Unauthenticated (Servant.baseUrl clientEnv) + -- The server should provide semantically relevant permission-denied messages + -- when possible, but this should catch any we miss. + 403 -> PermissionDenied (Text.Lazy.toStrict . Text.Lazy.decodeUtf8 $ Servant.responseBody resp) + 408 -> Timeout + 429 -> RateLimitExceeded + 504 -> Timeout + _ -> UnexpectedResponse resp + Servant.DecodeFailure msg resp -> DecodeFailure msg resp + Servant.UnsupportedContentType _ct resp -> UnexpectedResponse resp + Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp + Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) + httpDownloadEntities :: + (MonadIO m) => Auth.AuthenticatedHttpClient -> - BaseUrl -> + Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (IO (Either CodeserverTransportError (Servant.SourceT IO SyncV2.DownloadEntitiesChunk))) -httpDownloadEntities = - let SyncV2.Routes - { downloadEntitiesStream - } = - let pp :: Proxy ("ucm" Servant.:> "v2" Servant.:> "sync" Servant.:> SyncV2.API) - pp = Proxy - in ServantStreaming.hoistClient pp hoist (ServantStreaming.client pp) - in ( go downloadEntitiesStream - ) - where - hoist :: ServantStreaming.ClientM a -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) a - hoist m = do - clientEnv <- Reader.ask - (lift . lift $ ServantStreaming.withClientM m clientEnv pure) >>= \case - Right a -> pure a - Left err -> do - Debug.debugLogM Debug.Sync (show err) - throwError case err of - Servant.FailureResponse _req resp -> - case HTTP.statusCode $ Servant.responseStatusCode resp of - 401 -> Unauthenticated (Servant.baseUrl clientEnv) - -- The server should provide semantically relevant permission-denied messages - -- when possible, but this should catch any we miss. - 403 -> PermissionDenied (Text.Lazy.toStrict . Text.Lazy.decodeUtf8 $ Servant.responseBody resp) - 408 -> Timeout - 429 -> RateLimitExceeded - 504 -> Timeout - _ -> UnexpectedResponse resp - Servant.DecodeFailure msg resp -> DecodeFailure msg resp - Servant.UnsupportedContentType _ct resp -> UnexpectedResponse resp - Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp - Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) - - go :: - (req -> ReaderT Servant.ClientEnv (ExceptT CodeserverTransportError IO) resp) -> - Auth.AuthenticatedHttpClient -> - BaseUrl -> - req -> - IO (Either CodeserverTransportError resp) - go f (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = - (Servant.mkClientEnv httpClient unisonShareUrl) - { Servant.makeClientRequest = \url request -> - -- Disable client-side timeouts - (Servant.defaultMakeClientRequest url request) - <&> \r -> - r - { Http.Client.responseTimeout = Http.Client.responseTimeoutNone - } - } - & runReaderT (f req) - & runExceptT + ( ConduitT () SyncV2.DownloadEntitiesChunk m () -> + IO r + ) -> + (IO (Either CodeserverTransportError r)) +httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req f = do + let clientEnv = + (Servant.mkClientEnv httpClient unisonShareUrl) + { Servant.makeClientRequest = \url request -> + -- Disable client-side timeouts + (Servant.defaultMakeClientRequest url request) + <&> \r -> + r + { Http.Client.responseTimeout = Http.Client.responseTimeoutNone + } + } + mapLeft (handleClientError clientEnv) <$> handleStream clientEnv f (downloadEntitiesStreamClientM req) diff --git a/unison-share-api/src/Unison/SyncV2/API.hs b/unison-share-api/src/Unison/SyncV2/API.hs index 84bf6f17b8..71ea8693d3 100644 --- a/unison-share-api/src/Unison/SyncV2/API.hs +++ b/unison-share-api/src/Unison/SyncV2/API.hs @@ -20,8 +20,8 @@ type API = NamedRoutes Routes type DownloadEntitiesStream = -- | The causal hash the client needs. The server should provide it and all of its dependencies - ReqBody '[CBOR] DownloadEntitiesRequest - :> StreamPost NoFraming CBOR (SourceIO DownloadEntitiesChunk) + ReqBody '[CBOR, JSON] DownloadEntitiesRequest + :> StreamPost NetstringFraming CBOR (SourceIO DownloadEntitiesChunk) data Routes mode = Routes { downloadEntitiesStream :: mode :- "entities" :> "download" :> DownloadEntitiesStream diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 845401f43e..05b4fb99ab 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -13,6 +13,7 @@ module Unison.SyncV2.Types ) where +import Data.Aeson (FromJSON (..), ToJSON (..), object, withObject, (.:), (.=)) import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) import Codec.Serialise qualified as CBOR @@ -28,7 +29,7 @@ import Unison.Share.API.Hash (HashJWT) import Unison.Sync.Types qualified as SyncV1 newtype BranchRef = BranchRef {unBranchRef :: Text} - deriving (Serialise, Eq, Show, Ord) via Text + deriving (Serialise, Eq, Show, Ord, ToJSON, FromJSON) via Text data GetCausalHashErrorTag = GetCausalHashNoReadPermissionTag @@ -59,6 +60,21 @@ instance Serialise DownloadEntitiesRequest where encode causalHash <> encode branchRef <> encode knownHashes decode = DownloadEntitiesRequest <$> decode <*> decode <*> decode +instance FromJSON DownloadEntitiesRequest where + parseJSON = withObject "DownloadEntitiesRequest" $ \o -> do + causalHash <- o .: "causalHash" + branchRef <- o .: "branchRef" + knownHashes <- o .: "knownHashes" + pure DownloadEntitiesRequest {causalHash, branchRef, knownHashes} + +instance ToJSON DownloadEntitiesRequest where + toJSON (DownloadEntitiesRequest {causalHash, branchRef, knownHashes}) = + object + [ "causalHash" .= causalHash, + "branchRef" .= branchRef, + "knownHashes" .= knownHashes + ] + -- | Wrapper for CBOR data that has already been serialized. -- In our case, we use this because we may load pre-serialized CBOR directly from the database, -- but it's also useful in allowing us to more quickly seek through a CBOR stream, since we only need to decode the CBOR when/if we actually need to use it, and can skip past it using a byte offset otherwise. From 98a65c9a0c97d461eff227219602734c843585b8 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 13 Aug 2024 10:44:17 -0700 Subject: [PATCH 12/52] Switch from conduit to foreach --- .../src/Unison/CommandLine/OutputMessages.hs | 5 + unison-cli/src/Unison/Share/Sync/Types.hs | 2 + unison-cli/src/Unison/Share/SyncV2.hs | 135 +++++++++--------- unison-share-api/src/Unison/SyncV2/Types.hs | 4 +- 4 files changed, 77 insertions(+), 69 deletions(-) diff --git a/unison-cli/src/Unison/CommandLine/OutputMessages.hs b/unison-cli/src/Unison/CommandLine/OutputMessages.hs index 00875cdcbc..8fe38e4599 100644 --- a/unison-cli/src/Unison/CommandLine/OutputMessages.hs +++ b/unison-cli/src/Unison/CommandLine/OutputMessages.hs @@ -2316,6 +2316,11 @@ prettyTransportError = \case in if Text.null body then Nothing else Just (P.newline <> "Response body: " <> P.text body), responseRequestId resp <&> \responseId -> P.newline <> "Request ID: " <> P.blue (P.text responseId) ] + StreamingError err -> + P.lines + [ ( "We encountered an error while streaming data from the code server: " <> P.text err), + P.red (P.text err) + ] where -- Dig the request id out of a response header. responseRequestId :: Servant.Response -> Maybe Text diff --git a/unison-cli/src/Unison/Share/Sync/Types.hs b/unison-cli/src/Unison/Share/Sync/Types.hs index a53d14acbb..1d7066688c 100644 --- a/unison-cli/src/Unison/Share/Sync/Types.hs +++ b/unison-cli/src/Unison/Share/Sync/Types.hs @@ -39,6 +39,8 @@ data CodeserverTransportError | Unauthenticated Servant.BaseUrl | UnexpectedResponse Servant.Response | UnreachableCodeserver Servant.BaseUrl + | -- I wish Servant gave us more detail, but it's just Text. I don't think we ever hit these errors though. + StreamingError Text deriving stock (Show) deriving anyclass (Exception) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index a4f9edf9c0..b02204ef87 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -6,17 +6,14 @@ module Unison.Share.SyncV2 ) where -import Conduit (ConduitT) import Control.Lens import Control.Monad.Except import Control.Monad.Reader (ask) -import Data.Conduit ((.|)) -import Data.Conduit qualified as Conduit -import Data.Conduit.Combinators qualified as Conduit import Data.Map qualified as Map import Data.Proxy import Data.Set qualified as Set import Data.Set.NonEmpty qualified as NESet +import Data.Text qualified as Text import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy import Network.HTTP.Client qualified as Http.Client @@ -25,6 +22,7 @@ import Servant.API qualified as Servant import Servant.Client.Streaming qualified as Servant import Servant.Conduit () import Servant.Types.SourceT qualified as Servant +import Servant.Types.SourceT qualified as SourceT import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) @@ -44,9 +42,11 @@ import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share import Unison.SyncV2.API (Routes (downloadEntitiesStream)) import Unison.SyncV2.API qualified as SyncV2 +import Unison.SyncV2.Types (CBORBytes) import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) +import UnliftIO qualified ------------------------------------------------------------------------------------------------------------------------ -- Download entities @@ -75,24 +75,18 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback Just Q.EntityInMainStorage -> pure () -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do - result <- liftIO $ + Debug.debugLogM Debug.Temp $ "Kicking off sync request" + result <- liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do httpDownloadEntities authHTTPClient unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - \conduit -> do - Debug.debugLogM Debug.Temp $ "Kicking off sync request" - let entityPipeline :: ConduitT () c (ExceptT SyncV2.PullError IO) () - entityPipeline = Conduit.transPipe liftIO conduit .| unpackEntities downloadedCallback .| entityValidator .| entityInserter codebase - Debug.debugLogM Debug.Temp $ "Running conduit" - runExceptT (Conduit.runConduit entityPipeline) - -- >>= \case - -- Left err -> failed (SyncError err) - -- Right () -> pure () + \chunk -> do + liftIO (downloadedCallback 1) + throwExceptT $ handleChunk codebase downloadedCallback chunk case result of - Left err -> failed (TransportError err) - Right (Left syncErr) -> failed (SyncError syncErr) - Right (Right ()) -> pure () + Left err -> failed err + Right () -> pure () didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -107,46 +101,51 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) -entityValidator :: (MonadError SyncV2.PullError m) => ConduitT (Hash32, TempEntity) (Hash32, TempEntity) m () -entityValidator = Conduit.iterM $ \(hash, entity) -> do - Debug.debugLogM Debug.Temp $ "Validating entity" - - -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. - case EV.validateTempEntity hash entity of - Nothing -> pure () - Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> - let expectedMismatches = case et of - Share.TermComponentType -> expectedComponentHashMismatches - Share.DeclComponentType -> expectedComponentHashMismatches - Share.CausalType -> expectedCausalHashMismatches - _ -> mempty - in case Map.lookup supplied expectedMismatches of - Just expected - | expected == computed -> pure () - _ -> do - throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - Just err -> do - throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - -entityInserter :: (MonadIO m) => Codebase.Codebase IO v a -> ConduitT (Hash32, TempEntity) o m () -entityInserter codebase = Conduit.mapM_ \(hash, entity) -> do - Debug.debugLogM Debug.Temp $ "Inserting entity" - liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity - pure () - -unpackEntities :: (MonadError SyncV2.PullError m, MonadIO m) => (Int -> IO ()) -> ConduitT SyncV2.DownloadEntitiesChunk (Hash32, TempEntity) m () -unpackEntities downloadedCallback = Conduit.mapM $ \case - SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do - Debug.debugLogM Debug.Temp $ "Got entity chunk" - case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> do - throwError (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> do - liftIO (downloadedCallback 1) - pure (hash, entity) +handleChunk :: forall m v a. (MonadError SyncV2.PullError m, MonadIO m) => Codebase.Codebase IO v a -> (Int -> IO ()) -> SyncV2.DownloadEntitiesChunk -> m () +handleChunk codebase downloadedCallback = \case SyncV2.ErrorChunk {err} -> do Debug.debugLogM Debug.Temp $ "Got error chunk" throwError (SyncV2.PullError'DownloadEntities err) + SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do + liftIO (downloadedCallback 1) + -- Only want entities we don't already have + liftIO (Codebase.runTransaction codebase (Q.entityLocation hash)) >>= \case + Just {} -> pure () + Nothing -> do + tempEntity <- unpackEntity entityBytes + validateEntity hash tempEntity + insertEntity hash tempEntity + where + unpackEntity :: (CBORBytes TempEntity) -> m TempEntity + unpackEntity entityBytes = do + case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> do throwError (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity + validateEntity :: Hash32 -> TempEntity -> m () + validateEntity hash entity = do + Debug.debugLogM Debug.Temp $ "Validating entity" + -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. + case EV.validateTempEntity hash entity of + Nothing -> pure () + Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> + let expectedMismatches = case et of + Share.TermComponentType -> expectedComponentHashMismatches + Share.DeclComponentType -> expectedComponentHashMismatches + Share.CausalType -> expectedCausalHashMismatches + _ -> mempty + in case Map.lookup supplied expectedMismatches of + Just expected + | expected == computed -> pure () + _ -> do + throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + Just err -> do + throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + + insertEntity :: Hash32 -> TempEntity -> m () + insertEntity hash entity = do + Debug.debugLogM Debug.Temp $ "Inserting entity" + liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity + pure () ------------------------------------------------------------------------------------------------------------------------ -- Database operations @@ -198,13 +197,15 @@ SyncV2.Routes -- | Helper for running clientM that returns a stream of entities. -- You MUST consume the stream within the callback, it will be closed when the callback returns. -handleStream :: (MonadIO m) => Servant.ClientEnv -> (ConduitT () o m () -> IO r) -> Servant.ClientM (Servant.SourceT IO o) -> IO (Either Servant.ClientError r) -handleStream clientEnv consumeStream clientM = do - Servant.withClientM clientM clientEnv $ \case - Left err -> pure $ Left err - Right source -> do - conduit <- Servant.fromSourceIO source - Right <$> consumeStream conduit +handleStream :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> (o -> m ()) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError ()) +handleStream clientEnv callback clientM = do + toIO <- UnliftIO.askRunInIO + liftIO $ Servant.withClientM clientM clientEnv $ \case + Left err -> pure (Left $ handleClientError clientEnv err) + Right sourceT -> do + sourceTM <- liftIO $ Servant.fromSourceIO @o @(SourceT.SourceT m o) sourceT + (Right <$> toIO (SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) callback sourceTM)) + & UnliftIO.handle @_ @CodeserverTransportError (pure . Left) handleClientError :: Servant.ClientEnv -> Servant.ClientError -> CodeserverTransportError handleClientError clientEnv err = @@ -225,15 +226,13 @@ handleClientError clientEnv err = Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) httpDownloadEntities :: - (MonadIO m) => + (MonadUnliftIO m) => Auth.AuthenticatedHttpClient -> Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - ( ConduitT () SyncV2.DownloadEntitiesChunk m () -> - IO r - ) -> - (IO (Either CodeserverTransportError r)) -httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req f = do + (SyncV2.DownloadEntitiesChunk -> m ()) -> + m (Either CodeserverTransportError ()) +httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do let clientEnv = (Servant.mkClientEnv httpClient unisonShareUrl) { Servant.makeClientRequest = \url request -> @@ -244,4 +243,4 @@ httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl re { Http.Client.responseTimeout = Http.Client.responseTimeoutNone } } - mapLeft (handleClientError clientEnv) <$> handleStream clientEnv f (downloadEntitiesStreamClientM req) + handleStream clientEnv callback (downloadEntitiesStreamClientM req) diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 05b4fb99ab..8c46cb3983 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -13,11 +13,12 @@ module Unison.SyncV2.Types ) where -import Data.Aeson (FromJSON (..), ToJSON (..), object, withObject, (.:), (.=)) import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise (Serialise (..)) import Codec.Serialise qualified as CBOR import Codec.Serialise.Decoding qualified as CBOR +import Control.Exception (Exception) +import Data.Aeson (FromJSON (..), ToJSON (..), object, withObject, (.:), (.=)) import Data.ByteString.Lazy qualified as BL import Data.Set (Set) import Data.Text (Text) @@ -183,6 +184,7 @@ data PullError = PullError'DownloadEntities DownloadEntitiesError | PullError'Sync SyncError deriving stock (Show) + deriving anyclass (Exception) data SyncError = SyncErrorExpectedResultNotInMain CausalHash From 01c41afe2c70ec2daa316662ae000c56659a7f91 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 15 Aug 2024 09:27:54 -0700 Subject: [PATCH 13/52] More experiments --- unison-cli/package.yaml | 1 + unison-cli/src/Unison/Share/SyncV2.hs | 113 ++++++++++++++++++++------ unison-cli/unison-cli.cabal | 3 + 3 files changed, 91 insertions(+), 26 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 0bba34555e..a4055184a5 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -74,6 +74,7 @@ dependencies: - servant-conduit - shellmet - stm + - stm-chans - template-haskell - temporary - text diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index b02204ef87..6faf2b6bf0 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeOperators #-} +{-# OPTIONS_GHC -Wno-unused-top-binds #-} module Unison.Share.SyncV2 ( downloadEntities, @@ -9,6 +10,7 @@ where import Control.Lens import Control.Monad.Except import Control.Monad.Reader (ask) +import Data.Graph qualified as Graph import Data.Map qualified as Map import Data.Proxy import Data.Set qualified as Set @@ -46,6 +48,7 @@ import Unison.SyncV2.Types (CBORBytes) import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) +import Unison.Util.Timing qualified as Timing import UnliftIO qualified ------------------------------------------------------------------------------------------------------------------------ @@ -76,17 +79,21 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do Debug.debugLogM Debug.Temp $ "Kicking off sync request" - result <- liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do - httpDownloadEntities - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - \chunk -> do - liftIO (downloadedCallback 1) - throwExceptT $ handleChunk codebase downloadedCallback chunk - case result of - Left err -> failed err - Right () -> pure () + results <- Timing.time "Entity Download" $ do + liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do + httpDownloadEntitiesAsList + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} + allResults <- either failed pure results + liftIO $ downloadedCallback (length allResults) + allEntities <- Timing.time "Unpacking chunks" $ do (unpackChunks codebase (failed . SyncError) allResults) + sortedEntities <- Timing.time "Sorting Entities" $ UnliftIO.evaluate $ topSortEntities allEntities + Timing.time "Inserting entities" $ for_ sortedEntities \(hash, entity) -> do + r <- liftIO $ Codebase.runTransaction codebase $ Right <$> insertEntity hash entity + void $ either (failed . SyncError) pure r + pure () + didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -100,28 +107,52 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback didCausalSuccessfullyImport codebase hash = do let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) + unpackChunks :: Codebase.Codebase IO v a -> (forall x. SyncV2.PullError -> Cli x) -> [SyncV2.DownloadEntitiesChunk] -> Cli [(Hash32, TempEntity)] + unpackChunks codebase _handleErr xs = do + xs + & ( UnliftIO.pooledMapConcurrently \case + SyncV2.ErrorChunk {err} -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" + error $ show err + SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do + -- Only want entities we don't already have in main + (Codebase.runTransaction codebase (Q.entityLocation hash)) >>= \case + Just Q.EntityInMainStorage -> pure Nothing + _ -> do + tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity + pure $ Just (hash, tempEntity) + ) + & liftIO + <&> catMaybes + +topSortEntities :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] +topSortEntities entities = do + let adjList = entities <&> \(hash32, entity) -> ((hash32, entity), hash32, Set.toList $ Share.entityDependencies (tempEntityToEntity entity)) + (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList + in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) -handleChunk :: forall m v a. (MonadError SyncV2.PullError m, MonadIO m) => Codebase.Codebase IO v a -> (Int -> IO ()) -> SyncV2.DownloadEntitiesChunk -> m () -handleChunk codebase downloadedCallback = \case +handleChunk :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () +handleChunk rollback = \case SyncV2.ErrorChunk {err} -> do Debug.debugLogM Debug.Temp $ "Got error chunk" - throwError (SyncV2.PullError'DownloadEntities err) + rollback $ SyncV2.PullError'DownloadEntities err SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do - liftIO (downloadedCallback 1) -- Only want entities we don't already have - liftIO (Codebase.runTransaction codebase (Q.entityLocation hash)) >>= \case + Q.entityLocation hash >>= \case Just {} -> pure () Nothing -> do tempEntity <- unpackEntity entityBytes validateEntity hash tempEntity insertEntity hash tempEntity where - unpackEntity :: (CBORBytes TempEntity) -> m TempEntity + unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction TempEntity unpackEntity entityBytes = do case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> do throwError (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Left err -> do rollback (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) Right entity -> pure entity - validateEntity :: Hash32 -> TempEntity -> m () + validateEntity :: Hash32 -> TempEntity -> Sqlite.Transaction () validateEntity hash entity = do Debug.debugLogM Debug.Temp $ "Validating entity" -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. @@ -137,15 +168,15 @@ handleChunk codebase downloadedCallback = \case Just expected | expected == computed -> pure () _ -> do - throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err Just err -> do - throwError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - insertEntity :: Hash32 -> TempEntity -> m () - insertEntity hash entity = do - Debug.debugLogM Debug.Temp $ "Inserting entity" - liftIO . Codebase.runTransaction codebase $ upsertEntitySomewhere hash entity - pure () +insertEntity :: Hash32 -> TempEntity -> Sqlite.Transaction () +insertEntity hash entity = do + Debug.debugLogM Debug.Temp $ "Inserting entity" + upsertEntitySomewhere hash entity + pure () ------------------------------------------------------------------------------------------------------------------------ -- Database operations @@ -244,3 +275,33 @@ httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl re } } handleStream clientEnv callback (downloadEntitiesStreamClientM req) + +httpDownloadEntitiesAsList :: + (MonadUnliftIO m) => + Auth.AuthenticatedHttpClient -> + Servant.BaseUrl -> + SyncV2.DownloadEntitiesRequest -> + m (Either CodeserverTransportError [SyncV2.DownloadEntitiesChunk]) +httpDownloadEntitiesAsList (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = do + let clientEnv = + (Servant.mkClientEnv httpClient unisonShareUrl) + { Servant.makeClientRequest = \url request -> + -- Disable client-side timeouts + (Servant.defaultMakeClientRequest url request) + <&> \r -> + r + { Http.Client.responseTimeout = Http.Client.responseTimeoutNone + } + } + handleStreamAsList clientEnv (downloadEntitiesStreamClientM req) + +-- | Helper for running clientM that returns a stream of entities. +-- You MUST consume the stream within the callback, it will be closed when the callback returns. +handleStreamAsList :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError [o]) +handleStreamAsList clientEnv clientM = do + toIO <- UnliftIO.askRunInIO + liftIO $ Servant.withClientM clientM clientEnv $ \case + Left err -> pure (Left $ handleClientError clientEnv err) + Right sourceT -> do + sourceTM <- liftIO $ Servant.fromSourceIO @o @(SourceT.SourceT m o) sourceT + mapLeft (StreamingError . Text.pack) <$> toIO (runExceptT $ SourceT.runSourceT sourceTM) diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index a4be1a6afe..2955a288da 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -257,6 +257,7 @@ library , servant-conduit , shellmet , stm + , stm-chans , template-haskell , temporary , text @@ -405,6 +406,7 @@ executable transcripts , shellmet , silently , stm + , stm-chans , template-haskell , temporary , text @@ -558,6 +560,7 @@ test-suite cli-tests , servant-conduit , shellmet , stm + , stm-chans , template-haskell , temporary , text From fb391fb42889bee381e63d47d212f713c8a3e2d0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sat, 17 Aug 2024 23:00:30 -0700 Subject: [PATCH 14/52] Write all entities in a single transaction, big boost! --- unison-cli/src/Unison/Share/SyncV2.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 6faf2b6bf0..791f09c07f 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -89,9 +89,8 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback liftIO $ downloadedCallback (length allResults) allEntities <- Timing.time "Unpacking chunks" $ do (unpackChunks codebase (failed . SyncError) allResults) sortedEntities <- Timing.time "Sorting Entities" $ UnliftIO.evaluate $ topSortEntities allEntities - Timing.time "Inserting entities" $ for_ sortedEntities \(hash, entity) -> do - r <- liftIO $ Codebase.runTransaction codebase $ Right <$> insertEntity hash entity - void $ either (failed . SyncError) pure r + liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do + insertEntity hash entity pure () didCausalSuccessfullyImport codebase hash >>= \case From 62ab29b9f3f2c812df76d72f8ba0ae8f0a2f738f Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sat, 17 Aug 2024 23:04:02 -0700 Subject: [PATCH 15/52] Unparallelize unpacking, but run in a single transaction. --- unison-cli/src/Unison/Share/SyncV2.hs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 791f09c07f..25e135f056 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -87,7 +87,7 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} allResults <- either failed pure results liftIO $ downloadedCallback (length allResults) - allEntities <- Timing.time "Unpacking chunks" $ do (unpackChunks codebase (failed . SyncError) allResults) + allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) sortedEntities <- Timing.time "Sorting Entities" $ UnliftIO.evaluate $ topSortEntities allEntities liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do insertEntity hash entity @@ -106,16 +106,16 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback didCausalSuccessfullyImport codebase hash = do let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) - unpackChunks :: Codebase.Codebase IO v a -> (forall x. SyncV2.PullError -> Cli x) -> [SyncV2.DownloadEntitiesChunk] -> Cli [(Hash32, TempEntity)] - unpackChunks codebase _handleErr xs = do + unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] + unpackChunks xs = do xs - & ( UnliftIO.pooledMapConcurrently \case + & ( traverse \case SyncV2.ErrorChunk {err} -> do Debug.debugLogM Debug.Temp $ "Got error chunk" error $ show err SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do -- Only want entities we don't already have in main - (Codebase.runTransaction codebase (Q.entityLocation hash)) >>= \case + ((Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure Nothing _ -> do tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of @@ -123,7 +123,6 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback Right entity -> pure entity pure $ Just (hash, tempEntity) ) - & liftIO <&> catMaybes topSortEntities :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] From 831f5daa965b3ffb5ab13576fdeb00090224dec5 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sat, 17 Aug 2024 23:12:27 -0700 Subject: [PATCH 16/52] Remove download callback --- unison-cli/src/Unison/Cli/DownloadUtils.hs | 18 ++++++++++-------- unison-cli/src/Unison/Share/SyncV2.hs | 4 ++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index 1b8932ad8b..e0ad96970c 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -51,14 +51,16 @@ downloadProjectBranchFromShare useSquashed branch = (Share.NoSquashedHead, _) -> pure branch.branchHead exists <- Cli.runTransaction (Queries.causalExistsByHash32 (Share.hashJWTHash causalHashJwt)) when (not exists) do - (result, numDownloaded) <- - Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do - let branchRef = SyncV2.BranchRef (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) - -- TODO: Fill this in. - let knownHashes = Set.empty - result <- SyncV2.downloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback - numDownloaded <- liftIO getNumDownloaded - pure (result, numDownloaded) + (result, numDownloaded) <- do + -- Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do + let branchRef = SyncV2.BranchRef (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) + -- TODO: Fill this in. + let knownHashes = Set.empty + let downloadedCallback = \_ -> pure () + result <- SyncV2.downloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback + -- numDownloaded <- liftIO getNumDownloaded + let numDownloaded = 0 + pure (result, numDownloaded) result & onLeft \err0 -> do done case err0 of Share.SyncError err -> diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 25e135f056..b1ff2de2e6 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -65,7 +65,7 @@ downloadEntities :: -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback = do +downloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask Cli.label \done -> do @@ -86,7 +86,7 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes downloadedCallback unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} allResults <- either failed pure results - liftIO $ downloadedCallback (length allResults) + -- liftIO $ downloadedCallback (length allResults) allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) sortedEntities <- Timing.time "Sorting Entities" $ UnliftIO.evaluate $ topSortEntities allEntities liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do From 941dc04933d3d50cd1d18c4805f20261c20f11e0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sat, 17 Aug 2024 22:25:34 -0700 Subject: [PATCH 17/52] Add a buncha sqlite pragmas --- lib/unison-sqlite/src/Unison/Sqlite/Connection.hs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs b/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs index 48167980db..bf0a6d88a4 100644 --- a/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs +++ b/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs @@ -106,6 +106,11 @@ openConnection name file = do let conn = Connection {conn = conn0, file, name} execute conn [Sql.sql| PRAGMA foreign_keys = ON |] execute conn [Sql.sql| PRAGMA busy_timeout = 60000 |] + execute conn [Sql.sql| PRAGMA synchronous = normal |] + execute conn [Sql.sql| PRAGMA journal_size_limit = 6144000 |] + execute conn [Sql.sql| PRAGMA cache_size = -64000 |] + execute conn [Sql.sql| PRAGMA temp_store = 2 |] + pure conn -- Close a connection opened with 'openConnection'. From 0c692b3777581ed1fdb07cd03d93e888bbab4684 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sat, 17 Aug 2024 22:51:02 -0700 Subject: [PATCH 18/52] Add prepared statement cache. --- lib/unison-sqlite/package.yaml | 1 + .../src/Unison/Sqlite/Connection.hs | 57 ++++++++++++++----- .../src/Unison/Sqlite/Connection/Internal.hs | 8 ++- lib/unison-sqlite/unison-sqlite.cabal | 2 + 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/lib/unison-sqlite/package.yaml b/lib/unison-sqlite/package.yaml index 7d58258134..1e7896b116 100644 --- a/lib/unison-sqlite/package.yaml +++ b/lib/unison-sqlite/package.yaml @@ -28,6 +28,7 @@ tests: dependencies: - base + - containers - direct-sqlite - exceptions - generic-lens diff --git a/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs b/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs index bf0a6d88a4..726cac860e 100644 --- a/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs +++ b/lib/unison-sqlite/src/Unison/Sqlite/Connection.hs @@ -58,6 +58,7 @@ module Unison.Sqlite.Connection ) where +import Data.Map qualified as Map import Database.SQLite.Simple qualified as Sqlite import Database.SQLite.Simple.FromField qualified as Sqlite import Database.SQLite3 qualified as Direct.Sqlite @@ -71,7 +72,10 @@ import Unison.Sqlite.Connection.Internal (Connection (..)) import Unison.Sqlite.Exception import Unison.Sqlite.Sql (Sql (..)) import Unison.Sqlite.Sql qualified as Sql +import UnliftIO (atomically) import UnliftIO.Exception +import UnliftIO.STM (readTVar) +import UnliftIO.STM qualified as STM -- | Perform an action with a connection to a SQLite database. -- @@ -103,7 +107,8 @@ openConnection name file = do Just "" -> file _ -> "file:" <> file <> "?mode=ro" conn0 <- Sqlite.open sqliteURI `catch` rethrowAsSqliteConnectException name file - let conn = Connection {conn = conn0, file, name} + statementCache <- STM.newTVarIO Map.empty + let conn = Connection {conn = conn0, file, name, statementCache} execute conn [Sql.sql| PRAGMA foreign_keys = ON |] execute conn [Sql.sql| PRAGMA busy_timeout = 60000 |] execute conn [Sql.sql| PRAGMA synchronous = normal |] @@ -115,12 +120,34 @@ openConnection name file = do -- Close a connection opened with 'openConnection'. closeConnection :: Connection -> IO () -closeConnection (Connection _ _ conn) = +closeConnection conn@(Connection {conn = conn0}) = do -- FIXME if this throws an exception, it won't be under `SomeSqliteException` -- Possible fixes: -- 1. Add close exception to the hierarchy, e.g. `SqliteCloseException` -- 2. Always ignore exceptions thrown by `close` (Mitchell prefers this one) - Sqlite.close conn + closeAllStatements conn + Sqlite.close conn0 + +withStatement :: Connection -> Text -> (Sqlite.Statement -> IO a) -> IO a +withStatement conn sql action = do + bracket (prepareStatement conn sql) Sqlite.reset action + where + prepareStatement :: Connection -> Text -> IO Sqlite.Statement + prepareStatement Connection {conn, statementCache} sql = do + cached <- atomically $ do + cache <- STM.readTVar statementCache + pure $ Map.lookup sql cache + case cached of + Just stmt -> pure stmt + Nothing -> do + stmt <- Sqlite.openStatement conn (coerce @Text @Sqlite.Query sql) + atomically $ STM.modifyTVar statementCache (Map.insert sql stmt) + pure stmt + +closeAllStatements :: Connection -> IO () +closeAllStatements Connection {statementCache} = do + cache <- atomically $ readTVar statementCache + for_ cache Sqlite.closeStatement -- An internal type, for making prettier debug logs @@ -157,7 +184,7 @@ logQuery (Sql sql params) result = -- Without results execute :: (HasCallStack) => Connection -> Sql -> IO () -execute conn@(Connection _ _ conn0) sql@(Sql s params) = do +execute conn sql@(Sql s params) = do logQuery sql Nothing doExecute `catch` \(exception :: Sqlite.SQLError) -> throwSqliteQueryException @@ -168,16 +195,16 @@ execute conn@(Connection _ _ conn0) sql@(Sql s params) = do } where doExecute :: IO () - doExecute = - Sqlite.withStatement conn0 (coerce s) \(Sqlite.Statement statement) -> do - bindParameters statement params - void (Direct.Sqlite.step statement) + doExecute = do + withStatement conn s \statement -> do + bindParameters (coerce statement) params + void (Direct.Sqlite.step $ coerce statement) -- | Execute one or more semicolon-delimited statements. -- -- This function does not support parameters, and is mostly useful for executing DDL and migrations. executeStatements :: (HasCallStack) => Connection -> Text -> IO () -executeStatements conn@(Connection _ _ (Sqlite.Connection database _tempNameCounter)) sql = do +executeStatements conn@(Connection {conn = Sqlite.Connection database _tempNameCounter}) sql = do logQuery (Sql sql []) Nothing Direct.Sqlite.exec database sql `catch` \(exception :: Sqlite.SQLError) -> throwSqliteQueryException @@ -190,7 +217,7 @@ executeStatements conn@(Connection _ _ (Sqlite.Connection database _tempNameCoun -- With results, without checks queryStreamRow :: (HasCallStack, Sqlite.FromRow a) => Connection -> Sql -> (IO (Maybe a) -> IO r) -> IO r -queryStreamRow conn@(Connection _ _ conn0) sql@(Sql s params) callback = +queryStreamRow conn sql@(Sql s params) callback = run `catch` \(exception :: Sqlite.SQLError) -> throwSqliteQueryException SqliteQueryExceptionInfo @@ -199,8 +226,8 @@ queryStreamRow conn@(Connection _ _ conn0) sql@(Sql s params) callback = sql } where - run = - bracket (Sqlite.openStatement conn0 (coerce s)) Sqlite.closeStatement \statement -> do + run = do + withStatement conn s \statement -> do Sqlite.bind statement params callback (Sqlite.nextRow statement) @@ -218,7 +245,7 @@ queryStreamCol = queryStreamRow queryListRow :: forall a. (Sqlite.FromRow a, HasCallStack) => Connection -> Sql -> IO [a] -queryListRow conn@(Connection _ _ conn0) sql@(Sql s params) = do +queryListRow conn sql@(Sql s params) = do result <- doQuery `catch` \(exception :: Sqlite.SQLError) -> @@ -233,7 +260,7 @@ queryListRow conn@(Connection _ _ conn0) sql@(Sql s params) = do where doQuery :: IO [a] doQuery = - Sqlite.withStatement conn0 (coerce s) \statement -> do + withStatement conn (coerce s) \statement -> do bindParameters (coerce statement) params let loop :: [a] -> IO [a] loop rows = @@ -352,7 +379,7 @@ queryOneColCheck conn s check = -- Rows modified rowsModified :: Connection -> IO Int -rowsModified (Connection _ _ conn) = +rowsModified (Connection {conn}) = Sqlite.changes conn -- Vacuum diff --git a/lib/unison-sqlite/src/Unison/Sqlite/Connection/Internal.hs b/lib/unison-sqlite/src/Unison/Sqlite/Connection/Internal.hs index 5f80151f94..579c37cfb9 100644 --- a/lib/unison-sqlite/src/Unison/Sqlite/Connection/Internal.hs +++ b/lib/unison-sqlite/src/Unison/Sqlite/Connection/Internal.hs @@ -3,15 +3,19 @@ module Unison.Sqlite.Connection.Internal ) where +import Data.Map (Map) +import Data.Text (Text) import Database.SQLite.Simple qualified as Sqlite +import UnliftIO.STM (TVar) -- | A /non-thread safe/ connection to a SQLite database. data Connection = Connection { name :: String, file :: FilePath, - conn :: Sqlite.Connection + conn :: Sqlite.Connection, + statementCache :: TVar (Map Text Sqlite.Statement) } instance Show Connection where - show (Connection name file _conn) = + show (Connection name file _conn _statementCache) = "Connection { name = " ++ show name ++ ", file = " ++ show file ++ " }" diff --git a/lib/unison-sqlite/unison-sqlite.cabal b/lib/unison-sqlite/unison-sqlite.cabal index 5a5561c5ef..a228883ca9 100644 --- a/lib/unison-sqlite/unison-sqlite.cabal +++ b/lib/unison-sqlite/unison-sqlite.cabal @@ -64,6 +64,7 @@ library ghc-options: -Wall build-depends: base + , containers , direct-sqlite , exceptions , generic-lens @@ -126,6 +127,7 @@ test-suite tests build-depends: base , code-page + , containers , direct-sqlite , easytest , exceptions From 890905d36774c6683a12d2f17e042a7d458d0328 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 15 Aug 2024 09:27:54 -0700 Subject: [PATCH 19/52] Allow nulls in temp_entity table --- codebase2/codebase-sqlite/sql/001-temp-entity-tables.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codebase2/codebase-sqlite/sql/001-temp-entity-tables.sql b/codebase2/codebase-sqlite/sql/001-temp-entity-tables.sql index 0ae13812b1..6651d4a6fe 100644 --- a/codebase2/codebase-sqlite/sql/001-temp-entity-tables.sql +++ b/codebase2/codebase-sqlite/sql/001-temp-entity-tables.sql @@ -56,7 +56,8 @@ create table if not exists temp_entity ( create table if not exists temp_entity_missing_dependency ( dependent text not null references temp_entity(hash), dependency text not null, - dependencyJwt text not null, + -- TODO: this is just for testing + dependencyJwt text null, unique (dependent, dependency) ); create index if not exists temp_entity_missing_dependency_ix_dependent on temp_entity_missing_dependency (dependent); From b31473c408cf812d5c8ed0cc22291a1998f115fa Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 15 Aug 2024 09:27:54 -0700 Subject: [PATCH 20/52] Remove temp_entity checking --- .../U/Codebase/Sqlite/Queries.hs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs index d47a6e840d..615b790616 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs @@ -688,7 +688,7 @@ saveObject :: ObjectType -> ByteString -> Transaction ObjectId -saveObject hh h t blob = do +saveObject _hh h t blob = do execute [sql| INSERT INTO object (primary_hash_id, type_id, bytes) @@ -699,9 +699,9 @@ saveObject hh h t blob = do saveHashObject h oId 2 -- todo: remove this from here, and add it to other relevant places once there are v1 and v2 hashes rowsModified >>= \case 0 -> pure () - _ -> do - hash <- expectHash32 h - tryMoveTempEntityDependents hh hash + _ -> pure () + -- hash <- expectHash32 h + -- tryMoveTempEntityDependents hh hash pure oId expectObject :: SqliteExceptionReason e => ObjectId -> (ByteString -> Either e a) -> Transaction a @@ -959,7 +959,7 @@ saveCausal :: BranchHashId -> [CausalHashId] -> Transaction () -saveCausal hh self value parents = do +saveCausal _hh self value parents = do execute [sql| INSERT INTO causal (self_hash_id, value_hash_id) @@ -975,15 +975,15 @@ saveCausal hh self value parents = do INSERT INTO causal_parent (causal_id, parent_id) VALUES (:self, :parent) |] - flushCausalDependents hh self + -- flushCausalDependents hh self -flushCausalDependents :: +_flushCausalDependents :: HashHandle -> CausalHashId -> Transaction () -flushCausalDependents hh chId = do +_flushCausalDependents hh chId = do hash <- expectHash32 (unCausalHashId chId) - tryMoveTempEntityDependents hh hash + _tryMoveTempEntityDependents hh hash -- | `tryMoveTempEntityDependents #foo` does this: -- 0. Precondition: We just inserted object #foo. @@ -991,11 +991,11 @@ flushCausalDependents hh chId = do -- 2. Delete #foo as dependency from temp_entity_missing_dependency. e.g. (#bar, #foo), (#baz, #foo) -- 3. For each like #bar and #baz with no more rows in temp_entity_missing_dependency, -- insert_entity them. -tryMoveTempEntityDependents :: +_tryMoveTempEntityDependents :: HashHandle -> Hash32 -> Transaction () -tryMoveTempEntityDependents hh dependency = do +_tryMoveTempEntityDependents hh dependency = do dependents <- queryListCol [sql| From 1b637b2eb4d95959ea344ae9098de2eeeeafdacb Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sun, 18 Aug 2024 21:03:05 -0700 Subject: [PATCH 21/52] Smarter text and hash saving queries --- .gitignore | 1 + .../U/Codebase/Sqlite/Queries.hs | 39 +++++++++---------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 94b29b69e8..0379b0b953 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ dist-newstyle *.hie *.prof *.prof.html +*.profiterole.* /.direnv/ /.envrc diff --git a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs index 615b790616..389907dd3d 100644 --- a/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs +++ b/codebase2/codebase-sqlite/U/Codebase/Sqlite/Queries.hs @@ -534,23 +534,18 @@ countWatches = queryOneCol [sql| SELECT COUNT(*) FROM watch |] saveHash :: Hash32 -> Transaction HashId saveHash hash = do - execute - [sql| - INSERT INTO hash (base32) VALUES (:hash) - ON CONFLICT DO NOTHING - |] - expectHashId hash + loadHashId hash >>= \case + Just h -> pure h + Nothing -> do + queryOneCol + [sql| + INSERT INTO hash (base32) VALUES (:hash) + RETURNING id + |] saveHashes :: Traversable f => f Hash32 -> Transaction (f HashId) saveHashes hashes = do - for_ hashes \hash -> - execute - [sql| - INSERT INTO hash (base32) - VALUES (:hash) - ON CONFLICT DO NOTHING - |] - traverse expectHashId hashes + for hashes saveHash saveHashHash :: Hash -> Transaction HashId saveHashHash = saveHash . Hash32.fromHash @@ -625,13 +620,15 @@ expectBranchHashForCausalHash ch = do saveText :: Text -> Transaction TextId saveText t = do - execute - [sql| - INSERT INTO text (text) - VALUES (:t) - ON CONFLICT DO NOTHING - |] - expectTextId t + loadTextId t >>= \case + Just h -> pure h + Nothing -> do + queryOneCol + [sql| + INSERT INTO text (text) + VALUES (:t) + RETURNING id + |] saveTexts :: Traversable f => f Text -> Transaction (f TextId) saveTexts = From 6cf6f4cefda3c34e20d19b3af5db19b678d37d66 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Sun, 18 Aug 2024 22:49:59 -0700 Subject: [PATCH 22/52] Skip dependency checking because we're in sorted order. --- unison-cli/src/Unison/Share/SyncV2.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index b1ff2de2e6..0c23cf7a3d 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -88,9 +88,9 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallbac allResults <- either failed pure results -- liftIO $ downloadedCallback (length allResults) allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) - sortedEntities <- Timing.time "Sorting Entities" $ UnliftIO.evaluate $ topSortEntities allEntities + let sortedEntities = topSortEntities allEntities liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do - insertEntity hash entity + Q.saveTempEntityInMain v2HashHandle hash entity pure () didCausalSuccessfullyImport codebase hash >>= \case From c32d0f4c1a580456d3a3a74da7188b0dd12d388d Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 3 Dec 2024 15:55:50 -0800 Subject: [PATCH 23/52] Fix up merge --- lib/unison-sqlite/package.yaml | 1 + lib/unison-sqlite/unison-sqlite.cabal | 1 + unison-cli/package.yaml | 1 + unison-cli/src/Unison/CommandLine/OutputMessages.hs | 2 +- unison-cli/unison-cli.cabal | 1 + unison-share-api/package.yaml | 1 + unison-share-api/unison-share-api.cabal | 2 ++ 7 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/unison-sqlite/package.yaml b/lib/unison-sqlite/package.yaml index 84d0201eab..b90bd2aa57 100644 --- a/lib/unison-sqlite/package.yaml +++ b/lib/unison-sqlite/package.yaml @@ -9,6 +9,7 @@ library: dependencies: - base + - containers - direct-sqlite - megaparsec - pretty-simple diff --git a/lib/unison-sqlite/unison-sqlite.cabal b/lib/unison-sqlite/unison-sqlite.cabal index d4569c8e6f..329a05c5d8 100644 --- a/lib/unison-sqlite/unison-sqlite.cabal +++ b/lib/unison-sqlite/unison-sqlite.cabal @@ -64,6 +64,7 @@ library ghc-options: -Wall build-depends: base + , containers , direct-sqlite , megaparsec , pretty-simple diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 098c48f302..585419ea3c 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -67,6 +67,7 @@ library: - semialign - servant - servant-client + - servant-conduit - stm - temporary - text-ansi diff --git a/unison-cli/src/Unison/CommandLine/OutputMessages.hs b/unison-cli/src/Unison/CommandLine/OutputMessages.hs index ba367d8afd..cba6f2a1da 100644 --- a/unison-cli/src/Unison/CommandLine/OutputMessages.hs +++ b/unison-cli/src/Unison/CommandLine/OutputMessages.hs @@ -2375,7 +2375,7 @@ prettyTransportError = \case in if Text.null body then Nothing else Just (P.newline <> "Response body: " <> P.text body), responseRequestId resp <&> \responseId -> P.newline <> "Request ID: " <> P.blue (P.text responseId) ] - StreamingError err -> + Share.StreamingError err -> P.lines [ ( "We encountered an error while streaming data from the code server: " <> P.text err), P.red (P.text err) diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 8f82804198..76f9e1e11a 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -242,6 +242,7 @@ library , semialign , servant , servant-client + , servant-conduit , stm , temporary , text diff --git a/unison-share-api/package.yaml b/unison-share-api/package.yaml index acbb020908..92ed3369d4 100644 --- a/unison-share-api/package.yaml +++ b/unison-share-api/package.yaml @@ -66,6 +66,7 @@ dependencies: - unison-pretty-printer - unison-runtime - unison-util-relation + - unison-util-base32hex - unison-share-projects-api - unison-sqlite - unison-syntax diff --git a/unison-share-api/unison-share-api.cabal b/unison-share-api/unison-share-api.cabal index 5226322629..96c9111ec9 100644 --- a/unison-share-api/unison-share-api.cabal +++ b/unison-share-api/unison-share-api.cabal @@ -132,6 +132,7 @@ library , unison-share-projects-api , unison-sqlite , unison-syntax + , unison-util-base32hex , unison-util-relation , unliftio , uri-encode @@ -233,6 +234,7 @@ test-suite unison-share-api-tests , unison-share-projects-api , unison-sqlite , unison-syntax + , unison-util-base32hex , unison-util-relation , unliftio , uri-encode From e5c175b875cccdd573cdcbbf5c08eaad4828d333 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 13 Dec 2024 12:24:16 -0800 Subject: [PATCH 24/52] Update cabal --- unison-share-api/unison-share-api.cabal | 1 - 1 file changed, 1 deletion(-) diff --git a/unison-share-api/unison-share-api.cabal b/unison-share-api/unison-share-api.cabal index b1435895bb..5013cc64d2 100644 --- a/unison-share-api/unison-share-api.cabal +++ b/unison-share-api/unison-share-api.cabal @@ -193,7 +193,6 @@ test-suite unison-share-api-tests , cborg , code-page , containers - , cryptonite , directory , easytest , errors From a3750a6e60faa94dd60269d4b60dd491f06dcbe3 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 13 Dec 2024 17:37:56 -0800 Subject: [PATCH 25/52] Handle initial chunks --- unison-cli/src/Unison/Share/SyncV2.hs | 157 ++++++++++++++++-- unison-cli/unison-cli.cabal | 1 + unison-share-api/src/Unison/Server/Orphans.hs | 3 + unison-share-api/src/Unison/SyncV2/Types.hs | 140 ++++++++++++---- .../src/Unison/Util/Servant/CBOR.hs | 46 ++++- 5 files changed, 295 insertions(+), 52 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 0c23cf7a3d..d90ba9010f 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -3,13 +3,14 @@ {-# OPTIONS_GHC -Wno-unused-top-binds #-} module Unison.Share.SyncV2 - ( downloadEntities, + ( downloadEntitiesAllAtOnce, + streamDownloadEntities, ) where import Control.Lens import Control.Monad.Except -import Control.Monad.Reader (ask) +import Control.Monad.Reader (ReaderT (..), ask) import Data.Graph qualified as Graph import Data.Map qualified as Map import Data.Proxy @@ -54,7 +55,7 @@ import UnliftIO qualified ------------------------------------------------------------------------------------------------------------------------ -- Download entities -downloadEntities :: +downloadEntitiesAllAtOnce :: -- | The Unison Share URL. Servant.BaseUrl -> -- | The branch to download from. @@ -65,9 +66,8 @@ downloadEntities :: -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -downloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do +downloadEntitiesAllAtOnce unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask - Cli.label \done -> do let failed :: SyncError SyncV2.PullError -> Cli void failed = done . Left @@ -87,7 +87,85 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallbac SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} allResults <- either failed pure results -- liftIO $ downloadedCallback (length allResults) - allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) + ((version, entitySorting, numEntities), chunks) <- peelInitialChunk allResults + allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks chunks) + let sortedEntities = topSortEntities allEntities + liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do + Q.saveTempEntityInMain v2HashHandle hash entity + pure () + + didCausalSuccessfullyImport codebase hash >>= \case + False -> do + failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) + True -> pure () + -- we'll try vacuuming again next pull. + _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) + pure (Right ()) + where + -- Verify that the expected hash made it into main storage. + didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> Cli Bool + didCausalSuccessfullyImport codebase hash = do + let expectedHash = hash32ToCausalHash hash + isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) + peelInitialChunk :: [SyncV2.DownloadEntitiesChunk] -> Cli (SyncV2.Version, SyncV2.EntitySorting, Word64) + peelInitialChunk [SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities})] = pure (version, entitySorting, numEntities) + peelInitialChunk _ = error "Expected initial chunk" + unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] + unpackChunks xs = do + xs + & ( traverse \case + SyncV2.InitialC {} -> do + Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" + error "Expected entity chunk" + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" + error $ show err + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + -- Only want entities we don't already have in main + ((Q.entityLocation hash)) >>= \case + Just Q.EntityInMainStorage -> pure Nothing + _ -> do + tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity + pure $ Just (hash, tempEntity) + ) + <&> catMaybes + +streamDownloadEntities :: + -- | The Unison Share URL. + Servant.BaseUrl -> + -- | The branch to download from. + SyncV2.BranchRef -> + -- | The hash to download. + Share.HashJWT -> + Set Hash32 -> + -- | Callback that's given a number of entities we just downloaded. + (Int -> IO ()) -> + Cli (Either (SyncError SyncV2.PullError) ()) +streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do + Cli.Env {authHTTPClient, codebase} <- ask + Cli.label \done -> do + let failed :: SyncError SyncV2.PullError -> Cli void + failed = done . Left + let hash = Share.hashJWTHash hashJwt + Cli.runTransaction (Q.entityLocation hash) >>= \case + Just Q.EntityInMainStorage -> pure () + -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" + _ -> do + Debug.debugLogM Debug.Temp $ "Kicking off sync request" + results <- Timing.time "Entity Download" $ do + liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do + httpStreamEntities + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} + \chunk -> do + _ + allResults <- either failed pure results + -- liftIO $ downloadedCallback (length allResults) + ((version, entitySorting, numEntities), chunks) <- peelInitialChunk allResults + allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks chunks) let sortedEntities = topSortEntities allEntities liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do Q.saveTempEntityInMain v2HashHandle hash entity @@ -106,14 +184,19 @@ downloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallbac didCausalSuccessfullyImport codebase hash = do let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) + peelInitialChunk :: [SyncV2.DownloadEntitiesChunk] -> Cli (SyncV2.Version, SyncV2.EntitySorting, Word64) + peelInitialChunk [SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities})] = pure (version, entitySorting, numEntities) + peelInitialChunk _ = error "Expected initial chunk" unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] unpackChunks xs = do xs & ( traverse \case - SyncV2.ErrorChunk {err} -> do + SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities}) -> do + _ + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do Debug.debugLogM Debug.Temp $ "Got error chunk" error $ show err - SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do -- Only want entities we don't already have in main ((Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure Nothing @@ -133,10 +216,10 @@ topSortEntities entities = do handleChunk :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () handleChunk rollback = \case - SyncV2.ErrorChunk {err} -> do + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do Debug.debugLogM Debug.Temp $ "Got error chunk" rollback $ SyncV2.PullError'DownloadEntities err - SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do -- Only want entities we don't already have Q.entityLocation hash >>= \case Just {} -> pure () @@ -224,16 +307,36 @@ SyncV2.Routes { downloadEntitiesStream = downloadEntitiesStreamClientM } = Servant.client syncAPI +nextStep :: SourceT.StepT m a -> m (Maybe (Either Text (a, SourceT.StepT m a))) +nextStep = \case + SourceT.Stop -> pure Nothing + SourceT.Error err -> pure (Just (Left (Text.pack (show err)))) + SourceT.Skip s -> nextStep s + SourceT.Yield a s -> pure (Just $ Right (a, s)) + SourceT.Effect m -> m >>= nextStep + +peelFirstSourceT :: (Monad m) => SourceT.SourceT m a -> m (Maybe (Either Text (a, SourceT.SourceT m a))) +peelFirstSourceT (SourceT.SourceT handleStepT) = + handleStepT nextStep >>= \case + Nothing -> pure Nothing + Just (Left err) -> pure (Just (Left err)) + Just (Right (a, s)) -> pure (Just (Right (a, SourceT.SourceT ($ s)))) + -- | Helper for running clientM that returns a stream of entities. -- You MUST consume the stream within the callback, it will be closed when the callback returns. handleStream :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> (o -> m ()) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError ()) handleStream clientEnv callback clientM = do + handleSourceT clientEnv (SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) callback) clientM + +handleSourceT :: forall m o r. (MonadUnliftIO m) => Servant.ClientEnv -> (SourceT.SourceT m o -> m r) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError r) +handleSourceT clientEnv callback clientM = do toIO <- UnliftIO.askRunInIO liftIO $ Servant.withClientM clientM clientEnv $ \case Left err -> pure (Left $ handleClientError clientEnv err) Right sourceT -> do sourceTM <- liftIO $ Servant.fromSourceIO @o @(SourceT.SourceT m o) sourceT - (Right <$> toIO (SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) callback sourceTM)) + (Right <$> callback sourceTM) + & toIO & UnliftIO.handle @_ @CodeserverTransportError (pure . Left) handleClientError :: Servant.ClientEnv -> Servant.ClientError -> CodeserverTransportError @@ -254,14 +357,20 @@ handleClientError clientEnv err = Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) -httpDownloadEntities :: +data StreamError + = ShareError SyncV2.DownloadEntitiesError + | MissingInitialChunk + | StreamFailure Text + +httpStreamEntities :: + forall m. (MonadUnliftIO m) => Auth.AuthenticatedHttpClient -> Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (SyncV2.DownloadEntitiesChunk -> m ()) -> - m (Either CodeserverTransportError ()) -httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do + (SyncV2.DownloadEntitiesChunk -> ReaderT SyncV2.StreamInitInfo m ()) -> + m (Either CodeserverTransportError (Maybe (Either StreamError ()))) +httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do let clientEnv = (Servant.mkClientEnv httpClient unisonShareUrl) { Servant.makeClientRequest = \url request -> @@ -272,7 +381,20 @@ httpDownloadEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl re { Http.Client.responseTimeout = Http.Client.responseTimeoutNone } } - handleStream clientEnv callback (downloadEntitiesStreamClientM req) + handleSourceT clientEnv go (downloadEntitiesStreamClientM req) + where + go :: SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> m (Maybe (Either StreamError ())) + go st = do + peelFirstSourceT st >>= \case + Nothing -> pure Nothing + Just (Left err) -> pure (Just (Left $ StreamFailure err)) + Just (Right (chunk, k)) -> do + case chunk of + SyncV2.InitialC info -> do + SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) (flip runReaderT info . callback) k + pure $ Just $ Right () + SyncV2.EntityC _ -> pure . Just . Left $ MissingInitialChunk + SyncV2.ErrorC (SyncV2.ErrorChunk err) -> pure . Just . Left . ShareError $ err httpDownloadEntitiesAsList :: (MonadUnliftIO m) => @@ -293,8 +415,7 @@ httpDownloadEntitiesAsList (Auth.AuthenticatedHttpClient httpClient) unisonShare } handleStreamAsList clientEnv (downloadEntitiesStreamClientM req) --- | Helper for running clientM that returns a stream of entities. --- You MUST consume the stream within the callback, it will be closed when the callback returns. +-- | Helper for running clientM that collects all entities in a stream into a list. handleStreamAsList :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError [o]) handleStreamAsList clientEnv clientM = do toIO <- UnliftIO.askRunInIO diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 76f9e1e11a..6c62358a69 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -227,6 +227,7 @@ library , lsp-types >=2.0.2.0 , megaparsec , memory + , mmorph , mtl , network-simple , network-uri diff --git a/unison-share-api/src/Unison/Server/Orphans.hs b/unison-share-api/src/Unison/Server/Orphans.hs index c19804f1b2..003e6d1675 100644 --- a/unison-share-api/src/Unison/Server/Orphans.hs +++ b/unison-share-api/src/Unison/Server/Orphans.hs @@ -548,3 +548,6 @@ encodeVectorWith :: (a -> CBOR.Encoding) -> Vector.Vector a -> CBOR.Encoding encodeVectorWith f xs = CBOR.encodeListLen (fromIntegral $ Vector.length xs) <> (foldr (\a b -> f a <> b) mempty xs) + +instance Ord CBOR.DeserialiseFailure where + compare (CBOR.DeserialiseFailure o s) (CBOR.DeserialiseFailure o' s') = compare (o, s) (o', s') diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 8c46cb3983..5f29ffacef 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -1,6 +1,9 @@ module Unison.SyncV2.Types ( DownloadEntitiesRequest (..), DownloadEntitiesChunk (..), + EntityChunk (..), + ErrorChunk (..), + StreamInitInfo (..), SyncError (..), DownloadEntitiesError (..), CBORBytes (..), @@ -10,6 +13,8 @@ module Unison.SyncV2.Types UploadEntitiesRequest (..), BranchRef (..), PullError (..), + EntitySorting (..), + Version (..), ) where @@ -19,15 +24,19 @@ import Codec.Serialise qualified as CBOR import Codec.Serialise.Decoding qualified as CBOR import Control.Exception (Exception) import Data.Aeson (FromJSON (..), ToJSON (..), object, withObject, (.:), (.=)) -import Data.ByteString.Lazy qualified as BL +import Data.Map (Map) +import Data.Map qualified as Map import Data.Set (Set) import Data.Text (Text) +import Data.Text qualified as Text +import Data.Word (Word16, Word64) import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.TempEntity (TempEntity) import Unison.Hash32 (Hash32) import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) import Unison.Sync.Types qualified as SyncV1 +import Unison.Util.Servant.CBOR newtype BranchRef = BranchRef {unBranchRef :: Text} deriving (Serialise, Eq, Show, Ord, ToJSON, FromJSON) via Text @@ -76,21 +85,6 @@ instance ToJSON DownloadEntitiesRequest where "knownHashes" .= knownHashes ] --- | Wrapper for CBOR data that has already been serialized. --- In our case, we use this because we may load pre-serialized CBOR directly from the database, --- but it's also useful in allowing us to more quickly seek through a CBOR stream, since we only need to decode the CBOR when/if we actually need to use it, and can skip past it using a byte offset otherwise. --- --- The 't' phantom type is the type of the data encoded in the bytestring. -newtype CBORBytes t = CBORBytes BL.ByteString - deriving (Serialise) via (BL.ByteString) - --- | Deserialize a 'CBORBytes' value into its tagged type, throwing an error if the deserialization fails. -deserialiseOrFailCBORBytes :: (Serialise t) => CBORBytes t -> Either CBOR.DeserialiseFailure t -deserialiseOrFailCBORBytes (CBORBytes bs) = CBOR.deserialiseOrFail bs - -serialiseCBORBytes :: (Serialise t) => t -> CBORBytes t -serialiseCBORBytes = CBORBytes . CBOR.serialise - data DownloadEntitiesError = DownloadEntitiesNoReadPermission BranchRef | -- | msg, branchRef @@ -100,7 +94,7 @@ data DownloadEntitiesError | -- | project shorthand DownloadEntitiesProjectNotFound Text | DownloadEntitiesEntityValidationFailure SyncV1.EntityValidationError - deriving stock (Eq, Show) + deriving stock (Eq, Show, Ord) data DownloadEntitiesErrorTag = NoReadPermissionTag @@ -108,7 +102,7 @@ data DownloadEntitiesErrorTag | UserNotFoundTag | ProjectNotFoundTag | EntityValidationFailureTag - deriving stock (Eq, Show) + deriving stock (Eq, Show, Ord) instance Serialise DownloadEntitiesErrorTag where encode = \case @@ -144,33 +138,113 @@ instance Serialise DownloadEntitiesError where ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode +data EntitySorting = DependenciesFirst | DependentsFirst | Unsorted + deriving (Show, Eq, Ord) + +instance Serialise EntitySorting where + encode = \case + DependenciesFirst -> CBOR.encodeWord8 0 + DependentsFirst -> CBOR.encodeWord8 1 + Unsorted -> CBOR.encodeWord8 2 + decode = do + tag <- CBOR.decodeWord8 + case tag of + 0 -> pure DependenciesFirst + 1 -> pure DependentsFirst + 2 -> pure Unsorted + _ -> fail "invalid tag" + +newtype Version = Version Word16 + deriving stock (Show) + deriving newtype (Eq, Ord, Serialise) + +data StreamInitInfo + = StreamInitInfo + { version :: Version, + entitySorting :: EntitySorting, + numEntities :: Maybe Word64 + } + deriving (Show, Eq, Ord) + +decodeMapKey :: (Serialise r) => Text -> Map Text UnknownCBORBytes -> CBOR.Decoder s r +decodeMapKey k m = + optionalDecodeMapKey k m >>= \case + Nothing -> fail $ "Expected key: " <> Text.unpack k + Just x -> pure x + +optionalDecodeMapKey :: (Serialise r) => Text -> Map Text UnknownCBORBytes -> CBOR.Decoder s (Maybe r) +optionalDecodeMapKey k m = + case Map.lookup k m of + Nothing -> pure Nothing + Just bs -> Just <$> decodeUnknownCBORBytes bs + +-- | Serialised as a map to allow for future expansion +instance Serialise StreamInitInfo where + encode (StreamInitInfo {version, entitySorting}) = + CBOR.encode + ( Map.fromList + [ ("v" :: Text, serialiseUnknownCBORBytes version), + ("es", serialiseUnknownCBORBytes entitySorting) + ] + ) + decode = do + m <- CBOR.decode + version <- decodeMapKey "v" m + entitySorting <- decodeMapKey "es" m + numEntities <- (optionalDecodeMapKey "ne" m) + pure StreamInitInfo {version, entitySorting, numEntities} + +data EntityChunk = EntityChunk + { hash :: Hash32, + entityCBOR :: CBORBytes TempEntity + } + deriving (Show, Eq, Ord) + +instance Serialise EntityChunk where + encode (EntityChunk {hash, entityCBOR}) = CBOR.encode hash <> CBOR.encode entityCBOR + decode = EntityChunk <$> CBOR.decode <*> CBOR.decode + +data ErrorChunk = ErrorChunk + { err :: DownloadEntitiesError + } + deriving (Show, Eq, Ord) + +instance Serialise ErrorChunk where + encode (ErrorChunk {err}) = CBOR.encode err + decode = ErrorChunk <$> CBOR.decode + -- | A chunk of the download entities response stream. data DownloadEntitiesChunk - = EntityChunk {hash :: Hash32, entityCBOR :: CBORBytes TempEntity} - | ErrorChunk {err :: DownloadEntitiesError} + = InitialC StreamInitInfo + | EntityC EntityChunk + | ErrorC ErrorChunk + deriving (Show, Eq, Ord) -data DownloadEntitiesChunkTag = EntityChunkTag | ErrorChunkTag +data DownloadEntitiesChunkTag = InitialChunkTag | EntityChunkTag | ErrorChunkTag + deriving (Show, Eq, Ord) instance Serialise DownloadEntitiesChunkTag where - encode EntityChunkTag = CBOR.encodeWord8 0 - encode ErrorChunkTag = CBOR.encodeWord8 1 + encode InitialChunkTag = CBOR.encodeWord8 0 + encode EntityChunkTag = CBOR.encodeWord8 1 + encode ErrorChunkTag = CBOR.encodeWord8 2 decode = do tag <- CBOR.decodeWord8 case tag of - 0 -> pure EntityChunkTag - 1 -> pure ErrorChunkTag + 0 -> pure InitialChunkTag + 1 -> pure EntityChunkTag + 2 -> pure ErrorChunkTag _ -> fail "invalid tag" instance Serialise DownloadEntitiesChunk where - encode (EntityChunk {hash, entityCBOR}) = - encode EntityChunkTag <> encode hash <> encode entityCBOR - encode (ErrorChunk {err}) = - encode ErrorChunkTag <> encode err + encode (EntityC ec) = encode EntityChunkTag <> CBOR.encode ec + encode (ErrorC ec) = encode ErrorChunkTag <> CBOR.encode ec + encode (InitialC ic) = encode InitialChunkTag <> encode ic decode = do tag <- decode case tag of - EntityChunkTag -> EntityChunk <$> decode <*> decode - ErrorChunkTag -> ErrorChunk <$> decode + InitialChunkTag -> InitialC <$> decode + EntityChunkTag -> EntityC <$> decode + ErrorChunkTag -> ErrorC <$> decode -- TODO data UploadEntitiesRequest = UploadEntitiesRequest @@ -183,13 +257,13 @@ instance Serialise UploadEntitiesRequest where data PullError = PullError'DownloadEntities DownloadEntitiesError | PullError'Sync SyncError - deriving stock (Show) + deriving stock (Show, Eq, Ord) deriving anyclass (Exception) data SyncError = SyncErrorExpectedResultNotInMain CausalHash | SyncErrorDeserializationFailure CBOR.DeserialiseFailure - deriving stock (Show) + deriving stock (Show, Eq, Ord) data EntityKind = CausalEntity diff --git a/unison-share-api/src/Unison/Util/Servant/CBOR.hs b/unison-share-api/src/Unison/Util/Servant/CBOR.hs index 481cbc20b3..18fd94904c 100644 --- a/unison-share-api/src/Unison/Util/Servant/CBOR.hs +++ b/unison-share-api/src/Unison/Util/Servant/CBOR.hs @@ -1,10 +1,23 @@ -- | Servant configuration for the CBOR media type -- -- Adapted from https://hackage.haskell.org/package/servant-serialization-0.3/docs/Servant-API-ContentTypes-SerialiseCBOR.html via MIT license -module Unison.Util.Servant.CBOR (CBOR) where +module Unison.Util.Servant.CBOR + ( CBOR, + UnknownCBORBytes, + CBORBytes (..), + deserialiseOrFailCBORBytes, + serialiseCBORBytes, + decodeCBORBytes, + decodeUnknownCBORBytes, + serialiseUnknownCBORBytes, + ) +where import Codec.CBOR.Read (DeserialiseFailure (..)) import Codec.Serialise (Serialise, deserialiseOrFail, serialise) +import Codec.Serialise qualified as CBOR +import Codec.Serialise.Decoding qualified as CBOR +import Data.ByteString.Lazy qualified as BL import Data.List.NonEmpty qualified as NonEmpty import Network.HTTP.Media.MediaType qualified as MediaType import Servant @@ -42,3 +55,34 @@ instance (Serialise a) => MimeUnrender CBOR a where mapLeft f = either (Left . f) Right prettyErr (DeserialiseFailure offset err) = "Codec.Serialise.deserialiseOrFail: " ++ err ++ " at byte-offset " ++ show offset + +-- | Wrapper for CBOR data that has already been serialized. +-- In our case, we use this because we may load pre-serialized CBOR directly from the database, +-- but it's also useful in allowing us to more quickly seek through a CBOR stream, since we only need to decode the CBOR when/if we actually need to use it, and can skip past it using a byte offset otherwise. +-- +-- The 't' phantom type is the type of the data encoded in the bytestring. +newtype CBORBytes t = CBORBytes BL.ByteString + deriving (Serialise) via (BL.ByteString) + deriving (Eq, Show, Ord) + +-- | Deserialize a 'CBORBytes' value into its tagged type, throwing an error if the deserialization fails. +deserialiseOrFailCBORBytes :: (Serialise t) => CBORBytes t -> Either CBOR.DeserialiseFailure t +deserialiseOrFailCBORBytes (CBORBytes bs) = CBOR.deserialiseOrFail bs + +decodeCBORBytes :: (Serialise t) => CBORBytes t -> CBOR.Decoder s t +decodeCBORBytes (CBORBytes bs) = decodeUnknownCBORBytes (CBORBytes bs) + +decodeUnknownCBORBytes :: (Serialise t) => UnknownCBORBytes -> CBOR.Decoder s t +decodeUnknownCBORBytes (CBORBytes bs) = case deserialiseOrFailCBORBytes (CBORBytes bs) of + Left err -> fail (show err) + Right t -> pure t + +serialiseCBORBytes :: (Serialise t) => t -> CBORBytes t +serialiseCBORBytes = CBORBytes . CBOR.serialise + +serialiseUnknownCBORBytes :: (Serialise t) => t -> UnknownCBORBytes +serialiseUnknownCBORBytes = CBORBytes . CBOR.serialise + +data Unknown + +type UnknownCBORBytes = CBORBytes Unknown From 9cbc7d3e739f3ff7837e62138fbf536742abe382 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Fri, 13 Dec 2024 19:10:39 -0800 Subject: [PATCH 26/52] A bunch of sync refactors --- unison-cli/package.yaml | 1 + unison-cli/src/Unison/Share/SyncV2.hs | 303 +++++++++++--------- unison-share-api/src/Unison/SyncV2/Types.hs | 15 +- 3 files changed, 183 insertions(+), 136 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 585419ea3c..ccc8a9f5c1 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -52,6 +52,7 @@ library: - lsp >= 2.2.0.0 - lsp-types >= 2.0.2.0 - memory + - mmorph - mtl - network-simple - network-uri diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index d90ba9010f..093ec0cdcb 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -3,13 +3,13 @@ {-# OPTIONS_GHC -Wno-unused-top-binds #-} module Unison.Share.SyncV2 - ( downloadEntitiesAllAtOnce, - streamDownloadEntities, + ( streamDownloadEntities, ) where import Control.Lens import Control.Monad.Except +import Control.Monad.Morph (hoist) import Control.Monad.Reader (ReaderT (..), ask) import Data.Graph qualified as Graph import Data.Map qualified as Map @@ -55,82 +55,149 @@ import UnliftIO qualified ------------------------------------------------------------------------------------------------------------------------ -- Download entities -downloadEntitiesAllAtOnce :: - -- | The Unison Share URL. - Servant.BaseUrl -> - -- | The branch to download from. - SyncV2.BranchRef -> - -- | The hash to download. - Share.HashJWT -> - Set Hash32 -> - -- | Callback that's given a number of entities we just downloaded. - (Int -> IO ()) -> - Cli (Either (SyncError SyncV2.PullError) ()) -downloadEntitiesAllAtOnce unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do - Cli.Env {authHTTPClient, codebase} <- ask - Cli.label \done -> do - let failed :: SyncError SyncV2.PullError -> Cli void - failed = done . Left +-- | Syncs a stream which could send entities in any order. +syncUnsortedStream :: + (Codebase.Codebase IO v a) -> + Servant.SourceIO (SyncV2.DownloadEntitiesChunk) -> + IO (Either (SyncError SyncV2.PullError) ()) +syncUnsortedStream codebase stream = runExceptT do + sourceTM <- liftIO $ Servant.fromSourceIO stream + results <- mapLeft (StreamingError . Text.pack) <$> (runExceptT $ SourceT.runSourceT sourceTM) + allResults <- either (throwError . TransportError) pure results + allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) + -- TODO: do we want to save the entities somewhere? + let sortedEntities = topSortEntities allEntities + liftIO $ Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> + for_ sortedEntities \(hash, entity) -> do + validateEntity rollback hash entity + void $ Q.saveTempEntityInMain v2HashHandle hash entity + pure () - let hash = Share.hashJWTHash hashJwt +-- TODO: doublecheck this... +chunksOfSourceT :: forall m a. (Monad m) => Int -> SourceT.SourceT m a -> SourceT.SourceT m [a] +chunksOfSourceT chunkSize (SourceT.SourceT k) = SourceT.SourceT \f -> do + chunked <- k (go 0 mempty) + f chunked + where + go :: Int -> ([a] -> [a]) -> SourceT.StepT m a -> m (SourceT.StepT m [a]) + go i dlist s + | i == chunkSize = do + SourceT.Yield (dlist []) <$> (go 0 mempty s) + | otherwise = case s of + SourceT.Stop -> + pure $ SourceT.Yield (dlist []) (SourceT.Stop) + SourceT.Error err -> pure $ SourceT.Error err + SourceT.Skip s' -> SourceT.Skip <$> (go i dlist s') + SourceT.Yield a s' -> + (go (i + 1) ((a :) . dlist) s') + SourceT.Effect m -> do + m >>= go i dlist - Cli.runTransaction (Q.entityLocation hash) >>= \case +batchSize :: Int +batchSize = 1000 + +syncSortedStream :: + (Codebase.Codebase IO v a) -> + Servant.SourceT IO (SyncV2.DownloadEntitiesChunk) -> + IO (Either (SyncError SyncV2.PullError) ()) +syncSortedStream codebase stream = runExceptT do + hoist liftIO (chunksOfSourceT batchSize stream) + & ( SourceT.foreach (throwError . TransportError . StreamingError . Text.pack) \chunkBatch -> do + ExceptT $ Codebase.runTransactionWithRollback codebase \rollback -> do + sequenceA <$> for chunkBatch unpackChunk >>= \case + Left err -> do + rollback (Left $ SyncError err) + Right (catMaybes -> entityBatch) -> do + Right <$> for_ entityBatch \(hash, entity) -> do + validateEntity (rollback . Left . SyncError) hash entity + void $ Q.saveTempEntityInMain v2HashHandle hash entity + ) + +handleChunk' :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () +handleChunk' rollback = \case + SyncV2.InitialC {} -> do + Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" + rollback $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" + rollback $ SyncV2.PullError'DownloadEntities err + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + -- Only want entities we don't already have + Q.entityLocation hash >>= \case Just Q.EntityInMainStorage -> pure () - -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do - Debug.debugLogM Debug.Temp $ "Kicking off sync request" - results <- Timing.time "Entity Download" $ do - liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do - httpDownloadEntitiesAsList - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - allResults <- either failed pure results - -- liftIO $ downloadedCallback (length allResults) - ((version, entitySorting, numEntities), chunks) <- peelInitialChunk allResults - allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks chunks) - let sortedEntities = topSortEntities allEntities - liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do - Q.saveTempEntityInMain v2HashHandle hash entity - pure () + tempEntity <- unpackEntity entityBytes + validateEntity rollback hash tempEntity + void $ Q.saveTempEntityInMain v2HashHandle hash tempEntity + where + unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction TempEntity + unpackEntity entityBytes = do + case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> do rollback (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity + +unpackChunk :: SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction (Either SyncV2.PullError (Maybe (Hash32, TempEntity))) +unpackChunk = \case + SyncV2.InitialC {} -> do + Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" + pure . Left $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" + pure . Left $ SyncV2.PullError'DownloadEntities err + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + -- Only want entities we don't already have + Q.entityLocation hash >>= \case + Just Q.EntityInMainStorage -> pure $ Right Nothing + _ -> do + fmap (Just . (hash,)) <$> unpackEntity entityBytes + where + unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction (Either SyncV2.PullError TempEntity) + unpackEntity entityBytes = do + case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> do pure $ Left (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure $ Right entity - didCausalSuccessfullyImport codebase hash >>= \case - False -> do - failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) - True -> pure () - -- we'll try vacuuming again next pull. - _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) - pure (Right ()) - where - -- Verify that the expected hash made it into main storage. - didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> Cli Bool - didCausalSuccessfullyImport codebase hash = do - let expectedHash = hash32ToCausalHash hash - isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) - peelInitialChunk :: [SyncV2.DownloadEntitiesChunk] -> Cli (SyncV2.Version, SyncV2.EntitySorting, Word64) - peelInitialChunk [SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities})] = pure (version, entitySorting, numEntities) - peelInitialChunk _ = error "Expected initial chunk" - unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] - unpackChunks xs = do - xs - & ( traverse \case - SyncV2.InitialC {} -> do - Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" - error "Expected entity chunk" - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - error $ show err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do - -- Only want entities we don't already have in main - ((Q.entityLocation hash)) >>= \case - Just Q.EntityInMainStorage -> pure Nothing - _ -> do - tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure entity - pure $ Just (hash, tempEntity) - ) - <&> catMaybes +validateEntity :: (SyncV2.PullError -> Sqlite.Transaction ()) -> Hash32 -> TempEntity -> Sqlite.Transaction () +validateEntity rollback hash entity = do + Debug.debugLogM Debug.Temp $ "Validating entity" + -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. + case EV.validateTempEntity hash entity of + Nothing -> pure () + Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> + let expectedMismatches = case et of + Share.TermComponentType -> expectedComponentHashMismatches + Share.DeclComponentType -> expectedComponentHashMismatches + Share.CausalType -> expectedCausalHashMismatches + _ -> mempty + in case Map.lookup supplied expectedMismatches of + Just expected + | expected == computed -> pure () + _ -> do + rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + Just err -> do + rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + +unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] +unpackChunks xs = do + xs + & ( traverse \case + SyncV2.InitialC {} -> do + Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" + error "Expected entity chunk" + SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do + Debug.debugLogM Debug.Temp $ "Got error chunk" + error $ show err + SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + -- Only want entities we don't already have in main + ((Q.entityLocation hash)) >>= \case + Just Q.EntityInMainStorage -> pure Nothing + _ -> do + tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of + Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity + pure $ Just (hash, tempEntity) + ) + <&> catMaybes streamDownloadEntities :: -- | The Unison Share URL. @@ -149,28 +216,23 @@ streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedC let failed :: SyncError SyncV2.PullError -> Cli void failed = done . Left let hash = Share.hashJWTHash hashJwt - Cli.runTransaction (Q.entityLocation hash) >>= \case - Just Q.EntityInMainStorage -> pure () - -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" - _ -> do - Debug.debugLogM Debug.Temp $ "Kicking off sync request" - results <- Timing.time "Entity Download" $ do - liftIO . UnliftIO.handle @_ @SyncV2.PullError (pure . Left . SyncError) . fmap (mapLeft TransportError) $ do - httpStreamEntities - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - \chunk -> do - _ - allResults <- either failed pure results - -- liftIO $ downloadedCallback (length allResults) - ((version, entitySorting, numEntities), chunks) <- peelInitialChunk allResults - allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks chunks) - let sortedEntities = topSortEntities allEntities - liftIO $ Timing.time "Inserting entities" $ Codebase.runTransaction codebase $ for_ sortedEntities \(hash, entity) -> do - Q.saveTempEntityInMain v2HashHandle hash entity - pure () - + _r <- + Cli.runTransaction (Q.entityLocation hash) >>= \case + Just Q.EntityInMainStorage -> pure $ Right () + -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" + _ -> do + Debug.debugLogM Debug.Temp $ "Kicking off sync request" + Timing.time "Entity Download" $ do + fmap unifyErrs $ liftIO $ do + httpStreamEntities + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} + \stream -> do + SyncV2.StreamInitInfo {version, entitySorting, numEntities} <- ask + case entitySorting of + SyncV2.DependenciesFirst -> syncSortedStream stream + SyncV2.Unsorted -> lift $ syncUnsortedStream codebase stream didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -184,29 +246,13 @@ streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedC didCausalSuccessfullyImport codebase hash = do let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) - peelInitialChunk :: [SyncV2.DownloadEntitiesChunk] -> Cli (SyncV2.Version, SyncV2.EntitySorting, Word64) - peelInitialChunk [SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities})] = pure (version, entitySorting, numEntities) - peelInitialChunk _ = error "Expected initial chunk" - unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] - unpackChunks xs = do - xs - & ( traverse \case - SyncV2.InitialC (SyncV2.StreamInitInfo {version, entitySorting, numEntities}) -> do - _ - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - error $ show err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do - -- Only want entities we don't already have in main - ((Q.entityLocation hash)) >>= \case - Just Q.EntityInMainStorage -> pure Nothing - _ -> do - tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure entity - pure $ Just (hash, tempEntity) - ) - <&> catMaybes + + unifyErrs :: Either CodeserverTransportError (Maybe (Either SyncV2.SyncError ())) -> Either (SyncError SyncV2.PullError) () + unifyErrs = \case + Left err -> Left (TransportError err) + Right Nothing -> Right () + Right (Just (Left err)) -> Left (SyncError $ SyncV2.PullError'Sync err) + Right (Just (Right ())) -> Right () topSortEntities :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] topSortEntities entities = do @@ -214,8 +260,8 @@ topSortEntities entities = do (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) -handleChunk :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () -handleChunk rollback = \case +handleChunkUsingTemp :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () +handleChunkUsingTemp rollback = \case SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do Debug.debugLogM Debug.Temp $ "Got error chunk" rollback $ SyncV2.PullError'DownloadEntities err @@ -326,7 +372,7 @@ peelFirstSourceT (SourceT.SourceT handleStepT) = -- You MUST consume the stream within the callback, it will be closed when the callback returns. handleStream :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> (o -> m ()) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError ()) handleStream clientEnv callback clientM = do - handleSourceT clientEnv (SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) callback) clientM + handleSourceT clientEnv (SourceT.foreach (throwError . StreamingError . Text.pack) callback) clientM handleSourceT :: forall m o r. (MonadUnliftIO m) => Servant.ClientEnv -> (SourceT.SourceT m o -> m r) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError r) handleSourceT clientEnv callback clientM = do @@ -357,19 +403,14 @@ handleClientError clientEnv err = Servant.InvalidContentTypeHeader resp -> UnexpectedResponse resp Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) -data StreamError - = ShareError SyncV2.DownloadEntitiesError - | MissingInitialChunk - | StreamFailure Text - httpStreamEntities :: - forall m. + forall m r. (MonadUnliftIO m) => Auth.AuthenticatedHttpClient -> Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (SyncV2.DownloadEntitiesChunk -> ReaderT SyncV2.StreamInitInfo m ()) -> - m (Either CodeserverTransportError (Maybe (Either StreamError ()))) + (SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> ReaderT SyncV2.StreamInitInfo m r) -> + m (Either CodeserverTransportError (Maybe (Either SyncV2.SyncError ()))) httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do let clientEnv = (Servant.mkClientEnv httpClient unisonShareUrl) @@ -383,7 +424,7 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req } handleSourceT clientEnv go (downloadEntitiesStreamClientM req) where - go :: SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> m (Maybe (Either StreamError ())) + go :: SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> m (Maybe (Either SyncV2.SyncError ())) go st = do peelFirstSourceT st >>= \case Nothing -> pure Nothing @@ -391,7 +432,7 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req Just (Right (chunk, k)) -> do case chunk of SyncV2.InitialC info -> do - SourceT.foreach (UnliftIO.throwIO . StreamingError . Text.pack) (flip runReaderT info . callback) k + flip runReaderT info $ callback k pure $ Just $ Right () SyncV2.EntityC _ -> pure . Just . Left $ MissingInitialChunk SyncV2.ErrorC (SyncV2.ErrorChunk err) -> pure . Just . Left . ShareError $ err diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 5f29ffacef..76151620e8 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -138,20 +138,22 @@ instance Serialise DownloadEntitiesError where ProjectNotFoundTag -> DownloadEntitiesProjectNotFound <$> CBOR.decode EntityValidationFailureTag -> DownloadEntitiesEntityValidationFailure <$> CBOR.decode -data EntitySorting = DependenciesFirst | DependentsFirst | Unsorted +data EntitySorting + = -- all dependencies of an entity are guaranteed to be sent before the entity itself + DependenciesFirst + | -- no guarantees. + Unsorted deriving (Show, Eq, Ord) instance Serialise EntitySorting where encode = \case DependenciesFirst -> CBOR.encodeWord8 0 - DependentsFirst -> CBOR.encodeWord8 1 - Unsorted -> CBOR.encodeWord8 2 + Unsorted -> CBOR.encodeWord8 1 decode = do tag <- CBOR.decodeWord8 case tag of 0 -> pure DependenciesFirst - 1 -> pure DependentsFirst - 2 -> pure Unsorted + 1 -> pure Unsorted _ -> fail "invalid tag" newtype Version = Version Word16 @@ -263,6 +265,9 @@ data PullError data SyncError = SyncErrorExpectedResultNotInMain CausalHash | SyncErrorDeserializationFailure CBOR.DeserialiseFailure + | SyncErrorMissingInitialChunk + | SyncErrorMisplacedInitialChunk + | SyncErrorStreamFailure Text deriving stock (Show, Eq, Ord) data EntityKind From 439360b37d568afb521c6a497599b5d9c672b773 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 11:52:32 -0800 Subject: [PATCH 27/52] Port syncing code to use conduit instead --- unison-cli/package.yaml | 2 +- unison-cli/src/Unison/Cli/DownloadUtils.hs | 2 +- unison-cli/src/Unison/Share/SyncV2.hs | 268 +++++++------------- unison-cli/unison-cli.cabal | 2 +- unison-share-api/src/Unison/SyncV2/Types.hs | 1 + 5 files changed, 90 insertions(+), 185 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index ccc8a9f5c1..7222487059 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -33,6 +33,7 @@ library: - code-page - concurrent-output - containers >= 0.6.3 + - conduit - cryptonite - either - errors @@ -52,7 +53,6 @@ library: - lsp >= 2.2.0.0 - lsp-types >= 2.0.2.0 - memory - - mmorph - mtl - network-simple - network-uri diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index e0ad96970c..0ea4da184b 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -57,7 +57,7 @@ downloadProjectBranchFromShare useSquashed branch = -- TODO: Fill this in. let knownHashes = Set.empty let downloadedCallback = \_ -> pure () - result <- SyncV2.downloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback + result <- SyncV2.streamDownloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback -- numDownloaded <- liftIO getNumDownloaded let numDownloaded = 0 pure (result, numDownloaded) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 093ec0cdcb..db1659159b 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -1,22 +1,22 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeOperators #-} -{-# OPTIONS_GHC -Wno-unused-top-binds #-} module Unison.Share.SyncV2 ( streamDownloadEntities, ) where +import Conduit (ConduitT) +import Conduit qualified as C import Control.Lens import Control.Monad.Except -import Control.Monad.Morph (hoist) -import Control.Monad.Reader (ReaderT (..), ask) +import Control.Monad.Reader (ask) +import Data.Conduit.List qualified as C import Data.Graph qualified as Graph import Data.Map qualified as Map import Data.Proxy import Data.Set qualified as Set import Data.Set.NonEmpty qualified as NESet -import Data.Text qualified as Text import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy import Network.HTTP.Client qualified as Http.Client @@ -25,7 +25,6 @@ import Servant.API qualified as Servant import Servant.Client.Streaming qualified as Servant import Servant.Conduit () import Servant.Types.SourceT qualified as Servant -import Servant.Types.SourceT qualified as SourceT import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) @@ -50,7 +49,12 @@ import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) import Unison.Util.Timing qualified as Timing -import UnliftIO qualified + +type Stream i o = ConduitT i o SyncM () + +type PullErr = SyncError SyncV2.PullError + +type SyncM = ExceptT PullErr IO ------------------------------------------------------------------------------------------------------------------------ -- Download entities @@ -58,93 +62,42 @@ import UnliftIO qualified -- | Syncs a stream which could send entities in any order. syncUnsortedStream :: (Codebase.Codebase IO v a) -> - Servant.SourceIO (SyncV2.DownloadEntitiesChunk) -> - IO (Either (SyncError SyncV2.PullError) ()) -syncUnsortedStream codebase stream = runExceptT do - sourceTM <- liftIO $ Servant.fromSourceIO stream - results <- mapLeft (StreamingError . Text.pack) <$> (runExceptT $ SourceT.runSourceT sourceTM) - allResults <- either (throwError . TransportError) pure results + Stream () SyncV2.EntityChunk -> + SyncM () +syncUnsortedStream codebase stream = do + allResults <- C.runConduit $ stream C..| C.sinkList allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) -- TODO: do we want to save the entities somewhere? let sortedEntities = topSortEntities allEntities - liftIO $ Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> + ExceptT $ liftIO $ Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> do for_ sortedEntities \(hash, entity) -> do - validateEntity rollback hash entity + validateEntity (rollback . Left) hash entity void $ Q.saveTempEntityInMain v2HashHandle hash entity - pure () - --- TODO: doublecheck this... -chunksOfSourceT :: forall m a. (Monad m) => Int -> SourceT.SourceT m a -> SourceT.SourceT m [a] -chunksOfSourceT chunkSize (SourceT.SourceT k) = SourceT.SourceT \f -> do - chunked <- k (go 0 mempty) - f chunked - where - go :: Int -> ([a] -> [a]) -> SourceT.StepT m a -> m (SourceT.StepT m [a]) - go i dlist s - | i == chunkSize = do - SourceT.Yield (dlist []) <$> (go 0 mempty s) - | otherwise = case s of - SourceT.Stop -> - pure $ SourceT.Yield (dlist []) (SourceT.Stop) - SourceT.Error err -> pure $ SourceT.Error err - SourceT.Skip s' -> SourceT.Skip <$> (go i dlist s') - SourceT.Yield a s' -> - (go (i + 1) ((a :) . dlist) s') - SourceT.Effect m -> do - m >>= go i dlist + pure $ Right () batchSize :: Int -batchSize = 1000 +batchSize = 5000 syncSortedStream :: (Codebase.Codebase IO v a) -> - Servant.SourceT IO (SyncV2.DownloadEntitiesChunk) -> - IO (Either (SyncError SyncV2.PullError) ()) -syncSortedStream codebase stream = runExceptT do - hoist liftIO (chunksOfSourceT batchSize stream) - & ( SourceT.foreach (throwError . TransportError . StreamingError . Text.pack) \chunkBatch -> do - ExceptT $ Codebase.runTransactionWithRollback codebase \rollback -> do - sequenceA <$> for chunkBatch unpackChunk >>= \case - Left err -> do - rollback (Left $ SyncError err) - Right (catMaybes -> entityBatch) -> do - Right <$> for_ entityBatch \(hash, entity) -> do - validateEntity (rollback . Left . SyncError) hash entity - void $ Q.saveTempEntityInMain v2HashHandle hash entity - ) - -handleChunk' :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () -handleChunk' rollback = \case - SyncV2.InitialC {} -> do - Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" - rollback $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - rollback $ SyncV2.PullError'DownloadEntities err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do - -- Only want entities we don't already have - Q.entityLocation hash >>= \case - Just Q.EntityInMainStorage -> pure () - _ -> do - tempEntity <- unpackEntity entityBytes - validateEntity rollback hash tempEntity - void $ Q.saveTempEntityInMain v2HashHandle hash tempEntity - where - unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction TempEntity - unpackEntity entityBytes = do - case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> do rollback (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure entity + Stream () SyncV2.EntityChunk -> + SyncM () +syncSortedStream codebase stream = do + let handler :: Stream [SyncV2.EntityChunk] o + handler = C.mapM_C \chunkBatch -> do + ExceptT $ Codebase.runTransactionWithRollback codebase \rollback -> do + sequenceA <$> for chunkBatch unpackChunk >>= \case + Left err -> do + rollback (Left $ SyncError err) + Right (catMaybes -> entityBatch) -> do + Right <$> for_ entityBatch \(hash, entity) -> do + validateEntity (rollback . Left) hash entity + void $ Q.saveTempEntityInMain v2HashHandle hash entity + C.runConduit $ stream C..| C.chunksOf batchSize C..| handler -unpackChunk :: SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction (Either SyncV2.PullError (Maybe (Hash32, TempEntity))) +unpackChunk :: SyncV2.EntityChunk -> Sqlite.Transaction (Either SyncV2.PullError (Maybe (Hash32, TempEntity))) unpackChunk = \case - SyncV2.InitialC {} -> do - Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" - pure . Left $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - pure . Left $ SyncV2.PullError'DownloadEntities err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do -- Only want entities we don't already have Q.entityLocation hash >>= \case Just Q.EntityInMainStorage -> pure $ Right Nothing @@ -157,7 +110,7 @@ unpackChunk = \case Left err -> do pure $ Left (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) Right entity -> pure $ Right entity -validateEntity :: (SyncV2.PullError -> Sqlite.Transaction ()) -> Hash32 -> TempEntity -> Sqlite.Transaction () +validateEntity :: (PullErr -> Sqlite.Transaction ()) -> Hash32 -> TempEntity -> Sqlite.Transaction () validateEntity rollback hash entity = do Debug.debugLogM Debug.Temp $ "Validating entity" -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. @@ -173,21 +126,15 @@ validateEntity rollback hash entity = do Just expected | expected == computed -> pure () _ -> do - rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + rollback . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err Just err -> do - rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + rollback . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err -unpackChunks :: [SyncV2.DownloadEntitiesChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] +unpackChunks :: [SyncV2.EntityChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] unpackChunks xs = do xs & ( traverse \case - SyncV2.InitialC {} -> do - Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" - error "Expected entity chunk" - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - error $ show err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do + (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do -- Only want entities we don't already have in main ((Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure Nothing @@ -223,16 +170,18 @@ streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedC _ -> do Debug.debugLogM Debug.Temp $ "Kicking off sync request" Timing.time "Entity Download" $ do - fmap unifyErrs $ liftIO $ do - httpStreamEntities - authHTTPClient - unisonShareUrl - SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - \stream -> do - SyncV2.StreamInitInfo {version, entitySorting, numEntities} <- ask - case entitySorting of - SyncV2.DependenciesFirst -> syncSortedStream stream - SyncV2.Unsorted -> lift $ syncUnsortedStream codebase stream + liftIO . runExceptT $ httpStreamEntities + authHTTPClient + unisonShareUrl + SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} + \(SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo}) stream -> do + case version of + (SyncV2.Version 1) -> pure () + v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v + + case entitySorting of + SyncV2.DependenciesFirst -> syncSortedStream codebase stream + SyncV2.Unsorted -> syncUnsortedStream codebase stream didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -247,21 +196,17 @@ streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedC let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) - unifyErrs :: Either CodeserverTransportError (Maybe (Either SyncV2.SyncError ())) -> Either (SyncError SyncV2.PullError) () - unifyErrs = \case - Left err -> Left (TransportError err) - Right Nothing -> Right () - Right (Just (Left err)) -> Left (SyncError $ SyncV2.PullError'Sync err) - Right (Just (Right ())) -> Right () - topSortEntities :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] topSortEntities entities = do let adjList = entities <&> \(hash32, entity) -> ((hash32, entity), hash32, Set.toList $ Share.entityDependencies (tempEntityToEntity entity)) (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) -handleChunkUsingTemp :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () -handleChunkUsingTemp rollback = \case +_handleChunkUsingTemp :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () +_handleChunkUsingTemp rollback = \case + SyncV2.InitialC {} -> do + Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" + rollback $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do Debug.debugLogM Debug.Temp $ "Got error chunk" rollback $ SyncV2.PullError'DownloadEntities err @@ -353,37 +298,21 @@ SyncV2.Routes { downloadEntitiesStream = downloadEntitiesStreamClientM } = Servant.client syncAPI -nextStep :: SourceT.StepT m a -> m (Maybe (Either Text (a, SourceT.StepT m a))) -nextStep = \case - SourceT.Stop -> pure Nothing - SourceT.Error err -> pure (Just (Left (Text.pack (show err)))) - SourceT.Skip s -> nextStep s - SourceT.Yield a s -> pure (Just $ Right (a, s)) - SourceT.Effect m -> m >>= nextStep - -peelFirstSourceT :: (Monad m) => SourceT.SourceT m a -> m (Maybe (Either Text (a, SourceT.SourceT m a))) -peelFirstSourceT (SourceT.SourceT handleStepT) = - handleStepT nextStep >>= \case - Nothing -> pure Nothing - Just (Left err) -> pure (Just (Left err)) - Just (Right (a, s)) -> pure (Just (Right (a, SourceT.SourceT ($ s)))) +-- -- | Helper for running clientM that returns a stream of entities. +-- -- You MUST consume the stream within the callback, it will be closed when the callback returns. +-- handleStream :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> (o -> m ()) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError ()) +-- handleStream clientEnv callback clientM = do +-- handleSourceT clientEnv (SourceT.foreach (throwError . StreamingError . Text.pack) callback) clientM -- | Helper for running clientM that returns a stream of entities. -- You MUST consume the stream within the callback, it will be closed when the callback returns. -handleStream :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> (o -> m ()) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError ()) -handleStream clientEnv callback clientM = do - handleSourceT clientEnv (SourceT.foreach (throwError . StreamingError . Text.pack) callback) clientM - -handleSourceT :: forall m o r. (MonadUnliftIO m) => Servant.ClientEnv -> (SourceT.SourceT m o -> m r) -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError r) -handleSourceT clientEnv callback clientM = do - toIO <- UnliftIO.askRunInIO - liftIO $ Servant.withClientM clientM clientEnv $ \case - Left err -> pure (Left $ handleClientError clientEnv err) +withConduit :: forall r. Servant.ClientEnv -> (Stream () SyncV2.DownloadEntitiesChunk -> SyncM r) -> Servant.ClientM (Servant.SourceIO SyncV2.DownloadEntitiesChunk) -> SyncM r +withConduit clientEnv callback clientM = do + ExceptT . liftIO $ Servant.withClientM clientM clientEnv $ \case + Left err -> pure . Left . TransportError $ (handleClientError clientEnv err) Right sourceT -> do - sourceTM <- liftIO $ Servant.fromSourceIO @o @(SourceT.SourceT m o) sourceT - (Right <$> callback sourceTM) - & toIO - & UnliftIO.handle @_ @CodeserverTransportError (pure . Left) + conduit <- liftIO $ Servant.fromSourceIO sourceT + (runExceptT $ callback conduit) handleClientError :: Servant.ClientEnv -> Servant.ClientError -> CodeserverTransportError handleClientError clientEnv err = @@ -404,13 +333,12 @@ handleClientError clientEnv err = Servant.ConnectionError _ -> UnreachableCodeserver (Servant.baseUrl clientEnv) httpStreamEntities :: - forall m r. - (MonadUnliftIO m) => + forall. Auth.AuthenticatedHttpClient -> Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> ReaderT SyncV2.StreamInitInfo m r) -> - m (Either CodeserverTransportError (Maybe (Either SyncV2.SyncError ()))) + (SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> SyncM ()) -> + SyncM () httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do let clientEnv = (Servant.mkClientEnv httpClient unisonShareUrl) @@ -422,46 +350,22 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req { Http.Client.responseTimeout = Http.Client.responseTimeoutNone } } - handleSourceT clientEnv go (downloadEntitiesStreamClientM req) + withConduit clientEnv go (downloadEntitiesStreamClientM req) where - go :: SourceT.SourceT m SyncV2.DownloadEntitiesChunk -> m (Maybe (Either SyncV2.SyncError ())) - go st = do - peelFirstSourceT st >>= \case - Nothing -> pure Nothing - Just (Left err) -> pure (Just (Left $ StreamFailure err)) - Just (Right (chunk, k)) -> do + go :: Stream () SyncV2.DownloadEntitiesChunk -> SyncM () + go es = do + (streamRemainder, init) <- es C.$$+ C.headC + case init of + Nothing -> pure () + Just chunk -> do case chunk of SyncV2.InitialC info -> do - flip runReaderT info $ callback k - pure $ Just $ Right () - SyncV2.EntityC _ -> pure . Just . Left $ MissingInitialChunk - SyncV2.ErrorC (SyncV2.ErrorChunk err) -> pure . Just . Left . ShareError $ err - -httpDownloadEntitiesAsList :: - (MonadUnliftIO m) => - Auth.AuthenticatedHttpClient -> - Servant.BaseUrl -> - SyncV2.DownloadEntitiesRequest -> - m (Either CodeserverTransportError [SyncV2.DownloadEntitiesChunk]) -httpDownloadEntitiesAsList (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req = do - let clientEnv = - (Servant.mkClientEnv httpClient unisonShareUrl) - { Servant.makeClientRequest = \url request -> - -- Disable client-side timeouts - (Servant.defaultMakeClientRequest url request) - <&> \r -> - r - { Http.Client.responseTimeout = Http.Client.responseTimeoutNone - } - } - handleStreamAsList clientEnv (downloadEntitiesStreamClientM req) - --- | Helper for running clientM that collects all entities in a stream into a list. -handleStreamAsList :: forall m o. (MonadUnliftIO m) => Servant.ClientEnv -> Servant.ClientM (Servant.SourceIO o) -> m (Either CodeserverTransportError [o]) -handleStreamAsList clientEnv clientM = do - toIO <- UnliftIO.askRunInIO - liftIO $ Servant.withClientM clientM clientEnv $ \case - Left err -> pure (Left $ handleClientError clientEnv err) - Right sourceT -> do - sourceTM <- liftIO $ Servant.fromSourceIO @o @(SourceT.SourceT m o) sourceT - mapLeft (StreamingError . Text.pack) <$> toIO (runExceptT $ SourceT.runSourceT sourceTM) + let entityStream = C.unsealConduitT streamRemainder C..| C.mapM parseEntity + callback info entityStream + SyncV2.EntityC _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk + SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError . SyncV2.PullError'DownloadEntities $ err + parseEntity :: SyncV2.DownloadEntitiesChunk -> SyncM SyncV2.EntityChunk + parseEntity = \case + SyncV2.EntityC chunk -> pure chunk + SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError $ SyncV2.PullError'DownloadEntities err + SyncV2.InitialC {} -> throwError . SyncError $ SyncV2.PullError'Sync SyncV2.SyncErrorMisplacedInitialChunk diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 6c62358a69..79ecbe7022 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -205,6 +205,7 @@ library , co-log-core , code-page , concurrent-output + , conduit , containers >=0.6.3 , cryptonite , directory @@ -227,7 +228,6 @@ library , lsp-types >=2.0.2.0 , megaparsec , memory - , mmorph , mtl , network-simple , network-uri diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 76151620e8..8b2e8cbc77 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -268,6 +268,7 @@ data SyncError | SyncErrorMissingInitialChunk | SyncErrorMisplacedInitialChunk | SyncErrorStreamFailure Text + | SyncErrorUnsupportedVersion Version deriving stock (Show, Eq, Ord) data EntityKind From e19038bb321238e8f390ff33b647fa1552898cbf Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 13:06:04 -0800 Subject: [PATCH 28/52] Better error handling --- unison-cli/src/Unison/Cli/DownloadUtils.hs | 2 +- unison-cli/src/Unison/Share/SyncV2.hs | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index 0ea4da184b..461252ba82 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -57,7 +57,7 @@ downloadProjectBranchFromShare useSquashed branch = -- TODO: Fill this in. let knownHashes = Set.empty let downloadedCallback = \_ -> pure () - result <- SyncV2.streamDownloadEntities Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback + result <- SyncV2.streamDownloadEntitiesFromCodeserver Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback -- numDownloaded <- liftIO getNumDownloaded let numDownloaded = 0 pure (result, numDownloaded) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index db1659159b..aaea50e699 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -2,7 +2,7 @@ {-# LANGUAGE TypeOperators #-} module Unison.Share.SyncV2 - ( streamDownloadEntities, + ( streamDownloadEntitiesFromCodeserver, ) where @@ -65,6 +65,7 @@ syncUnsortedStream :: Stream () SyncV2.EntityChunk -> SyncM () syncUnsortedStream codebase stream = do + Debug.debugLogM Debug.Temp $ "Syncing unsorted stream" allResults <- C.runConduit $ stream C..| C.sinkList allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) -- TODO: do we want to save the entities somewhere? @@ -83,6 +84,7 @@ syncSortedStream :: Stream () SyncV2.EntityChunk -> SyncM () syncSortedStream codebase stream = do + Debug.debugLogM Debug.Temp $ "Syncing sorted stream" let handler :: Stream [SyncV2.EntityChunk] o handler = C.mapM_C \chunkBatch -> do ExceptT $ Codebase.runTransactionWithRollback codebase \rollback -> do @@ -90,8 +92,10 @@ syncSortedStream codebase stream = do Left err -> do rollback (Left $ SyncError err) Right (catMaybes -> entityBatch) -> do + Debug.debugLogM Debug.Temp $ "Got a batch of " <> show (length entityBatch) <> " unsynced entities" Right <$> for_ entityBatch \(hash, entity) -> do validateEntity (rollback . Left) hash entity + Debug.debugLogM Debug.Temp $ "Saving entity" void $ Q.saveTempEntityInMain v2HashHandle hash entity C.runConduit $ stream C..| C.chunksOf batchSize C..| handler @@ -146,7 +150,7 @@ unpackChunks xs = do ) <&> catMaybes -streamDownloadEntities :: +streamDownloadEntitiesFromCodeserver :: -- | The Unison Share URL. Servant.BaseUrl -> -- | The branch to download from. @@ -157,13 +161,13 @@ streamDownloadEntities :: -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do +streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask Cli.label \done -> do let failed :: SyncError SyncV2.PullError -> Cli void failed = done . Left let hash = Share.hashJWTHash hashJwt - _r <- + r <- Cli.runTransaction (Q.entityLocation hash) >>= \case Just Q.EntityInMainStorage -> pure $ Right () -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" @@ -182,6 +186,7 @@ streamDownloadEntities unisonShareUrl branchRef hashJwt knownHashes _downloadedC case entitySorting of SyncV2.DependenciesFirst -> syncSortedStream codebase stream SyncV2.Unsorted -> syncUnsortedStream codebase stream + either (done . Left) pure r didCausalSuccessfullyImport codebase hash >>= \case False -> do failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) @@ -308,9 +313,11 @@ SyncV2.Routes -- You MUST consume the stream within the callback, it will be closed when the callback returns. withConduit :: forall r. Servant.ClientEnv -> (Stream () SyncV2.DownloadEntitiesChunk -> SyncM r) -> Servant.ClientM (Servant.SourceIO SyncV2.DownloadEntitiesChunk) -> SyncM r withConduit clientEnv callback clientM = do + Debug.debugLogM Debug.Temp $ "Running clientM" ExceptT . liftIO $ Servant.withClientM clientM clientEnv $ \case Left err -> pure . Left . TransportError $ (handleClientError clientEnv err) Right sourceT -> do + Debug.debugLogM Debug.Temp $ "Converting sourceIO to conduit" conduit <- liftIO $ Servant.fromSourceIO sourceT (runExceptT $ callback conduit) @@ -354,7 +361,9 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req where go :: Stream () SyncV2.DownloadEntitiesChunk -> SyncM () go es = do + Debug.debugLogM Debug.Temp $ "Peeling options off stream" (streamRemainder, init) <- es C.$$+ C.headC + Debug.debugM Debug.Temp "Got initial chunk: " init case init of Nothing -> pure () Just chunk -> do @@ -362,7 +371,9 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req SyncV2.InitialC info -> do let entityStream = C.unsealConduitT streamRemainder C..| C.mapM parseEntity callback info entityStream - SyncV2.EntityC _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk + SyncV2.EntityC _ -> do + Debug.debugLogM Debug.Temp $ "Got unexpected entity chunk" + throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError . SyncV2.PullError'DownloadEntities $ err parseEntity :: SyncV2.DownloadEntitiesChunk -> SyncM SyncV2.EntityChunk parseEntity = \case From f1e5851aa103476d6e5cf932ca6564ceb85823ef Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 16:11:42 -0800 Subject: [PATCH 29/52] Parallelize validation on unsorted stream --- unison-cli/src/Unison/Share/SyncV2.hs | 37 +++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index aaea50e699..6110fead44 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -49,6 +49,7 @@ import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Monoid (foldMapM) import Unison.Util.Timing qualified as Timing +import UnliftIO qualified as IO type Stream i o = ConduitT i o SyncM () @@ -70,11 +71,18 @@ syncUnsortedStream codebase stream = do allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) -- TODO: do we want to save the entities somewhere? let sortedEntities = topSortEntities allEntities - ExceptT $ liftIO $ Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> do - for_ sortedEntities \(hash, entity) -> do - validateEntity (rollback . Left) hash entity - void $ Q.saveTempEntityInMain v2HashHandle hash entity - pure $ Right () + let validateEntities = do + runExceptT (batchValidateEntities sortedEntities) + -- Validation is slow, run it in parallel with insertion, but don't commit the transaction until we're done + -- validation. + ExceptT . liftIO $ IO.withAsync validateEntities \validationTask -> do + Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> do + for_ sortedEntities \(hash, entity) -> do + -- validateEntity (rollback . Left) hash entity + void $ Q.saveTempEntityInMain v2HashHandle hash entity + Sqlite.unsafeIO (IO.wait validationTask) >>= \case + Left err -> rollback (Left err) + Right _ -> pure $ Right () batchSize :: Int batchSize = 5000 @@ -134,6 +142,25 @@ validateEntity rollback hash entity = do Just err -> do rollback . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err +batchValidateEntities :: [(Hash32, TempEntity)] -> SyncM () +batchValidateEntities entities = do + mismatches <- fmap catMaybes $ liftIO $ IO.pooledForConcurrently entities \(hash, entity) -> do + IO.evaluate $ EV.validateTempEntity hash entity + for_ mismatches \case + err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> + let expectedMismatches = case et of + Share.TermComponentType -> expectedComponentHashMismatches + Share.DeclComponentType -> expectedComponentHashMismatches + Share.CausalType -> expectedCausalHashMismatches + _ -> mempty + in case Map.lookup supplied expectedMismatches of + Just expected + | expected == computed -> pure () + _ -> do + throwError . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + err -> do + throwError . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err + unpackChunks :: [SyncV2.EntityChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] unpackChunks xs = do xs From 4ca76adb94d8db18c73f0de1de5dd6304474c174 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 16:35:34 -0800 Subject: [PATCH 30/52] Add runTransactionExceptT --- lib/unison-sqlite/src/Unison/Sqlite.hs | 1 + lib/unison-sqlite/src/Unison/Sqlite/Transaction.hs | 9 +++++++++ parser-typechecker/src/Unison/Codebase.hs | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/lib/unison-sqlite/src/Unison/Sqlite.hs b/lib/unison-sqlite/src/Unison/Sqlite.hs index eec974d6ed..a94fceae40 100644 --- a/lib/unison-sqlite/src/Unison/Sqlite.hs +++ b/lib/unison-sqlite/src/Unison/Sqlite.hs @@ -19,6 +19,7 @@ module Unison.Sqlite Transaction, runTransaction, runTransactionWithRollback, + runTransactionExceptT, runReadOnlyTransaction, runWriteTransaction, cacheTransaction, diff --git a/lib/unison-sqlite/src/Unison/Sqlite/Transaction.hs b/lib/unison-sqlite/src/Unison/Sqlite/Transaction.hs index b44a04b0fa..5bf735b917 100644 --- a/lib/unison-sqlite/src/Unison/Sqlite/Transaction.hs +++ b/lib/unison-sqlite/src/Unison/Sqlite/Transaction.hs @@ -3,6 +3,7 @@ module Unison.Sqlite.Transaction Transaction, runTransaction, runTransactionWithRollback, + runTransactionExceptT, runReadOnlyTransaction, runWriteTransaction, cacheTransaction, @@ -44,6 +45,7 @@ where import Control.Concurrent (threadDelay) import Control.Exception (Exception (fromException), onException, throwIO) +import Control.Monad.Trans.Except (ExceptT, runExceptT) import Control.Monad.Trans.Reader (ReaderT (..)) import Data.Text qualified as Text import Data.Unique (Unique, newUnique) @@ -130,6 +132,13 @@ runTransactionWithRollback conn transaction = liftIO do Right x -> pure x {-# SPECIALIZE runTransactionWithRollback :: Connection -> ((forall void. a -> Transaction void) -> Transaction a) -> IO a #-} +-- | Run a transaction wrapped in an 'ExceptT'. If the ExceptT fails, the transaction is rolled back. +runTransactionExceptT :: (MonadIO m, HasCallStack) => Connection -> ExceptT e Transaction a -> m (Either e a) +runTransactionExceptT conn transaction = runTransactionWithRollback conn \rollback -> do + runExceptT transaction >>= \case + Left e -> rollback (Left e) + Right a -> pure (Right a) + -- | Run a transaction that is known to only perform reads. -- -- The action is provided a function that peels off the 'Transaction' newtype without sending the corresponding diff --git a/parser-typechecker/src/Unison/Codebase.hs b/parser-typechecker/src/Unison/Codebase.hs index 1fcb0e5c7c..e8cb24e84e 100644 --- a/parser-typechecker/src/Unison/Codebase.hs +++ b/parser-typechecker/src/Unison/Codebase.hs @@ -99,6 +99,7 @@ module Unison.Codebase -- * Direct codebase access runTransaction, runTransactionWithRollback, + runTransactionExceptT, withConnection, withConnectionIO, @@ -112,6 +113,7 @@ module Unison.Codebase ) where +import Control.Monad.Except (ExceptT) import Data.Map qualified as Map import Data.Set qualified as Set import U.Codebase.Branch qualified as V2Branch @@ -174,6 +176,14 @@ runTransactionWithRollback :: runTransactionWithRollback Codebase {withConnection} action = withConnection \conn -> Sqlite.runTransactionWithRollback conn action +runTransactionExceptT :: + (MonadIO m) => + Codebase m v a -> + ExceptT e Sqlite.Transaction b -> + m (Either e b) +runTransactionExceptT Codebase {withConnection} action = + withConnection \conn -> Sqlite.runTransactionExceptT conn action + getShallowCausalAtPathFromRootHash :: -- Causal to start at, if Nothing use the codebase's root branch. CausalHash -> From 7b96cda46b30f40722c5c7702bf7e18c0616baf7 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 16:35:34 -0800 Subject: [PATCH 31/52] Huge code cleanup --- unison-cli/src/Unison/Share/SyncV2.hs | 196 +++++--------------------- 1 file changed, 38 insertions(+), 158 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 6110fead44..26f43e98b1 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -16,7 +16,6 @@ import Data.Graph qualified as Graph import Data.Map qualified as Map import Data.Proxy import Data.Set qualified as Set -import Data.Set.NonEmpty qualified as NESet import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy import Network.HTTP.Client qualified as Http.Client @@ -47,7 +46,6 @@ import Unison.SyncV2.API qualified as SyncV2 import Unison.SyncV2.Types (CBORBytes) import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 -import Unison.Util.Monoid (foldMapM) import Unison.Util.Timing qualified as Timing import UnliftIO qualified as IO @@ -57,9 +55,26 @@ type PullErr = SyncError SyncV2.PullError type SyncM = ExceptT PullErr IO +batchSize :: Int +batchSize = 5000 + ------------------------------------------------------------------------------------------------------------------------ -- Download entities +validateAndSave :: (Codebase.Codebase IO v a) -> [(Hash32, TempEntity)] -> SyncM () +validateAndSave codebase entities = do + let validateEntities = do + runExceptT (batchValidateEntities entities) + -- Validation is slow, run it in parallel with insertion, but don't commit the transaction until we're done + -- validation. + ExceptT . liftIO $ IO.withAsync validateEntities \validationTask -> do + Timing.time "Inserting entities" $ Codebase.runTransactionExceptT codebase do + for_ entities \(hash, entity) -> do + void . lift $ Q.saveTempEntityInMain v2HashHandle hash entity + lift (Sqlite.unsafeIO (IO.wait validationTask)) >>= \case + Left err -> throwError err + Right _ -> pure () + -- | Syncs a stream which could send entities in any order. syncUnsortedStream :: (Codebase.Codebase IO v a) -> @@ -68,25 +83,11 @@ syncUnsortedStream :: syncUnsortedStream codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing unsorted stream" allResults <- C.runConduit $ stream C..| C.sinkList - allEntities <- Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransaction codebase $ do (unpackChunks allResults) - -- TODO: do we want to save the entities somewhere? - let sortedEntities = topSortEntities allEntities - let validateEntities = do - runExceptT (batchValidateEntities sortedEntities) - -- Validation is slow, run it in parallel with insertion, but don't commit the transaction until we're done - -- validation. - ExceptT . liftIO $ IO.withAsync validateEntities \validationTask -> do - Timing.time "Inserting entities" $ Codebase.runTransactionWithRollback codebase $ \rollback -> do - for_ sortedEntities \(hash, entity) -> do - -- validateEntity (rollback . Left) hash entity - void $ Q.saveTempEntityInMain v2HashHandle hash entity - Sqlite.unsafeIO (IO.wait validationTask) >>= \case - Left err -> rollback (Left err) - Right _ -> pure $ Right () - -batchSize :: Int -batchSize = 5000 + allEntities <- ExceptT $ Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransactionExceptT codebase $ do unpackChunks allResults + let sortedEntities = sortDependencyFirst allEntities + validateAndSave codebase sortedEntities +-- | Syncs a stream which sends entities which are already sorted in dependency order. syncSortedStream :: (Codebase.Codebase IO v a) -> Stream () SyncV2.EntityChunk -> @@ -95,52 +96,29 @@ syncSortedStream codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing sorted stream" let handler :: Stream [SyncV2.EntityChunk] o handler = C.mapM_C \chunkBatch -> do - ExceptT $ Codebase.runTransactionWithRollback codebase \rollback -> do - sequenceA <$> for chunkBatch unpackChunk >>= \case - Left err -> do - rollback (Left $ SyncError err) - Right (catMaybes -> entityBatch) -> do - Debug.debugLogM Debug.Temp $ "Got a batch of " <> show (length entityBatch) <> " unsynced entities" - Right <$> for_ entityBatch \(hash, entity) -> do - validateEntity (rollback . Left) hash entity - Debug.debugLogM Debug.Temp $ "Saving entity" - void $ Q.saveTempEntityInMain v2HashHandle hash entity + entityBatch <- ExceptT $ Codebase.runTransactionExceptT codebase do for chunkBatch unpackChunk + validateAndSave codebase (catMaybes entityBatch) C.runConduit $ stream C..| C.chunksOf batchSize C..| handler -unpackChunk :: SyncV2.EntityChunk -> Sqlite.Transaction (Either SyncV2.PullError (Maybe (Hash32, TempEntity))) +unpackChunk :: SyncV2.EntityChunk -> ExceptT PullErr Sqlite.Transaction (Maybe (Hash32, TempEntity)) unpackChunk = \case SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do -- Only want entities we don't already have - Q.entityLocation hash >>= \case - Just Q.EntityInMainStorage -> pure $ Right Nothing + lift (Q.entityLocation hash) >>= \case + Just Q.EntityInMainStorage -> pure Nothing _ -> do - fmap (Just . (hash,)) <$> unpackEntity entityBytes + (Just . (hash,)) <$> unpackEntity entityBytes where - unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction (Either SyncV2.PullError TempEntity) + unpackEntity :: (CBORBytes TempEntity) -> ExceptT PullErr Sqlite.Transaction TempEntity unpackEntity entityBytes = do case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> do pure $ Left (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure $ Right entity + Left err -> do throwError $ (SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) + Right entity -> pure entity -validateEntity :: (PullErr -> Sqlite.Transaction ()) -> Hash32 -> TempEntity -> Sqlite.Transaction () -validateEntity rollback hash entity = do - Debug.debugLogM Debug.Temp $ "Validating entity" - -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. - case EV.validateTempEntity hash entity of - Nothing -> pure () - Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> - let expectedMismatches = case et of - Share.TermComponentType -> expectedComponentHashMismatches - Share.DeclComponentType -> expectedComponentHashMismatches - Share.CausalType -> expectedCausalHashMismatches - _ -> mempty - in case Map.lookup supplied expectedMismatches of - Just expected - | expected == computed -> pure () - _ -> do - rollback . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - Just err -> do - rollback . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err +unpackChunks :: [SyncV2.EntityChunk] -> ExceptT PullErr Sqlite.Transaction [(Hash32, TempEntity)] +unpackChunks xs = do + for xs unpackChunk + <&> catMaybes batchValidateEntities :: [(Hash32, TempEntity)] -> SyncM () batchValidateEntities entities = do @@ -161,22 +139,6 @@ batchValidateEntities entities = do err -> do throwError . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err -unpackChunks :: [SyncV2.EntityChunk] -> Sqlite.Transaction [(Hash32, TempEntity)] -unpackChunks xs = do - xs - & ( traverse \case - (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do - -- Only want entities we don't already have in main - ((Q.entityLocation hash)) >>= \case - Just Q.EntityInMainStorage -> pure Nothing - _ -> do - tempEntity <- case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> error . show $ (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure entity - pure $ Just (hash, tempEntity) - ) - <&> catMaybes - streamDownloadEntitiesFromCodeserver :: -- | The Unison Share URL. Servant.BaseUrl -> @@ -228,97 +190,15 @@ streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashe let expectedHash = hash32ToCausalHash hash isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) -topSortEntities :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] -topSortEntities entities = do +-- | Topologically sort entities based on their dependencies. +sortDependencyFirst :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] +sortDependencyFirst entities = do let adjList = entities <&> \(hash32, entity) -> ((hash32, entity), hash32, Set.toList $ Share.entityDependencies (tempEntityToEntity entity)) (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) -_handleChunkUsingTemp :: (forall x. SyncV2.PullError -> Sqlite.Transaction x) -> SyncV2.DownloadEntitiesChunk -> Sqlite.Transaction () -_handleChunkUsingTemp rollback = \case - SyncV2.InitialC {} -> do - Debug.debugLogM Debug.Temp $ "Got unexpected initial chunk" - rollback $ SyncV2.PullError'Sync $ SyncV2.SyncErrorMisplacedInitialChunk - SyncV2.ErrorC (SyncV2.ErrorChunk {err}) -> do - Debug.debugLogM Debug.Temp $ "Got error chunk" - rollback $ SyncV2.PullError'DownloadEntities err - SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR = entityBytes}) -> do - -- Only want entities we don't already have - Q.entityLocation hash >>= \case - Just {} -> pure () - Nothing -> do - tempEntity <- unpackEntity entityBytes - validateEntity hash tempEntity - insertEntity hash tempEntity - where - unpackEntity :: (CBORBytes TempEntity) -> Sqlite.Transaction TempEntity - unpackEntity entityBytes = do - case CBOR.deserialiseOrFailCBORBytes entityBytes of - Left err -> do rollback (SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) - Right entity -> pure entity - validateEntity :: Hash32 -> TempEntity -> Sqlite.Transaction () - validateEntity hash entity = do - Debug.debugLogM Debug.Temp $ "Validating entity" - -- TODO: We can investigate batching or running this in parallel if it becomes a bottleneck. - case EV.validateTempEntity hash entity of - Nothing -> pure () - Just err@(Share.EntityHashMismatch et (Share.HashMismatchForEntity {supplied, computed})) -> - let expectedMismatches = case et of - Share.TermComponentType -> expectedComponentHashMismatches - Share.DeclComponentType -> expectedComponentHashMismatches - Share.CausalType -> expectedCausalHashMismatches - _ -> mempty - in case Map.lookup supplied expectedMismatches of - Just expected - | expected == computed -> pure () - _ -> do - rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - Just err -> do - rollback . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err - -insertEntity :: Hash32 -> TempEntity -> Sqlite.Transaction () -insertEntity hash entity = do - Debug.debugLogM Debug.Temp $ "Inserting entity" - upsertEntitySomewhere hash entity - pure () - ------------------------------------------------------------------------------------------------------------------------- --- Database operations - --- | Upsert a downloaded entity "somewhere" - --- --- 1. Nowhere if we already had the entity (in main or temp storage). --- 2. In main storage if we already have all of its dependencies in main storage. --- 3. In temp storage otherwise. -upsertEntitySomewhere :: - Hash32 -> - TempEntity -> - Sqlite.Transaction Q.EntityLocation -upsertEntitySomewhere hash entity = - Q.entityLocation hash >>= \case - Just location -> pure location - Nothing -> do - missingDependencies1 :: Set Hash32 <- - Share.entityDependencies (tempEntityToEntity entity) - & foldMapM - ( \depHash -> do - Q.entityExists depHash <&> \case - True -> mempty - False -> Set.singleton depHash - ) - case NESet.nonEmptySet missingDependencies1 of - Nothing -> do - _id <- Q.saveTempEntityInMain v2HashHandle hash entity - pure Q.EntityInMainStorage - Just missingDependencies -> do - Q.insertTempEntityV2 - hash - entity - missingDependencies - pure Q.EntityInTempStorage - ------------------------------------------------------------------------------------------------------------------------ --- HTTP calls +-- Servant stuff type SyncAPI = ("ucm" Servant.:> "v2" Servant.:> "sync" Servant.:> SyncV2.API) From 503e5010fcd45e31a027a40e11f5685438e8a372 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Mon, 16 Dec 2024 13:06:04 -0800 Subject: [PATCH 32/52] Start code for syncing to file --- unison-cli/package.yaml | 1 + unison-cli/src/Unison/Share/SyncV2.hs | 150 +++++++++++++++++++------- unison-cli/unison-cli.cabal | 1 + 3 files changed, 114 insertions(+), 38 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 7222487059..a9ffbb00eb 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -66,6 +66,7 @@ library: - recover-rtti - regex-tdfa - semialign + - serialise - servant - servant-client - servant-conduit diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 26f43e98b1..fc8e8009e4 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -3,14 +3,18 @@ module Unison.Share.SyncV2 ( streamDownloadEntitiesFromCodeserver, + streamDownloadEntitiesFromFile, ) where +import Codec.Serialise qualified as CBOR import Conduit (ConduitT) import Conduit qualified as C import Control.Lens import Control.Monad.Except import Control.Monad.Reader (ask) +import Control.Monad.ST (ST, stToIO) +import Data.ByteString qualified as BS import Data.Conduit.List qualified as C import Data.Graph qualified as Graph import Data.Map qualified as Map @@ -44,8 +48,8 @@ import Unison.Sync.Types qualified as Share import Unison.SyncV2.API (Routes (downloadEntitiesStream)) import Unison.SyncV2.API qualified as SyncV2 import Unison.SyncV2.Types (CBORBytes) -import Unison.SyncV2.Types qualified as CBOR import Unison.SyncV2.Types qualified as SyncV2 +import Unison.Util.Servant.CBOR qualified as CBOR import Unison.Util.Timing qualified as Timing import UnliftIO qualified as IO @@ -152,12 +156,10 @@ streamDownloadEntitiesFromCodeserver :: Cli (Either (SyncError SyncV2.PullError) ()) streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask - Cli.label \done -> do - let failed :: SyncError SyncV2.PullError -> Cli void - failed = done . Left + runExceptT do let hash = Share.hashJWTHash hashJwt - r <- - Cli.runTransaction (Q.entityLocation hash) >>= \case + ExceptT $ + (Cli.runTransaction (Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure $ Right () -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do @@ -167,22 +169,28 @@ streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashe authHTTPClient unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} - \(SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo}) stream -> do - case version of - (SyncV2.Version 1) -> pure () - v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v + \header stream -> do + doSync codebase header stream + afterSyncChecks codebase hash + pure () - case entitySorting of - SyncV2.DependenciesFirst -> syncSortedStream codebase stream - SyncV2.Unsorted -> syncUnsortedStream codebase stream - either (done . Left) pure r - didCausalSuccessfullyImport codebase hash >>= \case - False -> do - failed (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) - True -> pure () - -- we'll try vacuuming again next pull. - _success <- liftIO (Codebase.withConnection codebase Sqlite.vacuum) - pure (Right ()) +doSync :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> ExceptT PullErr IO () +doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do + case version of + (SyncV2.Version 1) -> pure () + v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v + + case entitySorting of + SyncV2.DependenciesFirst -> syncSortedStream codebase stream + SyncV2.Unsorted -> syncUnsortedStream codebase stream + +afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> ExceptT PullErr Cli () +afterSyncChecks codebase hash = do + lift (didCausalSuccessfullyImport codebase hash) >>= \case + False -> do + throwError (SyncError (SyncV2.PullError'Sync . SyncV2.SyncErrorExpectedResultNotInMain . hash32ToCausalHash $ hash)) + True -> pure () + void $ liftIO (Codebase.withConnection codebase Sqlite.vacuum) where -- Verify that the expected hash made it into main storage. didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> Cli Bool @@ -197,6 +205,68 @@ sortDependencyFirst entities = do (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) +streamDownloadEntitiesFromFile :: + -- | Location of the sync-file + FilePath -> + Cli (Either (SyncError SyncV2.PullError) ()) +streamDownloadEntitiesFromFile syncFilePath = do + Cli.Env {codebase} <- ask + runExceptT do + Debug.debugLogM Debug.Temp $ "Kicking off sync" + Timing.time "File Sync" $ liftIO . runExceptT $ do + let stream = C.transPipe C.runResourceT $ C.sourceFile syncFilePath C..| C.transPipe lift entitiesFromBS + (header, rest) <- initializeStream stream + doSync codebase header rest + afterSyncChecks codebase hash + +-- Expects a stream of tightly-packed CBOR entities without any framing/separators. +entitiesFromBS :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () +entitiesFromBS = C.transPipe (mapExceptT stToIO) $ do + C.await >>= \case + Nothing -> pure () + Just bs -> do + d <- newDecoder + loop bs d + where + newDecoder :: ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) + newDecoder = do + (lift . lift) CBOR.deserialiseIncremental >>= \case + CBOR.Done _ _ _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure "Invalid initial decoder" + CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err + CBOR.Partial k -> pure k + loop :: ByteString -> (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) -> ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) () + loop bs k = do + (lift . lift) (k (Just bs)) >>= \case + CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err + CBOR.Partial k' -> do + -- We need more input, try to get some + nextBS <- C.await + case nextBS of + Nothing -> do + -- No more input, try to finish up the decoder. + (lift . lift) (k' Nothing) >>= \case + CBOR.Done _ _ a -> C.yield a + CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err + CBOR.Partial _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure "Unexpected end of input" + Just bs' -> + -- Have some input, keep going. + loop bs' k' + CBOR.Done rem _ a -> do + C.yield a + if BS.null rem + then do + -- If we had no leftovers, we can check if there's any input left. + C.await >>= \case + Nothing -> pure () + Just bs'' -> do + -- If we have input left, start up a new decoder. + k <- newDecoder + loop bs'' k + else do + -- We have leftovers, start a new decoder and use those. + k <- newDecoder + loop rem k + ------------------------------------------------------------------------------------------------------------------------ -- Servant stuff @@ -264,24 +334,28 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req { Http.Client.responseTimeout = Http.Client.responseTimeoutNone } } - withConduit clientEnv go (downloadEntitiesStreamClientM req) + (downloadEntitiesStreamClientM req) & withConduit clientEnv \stream -> do + (init, entityStream) <- initializeStream stream + callback init entityStream + +-- | Peel the header off the stream and parse the remaining entity chunks. +initializeStream :: Stream () SyncV2.DownloadEntitiesChunk -> SyncM (SyncV2.StreamInitInfo, Stream () SyncV2.EntityChunk) +initializeStream stream = do + Debug.debugLogM Debug.Temp $ "Peeling options off stream" + (streamRemainder, init) <- stream C.$$+ C.headC + Debug.debugM Debug.Temp "Got initial chunk: " init + case init of + Nothing -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk + Just chunk -> do + case chunk of + SyncV2.InitialC info -> do + let entityStream = C.unsealConduitT streamRemainder C..| C.mapM parseEntity + pure $ (info, entityStream) + SyncV2.EntityC _ -> do + Debug.debugLogM Debug.Temp $ "Got unexpected entity chunk" + throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk + SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError . SyncV2.PullError'DownloadEntities $ err where - go :: Stream () SyncV2.DownloadEntitiesChunk -> SyncM () - go es = do - Debug.debugLogM Debug.Temp $ "Peeling options off stream" - (streamRemainder, init) <- es C.$$+ C.headC - Debug.debugM Debug.Temp "Got initial chunk: " init - case init of - Nothing -> pure () - Just chunk -> do - case chunk of - SyncV2.InitialC info -> do - let entityStream = C.unsealConduitT streamRemainder C..| C.mapM parseEntity - callback info entityStream - SyncV2.EntityC _ -> do - Debug.debugLogM Debug.Temp $ "Got unexpected entity chunk" - throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk - SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError . SyncV2.PullError'DownloadEntities $ err parseEntity :: SyncV2.DownloadEntitiesChunk -> SyncM SyncV2.EntityChunk parseEntity = \case SyncV2.EntityC chunk -> pure chunk diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 79ecbe7022..b7279d13fd 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -241,6 +241,7 @@ library , recover-rtti , regex-tdfa , semialign + , serialise , servant , servant-client , servant-conduit From e4a7512c0cfb9a50cc0a048a59fa98c6692773b0 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 12:30:30 -0800 Subject: [PATCH 33/52] Add framing support for file decoder --- unison-cli/package.yaml | 2 + unison-cli/src/Unison/Share/SyncV2.hs | 46 +++++++++++++++------ unison-cli/unison-cli.cabal | 2 + unison-share-api/src/Unison/SyncV2/Types.hs | 8 +++- 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index a9ffbb00eb..f29fb4d9dc 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -20,6 +20,7 @@ library: - condition: "!os(windows)" dependencies: unix dependencies: + - attoparsec - Diff - IntervalMap - ListLike @@ -34,6 +35,7 @@ library: - concurrent-output - containers >= 0.6.3 - conduit + - conduit-extra - cryptonite - either - errors diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index fc8e8009e4..f611e358a3 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -14,7 +14,11 @@ import Control.Lens import Control.Monad.Except import Control.Monad.Reader (ask) import Control.Monad.ST (ST, stToIO) +import Data.Attoparsec.ByteString qualified as A +import Data.Attoparsec.ByteString.Char8 qualified as A8 import Data.ByteString qualified as BS +import Data.ByteString.Lazy qualified as BL +import Data.Conduit.Attoparsec qualified as C import Data.Conduit.List qualified as C import Data.Graph qualified as Graph import Data.Map qualified as Map @@ -158,7 +162,7 @@ streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashe Cli.Env {authHTTPClient, codebase} <- ask runExceptT do let hash = Share.hashJWTHash hashJwt - ExceptT $ + ExceptT $ do (Cli.runTransaction (Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure $ Right () -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" @@ -171,8 +175,7 @@ streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashe SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} \header stream -> do doSync codebase header stream - afterSyncChecks codebase hash - pure () + mapExceptT liftIO (afterSyncChecks codebase hash) doSync :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> ExceptT PullErr IO () doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do @@ -184,7 +187,7 @@ doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _to SyncV2.DependenciesFirst -> syncSortedStream codebase stream SyncV2.Unsorted -> syncUnsortedStream codebase stream -afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> ExceptT PullErr Cli () +afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> SyncM () afterSyncChecks codebase hash = do lift (didCausalSuccessfullyImport codebase hash) >>= \case False -> do @@ -193,10 +196,10 @@ afterSyncChecks codebase hash = do void $ liftIO (Codebase.withConnection codebase Sqlite.vacuum) where -- Verify that the expected hash made it into main storage. - didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> Cli Bool + didCausalSuccessfullyImport :: Codebase.Codebase IO v a -> Hash32 -> IO Bool didCausalSuccessfullyImport codebase hash = do let expectedHash = hash32ToCausalHash hash - isJust <$> liftIO (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) + isJust <$> (Codebase.runTransaction codebase $ Q.loadCausalByCausalHash expectedHash) -- | Topologically sort entities based on their dependencies. sortDependencyFirst :: [(Hash32, TempEntity)] -> [(Hash32, TempEntity)] @@ -213,15 +216,32 @@ streamDownloadEntitiesFromFile syncFilePath = do Cli.Env {codebase} <- ask runExceptT do Debug.debugLogM Debug.Temp $ "Kicking off sync" - Timing.time "File Sync" $ liftIO . runExceptT $ do - let stream = C.transPipe C.runResourceT $ C.sourceFile syncFilePath C..| C.transPipe lift entitiesFromBS + mapExceptT liftIO $ Timing.time "File Sync" $ do + let stream = C.transPipe (liftIO . C.runResourceT) (C.sourceFile syncFilePath) C..| unNetString C..| C.mapM decodeFramedEntity (header, rest) <- initializeStream stream doSync codebase header rest - afterSyncChecks codebase hash + afterSyncChecks codebase (SyncV2.rootCausalHash header) + +-- | Gets the framed chunks from a NetString framed stream. +unNetString :: ConduitT ByteString ByteString SyncM () +unNetString = do + bs <- C.sinkParser $ do + len <- A8.decimal + _ <- A8.char ':' + bs <- A.take len + _ <- A8.char ',' + pure bs + C.yield bs + +decodeFramedEntity :: ByteString -> SyncM SyncV2.DownloadEntitiesChunk +decodeFramedEntity bs = do + case CBOR.deserialiseOrFail (BL.fromStrict bs) of + Left err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err + Right chunk -> pure chunk -- Expects a stream of tightly-packed CBOR entities without any framing/separators. -entitiesFromBS :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () -entitiesFromBS = C.transPipe (mapExceptT stToIO) $ do +_decodeUnframedEntities :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () +_decodeUnframedEntities = C.transPipe (mapExceptT stToIO) $ do C.await >>= \case Nothing -> pure () Just bs -> do @@ -231,7 +251,7 @@ entitiesFromBS = C.transPipe (mapExceptT stToIO) $ do newDecoder :: ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) newDecoder = do (lift . lift) CBOR.deserialiseIncremental >>= \case - CBOR.Done _ _ _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure "Invalid initial decoder" + CBOR.Done _ _ _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorStreamFailure "Invalid initial decoder" CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err CBOR.Partial k -> pure k loop :: ByteString -> (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) -> ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) () @@ -247,7 +267,7 @@ entitiesFromBS = C.transPipe (mapExceptT stToIO) $ do (lift . lift) (k' Nothing) >>= \case CBOR.Done _ _ a -> C.yield a CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err - CBOR.Partial _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure "Unexpected end of input" + CBOR.Partial _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorStreamFailure "Unexpected end of input" Just bs' -> -- Have some input, keep going. loop bs' k' diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index b7279d13fd..a077130c6a 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -199,6 +199,7 @@ library , aeson-pretty , ansi-terminal , async + , attoparsec , base , bytestring , cmark @@ -206,6 +207,7 @@ library , code-page , concurrent-output , conduit + , conduit-extra , containers >=0.6.3 , cryptonite , directory diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 8b2e8cbc77..fb1cdf30b7 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -164,7 +164,9 @@ data StreamInitInfo = StreamInitInfo { version :: Version, entitySorting :: EntitySorting, - numEntities :: Maybe Word64 + numEntities :: Maybe Word64, + rootCausalHash :: Hash32, + rootBranchRef :: Maybe BranchRef } deriving (Show, Eq, Ord) @@ -194,7 +196,9 @@ instance Serialise StreamInitInfo where version <- decodeMapKey "v" m entitySorting <- decodeMapKey "es" m numEntities <- (optionalDecodeMapKey "ne" m) - pure StreamInitInfo {version, entitySorting, numEntities} + rootCausalHash <- decodeMapKey "rc" m + rootBranchRef <- optionalDecodeMapKey "br" m + pure StreamInitInfo {version, entitySorting, numEntities, rootCausalHash, rootBranchRef} data EntityChunk = EntityChunk { hash :: Hash32, From 7e836173359650b29a93407de0139b8ff7e64bdf Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 14:26:51 -0800 Subject: [PATCH 34/52] Implement 'createSyncFile' --- unison-cli/src/Unison/Share/SyncV2.hs | 49 ++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index f611e358a3..d743f4341e 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -4,6 +4,7 @@ module Unison.Share.SyncV2 ( streamDownloadEntitiesFromCodeserver, streamDownloadEntitiesFromFile, + createSyncFile, ) where @@ -14,6 +15,7 @@ import Control.Lens import Control.Monad.Except import Control.Monad.Reader (ask) import Control.Monad.ST (ST, stToIO) +import Control.Monad.State import Data.Attoparsec.ByteString qualified as A import Data.Attoparsec.ByteString.Char8 qualified as A8 import Data.ByteString qualified as BS @@ -32,6 +34,7 @@ import Servant.API qualified as Servant import Servant.Client.Streaming qualified as Servant import Servant.Conduit () import Servant.Types.SourceT qualified as Servant +import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) import U.Codebase.Sqlite.V2.HashHandle (v2HashHandle) @@ -46,9 +49,11 @@ import Unison.Share.API.Hash qualified as Share import Unison.Share.ExpectedHashMismatches (expectedCausalHashMismatches, expectedComponentHashMismatches) import Unison.Share.Sync.Types import Unison.Sqlite qualified as Sqlite -import Unison.Sync.Common (hash32ToCausalHash, tempEntityToEntity) +import Unison.Sync.Common (causalHashToHash32, hash32ToCausalHash, tempEntityToEntity) +import Unison.Sync.Common qualified as Sync import Unison.Sync.EntityValidation qualified as EV import Unison.Sync.Types qualified as Share +import Unison.Sync.Types qualified as Sync import Unison.SyncV2.API (Routes (downloadEntitiesStream)) import Unison.SyncV2.API qualified as SyncV2 import Unison.SyncV2.Types (CBORBytes) @@ -56,6 +61,7 @@ import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Servant.CBOR qualified as CBOR import Unison.Util.Timing qualified as Timing import UnliftIO qualified as IO +import UnliftIO.IO.File qualified as IO type Stream i o = ConduitT i o SyncM () @@ -222,6 +228,47 @@ streamDownloadEntitiesFromFile syncFilePath = do doSync codebase header rest afterSyncChecks codebase (SyncV2.rootCausalHash header) +createSyncFile :: + CausalHash -> + Maybe SyncV2.BranchRef -> + FilePath -> + Cli () +createSyncFile rootHash mayBranchRef destFilePath = do + Cli.Env {codebase} <- ask + entities <- liftIO $ Codebase.runTransaction codebase (depsForCausal rootHash) + let initialChunk = + SyncV2.InitialC + ( SyncV2.StreamInitInfo + { rootCausalHash = causalHashToHash32 rootHash, + version = SyncV2.Version 1, + entitySorting = SyncV2.DependenciesFirst, + numEntities = Just . fromIntegral $ Map.size entities, + rootBranchRef = mayBranchRef + } + ) + let contents = + Map.toList entities + & ( fmap \(hash, entity) -> + let entityCBOR = (CBOR.serialiseCBORBytes (Sync.entityToTempEntity id entity)) + in SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR}) + ) + & (initialChunk :) + liftIO $ IO.writeBinaryFile destFilePath . BL.toStrict . foldMap CBOR.serialise $ contents + +-- | Collect all dependencies of a given causal hash. +depsForCausal :: CausalHash -> Sqlite.Transaction (Map Hash32 (Sync.Entity Text Hash32 Hash32)) +depsForCausal causalHash = do + flip execStateT mempty $ expandEntities (causalHashToHash32 causalHash) + where + expandEntities :: Hash32 -> ((StateT (Map Hash32 (Sync.Entity Text Hash32 Hash32)) Sqlite.Transaction)) () + expandEntities hash32 = do + gets (Map.member hash32) >>= \case + True -> pure () + False -> do + entity <- lift $ Sync.expectEntity hash32 + modify (Map.insert hash32 entity) + traverseOf_ Sync.entityHashes_ expandEntities entity + -- | Gets the framed chunks from a NetString framed stream. unNetString :: ConduitT ByteString ByteString SyncM () unNetString = do From 8ab3f2d5cfd87f7b858c33d2a11e859beb492d70 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 14:37:00 -0800 Subject: [PATCH 35/52] Add sync.to-file command --- .../src/Unison/Codebase/Editor/Input.hs | 1 + .../src/Unison/CommandLine/InputPatterns.hs | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/unison-cli/src/Unison/Codebase/Editor/Input.hs b/unison-cli/src/Unison/Codebase/Editor/Input.hs index da06a5fb8e..e3f4ddcf13 100644 --- a/unison-cli/src/Unison/Codebase/Editor/Input.hs +++ b/unison-cli/src/Unison/Codebase/Editor/Input.hs @@ -126,6 +126,7 @@ data Input | DiffNamespaceI BranchId2 BranchId2 -- old new | PullI !PullSourceTarget !PullMode | PushRemoteBranchI PushRemoteBranchInput + | SyncToFileI FilePath (Maybe ProjectAndBranchNames) | ResetI (BranchId2 {- namespace to reset it to -}) (Maybe UnresolvedProjectBranch {- ProjectBranch to reset -}) | -- | used in Welcome module to give directions to user -- diff --git a/unison-cli/src/Unison/CommandLine/InputPatterns.hs b/unison-cli/src/Unison/CommandLine/InputPatterns.hs index 87597a8653..26a411b9ad 100644 --- a/unison-cli/src/Unison/CommandLine/InputPatterns.hs +++ b/unison-cli/src/Unison/CommandLine/InputPatterns.hs @@ -97,6 +97,7 @@ module Unison.CommandLine.InputPatterns pushCreate, pushExhaustive, pushForce, + syncToFile, quit, releaseDraft, renameBranch, @@ -2088,6 +2089,36 @@ pushExhaustive = branchInclusion = AllBranches } +syncToFile :: InputPattern +syncToFile = + InputPattern + { patternName = "sync.to-file", + aliases = [], + visibility = I.Hidden, + args = [("file-path", Required, filePathArg), ("branch", Optional, projectAndBranchNamesArg suggestionsConfig)], + help = + ( P.wrapColumn2 + [ ( makeExample syncToFile ["./branch.usync"], + "saves the current branch to the file `foo.u`." + ), + ( makeExample syncToFile ["./main.usync", "/main"], + "saves the main branch to the file `main.usync`." + ) + ] + ), + parse = \case + [filePath, branch] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> (Just <$> handleProjectAndBranchNamesArg branch) + [filePath] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> pure Nothing + args -> wrongArgsLength "one or two arguments" args + } + where + suggestionsConfig = + ProjectBranchSuggestionsConfig + { showProjectCompletions = True, + projectInclusion = AllProjects, + branchInclusion = AllBranches + } + mergeOldSquashInputPattern :: InputPattern mergeOldSquashInputPattern = InputPattern @@ -3666,6 +3697,7 @@ validInputs = pushCreate, pushExhaustive, pushForce, + syncToFile, quit, releaseDraft, renameBranch, From 7fa3391e577b9fdd07b876171f3fbba77afb2a6c Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 14:39:12 -0800 Subject: [PATCH 36/52] Topologically sort sync files --- unison-cli/src/Unison/Share/SyncV2.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index d743f4341e..16c6060321 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -247,9 +247,12 @@ createSyncFile rootHash mayBranchRef destFilePath = do } ) let contents = - Map.toList entities + entities + & fmap (Sync.entityToTempEntity id) + & Map.toList + & sortDependencyFirst & ( fmap \(hash, entity) -> - let entityCBOR = (CBOR.serialiseCBORBytes (Sync.entityToTempEntity id entity)) + let entityCBOR = (CBOR.serialiseCBORBytes entity) in SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR}) ) & (initialChunk :) From 290a95321264dbbd8e4fbd0c4219f9d9f4d3a5bf Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 15:12:57 -0800 Subject: [PATCH 37/52] Building with new sync.to-file command --- .../src/Unison/Codebase/Editor/HandleInput.hs | 5 ++++- .../Codebase/Editor/HandleInput/SyncFile.hs | 19 +++++++++++++++++++ .../src/Unison/Codebase/Editor/Input.hs | 2 +- .../src/Unison/CommandLine/InputPatterns.hs | 19 +++++++++++++++++-- unison-cli/unison-cli.cabal | 1 + unison-share-api/src/Unison/SyncV2/Types.hs | 5 +++++ 6 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs index 4967878424..380354025f 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs @@ -59,8 +59,8 @@ import Unison.Codebase.Editor.HandleInput.DeleteBranch (handleDeleteBranch) import Unison.Codebase.Editor.HandleInput.DeleteNamespace (getEndangeredDependents, handleDeleteNamespace) import Unison.Codebase.Editor.HandleInput.DeleteProject (handleDeleteProject) import Unison.Codebase.Editor.HandleInput.Dependents (handleDependents) -import Unison.Codebase.Editor.HandleInput.EditNamespace (handleEditNamespace) import Unison.Codebase.Editor.HandleInput.EditDependents (handleEditDependents) +import Unison.Codebase.Editor.HandleInput.EditNamespace (handleEditNamespace) import Unison.Codebase.Editor.HandleInput.FindAndReplace (handleStructuredFindI, handleStructuredFindReplaceI, handleTextFindI) import Unison.Codebase.Editor.HandleInput.FormatFile qualified as Format import Unison.Codebase.Editor.HandleInput.Global qualified as Global @@ -87,6 +87,7 @@ import Unison.Codebase.Editor.HandleInput.ReleaseDraft (handleReleaseDraft) import Unison.Codebase.Editor.HandleInput.Run (handleRun) import Unison.Codebase.Editor.HandleInput.RuntimeUtils qualified as RuntimeUtils import Unison.Codebase.Editor.HandleInput.ShowDefinition (handleShowDefinition) +import Unison.Codebase.Editor.HandleInput.SyncFile qualified as SyncFile import Unison.Codebase.Editor.HandleInput.TermResolution (resolveMainRef) import Unison.Codebase.Editor.HandleInput.Tests qualified as Tests import Unison.Codebase.Editor.HandleInput.Todo (handleTodo) @@ -688,6 +689,7 @@ loop e = do Cli.respond Success PullI sourceTarget pullMode -> handlePull sourceTarget pullMode PushRemoteBranchI pushRemoteBranchInput -> handlePushRemoteBranch pushRemoteBranchInput + SyncToFileI syncFileDest projectBranchName -> SyncFile.handleSyncToFile syncFileDest projectBranchName ListDependentsI hq -> handleDependents hq ListDependenciesI hq -> handleDependencies hq NamespaceDependenciesI path -> handleNamespaceDependencies path @@ -1012,6 +1014,7 @@ inputDescription input = ProjectsI -> wat PullI {} -> wat PushRemoteBranchI {} -> wat + SyncToFileI {} -> wat QuitI {} -> wat ReleaseDraftI {} -> wat ShowDefinitionI {} -> wat diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs new file mode 100644 index 0000000000..9eb032c51c --- /dev/null +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs @@ -0,0 +1,19 @@ +module Unison.Codebase.Editor.HandleInput.SyncFile (handleSyncToFile) where + +import Control.Lens +import Unison.Cli.Monad (Cli) +import Unison.Cli.Monad qualified as Cli +import Unison.Cli.MonadUtils qualified as Cli +import Unison.Cli.ProjectUtils qualified as Project +import Unison.Prelude +import Unison.Project (ProjectAndBranch (..), ProjectBranchName, ProjectName) +import Unison.Share.SyncV2 qualified as SyncV2 +import Unison.SyncV2.Types (BranchRef) + +handleSyncToFile :: FilePath -> ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName) -> Cli () +handleSyncToFile destSyncFile branchToSync = do + pp <- Cli.getCurrentProjectPath + projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) branchToSync + causalHash <- Cli.runTransaction $ Project.getProjectBranchCausalHash (projectBranch ^. #branch) + let branchRef = into @BranchRef $ ProjectAndBranch (projectBranch ^. #project . #name) (projectBranch ^. #branch . #name) + SyncV2.createSyncFile causalHash (Just branchRef) destSyncFile diff --git a/unison-cli/src/Unison/Codebase/Editor/Input.hs b/unison-cli/src/Unison/Codebase/Editor/Input.hs index e3f4ddcf13..b352e6c44f 100644 --- a/unison-cli/src/Unison/Codebase/Editor/Input.hs +++ b/unison-cli/src/Unison/Codebase/Editor/Input.hs @@ -126,7 +126,7 @@ data Input | DiffNamespaceI BranchId2 BranchId2 -- old new | PullI !PullSourceTarget !PullMode | PushRemoteBranchI PushRemoteBranchInput - | SyncToFileI FilePath (Maybe ProjectAndBranchNames) + | SyncToFileI FilePath (ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName)) | ResetI (BranchId2 {- namespace to reset it to -}) (Maybe UnresolvedProjectBranch {- ProjectBranch to reset -}) | -- | used in Welcome module to give directions to user -- diff --git a/unison-cli/src/Unison/CommandLine/InputPatterns.hs b/unison-cli/src/Unison/CommandLine/InputPatterns.hs index 26a411b9ad..4b51c1e11c 100644 --- a/unison-cli/src/Unison/CommandLine/InputPatterns.hs +++ b/unison-cli/src/Unison/CommandLine/InputPatterns.hs @@ -739,6 +739,21 @@ handleProjectAndBranchNamesArg = SA.ProjectBranch (ProjectAndBranch mproj branch) -> pure $ maybe That These mproj branch otherNumArg -> Left $ wrongStructuredArgument "a project or branch" otherNumArg +handleOptionalProjectAndBranch :: I.Argument -> Either (P.Pretty CT.ColorText) (ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName)) +handleOptionalProjectAndBranch = + either + (\str -> fmap intoProjectAndBranch . first (const $ expectedButActually' "a project or branch" str) . tryInto @(These ProjectName ProjectBranchName) $ Text.pack str) + $ \case + SA.Project project -> pure $ ProjectAndBranch (Just project) Nothing + SA.ProjectBranch (ProjectAndBranch mproj branch) -> pure $ ProjectAndBranch mproj (Just branch) + otherNumArg -> Left $ wrongStructuredArgument "a project or branch" otherNumArg + where + intoProjectAndBranch :: These ProjectName ProjectBranchName -> ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName) + intoProjectAndBranch = \case + This project -> ProjectAndBranch (Just project) Nothing + That branch -> ProjectAndBranch Nothing (Just branch) + These project branch -> ProjectAndBranch (Just project) (Just branch) + mergeBuiltins :: InputPattern mergeBuiltins = InputPattern @@ -2107,8 +2122,8 @@ syncToFile = ] ), parse = \case - [filePath, branch] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> (Just <$> handleProjectAndBranchNamesArg branch) - [filePath] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> pure Nothing + [filePath, branch] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> handleOptionalProjectAndBranch branch + [filePath] -> Input.SyncToFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> pure (ProjectAndBranch Nothing Nothing) args -> wrongArgsLength "one or two arguments" args } where diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index a077130c6a..f7c46f6f04 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -88,6 +88,7 @@ library Unison.Codebase.Editor.HandleInput.Run Unison.Codebase.Editor.HandleInput.RuntimeUtils Unison.Codebase.Editor.HandleInput.ShowDefinition + Unison.Codebase.Editor.HandleInput.SyncFile Unison.Codebase.Editor.HandleInput.TermResolution Unison.Codebase.Editor.HandleInput.Tests Unison.Codebase.Editor.HandleInput.Todo diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index fb1cdf30b7..c16b2e72da 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -32,7 +32,9 @@ import Data.Text qualified as Text import Data.Word (Word16, Word64) import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.TempEntity (TempEntity) +import Unison.Core.Project (ProjectAndBranch (..), ProjectBranchName, ProjectName) import Unison.Hash32 (Hash32) +import Unison.Prelude (From (..)) import Unison.Server.Orphans () import Unison.Share.API.Hash (HashJWT) import Unison.Sync.Types qualified as SyncV1 @@ -41,6 +43,9 @@ import Unison.Util.Servant.CBOR newtype BranchRef = BranchRef {unBranchRef :: Text} deriving (Serialise, Eq, Show, Ord, ToJSON, FromJSON) via Text +instance From (ProjectAndBranch ProjectName ProjectBranchName) BranchRef where + from pab = BranchRef $ from pab + data GetCausalHashErrorTag = GetCausalHashNoReadPermissionTag | GetCausalHashUserNotFoundTag From 94755c83c9f21a1cf9ecdb4df3a04dc06f21e230 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 15:32:37 -0800 Subject: [PATCH 38/52] Fix missing datain init header --- unison-share-api/src/Unison/SyncV2/Types.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index c16b2e72da..9b8715b879 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -189,11 +189,14 @@ optionalDecodeMapKey k m = -- | Serialised as a map to allow for future expansion instance Serialise StreamInitInfo where - encode (StreamInitInfo {version, entitySorting}) = + encode (StreamInitInfo {version, entitySorting, numEntities, rootCausalHash, rootBranchRef}) = CBOR.encode ( Map.fromList [ ("v" :: Text, serialiseUnknownCBORBytes version), - ("es", serialiseUnknownCBORBytes entitySorting) + ("es", serialiseUnknownCBORBytes entitySorting), + ("ne", serialiseUnknownCBORBytes numEntities), + ("rc", serialiseUnknownCBORBytes rootCausalHash), + ("br", serialiseUnknownCBORBytes rootBranchRef) ] ) decode = do From 320361212a87c5b6d9cb68ca7c403244b97e136e Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 16:15:45 -0800 Subject: [PATCH 39/52] Clean up dep calculation --- unison-share-api/src/Unison/Sync/Types.hs | 24 ++--------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/unison-share-api/src/Unison/Sync/Types.hs b/unison-share-api/src/Unison/Sync/Types.hs index 3e52c6e103..51a1dd4538 100644 --- a/unison-share-api/src/Unison/Sync/Types.hs +++ b/unison-share-api/src/Unison/Sync/Types.hs @@ -60,7 +60,7 @@ import Codec.CBOR.Decoding qualified as CBOR import Codec.CBOR.Encoding qualified as CBOR import Codec.Serialise import Codec.Serialise qualified as CBOR -import Control.Lens (both, traverseOf) +import Control.Lens (both, foldMapOf, traverseOf) import Data.Aeson import Data.Aeson qualified as Aeson import Data.Aeson.Types qualified as Aeson @@ -174,28 +174,8 @@ entityHashes_ f = \case C causal -> C <$> causalHashes_ f causal -- | Get the direct dependencies of an entity (which are actually sync'd). --- --- FIXME use generic-lens here? (typed @hash) entityDependencies :: (Ord hash) => Entity text noSyncHash hash -> Set hash -entityDependencies = \case - TC (TermComponent terms) -> flip foldMap terms \(LocalIds {hashes}, _term) -> Set.fromList hashes - DC (DeclComponent decls) -> flip foldMap decls \(LocalIds {hashes}, _decl) -> Set.fromList hashes - P Patch {newHashLookup} -> Set.fromList newHashLookup - PD PatchDiff {parent, newHashLookup} -> Set.insert parent (Set.fromList newHashLookup) - N Namespace {defnLookup, patchLookup, childLookup} -> - Set.unions - [ Set.fromList defnLookup, - Set.fromList patchLookup, - foldMap (\(namespaceHash, causalHash) -> Set.fromList [namespaceHash, causalHash]) childLookup - ] - ND NamespaceDiff {parent, defnLookup, patchLookup, childLookup} -> - Set.unions - [ Set.singleton parent, - Set.fromList defnLookup, - Set.fromList patchLookup, - foldMap (\(namespaceHash, causalHash) -> Set.fromList [namespaceHash, causalHash]) childLookup - ] - C Causal {namespaceHash, parents} -> Set.insert namespaceHash parents +entityDependencies = foldMapOf entityHashes_ Set.singleton data TermComponent text hash = TermComponent [(LocalIds text hash, ByteString)] deriving stock (Show, Eq, Functor, Ord) From f42c1b4a7d74be665a29b73b08cdda4787f3afcb Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 16:20:04 -0800 Subject: [PATCH 40/52] Rename sync helpers --- unison-cli/src/Unison/Cli/DownloadUtils.hs | 2 +- .../Codebase/Editor/HandleInput/SyncFile.hs | 2 +- unison-cli/src/Unison/Share/SyncV2.hs | 21 ++++++++++--------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index 461252ba82..08142a808e 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -57,7 +57,7 @@ downloadProjectBranchFromShare useSquashed branch = -- TODO: Fill this in. let knownHashes = Set.empty let downloadedCallback = \_ -> pure () - result <- SyncV2.streamDownloadEntitiesFromCodeserver Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback + result <- SyncV2.syncFromCodeserver Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback -- numDownloaded <- liftIO getNumDownloaded let numDownloaded = 0 pure (result, numDownloaded) diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs index 9eb032c51c..13a9a51609 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs @@ -16,4 +16,4 @@ handleSyncToFile destSyncFile branchToSync = do projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) branchToSync causalHash <- Cli.runTransaction $ Project.getProjectBranchCausalHash (projectBranch ^. #branch) let branchRef = into @BranchRef $ ProjectAndBranch (projectBranch ^. #project . #name) (projectBranch ^. #branch . #name) - SyncV2.createSyncFile causalHash (Just branchRef) destSyncFile + SyncV2.syncToFile causalHash (Just branchRef) destSyncFile diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 16c6060321..544d45932f 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -2,9 +2,9 @@ {-# LANGUAGE TypeOperators #-} module Unison.Share.SyncV2 - ( streamDownloadEntitiesFromCodeserver, - streamDownloadEntitiesFromFile, - createSyncFile, + ( syncFromCodeserver, + syncFromFile, + syncToFile, ) where @@ -153,7 +153,7 @@ batchValidateEntities entities = do err -> do throwError . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err -streamDownloadEntitiesFromCodeserver :: +syncFromCodeserver :: -- | The Unison Share URL. Servant.BaseUrl -> -- | The branch to download from. @@ -164,7 +164,7 @@ streamDownloadEntitiesFromCodeserver :: -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -streamDownloadEntitiesFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do +syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask runExceptT do let hash = Share.hashJWTHash hashJwt @@ -214,11 +214,11 @@ sortDependencyFirst entities = do (graph, vertexInfo, _vertexForKey) = Graph.graphFromEdges adjList in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) -streamDownloadEntitiesFromFile :: +syncFromFile :: -- | Location of the sync-file FilePath -> - Cli (Either (SyncError SyncV2.PullError) ()) -streamDownloadEntitiesFromFile syncFilePath = do + Cli (Either (SyncError SyncV2.PullError) CausalHash) +syncFromFile syncFilePath = do Cli.Env {codebase} <- ask runExceptT do Debug.debugLogM Debug.Temp $ "Kicking off sync" @@ -227,13 +227,14 @@ streamDownloadEntitiesFromFile syncFilePath = do (header, rest) <- initializeStream stream doSync codebase header rest afterSyncChecks codebase (SyncV2.rootCausalHash header) + pure . hash32ToCausalHash $ SyncV2.rootCausalHash header -createSyncFile :: +syncToFile :: CausalHash -> Maybe SyncV2.BranchRef -> FilePath -> Cli () -createSyncFile rootHash mayBranchRef destFilePath = do +syncToFile rootHash mayBranchRef destFilePath = do Cli.Env {codebase} <- ask entities <- liftIO $ Codebase.runTransaction codebase (depsForCausal rootHash) let initialChunk = From a9d95944a7ae9f58ce3e89d3733aa02a37790000 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 16:46:59 -0800 Subject: [PATCH 41/52] Wire in sync.from-file --- unison-cli/src/Unison/Cli/MonadUtils.hs | 8 ++++ .../src/Unison/Codebase/Editor/HandleInput.hs | 4 ++ .../Codebase/Editor/HandleInput/SyncFile.hs | 16 ++++++- .../src/Unison/Codebase/Editor/Input.hs | 1 + .../src/Unison/Codebase/Editor/Output.hs | 3 ++ .../src/Unison/CommandLine/InputPatterns.hs | 45 +++++++++++++++++++ .../src/Unison/CommandLine/OutputMessages.hs | 3 ++ 7 files changed, 79 insertions(+), 1 deletion(-) diff --git a/unison-cli/src/Unison/Cli/MonadUtils.hs b/unison-cli/src/Unison/Cli/MonadUtils.hs index 242ee77635..94f01b098b 100644 --- a/unison-cli/src/Unison/Cli/MonadUtils.hs +++ b/unison-cli/src/Unison/Cli/MonadUtils.hs @@ -49,6 +49,7 @@ module Unison.Cli.MonadUtils stepManyAtM, updateProjectBranchRoot, updateProjectBranchRoot_, + setProjectBranchRootToCausalHash, updateAtM, updateAt, updateAndStepAt, @@ -447,6 +448,13 @@ updateProjectBranchRoot projectBranch reason f = do Q.setProjectBranchHead reason (projectBranch ^. #projectId) (projectBranch ^. #branchId) causalHashId pure result +setProjectBranchRootToCausalHash :: ProjectBranch -> Text -> CausalHash -> Cli () +setProjectBranchRootToCausalHash projectBranch reason targetCH = do + Cli.time "setProjectBranchRootToCausalHash" do + Cli.runTransaction $ do + targetCHID <- Q.expectCausalHashIdByCausalHash targetCH + Q.setProjectBranchHead reason (projectBranch ^. #projectId) (projectBranch ^. #branchId) targetCHID + updateProjectBranchRoot_ :: ProjectBranch -> Text -> (Branch IO -> Branch IO) -> Cli () updateProjectBranchRoot_ projectBranch reason f = do updateProjectBranchRoot projectBranch reason (\b -> pure (f b, ())) diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs index 380354025f..87403f028a 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs @@ -690,6 +690,9 @@ loop e = do PullI sourceTarget pullMode -> handlePull sourceTarget pullMode PushRemoteBranchI pushRemoteBranchInput -> handlePushRemoteBranch pushRemoteBranchInput SyncToFileI syncFileDest projectBranchName -> SyncFile.handleSyncToFile syncFileDest projectBranchName + SyncFromFileI syncFileSrc projectBranchName -> do + description <- inputDescription input + SyncFile.handleSyncFromFile description syncFileSrc projectBranchName ListDependentsI hq -> handleDependents hq ListDependenciesI hq -> handleDependencies hq NamespaceDependenciesI path -> handleNamespaceDependencies path @@ -1015,6 +1018,7 @@ inputDescription input = PullI {} -> wat PushRemoteBranchI {} -> wat SyncToFileI {} -> wat + SyncFromFileI {} -> wat QuitI {} -> wat ReleaseDraftI {} -> wat ShowDefinitionI {} -> wat diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs index 13a9a51609..03ecbc3907 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs @@ -1,10 +1,15 @@ -module Unison.Codebase.Editor.HandleInput.SyncFile (handleSyncToFile) where +module Unison.Codebase.Editor.HandleInput.SyncFile + ( handleSyncToFile, + handleSyncFromFile, + ) +where import Control.Lens import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli import Unison.Cli.MonadUtils qualified as Cli import Unison.Cli.ProjectUtils qualified as Project +import Unison.Codebase.Editor.Output qualified as Output import Unison.Prelude import Unison.Project (ProjectAndBranch (..), ProjectBranchName, ProjectName) import Unison.Share.SyncV2 qualified as SyncV2 @@ -17,3 +22,12 @@ handleSyncToFile destSyncFile branchToSync = do causalHash <- Cli.runTransaction $ Project.getProjectBranchCausalHash (projectBranch ^. #branch) let branchRef = into @BranchRef $ ProjectAndBranch (projectBranch ^. #project . #name) (projectBranch ^. #branch . #name) SyncV2.syncToFile causalHash (Just branchRef) destSyncFile + +handleSyncFromFile :: Text -> FilePath -> ProjectAndBranch (Maybe ProjectName) ProjectBranchName -> Cli () +handleSyncFromFile description srcSyncFile branchToSync = do + pp <- Cli.getCurrentProjectPath + projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) (over #branch Just branchToSync) + SyncV2.syncFromFile srcSyncFile >>= \case + Left err -> Cli.respond (Output.SyncPullError err) + Right causalHash -> do + Cli.setProjectBranchRootToCausalHash (projectBranch ^. #branch) description causalHash diff --git a/unison-cli/src/Unison/Codebase/Editor/Input.hs b/unison-cli/src/Unison/Codebase/Editor/Input.hs index b352e6c44f..1252fca680 100644 --- a/unison-cli/src/Unison/Codebase/Editor/Input.hs +++ b/unison-cli/src/Unison/Codebase/Editor/Input.hs @@ -127,6 +127,7 @@ data Input | PullI !PullSourceTarget !PullMode | PushRemoteBranchI PushRemoteBranchInput | SyncToFileI FilePath (ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName)) + | SyncFromFileI FilePath UnresolvedProjectBranch | ResetI (BranchId2 {- namespace to reset it to -}) (Maybe UnresolvedProjectBranch {- ProjectBranch to reset -}) | -- | used in Welcome module to give directions to user -- diff --git a/unison-cli/src/Unison/Codebase/Editor/Output.hs b/unison-cli/src/Unison/Codebase/Editor/Output.hs index 7ebf9ad299..13c0a076cc 100644 --- a/unison-cli/src/Unison/Codebase/Editor/Output.hs +++ b/unison-cli/src/Unison/Codebase/Editor/Output.hs @@ -80,6 +80,7 @@ import Unison.Share.Sync.Types qualified as Sync import Unison.ShortHash (ShortHash) import Unison.Symbol (Symbol) import Unison.Sync.Types qualified as Share (DownloadEntitiesError, UploadEntitiesError) +import Unison.SyncV2.Types qualified as SyncV2 import Unison.Syntax.Parser qualified as Parser import Unison.Term (Term) import Unison.Type (Type) @@ -440,6 +441,7 @@ data Output | -- | A literal output message. Use this if it's too cumbersome to create a new Output constructor, e.g. for -- ephemeral progress messages that are just simple strings like "Loading branch..." Literal !(P.Pretty P.ColorText) + | SyncPullError (Sync.SyncError SyncV2.PullError) data MoreEntriesThanShown = MoreEntriesThanShown | AllEntriesShown deriving (Eq, Show) @@ -678,6 +680,7 @@ isFailure o = case o of IncoherentDeclDuringMerge {} -> True IncoherentDeclDuringUpdate {} -> True Literal _ -> False + SyncPullError {} -> True isNumberedFailure :: NumberedOutput -> Bool isNumberedFailure = \case diff --git a/unison-cli/src/Unison/CommandLine/InputPatterns.hs b/unison-cli/src/Unison/CommandLine/InputPatterns.hs index 4b51c1e11c..50f1a85be3 100644 --- a/unison-cli/src/Unison/CommandLine/InputPatterns.hs +++ b/unison-cli/src/Unison/CommandLine/InputPatterns.hs @@ -98,6 +98,7 @@ module Unison.CommandLine.InputPatterns pushExhaustive, pushForce, syncToFile, +syncFromFile, quit, releaseDraft, renameBranch, @@ -754,6 +755,23 @@ handleOptionalProjectAndBranch = That branch -> ProjectAndBranch Nothing (Just branch) These project branch -> ProjectAndBranch (Just project) (Just branch) +handleBranchWithOptionalProject :: I.Argument -> Either (P.Pretty CT.ColorText) (ProjectAndBranch (Maybe ProjectName) ProjectBranchName) +handleBranchWithOptionalProject = + either + ( \str -> + Text.pack str + & tryInto @(These ProjectName ProjectBranchName) + & first (const $ expectedButActually' "a project branch" str) + >>= \case + These project branch -> pure $ ProjectAndBranch (Just project) branch + That branch -> pure $ ProjectAndBranch Nothing branch + This _project -> Left $ expectedButActually' "a project branch" str + ) + ( \case + SA.ProjectBranch (ProjectAndBranch mproj branch) -> pure $ ProjectAndBranch mproj branch + otherNumArg -> Left $ wrongStructuredArgument "a project branch" otherNumArg + ) + mergeBuiltins :: InputPattern mergeBuiltins = InputPattern @@ -2134,6 +2152,32 @@ syncToFile = branchInclusion = AllBranches } +syncFromFile :: InputPattern +syncFromFile = + InputPattern + { patternName = "sync.from-file", + aliases = [], + visibility = I.Hidden, + args = [("file-path", Required, filePathArg), ("destination branch", Required, projectAndBranchNamesArg suggestionsConfig)], + help = + ( P.wrapColumn2 + [ ( makeExample syncFromFile ["./feature.usync", "/feature"], + "Sets the /feature branch to the contents of the file `main.usync`." + ) + ] + ), + parse = \case + [filePath, branch] -> Input.SyncFromFileI <$> unsupportedStructuredArgument makeStandalone "a file name" filePath <*> handleBranchWithOptionalProject branch + args -> wrongArgsLength "one or two arguments" args + } + where + suggestionsConfig = + ProjectBranchSuggestionsConfig + { showProjectCompletions = True, + projectInclusion = AllProjects, + branchInclusion = AllBranches + } + mergeOldSquashInputPattern :: InputPattern mergeOldSquashInputPattern = InputPattern @@ -3713,6 +3757,7 @@ validInputs = pushExhaustive, pushForce, syncToFile, + syncFromFile, quit, releaseDraft, renameBranch, diff --git a/unison-cli/src/Unison/CommandLine/OutputMessages.hs b/unison-cli/src/Unison/CommandLine/OutputMessages.hs index 86b5ed685c..251ef7cf6e 100644 --- a/unison-cli/src/Unison/CommandLine/OutputMessages.hs +++ b/unison-cli/src/Unison/CommandLine/OutputMessages.hs @@ -2258,6 +2258,9 @@ notifyUser dir = \case <> "it. Then try the update again." ] Literal message -> pure message + SyncPullError syncErr -> + -- TODO: Better error message + pure (P.shown syncErr) prettyShareError :: ShareError -> Pretty prettyShareError = From 61393ccd66200b6026fd83f01fb1a2825b01bd82 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Tue, 17 Dec 2024 17:23:27 -0800 Subject: [PATCH 42/52] Break resource handling, but get file sync kinda working. --- .../src/Unison/Codebase/Editor/HandleInput.hs | 3 ++- unison-cli/src/Unison/Share/SyncV2.hs | 18 +++++++++++------- unison-share-api/src/Unison/SyncV2/Types.hs | 16 ++++++++++++---- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs index 87403f028a..60b8508cfe 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs @@ -1018,7 +1018,8 @@ inputDescription input = PullI {} -> wat PushRemoteBranchI {} -> wat SyncToFileI {} -> wat - SyncFromFileI {} -> wat + SyncFromFileI fp pab -> + pure $ "sync.from-file " <> into @Text fp <> " " <> into @Text pab QuitI {} -> wat ReleaseDraftI {} -> wat ShowDefinitionI {} -> wat diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 544d45932f..1fa8806e8e 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -223,7 +223,11 @@ syncFromFile syncFilePath = do runExceptT do Debug.debugLogM Debug.Temp $ "Kicking off sync" mapExceptT liftIO $ Timing.time "File Sync" $ do - let stream = C.transPipe (liftIO . C.runResourceT) (C.sourceFile syncFilePath) C..| unNetString C..| C.mapM decodeFramedEntity + -- TODO: Add correct resource handling :'( + bl <- liftIO $ BL.readFile syncFilePath + let source = C.sourceLazy bl + -- let source = (C.sourceFile syncFilePath) + let stream = source C..| decodeUnframedEntities (header, rest) <- initializeStream stream doSync codebase header rest afterSyncChecks codebase (SyncV2.rootCausalHash header) @@ -274,8 +278,8 @@ depsForCausal causalHash = do traverseOf_ Sync.entityHashes_ expandEntities entity -- | Gets the framed chunks from a NetString framed stream. -unNetString :: ConduitT ByteString ByteString SyncM () -unNetString = do +_unNetString :: ConduitT ByteString ByteString SyncM () +_unNetString = do bs <- C.sinkParser $ do len <- A8.decimal _ <- A8.char ':' @@ -284,15 +288,15 @@ unNetString = do pure bs C.yield bs -decodeFramedEntity :: ByteString -> SyncM SyncV2.DownloadEntitiesChunk -decodeFramedEntity bs = do +_decodeFramedEntity :: ByteString -> SyncM SyncV2.DownloadEntitiesChunk +_decodeFramedEntity bs = do case CBOR.deserialiseOrFail (BL.fromStrict bs) of Left err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err Right chunk -> pure chunk -- Expects a stream of tightly-packed CBOR entities without any framing/separators. -_decodeUnframedEntities :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () -_decodeUnframedEntities = C.transPipe (mapExceptT stToIO) $ do +decodeUnframedEntities :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () +decodeUnframedEntities = C.transPipe (mapExceptT stToIO) $ do C.await >>= \case Nothing -> pure () Just bs -> do diff --git a/unison-share-api/src/Unison/SyncV2/Types.hs b/unison-share-api/src/Unison/SyncV2/Types.hs index 9b8715b879..04ce112d8f 100644 --- a/unison-share-api/src/Unison/SyncV2/Types.hs +++ b/unison-share-api/src/Unison/SyncV2/Types.hs @@ -33,6 +33,7 @@ import Data.Word (Word16, Word64) import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.TempEntity (TempEntity) import Unison.Core.Project (ProjectAndBranch (..), ProjectBranchName, ProjectName) +import Unison.Debug qualified as Debug import Unison.Hash32 (Hash32) import Unison.Prelude (From (..)) import Unison.Server.Orphans () @@ -191,20 +192,27 @@ optionalDecodeMapKey k m = instance Serialise StreamInitInfo where encode (StreamInitInfo {version, entitySorting, numEntities, rootCausalHash, rootBranchRef}) = CBOR.encode - ( Map.fromList + ( Map.fromList $ [ ("v" :: Text, serialiseUnknownCBORBytes version), ("es", serialiseUnknownCBORBytes entitySorting), - ("ne", serialiseUnknownCBORBytes numEntities), - ("rc", serialiseUnknownCBORBytes rootCausalHash), - ("br", serialiseUnknownCBORBytes rootBranchRef) + ("rc", serialiseUnknownCBORBytes rootCausalHash) ] + <> maybe [] (\ne -> [("ne", serialiseUnknownCBORBytes ne)]) numEntities + <> maybe [] (\br -> [("br", serialiseUnknownCBORBytes br)]) rootBranchRef ) decode = do + Debug.debugLogM Debug.Temp "Decoding StreamInitInfo" + Debug.debugLogM Debug.Temp "Decoding Map" m <- CBOR.decode + Debug.debugLogM Debug.Temp "Decoding Version" version <- decodeMapKey "v" m + Debug.debugLogM Debug.Temp "Decoding Entity Sorting" entitySorting <- decodeMapKey "es" m + Debug.debugLogM Debug.Temp "Decoding Number of Entities" numEntities <- (optionalDecodeMapKey "ne" m) + Debug.debugLogM Debug.Temp "Decoding Root Causal Hash" rootCausalHash <- decodeMapKey "rc" m + Debug.debugLogM Debug.Temp "Decoding Branch Ref" rootBranchRef <- optionalDecodeMapKey "br" m pure StreamInitInfo {version, entitySorting, numEntities, rootCausalHash, rootBranchRef} From 65dd42e4bc99955039048985e7b32a7e885c21ff Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 18 Dec 2024 15:44:47 -0800 Subject: [PATCH 43/52] Write ErrGroupT Monad --- unison-cli/src/Unison/Share/Sync/Util.hs | 34 ++++++++++++++++++++++++ unison-cli/unison-cli.cabal | 1 + 2 files changed, 35 insertions(+) create mode 100644 unison-cli/src/Unison/Share/Sync/Util.hs diff --git a/unison-cli/src/Unison/Share/Sync/Util.hs b/unison-cli/src/Unison/Share/Sync/Util.hs new file mode 100644 index 0000000000..b3aa28ecfe --- /dev/null +++ b/unison-cli/src/Unison/Share/Sync/Util.hs @@ -0,0 +1,34 @@ +module Unison.Share.Sync.Util + ( ErrGroupT (..), + runErrGroupT, + ) +where + +import Control.Monad.Error.Class (MonadError (..)) +import Control.Monad.Reader (MonadReader (..), MonadTrans (..), ReaderT (..)) +import Data.Data (Typeable) +import UnliftIO qualified as IO + +newtype Handler e m = Handler {runHandler :: forall x. e -> m x} + +newtype ErrGroupT e m a = ErrGroupT {unErrGroupT :: ReaderT (Handler e m) m a} + deriving newtype (Functor, Applicative, Monad) + +newtype ExceptionWrapper e = ExceptionWrapper {unException :: e} + +instance Show (ExceptionWrapper e) where + show (ExceptionWrapper _) = "ExceptionWrapper<>" + +instance (Typeable e) => IO.Exception (ExceptionWrapper e) + +instance (IO.MonadUnliftIO m, Typeable e) => MonadError e (ErrGroupT e m) where + throwError e = do + handler <- ErrGroupT ask + ErrGroupT $ lift $ runHandler handler e + catchError (ErrGroupT m) f = + ErrGroupT $ + IO.handle (unErrGroupT . f . unException) m + +runErrGroupT :: (IO.MonadUnliftIO m, Typeable e) => ErrGroupT e m a -> (e -> m a) -> m a +runErrGroupT (ErrGroupT m) handler = do + IO.handle (handler . unException) $ runReaderT m (Handler (IO.throwIO . ExceptionWrapper)) diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index f7c46f6f04..6db8fc87e1 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -152,6 +152,7 @@ library Unison.Share.ExpectedHashMismatches Unison.Share.Sync Unison.Share.Sync.Types + Unison.Share.Sync.Util Unison.Share.SyncV2 Unison.Util.HTTP Unison.Version From 597ff68cbe63fee1ef9b81571b7962e955e0c2a1 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 18 Dec 2024 16:42:49 -0800 Subject: [PATCH 44/52] BailT Monad stuff --- unison-cli/src/Unison/Share/Sync/Util.hs | 40 ++++++++++++++---------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/unison-cli/src/Unison/Share/Sync/Util.hs b/unison-cli/src/Unison/Share/Sync/Util.hs index b3aa28ecfe..39eeb2cede 100644 --- a/unison-cli/src/Unison/Share/Sync/Util.hs +++ b/unison-cli/src/Unison/Share/Sync/Util.hs @@ -1,18 +1,20 @@ module Unison.Share.Sync.Util - ( ErrGroupT (..), - runErrGroupT, + ( BailT (..), + MonadBail (..), + runBailT, + mapBailT, + withError, ) where -import Control.Monad.Error.Class (MonadError (..)) -import Control.Monad.Reader (MonadReader (..), MonadTrans (..), ReaderT (..)) +import Control.Monad.Reader (MonadReader (..), ReaderT (..), mapReaderT, withReaderT) import Data.Data (Typeable) import UnliftIO qualified as IO -newtype Handler e m = Handler {runHandler :: forall x. e -> m x} +newtype Handler e = Handler {runHandler :: forall x. e -> IO x} -newtype ErrGroupT e m a = ErrGroupT {unErrGroupT :: ReaderT (Handler e m) m a} - deriving newtype (Functor, Applicative, Monad) +newtype BailT e m a = BailT {unErrGroupT :: ReaderT (Handler e) m a} + deriving newtype (Functor, Applicative, Monad, IO.MonadUnliftIO, IO.MonadIO) newtype ExceptionWrapper e = ExceptionWrapper {unException :: e} @@ -21,14 +23,20 @@ instance Show (ExceptionWrapper e) where instance (Typeable e) => IO.Exception (ExceptionWrapper e) -instance (IO.MonadUnliftIO m, Typeable e) => MonadError e (ErrGroupT e m) where - throwError e = do - handler <- ErrGroupT ask - ErrGroupT $ lift $ runHandler handler e - catchError (ErrGroupT m) f = - ErrGroupT $ - IO.handle (unErrGroupT . f . unException) m +class MonadBail e m where + bail :: e -> m a -runErrGroupT :: (IO.MonadUnliftIO m, Typeable e) => ErrGroupT e m a -> (e -> m a) -> m a -runErrGroupT (ErrGroupT m) handler = do +mapBailT :: (Monad n) => (m a -> n b) -> BailT e m a -> BailT e n b +mapBailT f (BailT m) = BailT $ mapReaderT f $ m + +withError :: (Monad m) => (e' -> e) -> BailT e' m a -> BailT e m a +withError f (BailT m) = BailT $ withReaderT (\h -> Handler $ runHandler h . f) m + +instance (IO.MonadUnliftIO m, Typeable e) => MonadBail e (BailT e m) where + bail e = do + handler <- BailT ask + BailT $ IO.liftIO $ runHandler handler e + +runBailT :: (IO.MonadUnliftIO m, Typeable e) => BailT e m a -> (e -> m a) -> m a +runBailT (BailT m) handler = do IO.handle (handler . unException) $ runReaderT m (Handler (IO.throwIO . ExceptionWrapper)) From 8edf4c1556c4c561244a84c8c3d46665d1f5fec6 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 18 Dec 2024 16:42:49 -0800 Subject: [PATCH 45/52] Fix resourceT resource handling --- unison-cli/src/Unison/Share/SyncV2.hs | 63 +++++++++++++-------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 1fa8806e8e..882abf5009 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -63,11 +63,11 @@ import Unison.Util.Timing qualified as Timing import UnliftIO qualified as IO import UnliftIO.IO.File qualified as IO -type Stream i o = ConduitT i o SyncM () +type Stream i o = ConduitT i o StreamM () type PullErr = SyncError SyncV2.PullError -type SyncM = ExceptT PullErr IO +type StreamM = (ExceptT PullErr (C.ResourceT IO)) batchSize :: Int batchSize = 5000 @@ -75,7 +75,7 @@ batchSize = 5000 ------------------------------------------------------------------------------------------------------------------------ -- Download entities -validateAndSave :: (Codebase.Codebase IO v a) -> [(Hash32, TempEntity)] -> SyncM () +validateAndSave :: (Codebase.Codebase IO v a) -> [(Hash32, TempEntity)] -> StreamM () validateAndSave codebase entities = do let validateEntities = do runExceptT (batchValidateEntities entities) @@ -93,7 +93,7 @@ validateAndSave codebase entities = do syncUnsortedStream :: (Codebase.Codebase IO v a) -> Stream () SyncV2.EntityChunk -> - SyncM () + StreamM () syncUnsortedStream codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing unsorted stream" allResults <- C.runConduit $ stream C..| C.sinkList @@ -105,12 +105,12 @@ syncUnsortedStream codebase stream = do syncSortedStream :: (Codebase.Codebase IO v a) -> Stream () SyncV2.EntityChunk -> - SyncM () + StreamM () syncSortedStream codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing sorted stream" let handler :: Stream [SyncV2.EntityChunk] o handler = C.mapM_C \chunkBatch -> do - entityBatch <- ExceptT $ Codebase.runTransactionExceptT codebase do for chunkBatch unpackChunk + entityBatch <- mapExceptT lift . ExceptT $ Codebase.runTransactionExceptT codebase do for chunkBatch unpackChunk validateAndSave codebase (catMaybes entityBatch) C.runConduit $ stream C..| C.chunksOf batchSize C..| handler @@ -134,7 +134,7 @@ unpackChunks xs = do for xs unpackChunk <&> catMaybes -batchValidateEntities :: [(Hash32, TempEntity)] -> SyncM () +batchValidateEntities :: [(Hash32, TempEntity)] -> ExceptT PullErr IO () batchValidateEntities entities = do mismatches <- fmap catMaybes $ liftIO $ IO.pooledForConcurrently entities \(hash, entity) -> do IO.evaluate $ EV.validateTempEntity hash entity @@ -175,7 +175,7 @@ syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallb _ -> do Debug.debugLogM Debug.Temp $ "Kicking off sync request" Timing.time "Entity Download" $ do - liftIO . runExceptT $ httpStreamEntities + liftIO . C.runResourceT . runExceptT $ httpStreamEntities authHTTPClient unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} @@ -183,7 +183,7 @@ syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallb doSync codebase header stream mapExceptT liftIO (afterSyncChecks codebase hash) -doSync :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> ExceptT PullErr IO () +doSync :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do case version of (SyncV2.Version 1) -> pure () @@ -193,7 +193,7 @@ doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _to SyncV2.DependenciesFirst -> syncSortedStream codebase stream SyncV2.Unsorted -> syncUnsortedStream codebase stream -afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> SyncM () +afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> ExceptT (SyncError SyncV2.PullError) IO () afterSyncChecks codebase hash = do lift (didCausalSuccessfullyImport codebase hash) >>= \case False -> do @@ -223,13 +223,11 @@ syncFromFile syncFilePath = do runExceptT do Debug.debugLogM Debug.Temp $ "Kicking off sync" mapExceptT liftIO $ Timing.time "File Sync" $ do - -- TODO: Add correct resource handling :'( - bl <- liftIO $ BL.readFile syncFilePath - let source = C.sourceLazy bl - -- let source = (C.sourceFile syncFilePath) - let stream = source C..| decodeUnframedEntities - (header, rest) <- initializeStream stream - doSync codebase header rest + header <- mapExceptT C.runResourceT $ do + let stream = C.sourceFile syncFilePath C..| decodeUnframedEntities + (header, rest) <- initializeStream stream + doSync codebase header rest + pure header afterSyncChecks codebase (SyncV2.rootCausalHash header) pure . hash32ToCausalHash $ SyncV2.rootCausalHash header @@ -278,7 +276,7 @@ depsForCausal causalHash = do traverseOf_ Sync.entityHashes_ expandEntities entity -- | Gets the framed chunks from a NetString framed stream. -_unNetString :: ConduitT ByteString ByteString SyncM () +_unNetString :: ConduitT ByteString ByteString StreamM () _unNetString = do bs <- C.sinkParser $ do len <- A8.decimal @@ -288,15 +286,15 @@ _unNetString = do pure bs C.yield bs -_decodeFramedEntity :: ByteString -> SyncM SyncV2.DownloadEntitiesChunk +_decodeFramedEntity :: ByteString -> StreamM SyncV2.DownloadEntitiesChunk _decodeFramedEntity bs = do case CBOR.deserialiseOrFail (BL.fromStrict bs) of Left err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err Right chunk -> pure chunk -- Expects a stream of tightly-packed CBOR entities without any framing/separators. -decodeUnframedEntities :: ConduitT ByteString SyncV2.DownloadEntitiesChunk SyncM () -decodeUnframedEntities = C.transPipe (mapExceptT stToIO) $ do +decodeUnframedEntities :: ConduitT ByteString SyncV2.DownloadEntitiesChunk StreamM () +decodeUnframedEntities = C.transPipe (mapExceptT (lift . stToIO)) $ do C.await >>= \case Nothing -> pure () Just bs -> do @@ -363,15 +361,16 @@ SyncV2.Routes -- | Helper for running clientM that returns a stream of entities. -- You MUST consume the stream within the callback, it will be closed when the callback returns. -withConduit :: forall r. Servant.ClientEnv -> (Stream () SyncV2.DownloadEntitiesChunk -> SyncM r) -> Servant.ClientM (Servant.SourceIO SyncV2.DownloadEntitiesChunk) -> SyncM r +withConduit :: forall r. Servant.ClientEnv -> (Stream () SyncV2.DownloadEntitiesChunk -> StreamM r) -> Servant.ClientM (Servant.SourceIO SyncV2.DownloadEntitiesChunk) -> StreamM r withConduit clientEnv callback clientM = do Debug.debugLogM Debug.Temp $ "Running clientM" - ExceptT . liftIO $ Servant.withClientM clientM clientEnv $ \case - Left err -> pure . Left . TransportError $ (handleClientError clientEnv err) - Right sourceT -> do - Debug.debugLogM Debug.Temp $ "Converting sourceIO to conduit" - conduit <- liftIO $ Servant.fromSourceIO sourceT - (runExceptT $ callback conduit) + ExceptT $ withRunInIO \runInIO -> do + Servant.withClientM clientM clientEnv $ \case + Left err -> pure . Left . TransportError $ (handleClientError clientEnv err) + Right sourceT -> do + Debug.debugLogM Debug.Temp $ "Converting sourceIO to conduit" + conduit <- liftIO $ Servant.fromSourceIO sourceT + (runInIO . runExceptT $ callback conduit) handleClientError :: Servant.ClientEnv -> Servant.ClientError -> CodeserverTransportError handleClientError clientEnv err = @@ -396,8 +395,8 @@ httpStreamEntities :: Auth.AuthenticatedHttpClient -> Servant.BaseUrl -> SyncV2.DownloadEntitiesRequest -> - (SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> SyncM ()) -> - SyncM () + (SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM ()) -> + StreamM () httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback = do let clientEnv = (Servant.mkClientEnv httpClient unisonShareUrl) @@ -414,7 +413,7 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req callback init entityStream -- | Peel the header off the stream and parse the remaining entity chunks. -initializeStream :: Stream () SyncV2.DownloadEntitiesChunk -> SyncM (SyncV2.StreamInitInfo, Stream () SyncV2.EntityChunk) +initializeStream :: Stream () SyncV2.DownloadEntitiesChunk -> StreamM (SyncV2.StreamInitInfo, Stream () SyncV2.EntityChunk) initializeStream stream = do Debug.debugLogM Debug.Temp $ "Peeling options off stream" (streamRemainder, init) <- stream C.$$+ C.headC @@ -431,7 +430,7 @@ initializeStream stream = do throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorMissingInitialChunk SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError . SyncV2.PullError'DownloadEntities $ err where - parseEntity :: SyncV2.DownloadEntitiesChunk -> SyncM SyncV2.EntityChunk + parseEntity :: SyncV2.DownloadEntitiesChunk -> StreamM SyncV2.EntityChunk parseEntity = \case SyncV2.EntityC chunk -> pure chunk SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError $ SyncV2.PullError'DownloadEntities err From 2c95433b92f5148dc06eac2878431b4f68bb8084 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 18 Dec 2024 16:55:54 -0800 Subject: [PATCH 46/52] Add gzip --- unison-cli/src/Unison/Share/SyncV2.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 882abf5009..6772f238a7 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -22,6 +22,7 @@ import Data.ByteString qualified as BS import Data.ByteString.Lazy qualified as BL import Data.Conduit.Attoparsec qualified as C import Data.Conduit.List qualified as C +import Data.Conduit.Zlib qualified as C import Data.Graph qualified as Graph import Data.Map qualified as Map import Data.Proxy @@ -61,7 +62,6 @@ import Unison.SyncV2.Types qualified as SyncV2 import Unison.Util.Servant.CBOR qualified as CBOR import Unison.Util.Timing qualified as Timing import UnliftIO qualified as IO -import UnliftIO.IO.File qualified as IO type Stream i o = ConduitT i o StreamM () @@ -224,7 +224,7 @@ syncFromFile syncFilePath = do Debug.debugLogM Debug.Temp $ "Kicking off sync" mapExceptT liftIO $ Timing.time "File Sync" $ do header <- mapExceptT C.runResourceT $ do - let stream = C.sourceFile syncFilePath C..| decodeUnframedEntities + let stream = C.sourceFile syncFilePath C..| C.ungzip C..| decodeUnframedEntities (header, rest) <- initializeStream stream doSync codebase header rest pure header @@ -259,7 +259,7 @@ syncToFile rootHash mayBranchRef destFilePath = do in SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR}) ) & (initialChunk :) - liftIO $ IO.writeBinaryFile destFilePath . BL.toStrict . foldMap CBOR.serialise $ contents + liftIO . C.runResourceT $ C.runConduit $ C.yieldMany contents C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| C.sinkFile destFilePath -- | Collect all dependencies of a given causal hash. depsForCausal :: CausalHash -> Sqlite.Transaction (Map Hash32 (Sync.Entity Text Hash32 Hash32)) From 5d8e60ebd5d333d97fb4ac836540f9aeee1b35d1 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Wed, 18 Dec 2024 20:03:43 -0800 Subject: [PATCH 47/52] Implement sync.from-codebase --- .../src/Unison/Codebase/Editor/HandleInput.hs | 9 ++++ .../Codebase/Editor/HandleInput/SyncFile.hs | 36 +++++++++++++- .../src/Unison/Codebase/Editor/Input.hs | 1 + .../src/Unison/CommandLine/InputPatterns.hs | 28 ++++++++++- unison-cli/src/Unison/Share/SyncV2.hs | 48 ++++++++++++++----- 5 files changed, 109 insertions(+), 13 deletions(-) diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs index 60b8508cfe..43a05c2fde 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs @@ -693,6 +693,13 @@ loop e = do SyncFromFileI syncFileSrc projectBranchName -> do description <- inputDescription input SyncFile.handleSyncFromFile description syncFileSrc projectBranchName + SyncFromCodebaseI srcCodebasePath srcBranch destBranch -> do + description <- inputDescription input + let srcBranch' = + srcBranch & over #project \case + Nothing -> error "todo" + Just proj -> proj + SyncFile.handleSyncFromCodebase description srcCodebasePath srcBranch' destBranch ListDependentsI hq -> handleDependents hq ListDependenciesI hq -> handleDependencies hq NamespaceDependenciesI path -> handleNamespaceDependencies path @@ -1020,6 +1027,8 @@ inputDescription input = SyncToFileI {} -> wat SyncFromFileI fp pab -> pure $ "sync.from-file " <> into @Text fp <> " " <> into @Text pab + SyncFromCodebaseI fp srcBranch destBranch -> do + pure $ "sync.from-file " <> into @Text fp <> " " <> into @Text srcBranch <> " " <> into @Text destBranch QuitI {} -> wat ReleaseDraftI {} -> wat ShowDefinitionI {} -> wat diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs index 03ecbc3907..9789657f8e 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs @@ -1,15 +1,22 @@ module Unison.Codebase.Editor.HandleInput.SyncFile ( handleSyncToFile, handleSyncFromFile, + handleSyncFromCodebase, ) where import Control.Lens +import Control.Monad.Reader (MonadReader (..)) +import U.Codebase.Sqlite.Queries qualified as Q import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli import Unison.Cli.MonadUtils qualified as Cli import Unison.Cli.ProjectUtils qualified as Project +import Unison.Codebase (CodebasePath) +import Unison.Codebase qualified as Codebase import Unison.Codebase.Editor.Output qualified as Output +import Unison.Codebase.Init qualified as Init +import Unison.Codebase.SqliteCodebase qualified as SqliteCodebase import Unison.Prelude import Unison.Project (ProjectAndBranch (..), ProjectBranchName, ProjectName) import Unison.Share.SyncV2 qualified as SyncV2 @@ -21,7 +28,10 @@ handleSyncToFile destSyncFile branchToSync = do projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) branchToSync causalHash <- Cli.runTransaction $ Project.getProjectBranchCausalHash (projectBranch ^. #branch) let branchRef = into @BranchRef $ ProjectAndBranch (projectBranch ^. #project . #name) (projectBranch ^. #branch . #name) - SyncV2.syncToFile causalHash (Just branchRef) destSyncFile + Cli.Env {codebase} <- ask + liftIO (SyncV2.syncToFile codebase causalHash (Just branchRef) destSyncFile) >>= \case + Left err -> Cli.respond (Output.SyncPullError err) + Right _ -> pure () handleSyncFromFile :: Text -> FilePath -> ProjectAndBranch (Maybe ProjectName) ProjectBranchName -> Cli () handleSyncFromFile description srcSyncFile branchToSync = do @@ -31,3 +41,27 @@ handleSyncFromFile description srcSyncFile branchToSync = do Left err -> Cli.respond (Output.SyncPullError err) Right causalHash -> do Cli.setProjectBranchRootToCausalHash (projectBranch ^. #branch) description causalHash + +handleSyncFromCodebase :: Text -> CodebasePath -> ProjectAndBranch ProjectName ProjectBranchName -> ProjectAndBranch (Maybe ProjectName) ProjectBranchName -> Cli () +handleSyncFromCodebase description srcCodebasePath srcBranch destBranch = do + Cli.Env {codebase} <- ask + pp <- Cli.getCurrentProjectPath + projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) (over #branch Just destBranch) + r <- liftIO $ Init.withOpenCodebase SqliteCodebase.init "sync-src" srcCodebasePath Init.DontLock (Init.MigrateAfterPrompt Init.Backup Init.Vacuum) \srcCodebase -> do + Codebase.withConnection srcCodebase \srcConn -> do + maySrcCausalHash <- Codebase.runTransaction srcCodebase $ do + let ProjectAndBranch srcProjName srcBranchName = srcBranch + runMaybeT do + project <- MaybeT (Q.loadProjectByName srcProjName) + branch <- MaybeT (Q.loadProjectBranchByName (project ^. #projectId) srcBranchName) + lift $ Project.getProjectBranchCausalHash branch + case maySrcCausalHash of + Nothing -> pure $ Left (error "Todo proper error") + Just srcCausalHash -> do + fmap (const srcCausalHash) <$> liftIO (SyncV2.syncFromCodebase srcConn codebase srcCausalHash) + + case r of + Left _err -> pure $ error "Todo proper error" + Right (Left syncErr) -> Cli.respond (Output.SyncPullError syncErr) + Right (Right causalHash) -> do + Cli.setProjectBranchRootToCausalHash (projectBranch ^. #branch) description causalHash diff --git a/unison-cli/src/Unison/Codebase/Editor/Input.hs b/unison-cli/src/Unison/Codebase/Editor/Input.hs index 1252fca680..684a5ac1ea 100644 --- a/unison-cli/src/Unison/Codebase/Editor/Input.hs +++ b/unison-cli/src/Unison/Codebase/Editor/Input.hs @@ -128,6 +128,7 @@ data Input | PushRemoteBranchI PushRemoteBranchInput | SyncToFileI FilePath (ProjectAndBranch (Maybe ProjectName) (Maybe ProjectBranchName)) | SyncFromFileI FilePath UnresolvedProjectBranch + | SyncFromCodebaseI FilePath UnresolvedProjectBranch UnresolvedProjectBranch | ResetI (BranchId2 {- namespace to reset it to -}) (Maybe UnresolvedProjectBranch {- ProjectBranch to reset -}) | -- | used in Welcome module to give directions to user -- diff --git a/unison-cli/src/Unison/CommandLine/InputPatterns.hs b/unison-cli/src/Unison/CommandLine/InputPatterns.hs index 50f1a85be3..f2fc24cf7d 100644 --- a/unison-cli/src/Unison/CommandLine/InputPatterns.hs +++ b/unison-cli/src/Unison/CommandLine/InputPatterns.hs @@ -98,7 +98,8 @@ module Unison.CommandLine.InputPatterns pushExhaustive, pushForce, syncToFile, -syncFromFile, + syncFromFile, + syncFromCodebase, quit, releaseDraft, renameBranch, @@ -2178,6 +2179,30 @@ syncFromFile = branchInclusion = AllBranches } +syncFromCodebase :: InputPattern +syncFromCodebase = + InputPattern + { patternName = "sync.from-codebase", + aliases = [], + visibility = I.Hidden, + args = [("codebase-location", Required, filePathArg), ("branch-to-sync", Required, projectAndBranchNamesArg suggestionsConfig), ("destination-branch", Optional, projectAndBranchNamesArg suggestionsConfig)], + help = + ( P.wrapColumn2 + [ (makeExample syncFromCodebase ["./codebase", "/feature", "/main"], "Sets the /feature branch to the contents of the codebase at ./codebase.") + ] + ), + parse = \case + [codebaseLocation, branchToSync, destinationBranch] -> Input.SyncFromCodebaseI <$> unsupportedStructuredArgument makeStandalone "a file name" codebaseLocation <*> handleBranchWithOptionalProject branchToSync <*> handleBranchWithOptionalProject destinationBranch + args -> wrongArgsLength "three arguments" args + } + where + suggestionsConfig = + ProjectBranchSuggestionsConfig + { showProjectCompletions = True, + projectInclusion = AllProjects, + branchInclusion = AllBranches + } + mergeOldSquashInputPattern :: InputPattern mergeOldSquashInputPattern = InputPattern @@ -3758,6 +3783,7 @@ validInputs = pushForce, syncToFile, syncFromFile, + syncFromCodebase, quit, releaseDraft, renameBranch, diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 6772f238a7..e9e48c4808 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -5,6 +5,7 @@ module Unison.Share.SyncV2 ( syncFromCodeserver, syncFromFile, syncToFile, + syncFromCodebase, ) where @@ -180,11 +181,11 @@ syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallb unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} \header stream -> do - doSync codebase header stream + streamIntoCodebase codebase header stream mapExceptT liftIO (afterSyncChecks codebase hash) -doSync :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () -doSync codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do +streamIntoCodebase :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () +streamIntoCodebase codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do case version of (SyncV2.Version 1) -> pure () v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v @@ -226,19 +227,32 @@ syncFromFile syncFilePath = do header <- mapExceptT C.runResourceT $ do let stream = C.sourceFile syncFilePath C..| C.ungzip C..| decodeUnframedEntities (header, rest) <- initializeStream stream - doSync codebase header rest + streamIntoCodebase codebase header rest pure header afterSyncChecks codebase (SyncV2.rootCausalHash header) pure . hash32ToCausalHash $ SyncV2.rootCausalHash header -syncToFile :: +syncFromCodebase :: + -- | The codebase to sync from. + Sqlite.Connection -> + (Codebase.Codebase IO v a) -> + -- | The hash to sync. + CausalHash -> + IO (Either (SyncError SyncV2.PullError) ()) +syncFromCodebase srcConn destCodebase causalHash = do + liftIO . C.runResourceT . runExceptT $ withEntityStream srcConn causalHash Nothing \entityStream -> do + (header, rest) <- initializeStream entityStream + streamIntoCodebase destCodebase header rest + mapExceptT liftIO (afterSyncChecks destCodebase (causalHashToHash32 causalHash)) + +withEntityStream :: + Sqlite.Connection -> CausalHash -> Maybe SyncV2.BranchRef -> - FilePath -> - Cli () -syncToFile rootHash mayBranchRef destFilePath = do - Cli.Env {codebase} <- ask - entities <- liftIO $ Codebase.runTransaction codebase (depsForCausal rootHash) + (Stream () SyncV2.DownloadEntitiesChunk -> StreamM ()) -> + StreamM () +withEntityStream conn rootHash mayBranchRef callback = do + entities <- liftIO $ Sqlite.runTransaction conn (depsForCausal rootHash) let initialChunk = SyncV2.InitialC ( SyncV2.StreamInitInfo @@ -259,7 +273,19 @@ syncToFile rootHash mayBranchRef destFilePath = do in SyncV2.EntityC (SyncV2.EntityChunk {hash, entityCBOR}) ) & (initialChunk :) - liftIO . C.runResourceT $ C.runConduit $ C.yieldMany contents C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| C.sinkFile destFilePath + let stream = C.yieldMany contents + callback stream + +syncToFile :: + Codebase.Codebase IO v a -> + CausalHash -> + Maybe SyncV2.BranchRef -> + FilePath -> + IO (Either PullErr ()) +syncToFile codebase rootHash mayBranchRef destFilePath = do + liftIO $ Codebase.withConnection codebase \conn -> do + liftIO . C.runResourceT . runExceptT $ withEntityStream conn rootHash mayBranchRef \stream -> do + C.runConduit $ stream C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| C.sinkFile destFilePath -- | Collect all dependencies of a given causal hash. depsForCausal :: CausalHash -> Sqlite.Transaction (Map Hash32 (Sync.Entity Text Hash32 Hash32)) From 1f5ffa5b42a052662c6f916e6006dba111065130 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 19 Dec 2024 14:58:33 -0600 Subject: [PATCH 48/52] SyncV2 cleanup --- unison-cli/src/Unison/Share/SyncV2.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index e9e48c4808..1666ea6c4c 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -66,9 +66,9 @@ import UnliftIO qualified as IO type Stream i o = ConduitT i o StreamM () -type PullErr = SyncError SyncV2.PullError +type SyncErr = SyncError SyncV2.PullError -type StreamM = (ExceptT PullErr (C.ResourceT IO)) +type StreamM = (ExceptT SyncErr (C.ResourceT IO)) batchSize :: Int batchSize = 5000 @@ -115,7 +115,7 @@ syncSortedStream codebase stream = do validateAndSave codebase (catMaybes entityBatch) C.runConduit $ stream C..| C.chunksOf batchSize C..| handler -unpackChunk :: SyncV2.EntityChunk -> ExceptT PullErr Sqlite.Transaction (Maybe (Hash32, TempEntity)) +unpackChunk :: SyncV2.EntityChunk -> ExceptT SyncErr Sqlite.Transaction (Maybe (Hash32, TempEntity)) unpackChunk = \case SyncV2.EntityChunk {hash, entityCBOR = entityBytes} -> do -- Only want entities we don't already have @@ -124,18 +124,18 @@ unpackChunk = \case _ -> do (Just . (hash,)) <$> unpackEntity entityBytes where - unpackEntity :: (CBORBytes TempEntity) -> ExceptT PullErr Sqlite.Transaction TempEntity + unpackEntity :: (CBORBytes TempEntity) -> ExceptT SyncErr Sqlite.Transaction TempEntity unpackEntity entityBytes = do case CBOR.deserialiseOrFailCBORBytes entityBytes of Left err -> do throwError $ (SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err) Right entity -> pure entity -unpackChunks :: [SyncV2.EntityChunk] -> ExceptT PullErr Sqlite.Transaction [(Hash32, TempEntity)] +unpackChunks :: [SyncV2.EntityChunk] -> ExceptT SyncErr Sqlite.Transaction [(Hash32, TempEntity)] unpackChunks xs = do for xs unpackChunk <&> catMaybes -batchValidateEntities :: [(Hash32, TempEntity)] -> ExceptT PullErr IO () +batchValidateEntities :: [(Hash32, TempEntity)] -> ExceptT SyncErr IO () batchValidateEntities entities = do mismatches <- fmap catMaybes $ liftIO $ IO.pooledForConcurrently entities \(hash, entity) -> do IO.evaluate $ EV.validateTempEntity hash entity @@ -281,7 +281,7 @@ syncToFile :: CausalHash -> Maybe SyncV2.BranchRef -> FilePath -> - IO (Either PullErr ()) + IO (Either SyncErr ()) syncToFile codebase rootHash mayBranchRef destFilePath = do liftIO $ Codebase.withConnection codebase \conn -> do liftIO . C.runResourceT . runExceptT $ withEntityStream conn rootHash mayBranchRef \stream -> do @@ -327,13 +327,13 @@ decodeUnframedEntities = C.transPipe (mapExceptT (lift . stToIO)) $ do d <- newDecoder loop bs d where - newDecoder :: ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) + newDecoder :: ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT SyncErr (ST s)) (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) newDecoder = do (lift . lift) CBOR.deserialiseIncremental >>= \case CBOR.Done _ _ _ -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorStreamFailure "Invalid initial decoder" CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err CBOR.Partial k -> pure k - loop :: ByteString -> (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) -> ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT PullErr (ST s)) () + loop :: ByteString -> (Maybe ByteString -> ST s (CBOR.IDecode s (SyncV2.DownloadEntitiesChunk))) -> ConduitT ByteString SyncV2.DownloadEntitiesChunk (ExceptT SyncErr (ST s)) () loop bs k = do (lift . lift) (k (Just bs)) >>= \case CBOR.Fail _ _ err -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorDeserializationFailure err From 0433202258a74da9c46231ac84e772064ea21a51 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 19 Dec 2024 14:58:33 -0600 Subject: [PATCH 49/52] Remove unused temp-table changes --- .../sql/015-syncv2-temp-entity-tables.sql | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql diff --git a/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql b/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql deleted file mode 100644 index e125539afa..0000000000 --- a/codebase2/codebase-sqlite/sql/015-syncv2-temp-entity-tables.sql +++ /dev/null @@ -1,53 +0,0 @@ --- Copy-paste of the original temp entity tables, but without hashjwts since syncv2 doesn't use them. -DROP TABLE temp_entity_missing_dependency; -DROP TABLE temp_entity; - --- A "temp entity" is a term/decl/namespace/patch/causal that we cannot store in the database proper due to missing --- dependencies. --- --- The existence of each `temp_entity` row implies the existence of one or more corresponding --- `temp_entity_missing_dependency` rows: it does not make sense to make a `temp_entity` row for a thing that has no --- missing dependencies! --- --- Similarly, each `temp_entity` row implies we do not have the entity in the database proper. When and if we *do* store --- an entity proper (after storing all of its dependencies), we should always atomically delete the corresponding --- `temp_entity` row, if any. -create table if not exists temp_entity ( - hash text primary key not null, - blob bytes not null, - type_id integer not null references temp_entity_type_description(id) -); - --- A many-to-many relationship between `temp_entity` (entities we can't yet store due to missing dependencies), and the --- non-empty set of hashes of each entity's dependencies. --- --- We store with each missing dependency the JWT that Unison Share provided us to download that dependency. For --- downloading a particular dependency #bar, we only need one JWT, even if it's in the table multiple times. (In fact, --- in this case, there is one "best" JWT - the one with the latest expiry time). --- --- The JWTs are also encoded in the local ids part of entity itself (`temp_entity.blob`), but we don't want to have to --- keep going back there there to decode during a pull. --- --- For example, if we wanted to store term #foo, but couldn't because it depends on term #bar which we don't have yet, --- we would end up with the following rows. --- --- temp_entity --- +------------------------+ --- | hash | blob | type_id | --- |========================| --- | #foo | ... | 0 (term) | --- +------------------------+ --- --- temp_entity_missing_dependency --- +----------------------------------------+ --- | dependent | dependency | dependencyJwt | --- |========================================| --- | #foo | #bar | aT.Eb.cx | --- +----------------------------------------+ -create table if not exists temp_entity_missing_dependency ( - dependent text not null references temp_entity(hash), - dependency text not null, - unique (dependent, dependency) -); -create index if not exists temp_entity_missing_dependency_ix_dependent on temp_entity_missing_dependency (dependent); -create index if not exists temp_entity_missing_dependency_ix_dependency on temp_entity_missing_dependency (dependency); From 7657cfcb1b1c5390be016d9195e29487689772eb Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 19 Dec 2024 15:37:01 -0600 Subject: [PATCH 50/52] Skip validation if syncing from Share Prod --- .../unison-codebase-sqlite.cabal | 3 +- unison-cli/src/Unison/Cli/DownloadUtils.hs | 50 ++++++++++++------- .../src/Unison/Codebase/Editor/HandleInput.hs | 8 +-- .../Codebase/Editor/HandleInput/InstallLib.hs | 2 +- .../Editor/HandleInput/ProjectClone.hs | 4 +- .../Editor/HandleInput/ProjectCreate.hs | 4 +- .../Codebase/Editor/HandleInput/Pull.hs | 1 + .../HandleInput/{SyncFile.hs => SyncV2.hs} | 15 ++++-- unison-cli/src/Unison/Share/SyncV2.hs | 42 +++++++++------- unison-cli/unison-cli.cabal | 2 +- 10 files changed, 78 insertions(+), 53 deletions(-) rename unison-cli/src/Unison/Codebase/Editor/HandleInput/{SyncFile.hs => SyncV2.hs} (84%) diff --git a/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal b/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal index beca90fa76..48431ee573 100644 --- a/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal +++ b/codebase2/codebase-sqlite/unison-codebase-sqlite.cabal @@ -1,6 +1,6 @@ cabal-version: 1.12 --- This file has been generated from package.yaml by hpack version 0.36.0. +-- This file has been generated from package.yaml by hpack version 0.37.0. -- -- see: https://github.com/sol/hpack @@ -24,7 +24,6 @@ extra-source-files: sql/012-add-current-project-path-table.sql sql/013-add-project-branch-reflog-table.sql sql/014-add-project-branch-causal-hash-id.sql - sql/015-syncv2-temp-entity-tables.sql sql/create.sql source-repository head diff --git a/unison-cli/src/Unison/Cli/DownloadUtils.hs b/unison-cli/src/Unison/Cli/DownloadUtils.hs index 08142a808e..fb53a84176 100644 --- a/unison-cli/src/Unison/Cli/DownloadUtils.hs +++ b/unison-cli/src/Unison/Cli/DownloadUtils.hs @@ -4,6 +4,7 @@ module Unison.Cli.DownloadUtils ( downloadProjectBranchFromShare, downloadLooseCodeFromShare, + SyncVersion (..), ) where @@ -35,13 +36,16 @@ import Unison.Sync.Common qualified as Sync.Common import Unison.Sync.Types qualified as Share import Unison.SyncV2.Types qualified as SyncV2 +data SyncVersion = SyncV1 | SyncV2 + -- | Download a project/branch from Share. downloadProjectBranchFromShare :: (HasCallStack) => + SyncVersion -> Share.IncludeSquashedHead -> Share.RemoteProjectBranch -> Cli (Either Output.ShareError CausalHash) -downloadProjectBranchFromShare useSquashed branch = +downloadProjectBranchFromShare syncVersion useSquashed branch = Cli.labelE \done -> do let remoteProjectBranchName = branch.branchName causalHashJwt <- @@ -51,24 +55,32 @@ downloadProjectBranchFromShare useSquashed branch = (Share.NoSquashedHead, _) -> pure branch.branchHead exists <- Cli.runTransaction (Queries.causalExistsByHash32 (Share.hashJWTHash causalHashJwt)) when (not exists) do - (result, numDownloaded) <- do - -- Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do - let branchRef = SyncV2.BranchRef (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) - -- TODO: Fill this in. - let knownHashes = Set.empty - let downloadedCallback = \_ -> pure () - result <- SyncV2.syncFromCodeserver Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback - -- numDownloaded <- liftIO getNumDownloaded - let numDownloaded = 0 - pure (result, numDownloaded) - result & onLeft \err0 -> do - done case err0 of - Share.SyncError err -> - -- TODO: Fix this - error (show err) - -- Output.ShareErrorDownloadEntities err - Share.TransportError err -> Output.ShareErrorTransport err - Cli.respond (Output.DownloadedEntities numDownloaded) + case syncVersion of + SyncV1 -> do + let repoInfo = Share.RepoInfo (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) + Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do + result <- Share.downloadEntities Share.hardCodedBaseUrl repoInfo causalHashJwt downloadedCallback + numDownloaded <- liftIO getNumDownloaded + result & onLeft \err0 -> do + done case err0 of + Share.SyncError err -> Output.ShareErrorDownloadEntities err + Share.TransportError err -> Output.ShareErrorTransport err + Cli.respond (Output.DownloadedEntities numDownloaded) + SyncV2 -> do + -- Cli.with withEntitiesDownloadedProgressCallback \(downloadedCallback, getNumDownloaded) -> do + let branchRef = SyncV2.BranchRef (into @Text (ProjectAndBranch branch.projectName remoteProjectBranchName)) + -- TODO: Fill this in. + let knownHashes = Set.empty + let downloadedCallback = \_ -> pure () + let shouldValidate = not $ Codeserver.isCustomCodeserver Codeserver.defaultCodeserver + result <- SyncV2.syncFromCodeserver shouldValidate Share.hardCodedBaseUrl branchRef causalHashJwt knownHashes downloadedCallback + result & onLeft \err0 -> do + done case err0 of + Share.SyncError err -> + -- TODO: Fix this + error (show err) + -- Output.ShareErrorDownloadEntities err + Share.TransportError err -> Output.ShareErrorTransport err pure (Sync.Common.hash32ToCausalHash (Share.hashJWTHash causalHashJwt)) -- | Download loose code from Share. diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs index 43a05c2fde..e3e78c6575 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput.hs @@ -87,7 +87,7 @@ import Unison.Codebase.Editor.HandleInput.ReleaseDraft (handleReleaseDraft) import Unison.Codebase.Editor.HandleInput.Run (handleRun) import Unison.Codebase.Editor.HandleInput.RuntimeUtils qualified as RuntimeUtils import Unison.Codebase.Editor.HandleInput.ShowDefinition (handleShowDefinition) -import Unison.Codebase.Editor.HandleInput.SyncFile qualified as SyncFile +import Unison.Codebase.Editor.HandleInput.SyncV2 qualified as SyncV2 import Unison.Codebase.Editor.HandleInput.TermResolution (resolveMainRef) import Unison.Codebase.Editor.HandleInput.Tests qualified as Tests import Unison.Codebase.Editor.HandleInput.Todo (handleTodo) @@ -689,17 +689,17 @@ loop e = do Cli.respond Success PullI sourceTarget pullMode -> handlePull sourceTarget pullMode PushRemoteBranchI pushRemoteBranchInput -> handlePushRemoteBranch pushRemoteBranchInput - SyncToFileI syncFileDest projectBranchName -> SyncFile.handleSyncToFile syncFileDest projectBranchName + SyncToFileI syncFileDest projectBranchName -> SyncV2.handleSyncToFile syncFileDest projectBranchName SyncFromFileI syncFileSrc projectBranchName -> do description <- inputDescription input - SyncFile.handleSyncFromFile description syncFileSrc projectBranchName + SyncV2.handleSyncFromFile description syncFileSrc projectBranchName SyncFromCodebaseI srcCodebasePath srcBranch destBranch -> do description <- inputDescription input let srcBranch' = srcBranch & over #project \case Nothing -> error "todo" Just proj -> proj - SyncFile.handleSyncFromCodebase description srcCodebasePath srcBranch' destBranch + SyncV2.handleSyncFromCodebase description srcCodebasePath srcBranch' destBranch ListDependentsI hq -> handleDependents hq ListDependenciesI hq -> handleDependencies hq NamespaceDependenciesI path -> handleNamespaceDependencies path diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/InstallLib.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/InstallLib.hs index 52e70188c8..299f30ba47 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/InstallLib.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/InstallLib.hs @@ -60,7 +60,7 @@ handleInstallLib remind (ProjectAndBranch libdepProjectName unresolvedLibdepBran Cli.Env {codebase} <- ask causalHash <- - downloadProjectBranchFromShare Share.IncludeSquashedHead libdepProjectBranch + downloadProjectBranchFromShare SyncV1 Share.IncludeSquashedHead libdepProjectBranch & onLeftM (Cli.returnEarly . Output.ShareError) remoteBranchObject <- liftIO (Codebase.expectBranchForHash codebase causalHash) diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectClone.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectClone.hs index 8a872d18b8..670a730b5e 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectClone.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectClone.hs @@ -13,7 +13,7 @@ import U.Codebase.Sqlite.Project qualified as Sqlite (Project (..)) import U.Codebase.Sqlite.ProjectBranch qualified as Sqlite import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.Queries qualified as Queries -import Unison.Cli.DownloadUtils (downloadProjectBranchFromShare) +import Unison.Cli.DownloadUtils (SyncVersion (..), downloadProjectBranchFromShare) import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli import Unison.Cli.MonadUtils qualified as Cli (getCurrentProjectAndBranch) @@ -225,7 +225,7 @@ cloneInto localProjectBranch remoteProjectBranch = do let remoteProjectBranchNames = ProjectAndBranch remoteProjectName remoteBranchName branchHead <- - downloadProjectBranchFromShare Share.NoSquashedHead remoteProjectBranch + downloadProjectBranchFromShare SyncV1 Share.NoSquashedHead remoteProjectBranch & onLeftM (Cli.returnEarly . Output.ShareError) localProjectAndBranch <- diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectCreate.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectCreate.hs index e9f6e99e95..0096a91d8d 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectCreate.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/ProjectCreate.hs @@ -13,7 +13,7 @@ import U.Codebase.Sqlite.Project (Project (..)) import U.Codebase.Sqlite.ProjectBranch (ProjectBranch (..)) import U.Codebase.Sqlite.Queries (expectCausalHashIdByCausalHash) import U.Codebase.Sqlite.Queries qualified as Queries -import Unison.Cli.DownloadUtils (downloadProjectBranchFromShare) +import Unison.Cli.DownloadUtils (SyncVersion (..), downloadProjectBranchFromShare) import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli import Unison.Cli.Share.Projects qualified as Share @@ -108,7 +108,7 @@ projectCreate tryDownloadingBase maybeProjectName = do Share.GetProjectBranchResponseBranchNotFound -> done Nothing Share.GetProjectBranchResponseProjectNotFound -> done Nothing Share.GetProjectBranchResponseSuccess branch -> pure branch - downloadProjectBranchFromShare Share.NoSquashedHead baseLatestReleaseBranch + downloadProjectBranchFromShare SyncV1 Share.NoSquashedHead baseLatestReleaseBranch & onLeftM (Cli.returnEarly . Output.ShareError) Cli.Env {codebase} <- ask baseLatestReleaseBranchObject <- diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/Pull.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/Pull.hs index 3ff7012220..42aebf0299 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/Pull.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/Pull.hs @@ -59,6 +59,7 @@ handlePull unresolvedSourceAndTarget pullMode = do ReadShare'LooseCode repo -> downloadLooseCodeFromShare repo & onLeftM (Cli.returnEarly . Output.ShareError) ReadShare'ProjectBranch remoteBranch -> downloadProjectBranchFromShare + SyncV1 ( case pullMode of Input.PullWithHistory -> Share.NoSquashedHead Input.PullWithoutHistory -> Share.IncludeSquashedHead diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncV2.hs similarity index 84% rename from unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs rename to unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncV2.hs index 9789657f8e..6bcf1b7aa1 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncFile.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/SyncV2.hs @@ -1,17 +1,21 @@ -module Unison.Codebase.Editor.HandleInput.SyncFile +module Unison.Codebase.Editor.HandleInput.SyncV2 ( handleSyncToFile, handleSyncFromFile, handleSyncFromCodebase, + handleSyncFromCodeserver, ) where import Control.Lens import Control.Monad.Reader (MonadReader (..)) +import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q +import Unison.Cli.DownloadUtils (SyncVersion, downloadProjectBranchFromShare) import Unison.Cli.Monad (Cli) import Unison.Cli.Monad qualified as Cli import Unison.Cli.MonadUtils qualified as Cli import Unison.Cli.ProjectUtils qualified as Project +import Unison.Cli.Share.Projects qualified as Projects import Unison.Codebase (CodebasePath) import Unison.Codebase qualified as Codebase import Unison.Codebase.Editor.Output qualified as Output @@ -37,7 +41,8 @@ handleSyncFromFile :: Text -> FilePath -> ProjectAndBranch (Maybe ProjectName) P handleSyncFromFile description srcSyncFile branchToSync = do pp <- Cli.getCurrentProjectPath projectBranch <- Project.resolveProjectBranchInProject (pp ^. #project) (over #branch Just branchToSync) - SyncV2.syncFromFile srcSyncFile >>= \case + let shouldValidate = True + SyncV2.syncFromFile shouldValidate srcSyncFile >>= \case Left err -> Cli.respond (Output.SyncPullError err) Right causalHash -> do Cli.setProjectBranchRootToCausalHash (projectBranch ^. #branch) description causalHash @@ -58,10 +63,14 @@ handleSyncFromCodebase description srcCodebasePath srcBranch destBranch = do case maySrcCausalHash of Nothing -> pure $ Left (error "Todo proper error") Just srcCausalHash -> do - fmap (const srcCausalHash) <$> liftIO (SyncV2.syncFromCodebase srcConn codebase srcCausalHash) + let shouldValidate = True + fmap (const srcCausalHash) <$> liftIO (SyncV2.syncFromCodebase shouldValidate srcConn codebase srcCausalHash) case r of Left _err -> pure $ error "Todo proper error" Right (Left syncErr) -> Cli.respond (Output.SyncPullError syncErr) Right (Right causalHash) -> do Cli.setProjectBranchRootToCausalHash (projectBranch ^. #branch) description causalHash + +handleSyncFromCodeserver :: SyncVersion -> Projects.IncludeSquashedHead -> Projects.RemoteProjectBranch -> Cli (Either Output.ShareError CausalHash) +handleSyncFromCodeserver = downloadProjectBranchFromShare diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index 1666ea6c4c..cb31b47ca4 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -76,10 +76,10 @@ batchSize = 5000 ------------------------------------------------------------------------------------------------------------------------ -- Download entities -validateAndSave :: (Codebase.Codebase IO v a) -> [(Hash32, TempEntity)] -> StreamM () -validateAndSave codebase entities = do - let validateEntities = do - runExceptT (batchValidateEntities entities) +validateAndSave :: Bool -> (Codebase.Codebase IO v a) -> [(Hash32, TempEntity)] -> StreamM () +validateAndSave shouldValidate codebase entities = do + let validateEntities = + runExceptT $ when shouldValidate (batchValidateEntities entities) -- Validation is slow, run it in parallel with insertion, but don't commit the transaction until we're done -- validation. ExceptT . liftIO $ IO.withAsync validateEntities \validationTask -> do @@ -92,27 +92,29 @@ validateAndSave codebase entities = do -- | Syncs a stream which could send entities in any order. syncUnsortedStream :: + Bool -> (Codebase.Codebase IO v a) -> Stream () SyncV2.EntityChunk -> StreamM () -syncUnsortedStream codebase stream = do +syncUnsortedStream shouldValidate codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing unsorted stream" allResults <- C.runConduit $ stream C..| C.sinkList allEntities <- ExceptT $ Timing.time "Unpacking chunks" $ liftIO $ Codebase.runTransactionExceptT codebase $ do unpackChunks allResults let sortedEntities = sortDependencyFirst allEntities - validateAndSave codebase sortedEntities + validateAndSave shouldValidate codebase sortedEntities -- | Syncs a stream which sends entities which are already sorted in dependency order. syncSortedStream :: + Bool -> (Codebase.Codebase IO v a) -> Stream () SyncV2.EntityChunk -> StreamM () -syncSortedStream codebase stream = do +syncSortedStream shouldValidate codebase stream = do Debug.debugLogM Debug.Temp $ "Syncing sorted stream" let handler :: Stream [SyncV2.EntityChunk] o handler = C.mapM_C \chunkBatch -> do entityBatch <- mapExceptT lift . ExceptT $ Codebase.runTransactionExceptT codebase do for chunkBatch unpackChunk - validateAndSave codebase (catMaybes entityBatch) + validateAndSave shouldValidate codebase (catMaybes entityBatch) C.runConduit $ stream C..| C.chunksOf batchSize C..| handler unpackChunk :: SyncV2.EntityChunk -> ExceptT SyncErr Sqlite.Transaction (Maybe (Hash32, TempEntity)) @@ -155,6 +157,7 @@ batchValidateEntities entities = do throwError . SyncError . SyncV2.PullError'DownloadEntities . SyncV2.DownloadEntitiesEntityValidationFailure $ err syncFromCodeserver :: + Bool -> -- | The Unison Share URL. Servant.BaseUrl -> -- | The branch to download from. @@ -165,7 +168,7 @@ syncFromCodeserver :: -- | Callback that's given a number of entities we just downloaded. (Int -> IO ()) -> Cli (Either (SyncError SyncV2.PullError) ()) -syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do +syncFromCodeserver shouldValidate unisonShareUrl branchRef hashJwt knownHashes _downloadedCallback = do Cli.Env {authHTTPClient, codebase} <- ask runExceptT do let hash = Share.hashJWTHash hashJwt @@ -181,18 +184,18 @@ syncFromCodeserver unisonShareUrl branchRef hashJwt knownHashes _downloadedCallb unisonShareUrl SyncV2.DownloadEntitiesRequest {branchRef, causalHash = hashJwt, knownHashes} \header stream -> do - streamIntoCodebase codebase header stream + streamIntoCodebase shouldValidate codebase header stream mapExceptT liftIO (afterSyncChecks codebase hash) -streamIntoCodebase :: Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () -streamIntoCodebase codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do +streamIntoCodebase :: Bool -> Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () +streamIntoCodebase shouldValidate codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do case version of (SyncV2.Version 1) -> pure () v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v case entitySorting of - SyncV2.DependenciesFirst -> syncSortedStream codebase stream - SyncV2.Unsorted -> syncUnsortedStream codebase stream + SyncV2.DependenciesFirst -> syncSortedStream shouldValidate codebase stream + SyncV2.Unsorted -> syncUnsortedStream shouldValidate codebase stream afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> ExceptT (SyncError SyncV2.PullError) IO () afterSyncChecks codebase hash = do @@ -216,10 +219,11 @@ sortDependencyFirst entities = do in Graph.reverseTopSort graph <&> \v -> (view _1 $ vertexInfo v) syncFromFile :: + Bool -> -- | Location of the sync-file FilePath -> Cli (Either (SyncError SyncV2.PullError) CausalHash) -syncFromFile syncFilePath = do +syncFromFile shouldValidate syncFilePath = do Cli.Env {codebase} <- ask runExceptT do Debug.debugLogM Debug.Temp $ "Kicking off sync" @@ -227,22 +231,23 @@ syncFromFile syncFilePath = do header <- mapExceptT C.runResourceT $ do let stream = C.sourceFile syncFilePath C..| C.ungzip C..| decodeUnframedEntities (header, rest) <- initializeStream stream - streamIntoCodebase codebase header rest + streamIntoCodebase shouldValidate codebase header rest pure header afterSyncChecks codebase (SyncV2.rootCausalHash header) pure . hash32ToCausalHash $ SyncV2.rootCausalHash header syncFromCodebase :: + Bool -> -- | The codebase to sync from. Sqlite.Connection -> (Codebase.Codebase IO v a) -> -- | The hash to sync. CausalHash -> IO (Either (SyncError SyncV2.PullError) ()) -syncFromCodebase srcConn destCodebase causalHash = do +syncFromCodebase shouldValidate srcConn destCodebase causalHash = do liftIO . C.runResourceT . runExceptT $ withEntityStream srcConn causalHash Nothing \entityStream -> do (header, rest) <- initializeStream entityStream - streamIntoCodebase destCodebase header rest + streamIntoCodebase shouldValidate destCodebase header rest mapExceptT liftIO (afterSyncChecks destCodebase (causalHashToHash32 causalHash)) withEntityStream :: @@ -441,7 +446,6 @@ httpStreamEntities (Auth.AuthenticatedHttpClient httpClient) unisonShareUrl req -- | Peel the header off the stream and parse the remaining entity chunks. initializeStream :: Stream () SyncV2.DownloadEntitiesChunk -> StreamM (SyncV2.StreamInitInfo, Stream () SyncV2.EntityChunk) initializeStream stream = do - Debug.debugLogM Debug.Temp $ "Peeling options off stream" (streamRemainder, init) <- stream C.$$+ C.headC Debug.debugM Debug.Temp "Got initial chunk: " init case init of diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index 6db8fc87e1..dbdc009a7a 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -88,7 +88,7 @@ library Unison.Codebase.Editor.HandleInput.Run Unison.Codebase.Editor.HandleInput.RuntimeUtils Unison.Codebase.Editor.HandleInput.ShowDefinition - Unison.Codebase.Editor.HandleInput.SyncFile + Unison.Codebase.Editor.HandleInput.SyncV2 Unison.Codebase.Editor.HandleInput.TermResolution Unison.Codebase.Editor.HandleInput.Tests Unison.Codebase.Editor.HandleInput.Todo From 6019d147c8eea667260195ceb6181ac7b10f90f6 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 19 Dec 2024 17:06:27 -0600 Subject: [PATCH 51/52] Wire in progress tracking --- unison-cli/src/Unison/Share/SyncV2.hs | 54 +++++++++++++++++++-------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index cb31b47ca4..f7280b48ca 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -36,6 +36,7 @@ import Servant.API qualified as Servant import Servant.Client.Streaming qualified as Servant import Servant.Conduit () import Servant.Types.SourceT qualified as Servant +import System.Console.Regions qualified as Console.Regions import U.Codebase.HashTags (CausalHash) import U.Codebase.Sqlite.Queries qualified as Q import U.Codebase.Sqlite.TempEntity (TempEntity) @@ -175,7 +176,6 @@ syncFromCodeserver shouldValidate unisonShareUrl branchRef hashJwt knownHashes _ ExceptT $ do (Cli.runTransaction (Q.entityLocation hash)) >>= \case Just Q.EntityInMainStorage -> pure $ Right () - -- Just Q.EntityInTempStorage -> error "TODO: implement temp storage handler" _ -> do Debug.debugLogM Debug.Temp $ "Kicking off sync request" Timing.time "Entity Download" $ do @@ -188,14 +188,16 @@ syncFromCodeserver shouldValidate unisonShareUrl branchRef hashJwt knownHashes _ mapExceptT liftIO (afterSyncChecks codebase hash) streamIntoCodebase :: Bool -> Codebase.Codebase IO v a -> SyncV2.StreamInitInfo -> Stream () SyncV2.EntityChunk -> StreamM () -streamIntoCodebase shouldValidate codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = _todo} stream = do - case version of - (SyncV2.Version 1) -> pure () - v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v +streamIntoCodebase shouldValidate codebase SyncV2.StreamInitInfo {version, entitySorting, numEntities = numEntities} stream = ExceptT do + withStreamProgressCallback (fromIntegral <$> numEntities) \countC -> runExceptT do + let stream' = stream C..| countC + case version of + (SyncV2.Version 1) -> pure () + v -> throwError . SyncError . SyncV2.PullError'Sync $ SyncV2.SyncErrorUnsupportedVersion v - case entitySorting of - SyncV2.DependenciesFirst -> syncSortedStream shouldValidate codebase stream - SyncV2.Unsorted -> syncUnsortedStream shouldValidate codebase stream + case entitySorting of + SyncV2.DependenciesFirst -> syncSortedStream shouldValidate codebase stream' + SyncV2.Unsorted -> syncUnsortedStream shouldValidate codebase stream' afterSyncChecks :: Codebase.Codebase IO v a -> Hash32 -> ExceptT (SyncError SyncV2.PullError) IO () afterSyncChecks codebase hash = do @@ -245,26 +247,28 @@ syncFromCodebase :: CausalHash -> IO (Either (SyncError SyncV2.PullError) ()) syncFromCodebase shouldValidate srcConn destCodebase causalHash = do - liftIO . C.runResourceT . runExceptT $ withEntityStream srcConn causalHash Nothing \entityStream -> do + liftIO . C.runResourceT . runExceptT $ withEntityStream srcConn causalHash Nothing \_total entityStream -> do (header, rest) <- initializeStream entityStream streamIntoCodebase shouldValidate destCodebase header rest mapExceptT liftIO (afterSyncChecks destCodebase (causalHashToHash32 causalHash)) withEntityStream :: + (MonadIO m) => Sqlite.Connection -> CausalHash -> Maybe SyncV2.BranchRef -> - (Stream () SyncV2.DownloadEntitiesChunk -> StreamM ()) -> - StreamM () + (Int -> Stream () SyncV2.DownloadEntitiesChunk -> m r) -> + m r withEntityStream conn rootHash mayBranchRef callback = do entities <- liftIO $ Sqlite.runTransaction conn (depsForCausal rootHash) + let totalEntities = fromIntegral $ Map.size entities let initialChunk = SyncV2.InitialC ( SyncV2.StreamInitInfo { rootCausalHash = causalHashToHash32 rootHash, version = SyncV2.Version 1, entitySorting = SyncV2.DependenciesFirst, - numEntities = Just . fromIntegral $ Map.size entities, + numEntities = Just $ fromIntegral totalEntities, rootBranchRef = mayBranchRef } ) @@ -279,7 +283,7 @@ withEntityStream conn rootHash mayBranchRef callback = do ) & (initialChunk :) let stream = C.yieldMany contents - callback stream + callback totalEntities stream syncToFile :: Codebase.Codebase IO v a -> @@ -289,8 +293,10 @@ syncToFile :: IO (Either SyncErr ()) syncToFile codebase rootHash mayBranchRef destFilePath = do liftIO $ Codebase.withConnection codebase \conn -> do - liftIO . C.runResourceT . runExceptT $ withEntityStream conn rootHash mayBranchRef \stream -> do - C.runConduit $ stream C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| C.sinkFile destFilePath + C.runResourceT $ + withEntityStream conn rootHash mayBranchRef \mayTotal stream -> do + withStreamProgressCallback (Just mayTotal) \countC -> runExceptT do + C.runConduit $ stream C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| countC C..| C.sinkFile destFilePath -- | Collect all dependencies of a given causal hash. depsForCausal :: CausalHash -> Sqlite.Transaction (Map Hash32 (Sync.Entity Text Hash32 Hash32)) @@ -465,3 +471,21 @@ initializeStream stream = do SyncV2.EntityC chunk -> pure chunk SyncV2.ErrorC (SyncV2.ErrorChunk err) -> throwError . SyncError $ SyncV2.PullError'DownloadEntities err SyncV2.InitialC {} -> throwError . SyncError $ SyncV2.PullError'Sync SyncV2.SyncErrorMisplacedInitialChunk + +-- Provide the given action a callback that display to the terminal. +withStreamProgressCallback :: (MonadIO m, MonadUnliftIO n) => Maybe Int -> (ConduitT i i m () -> n a) -> n a +withStreamProgressCallback total action = do + entitiesDownloadedVar <- IO.newTVarIO (0 :: Int) + IO.withRunInIO \toIO -> do + Console.Regions.displayConsoleRegions do + Console.Regions.withConsoleRegion Console.Regions.Linear \region -> do + Console.Regions.setConsoleRegion region do + entitiesDownloaded <- IO.readTVar entitiesDownloadedVar + pure $ + "\n Processed " + <> tShow entitiesDownloaded + <> maybe "" (\total -> " / " <> tShow total) total + <> " entities...\n\n" + toIO $ action $ C.awaitForever \i -> do + liftIO $ IO.atomically (IO.modifyTVar' entitiesDownloadedVar (+ 1)) + C.yield i From a40bdc049d452b2f5e1ebcdefb60b800905ff3a7 Mon Sep 17 00:00:00 2001 From: Chris Penner Date: Thu, 19 Dec 2024 17:12:04 -0600 Subject: [PATCH 52/52] Fix progress tracking --- unison-cli/src/Unison/Share/SyncV2.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unison-cli/src/Unison/Share/SyncV2.hs b/unison-cli/src/Unison/Share/SyncV2.hs index f7280b48ca..d0a9c7483a 100644 --- a/unison-cli/src/Unison/Share/SyncV2.hs +++ b/unison-cli/src/Unison/Share/SyncV2.hs @@ -296,7 +296,7 @@ syncToFile codebase rootHash mayBranchRef destFilePath = do C.runResourceT $ withEntityStream conn rootHash mayBranchRef \mayTotal stream -> do withStreamProgressCallback (Just mayTotal) \countC -> runExceptT do - C.runConduit $ stream C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| countC C..| C.sinkFile destFilePath + C.runConduit $ stream C..| countC C..| C.map (BL.toStrict . CBOR.serialise) C..| C.transPipe liftIO C.gzip C..| C.sinkFile destFilePath -- | Collect all dependencies of a given causal hash. depsForCausal :: CausalHash -> Sqlite.Transaction (Map Hash32 (Sync.Entity Text Hash32 Hash32))