Skip to content

Commit

Permalink
indexText: added allElementsOf
Browse files Browse the repository at this point in the history
  • Loading branch information
agocorona committed Apr 24, 2013
1 parent 80d4f04 commit 304eb8d
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 12 deletions.
50 changes: 46 additions & 4 deletions Data/TCache.hs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,49 @@ gives:
,addTrigger

-- * Cache control
{-- |
The mechanism for dropping elements from the cache is too lazy. `flushDBRef`, for example
just delete the data element from the TVar, but the TVar node
remains attached to the table so there is no decrement on the number of elements.
The element is garbage collected unless you have a direct reference to the element, not the DBRef
Note that you can still have a valid reference to this element, but this element is no longer
in the cache. The usual thing is that you do not have it, and the element will be garbage
collected (but still there will be a NotRead entry for this key!!!). If the DBRef is read again, the
TCache will go to permanent storage to retrieve it.
clear opertions such `clearsyncCache` does something similar: it does not delete the
element from the cache. It just inform the garbage collector that there is no longer necessary to maintain
the element in the cache. So if the element has no other references (maybe you keep a
variable that point to that DBRef) it will be GCollected.
If this is not possible, it will remain in the cache and will be treated as such,
until the DBRef is no longer referenced by the program. This is done by means of a weak pointer
All these complications are necessary because the programmer can handle DBRefs directly,
so the cache has no complete control of the DBRef life cycle, short to speak.
a DBRef can be in the states:
- `Exist`: it is in the cache
- `DoesNotExist`: neither is in the cache neither in storage: it is like a cached "notfound" to
speed up repeated failed requests
- `NotRead`: may exist or not in permanent storage, but not in the cache
In terms of Garbage collection it may be:
1 - pending garbage collection: attached to the hashtable by means of a weak pointer: delete it asap
2 - cached: attached by a direct pointer and a weak pointer: It is being cached
clearsyncCache just pass elements from 2 to 1
--}
,flushDBRef
,flushKey
,invalidateKey
Expand Down Expand Up @@ -530,7 +573,7 @@ onNothing io onerr= do
flushDBRef :: (IResource a, Typeable a) =>DBRef a -> STM()
flushDBRef (DBRef _ tv)= writeTVar tv NotRead

-- flush the element with the given key
-- | flush the element with the given key
flushKey key= do
(cache,time) <- unsafeIOToSTM $ readIORef refcache
c <- unsafeIOToSTM $ H.lookup cache key
Expand All @@ -542,7 +585,7 @@ flushKey key= do
Nothing -> unsafeIOToSTM (finalize w) >> flushKey key
Nothing -> return ()

-- label the object as not existent in database
-- | label the object as not existent in database
invalidateKey key= do
(cache,time) <- unsafeIOToSTM $ readIORef refcache
c <- unsafeIOToSTM $ H.lookup cache key
Expand Down Expand Up @@ -758,6 +801,7 @@ updateListToHash hash kv= mapM (update1 hash) kv where


-- | Start the thread that periodically call `clearSyncCache` to clean and writes on the persistent storage.
-- it is indirecly set by means of `syncWrite`, since it is more higuer level. I recommend to use the latter
-- Otherwise, 'syncCache' or `clearSyncCache` or `atomicallySync` must be invoked explicitly or no persistence will exist.
-- Cache writes allways save a coherent state
clearSyncCacheProc ::
Expand Down Expand Up @@ -802,8 +846,6 @@ data SyncMode= Synchronous -- ^ sync state to permanent storage when `atomical
tvSyncWrite= unsafePerformIO $ newIORef (Synchronous, Nothing)

-- | Specify the cache synchronization policy with permanent storage. See `SyncMode` for details
--

syncWrite:: SyncMode -> IO()
syncWrite mode= do
(_,thread) <- readIORef tvSyncWrite
Expand Down
28 changes: 20 additions & 8 deletions Data/TCache/IndexText.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ main= do
-}

module Data.TCache.IndexText(indexText, indexList, contains, containsElem) where
module Data.TCache.IndexText(indexText, indexList, contains, containsElem, allElemsOf) where
import Data.TCache
import Data.TCache.IndexQuery
import Data.TCache.Defs
Expand Down Expand Up @@ -141,15 +141,15 @@ indexText
=> (a -> b) -- ^ field to index
-> (b -> T.Text) -- ^ method to convert the field content to lazy Text (for example `pack` in case of String fields). This permits to index non Textual fields
-> IO ()
indexText sel convert= addTrigger (indext sel (words1 . convert)) where
indexText sel convert= addTrigger (indext sel (words1 . convert))

-- | trigger the indexation of list fields with elements convertible to Text
indexList
:: (IResource a, Typeable a, Typeable b)
=> (a -> b) -- ^ field to index
-> (b -> [T.Text]) -- ^ method to convert a field element to Text (for example `pack . show` in case of elemets with Show instances)
-> IO ()
indexList sel convert= addTrigger (indext sel convert) where
indexList sel convert= addTrigger (indext sel convert)


indext :: (IResource a, Typeable a,Typeable b)
Expand Down Expand Up @@ -200,9 +200,21 @@ containsElem sel wstr = do
let wordsr = catMaybes $ map (\n -> M.lookup n mmapIntString) $ catMaybes mns
return $ map getDBRef wordsr

words1= filter ( (<) 2 . T.length) . T.split (\c -> isSeparator c || c=='\n' || isPunctuation c )
allElemsOf :: (IResource a, Typeable a, Typeable b) => (a -> b) -> STM [T.Text]
allElemsOf sel = do
let [t1, t2]= typeRepArgs $! typeOf sel
let t= show t1 ++ show t2
let u= undefined
mr <- withSTMResources [IndexText t u u u u]
$ \[r] -> resources{toReturn= r}
case mr of
Nothing -> return []
Just (IndexText t n _ _ map) -> return $ M.keys map

words1= filter filterWordt {-( (<) 2 . T.length)-} . T.split (\c -> isSeparator c || c=='\n' || isPunctuation c )

-- | return the DBRefs whose fields include all the words of length three or more in the requested text contents
-- | return the DBRefs whose fields include all the words in the requested text contents.Except the
-- words with less than three characters that are not digits or uppercase, that are filtered out before making the query
contains
:: (IResource a, Typeable a, Typeable b)
=>( a -> b) -- ^ field to search in
Expand All @@ -212,8 +224,8 @@ contains sel str= case words str of
[] -> return []
[w] -> containsElem sel w
ws -> do
let rs = map (containsElem sel) $ filter (\t -> length t >2) ws
let rs = map (containsElem sel) $ filter filterWord ws
foldl (.&&.) (head rs) (tail rs)



filterWordt w= T.length w >2 || or (map (\c -> isUpper c || isDigit c) (T.unpack w))
filterWord w= length w >2 || or (map (\c -> isUpper c || isDigit c) w)

0 comments on commit 304eb8d

Please sign in to comment.