-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathnvm_queue.h
294 lines (240 loc) · 6.7 KB
/
nvm_queue.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
#ifndef __NVM_QUEUE_H__
#define __NVM_QUEUE_H__
#ifndef __device__
#define __device__
#endif
#ifndef __host__
#define __host__
#endif
#include <nvm_util.h>
#include <nvm_types.h>
#include <stdint.h>
#include <stdbool.h>
#include <errno.h>
/*
* Clear queue descriptor.
*
* Initialize an empty queue descriptor.
* The user must clear the queue memory manually before using the handle.
*
* Note: vaddr must be page-aligned and at least one page.
*
* Queue memory must be physically contiguous.
*/
__host__
int nvm_queue_clear(nvm_queue_t* q, // NVM queue descriptor
const nvm_ctrl_t* ctrl, // NVM controller handle
bool cq, // Is this a completion queue or submission queue?
uint16_t no, // Queue number
uint32_t qs, // Queue size (number of entries)
bool local, // Is this local or remote memory
volatile void* vaddr, // Virtual address to queue memory
uint64_t ioaddr); // Bus address to queue memory (as seen from the controller)
/*
* Reset queue descriptor and set all members to initial state.
*
* Note: this function should not be used if the queue has been created but
* not yet deleted, as it will lead to inconsistent state for the
* controller.
*/
__host__
void nvm_queue_reset(nvm_queue_t* q);
/*
* Enqueue a submission command.
*
* Enqueue a submission command in the specified SQ and return a pointer to
* the queue slot in order to build the command inline in queue memory.
*
* Returns a pointer to the queue entry. or NULL if the queue is full.
*/
__host__ __device__ static inline
nvm_cmd_t* nvm_sq_enqueue(nvm_queue_t* sq)
{
// Check if queue is full
if (((uint16_t) (sq->tail - sq->head) % sq->qs) == sq->qs - 1)
{
return NULL;
}
// Take slot and end of queue
nvm_cmd_t* cmd = (nvm_cmd_t*) (((unsigned char*) sq->vaddr) + sq->es * sq->tail);
// Increase tail pointer and invert phase tag if necessary
if (++sq->tail == sq->qs)
{
sq->phase = !sq->phase;
sq->tail = 0;
}
return cmd;
}
/*
* Enqueue command the i'th of n threads.
*
* This function does not check actual queue state, the caller should store
* the pointer it last received and pass to the next call in order to simplify
* position calculation.
*
* It is therefore important that all completions are consumed before clling
* this function.
*
* The reason for this is to avoid unecessary thread-synchronisation/barriers.
*
* Note: n must be less than the queue size
*
* Note: The pointer should be stored and used as the last parameter for the
* succeeding call.
*/
#ifdef __CUDACC__
__device__ static inline
nvm_cmd_t* nvm_sq_enqueue_n(nvm_queue_t* sq, nvm_cmd_t* last, uint16_t n, uint16_t i)
{
unsigned char* start = (unsigned char*) sq->vaddr;
unsigned char* end = start + (sq->qs * sq->es);
nvm_cmd_t* cmd = NULL;
if (n >= sq->qs)
{
return NULL;
}
if (last == NULL)
{
cmd = (nvm_cmd_t*) (start + sq->es * i);
}
else
{
cmd = (nvm_cmd_t*) (((unsigned char*) last) + n * sq->es);
if (((nvm_cmd_t*) end) <= cmd)
{
cmd = (nvm_cmd_t*) (start + (((unsigned char*) cmd) - end));
}
}
// The 0'th thread should update the state
if (i == 0)
{
sq->tail = (((uint32_t) sq->tail) + ((uint32_t) n)) % sq->qs;
}
// Wait state updating here
__syncthreads();
return cmd;
}
#endif
/*
* Poll completion queue.
*
* Check the head of a completion queue for a new entry. The caller must
* call dequeue manually.
*
* Returns a pointer to an unprocessed completion entry, or NULL if the queue
* is empty.
*/
__host__ __device__ static inline
nvm_cpl_t* nvm_cq_poll(const nvm_queue_t* cq)
{
nvm_cpl_t* cpl = (nvm_cpl_t*) (((unsigned char*) cq->vaddr) + cq->es * cq->head);
#ifndef __CUDA_ARCH__
if (cq->local)
{
nvm_cache_invalidate((void*) cpl, sizeof(nvm_cpl_t));
}
#endif
// Check if new completion is ready by checking the phase tag
if (!_RB(*NVM_CPL_STATUS(cpl), 0, 0) != !cq->phase)
{
return NULL;
}
return cpl;
}
/*
* Dequeue completion queue entry.
*
* Dequeue a completion entry from the completion queue. If there is no ready
* completions, this function returns NULL.
*
* The caller must update the corresponding SQ manually.
*
* Returns a pointer to the completion entry, or NULL if the queue is empty.
*/
__host__ __device__ static inline
nvm_cpl_t* nvm_cq_dequeue(nvm_queue_t* cq)
{
nvm_cpl_t* cpl = nvm_cq_poll(cq);
if (cpl != NULL)
{
// Increase head pointer and invert phase tag
if (++cq->head == cq->qs)
{
cq->head = 0;
cq->phase = !cq->phase;
}
}
return cpl;
}
/*
* Dequeue completion queue entry.
*
* Dequeue a completion entry from the completion queue. If none are ready
* at the time, this function will block until a controller timeout interval
* or a ready completion.
*
* Returns a pointer to the completion entry, or NULL if the queue is empty or
* on timeout.
*/
__host__
nvm_cpl_t* nvm_cq_dequeue_block(nvm_queue_t* cq, uint64_t timeout);
/*
* Update SQ tail pointer.
*
* Submit all enqueued commands by ringing the doorbell.
* The caller must make sure that all commands are prepared before calling
* this.
*/
__host__ __device__ static inline
void nvm_sq_submit(nvm_queue_t* sq)
{
if (sq->last != sq->tail && sq->db != NULL)
{
#ifndef __CUDA_ARCH__
if (sq->local)
{
// TODO: only flush the actual entries
nvm_cache_flush((void*) sq->vaddr, sq->es * sq->qs);
}
else
{
nvm_wcb_flush();
}
#endif
*((volatile uint32_t*) sq->db) = sq->tail;
sq->last = sq->tail;
}
}
/*
* Update SQ head pointer.
*/
__host__ __device__ static inline
void nvm_sq_update(nvm_queue_t* sq)
{
// Update head pointer of submission queue
if (sq->db != NULL && ++sq->head == sq->qs)
{
sq->head = 0;
}
}
/*
* Update controller's CQ head pointer.
*
* Indicate that all completions are processed by ringing the doorbell.
* All completion pointers acquired before this must be discarded after
* calling this.
*/
__host__ __device__ static inline
void nvm_cq_update(nvm_queue_t* cq)
{
if (cq->last != cq->head && cq->db != NULL)
{
*((volatile uint32_t*) cq->db) = cq->head;
cq->tail = cq->last = cq->head;
}
}
//#ifndef __CUDACC__
//#undef __device__
//#undef __host__
//#endif
#endif /* __NVM_QUEUE_H__ */