-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomputer-system.tex
391 lines (322 loc) · 21.9 KB
/
computer-system.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% University/School Laboratory Report
% LaTeX Template
% Version 4.0 (March 21, 2022)
%
% This template originates from:
% https://www.LaTeXTemplates.com
%
% Authors:
% Vel ([email protected])
% Linux and Unix Users Group at Virginia Tech Wiki
%
% License:
% CC BY-NC-SA 4.0 (https://creativecommons.org/licenses/by-nc-sa/4.0/)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%----------------------------------------------------------------------------------------
% PACKAGES AND DOCUMENT CONFIGURATIONS
%----------------------------------------------------------------------------------------
\documentclass[
a4paper, % Paper size, specify a4paper (A4) or letterpaper (US letter)
11pt, % Default font size, specify 10pt, 11pt or 12pt
]{CSUniSchoolLabReport}
\usepackage{caption, hyperref, listings, graphics, geometry}
\usepackage{xcolor} %代码着色宏包
% \addbibresource{sample.bib} % Bibliography file (located in the same folder as the template)
\geometry{left = 2.5cm, right = 2.5cm, top = 2.5cm, bottom = 2.5cm}
\renewcommand{\abstractname}{\large Abstract\\}
%----------------------------------------------------------------------------------------
% REPORT INFORMATION
%----------------------------------------------------------------------------------------
\title{A RISC-V computer system design\thanks{\textbf{The latest version can be found at} \url{https://github.com/dream-tentacle/digital-logic-paper}}}
\author{Jun-Cheng Xiong} % Author name(s), add additional authors like: '\& James \textsc{Smith}'
\date{\today} % Date of the report
%----------------------------------------------------------------------------------------
\newdimen\Lmargin
\newdimen\Rmargin
\Lmargin=1in \advance\Lmargin by\hoffset \advance\Lmargin by\oddsidemargin
\Rmargin=\paperwidth \advance\Rmargin by-\Lmargin \advance\Rmargin by-\textwidth
\ifdim\Lmargin>\Rmargin \Rmargin=\Lmargin \fi
\def\Scentering{\leftskip=0pt plus 1fil minus \Rmargin
\rightskip=\leftskip}
%----------------------------------------------------------------------------------------
\lstset{
basicstyle=\tt,
numbers=left,
rulesepcolor=\color{red!20!green!20!blue!20},
escapeinside=``,
xleftmargin=2em,xrightmargin=2em, aboveskip=1em,
%background
framexleftmargin=1.5mm,
frame=shadowbox,
backgroundcolor=\color[RGB]{245,245,244},
%style
keywordstyle=\color{blue}\bfseries,
identifierstyle=\bf,
numberstyle=\color[RGB]{0,192,192},
commentstyle=\it\color[RGB]{96,96,96},
stringstyle=\rmfamily\slshape\color[RGB]{128,0,0},
%space
showstringspaces=false
}
%----------------------------------------------------------------------------------------
\begin{document}
\maketitle % Insert the title, author and date using the information specified above
\begin{abstract}
\large{
In this project, a simple RISC-V computer system is implemented on Digilent Nexys A7-100T FPGA board. The compter system is composed of: a CPU, a data memory, an instruction memory, a vga screen, a keyboard, and a PWM audio output. The CPU is a 5-stage pipeline processor, which can execute 32-bit RISC-V instructions. The system interface is a terminal, which can execute basic commands and run two software.
\textbf{Keywords}: RISC-V, FPGA, 5-stage pipeline, computer system
}
\end{abstract}
\thispagestyle{empty}
\newpage
\thispagestyle{empty}
\tableofcontents
\quad
\begin{figure}[h]
\centering
\includegraphics[width=12cm]{image7.png}
\caption{Digilent Nexys A7-100T FPGA board}
\label{fig:image7}
\end{figure}
\newpage
\setcounter{page}{1}
\section{Introduction}
\subsection{Overall introduction}
In the realm of digital systems design, the creation of a fully functional computer system on a Field-Programmable Gate Array (FPGA) stands as a testament to the designer's knowledge of digital systems. This project centers around the development of a simple yet robust RISC-V computer system, implemented on the Digilent Nexys A7-100T FPGA board. At the heart of this project lies the CPU, which is a 5-stage pipeline processor that can execute 32-bit RISC-V instructions. The computer system designed for this project includes essential components such as a CPU, a data memory, an instruction memory, a VGA screen, a keyboard, and a PWM audio output. The user interacts with the system through a terminal, which supports several commands and can run three software - Snake, Piano, and Conway's Game of Life.
This report is divided into two parts. The first segment delves into the hardware design, concentrating on the intricacies of the 5-stage pipeline CPU, memory management, and all other devices. The second part shifts focus to the software design, encompassing the implementation of the terminal, the engaging Snake game, the simple yet functional Piano game, and the Conway's Game of Life.
\subsection{FPGA}
The FPGA board for this project is Digilent Nexys A7-100T, a circuit design and implementation platform for classroom use. For more information, please refer to \url{https://digilent.com/reference/programmable-logic/nexys-a7/start}.
\subsection{RISC-V}
RISC-V is an open-source instruction set architecture (ISA) based on reduced instruction set computer (RISC) principles. It is a standard ISA designed to be simple, extensible, and easy to implement. In this project, I implemented a 32-bit RISC-V CPU, which can execute all 37 base instructions. For more information about RISC-V itself, please refer to \url{https://riscv.org/}.
\section{Hardware}
\subsection{5-stage pipeline CPU}
The 5-stage pipeline divides every instruction into 5 stages, and executes them in parallel, which can greatly improve the speed of the CPU. The 5 stages are: instruction fetch (IF), instruction decode (ID), execute (EX), memory access (MEM), and write back (WB). The pipeline is shown in Figure \ref{fig:image2}.
\begin{figure}[htbp]
\Scentering
\includegraphics[width=.9\paperwidth]{image2.png}
\caption{}
\label{fig:image2}
\end{figure}
The cycle of clocks begins at the falling edge and ends at the next falling edge. The CPU clock is used in both the rising edge and the falling edge as follows. The regester file is written in the rising edge but read with logic assignment (thus output changes at any time). The instruction memory is read in the rising edge. The data memory (including other I/O devices) is read in the rising edge and written in the falling edge. The PC is updated in the falling edge. The pipeline registers are written and read in the falling edge.
\subsection*{Instruction fetch (IF)}
This stage fetches the instruction from the instruction memory. The PC is the program counter, which stores the address of the next instruction. Ususally, the PC is updated by adding 4 to itself, and this is calculated by a specialized add unit. At the end of the IF stage, the current PC and the instruction are stored in the pipeline register IF/ID.
\subsection*{Instruction decode (ID)}
This stage decodes the instruction and reads the register file. The register file has two read ports, which can read two registers at the same time. Other control signals are also generated in this stage. At the end of the ID stage, the PC, the regester information and the control signals are stored in the pipeline register ID/EX.
\subsection*{Execute (EX)}
This stage executes the calculation. According to the result of ALU and the control signals, a signal for branching is generated. If it is going to branch, the IF and ID stages will be flushed, and the PC will be changed. At the end of the EX stage, the PC, the result of ALU, the regester information and the control signals are stored in the pipeline register EX/MEM.
\subsection*{Memory access (MEM)}
This stage not only reads and writes the memory, but also checks if there is a hazard, which will be introduced later. At the end of the MEM stage, the PC, the result of ALU, the regester information and the control signals are stored in the pipeline register MEM/WB.
\subsection*{Write back (WB)}
This stage writes the result of ALU or the memory to the register file. Because the register file is written in the rising edge, the written value can be used in the ID stage of the same clock cycle, which is called forwarding.
\subsection*{Hazard solution}
There are three types of hazards: data hazard, control hazard and structural hazard. The structural hazard doesn't exist in this project because we don't use any hardware resource in two stages at the same time. The control hazard is solved by flushing the IF and ID stages, which we have already mentioned. The data hazard is solved by forwarding.
There are three types of data hazard: RAW (read after write), WAR (write after read) and WAW (write after write). Only the RAW appears in this design. Again, there are three types of RAW: WB-ID, WB-EX, and MEM-EX. The WB-ID is mentioned in the WB stage introduction. To solve the WB-EX hazard, we use a forwarding unit to forward the result of ALU or the memory to the EX stage. The forwarding unit checks whether one of the register data in the EX comes from the writing register. If so, the forwarding unit will change the data to the writing data. The MEM-EX hazard is also solved by the forwarding unit. The forwarding unit checks whether one of the register data in the EX comes from the writing register and the instruction is a load instruction that reads the memory and writes the register. If so, the forwarding unit will change the data to the writing data.
\subsection{Memory management}
The CPU uses byte addressing, and access all other devices through memory-mapped I/O. The memory address is 32 bits wide, and the first 12 bits are used to select the device. The address map is shown in Table \ref{tab:address-map}.
\begin{table}[h]
\Scentering
\begin{tabular}{|c|c|}
\hline
Address range & Device \\
\hline
0x00000000 - 0x000FFFFF & Instruction memory \\
\hline
0x00100000 - 0x001FFFFF & Data memory \\
\hline
0x00200000 - 0x002FFFFF & VGA screen \\
\hline
0x00300000, 0x00300004 & Keyboard \\
\hline
0x00400000, 0x00400004 & LED \\
\hline
0x00500000, 0x00500004 & timer \\
\hline
0x00600000 & deprecated \\
\hline
0x00700000 & deprecated \\
\hline
0x00800000, 0x00800004 & PWM audio output \\
\hline
\end{tabular}
\captionof{table}{Address Map}
\label{tab:address-map}
\end{table}
\subsection{Instruction and data memory}
The instruction and data memory are both implemented using block RAM (BRAM). The instruction memory is read-only, and the data memory is read-write. Both of them are 1MB in size. The instruction address must be aligned to 4 bytes, while the data address can be any byte address.
\subsection{VGA screen (Write-only)}
The resolution is 640x480, and the color depth is 12 bits. Because the system is totally based on a terminal and the games are using characters as the basic unit, the screen is divided into 80x30 characters of 8x16 pixels. This allows us only store 80x30 characters' ascii code.
To accelerate the screen access speed, the screen is implemented with an one-dimensional array instead of a two-dimensional array. Considering the line and column size, 11-7 bits of the address are used to represent the line number, and 6-0 bits are used to represent the column number($2^5=32,2^7=128$). The screen has a base address of 0x00200000, and is accessed with 0x00200000 + offset.
\subsection{Keyboard (Read-only)}
The input signals of the keyboard are PS2\_CLK and PS2\_DATA. With a keyboard signal processor, every byte of scan code can be generated from a stream of PS2\_DATA. The scan code is then stored in a circular buffer of 16 bytes. The buffer provides the CPU with a ``new\_key'' signal, which is high when the buffer is not empty. The CPU can read ``new\_key'' through the address 0x00300000. When the CPU reads the address 0x00300004, the buffer will pop a byte of scan code.
The driver of the keyboard is implemented in the software part. Basically, the driver is a finite state machine (Figure \ref{fig:image1}), which can be divided into 4 states: KEY\_DOWN, KEY\_UP, LONG\_KEY\_DOWN, and LONG\_KEY\_UP. The ``LONG\_'' prefix in states refers to the two-bytes scan code which mainly start with 0xE0. The break code of a key is 0xF0 succeeded by its make code, thus the KEY\_UP and LONG\_KEY\_UP states are used to detect the break code.
The driver is implemented with a C function, which has only one state change every time it is called. There are three global values, ``key\_ready'', ``two\_byte\_code'' and ``key\_up'', which are signals for applications and are set according to the state. When the state machine finds that it ends a key press, it sets ``key\_ready'' to 1, and returns (the lowest byte of) the scan code. Otherwise, it returns 0. The other two-byte scan codes are not used in this project, so we just ignore them.
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image1.png}
\captionof{figure}{Keyboard driver state machine}
\par 1: KEY\_DOWN, 2: LONG\_KEY\_DOWN, \\ 3: KEY\_UP, 4: LONG\_KEY\_UP
\label{fig:image1}
\end{figure}
When a keycode is gotten, the software part will get the ascii code for it through an array. If the key is a signal key, it will be proceeded correspondingly.
\subsection{PWM audio output (Write-only)}
The audio pitch is based on the frequency of the PWM signal. There is a count value and a max value. Thw PWM signal is high when the count value is less than half of the max value, and low otherwise. The count value is increased by 1 every clock cycle and set back to 0 when it exceeds the max value. Thus, the pitch can be adjusted by changing the max value.
The audio has two memory addresses, 0x00800000 and 0x00800004. The first address is used to set the max value, and the second address is used to turn on/off the audio.
\subsection{Timer (Read-only)}
There are two memory addresses, 0x00500000 and 0x00500004, corresponding to the millisecond counter and the second counter. Four counters are used in the timer component: clock counter, millisecond counter 1, millisecond counter 2, and second counter. The clock frequency is 100MHz. When the clock counter reaches $10^5$, both the millisecond counters are increased by 1, and the clock counter is set back to 0. When the millisecond counter 1 reaches 1000, the second counter is increased by 1, and the millisecond counter 1 is set back to 0. The address 0x00500000 is used to read the millisecond counter 2, and the address 0x00500004 is used to read the second counter.
\section{Software}
\subsection{Terminal}
The terminal is the interface between the user and the system. The function of the terminal calls the keyboard driver function only once every time it is called. If the keyboard driver returns 0, the terminal will return immediately. Otherwise, the terminal will process the scan code and execute the corresponding command. When the system is turned on, it enters a while loop, which calls the terminal function without stopping.
The terminal has different types of responses to different key presses:
\begin{itemize}
\item \textbf{key up}: When the global variable ``key\_up'' is set to 1, the terminal checks if the key is ``shift'', ``ctrl'', or ``alt''. If so, it sets the corresponding global flag variable to 0. Otherwise, it does nothing.
\item \textbf{key down}: When the global variable ``key\_up'' is set to 0:
\begin{itemize}
\item \textbf{signal keys}: If the key is ``shift'', ``ctrl'', or ``alt'', the terminal sets the corresponding global flag variable to 1.
\item \textbf{backspace}: If the key is ``backspace'', the terminal deletes the last character in the buffer and put a space in the screen.
\item \textbf{enter}: If the key is ``enter'', the terminal executes the command in the buffer. If the command is not recognized, the terminal does nothing.
\item \textbf{caps lock}: If the key is ``caps lock'', the terminal toggles the ``caps\_lock'' global flag variable.
\end{itemize}
\end{itemize}
\subsection{Help}
Typing ``help'' and enter gives you the usage of different commands (Figure \ref{fig:image8}).
\begin{figure}[!htb]
\centering
\includegraphics[width=12cm]{image8.png}
\captionof{figure}{Help}
\label{fig:image8}
\end{figure}
The simple commands as ``fib'', ``hello'' and ``sort'' is shown in Figure \ref{fig:image10}.
\begin{figure}[!htb]
\centering
\includegraphics[width=12cm]{image10.png}
\captionof{figure}{Simple commands}
\label{fig:image10}
\end{figure}
\subsection{Speed Test}
A benchmark called dhrystone (version 2.1) is used to test the speed of the CPU. The test command runs a program testing the speed of the CPU. The result is shown in Figure \ref{fig:image14}. More information about dhrystone benchmark can be found at \url{https://en.wikipedia.org/wiki/Dhrystone} and \url{https://kreier.github.io/benchmark/dhrystone/}.
\begin{figure}[!htb]
\centering
\includegraphics[width=12cm]{image14.png}
\captionof{figure}{Test result}
\label{fig:image14}
\end{figure}
\subsection{Control Hazard Test}
The test command runs a program testing whether the control hazard is solved. There are 5 types of testing (code in Appendix). The result is shown in Figure \ref{fig:image9}.
\begin{figure}[!htb]
\centering
\includegraphics[width=12cm]{image9.png}
\captionof{figure}{Test result}
\label{fig:image9}
\end{figure}
\subsection{Snake}
The snake game looks like the classic snake game on Nokia phones. The snake can move in four directions, and it will die if it hits the wall or itself. The snake will grow longer when it eats food.
There are 3 different levels in the game. The speed of the snake increases as the level increases. When the ``snake'' command is executed in the terminal, a pop-up window will appear (Figure \ref{fig:image3}), and the player can choose the level by pressing 1, 2, or 3. The game will start after the player chooses the level. The player can press ``q'' to quit the game and return to the terminal.
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image3.png}
\captionof{figure}{Snake game level selection}
\label{fig:image3}
\end{figure}
The game looks like Figure \ref{fig:image4}. The snake is composed of squares, and the food is a circle. The score is shown in the top left corner. The player can press ``w'', ``a'', ``s'', ``d'' to control the snake. The player can press ``q'' to quit the game and return to the terminal. When the game ends, a pop-up window will appear, reads ``YOU LOSE'' in ascii art and the score (Figure \ref{fig:image5}).
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image4.png}
\captionof{figure}{Snake game}
\label{fig:image4}
\end{figure}
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image5.png}
\captionof{figure}{Snake game over}
\label{fig:image5}
\end{figure}
When using the command ``gensnake'', a loading animation will appear (Figure \ref{fig:image11}).
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image11.png}
\captionof{figure}{Loading animation}
\label{fig:image11}
\end{figure}
\subsection{Piano}
The piano game is just a simple piano. The player can press the keys: \`, 0-9, -, = and backspace to play. The shift (ctrl) key can increase (decrease) the pitch by one octave. The player can press ``q'' to quit the game and return to the terminal.
The game looks like Figure \ref{fig:image6}. The numbers under the piano keys tell the player how to play the piano. The player can press ``q'' to quit the game and return to the terminal.
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image6.png}
\captionof{figure}{Piano game}
\label{fig:image6}
\end{figure}
\subsection{Conway}
The convey is the well-known experiment of Conway's Game of Life. The game implements the rules of Conway's Game of Life. The player can press ``q'' to quit the game and return to the terminal.
Figure \ref{fig:image12} shows the game when it starts, and Figure \ref{fig:image13} shows the stable state of the game.
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image12.png}
\captionof{figure}{Conway's Game of Life}
\label{fig:image12}
\end{figure}
\begin{figure}[!htb]
\centering
\includegraphics[width=8cm]{image13.png}
\captionof{figure}{Conway's Game of Life}
\label{fig:image13}
\end{figure}
\newpage
\newpage
\section{appendix}
\subsection{test}
\begin{lstlisting}[language=C]
static __attribute__((noinline)) void fact1() {
volatile unsigned fact __attribute__((unused)) = 1;
for (unsigned i = 1; i < FACT_N; i++)
fact *= i;
}
__attribute__((noinline)) void fact2() {
volatile unsigned fact0 __attribute__((unused)) = 1;
volatile unsigned fact1 __attribute__((unused)) = 1;
volatile unsigned fact2 __attribute__((unused)) = 1;
volatile unsigned fact3 __attribute__((unused)) = 1;
for (unsigned i = 1; i < FACT_N; i += 4) {
fact0 *= i;
fact1 *= i + 1;
fact2 *= i + 2;
fact3 *= i + 3;
}
}
__attribute__((noinline)) void fact3() {
volatile unsigned fact __attribute__((unused)) = 1;
for (unsigned i = 1; i < FACT_N; i += 4) {
fact *= i;
fact *= i + 1;
fact *= i + 2;
fact *= i + 3;
}
}
static __attribute__((noinline)) void count1() {
int T = TIMES;
while (T--) {
for (int i = 0; i < SIZE; i++) {
value[i] = rand();
}
volatile int cnt __attribute__((unused)) = 0;
for (int i = 0; i < SIZE; i++)
if (value[i] > 500)
cnt++;
}
}
static __attribute__((noinline)) void count2() {
int T = TIMES;
while (T--) {
for (int i = 0; i < SIZE; i++) {
value[i] = i;
}
volatile int cnt __attribute__((unused)) = 0;
for (int i = 0; i < SIZE; i++)
if (value[i] > 500)
cnt++;
}
}
\end{lstlisting}
\end{document}