-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathx11_x64.html
2012 lines (1675 loc) · 82.8 KB
/
x11_x64.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<title>Learn x86-64 assembly by writing a GUI from scratch</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link type="application/atom+xml" href="/blog/feed.xml" rel="self">
<link rel="shortcut icon" type="image/ico" href="/blog/favicon.ico">
<link rel="stylesheet" type="text/css" href="main.css">
<link rel="stylesheet" href="https://unpkg.com/@highlightjs/[email protected]/styles/default.min.css">
<script src="highlight.min.js"></script>
<!-- From https://github.com/odin-lang/odin-lang.org/blob/6f48c2cfb094a42dffd34143884fa958bd9c0ba2/themes/odin/layouts/partials/head.html#L71 -->
<script src="x86asm.min.js"></script>
<script src="odin_syntax.js"></script>
<script type="module" src="search_index.js"></script>
<script type="module" src="search.js"></script>
</head>
<body>
<div id="banner">
<div id="name">
<img id="me" src="me.jpeg">
<span>Philippe Gaultier</span>
</div>
<input id="search" placeholder="🔎 Search" autocomplete=off>
<ul>
<li> <a href="/blog/body_of_work.html">Body of work</a> </li>
<li> <a href="/blog/articles-by-tag.html">Tags</a> </li>
<li> <a href="https://github.com/gaultier/resume/raw/master/Philippe_Gaultier_resume_en.pdf">
Resume
</a> </li>
<li> <a href="/blog/feed.xml">
<svg viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M5.5 3.5C4.39543 3.5 3.5 4.39543 3.5 5.5V18.5C3.5 19.6046 4.39543 20.5 5.5 20.5H18.5C19.6046 20.5 20.5 19.6046 20.5 18.5V5.5C20.5 4.39543 19.6046 3.5 18.5 3.5H5.5ZM7 19C8.10457 19 9 18.1046 9 17C9 15.8954 8.10457 15 7 15C5.89543 15 5 15.8954 5 17C5 18.1046 5.89543 19 7 19ZM6.14863 10.5052C6.14863 10.0379 6.52746 9.65906 6.99478 9.65906C7.95949 9.65906 8.91476 9.84908 9.80603 10.2183C10.6973 10.5874 11.5071 11.1285 12.1893 11.8107C12.8715 12.4929 13.4126 13.3027 13.7817 14.194C14.1509 15.0852 14.3409 16.0405 14.3409 17.0052C14.3409 17.4725 13.9621 17.8514 13.4948 17.8514C13.0275 17.8514 12.6486 17.4725 12.6486 17.0052C12.6486 16.2627 12.5024 15.5275 12.2183 14.8416C11.9341 14.1556 11.5177 13.5324 10.9927 13.0073C10.4676 12.4823 9.84437 12.0659 9.15842 11.7817C8.47246 11.4976 7.73726 11.3514 6.99478 11.3514C6.52746 11.3514 6.14863 10.9725 6.14863 10.5052ZM7 5.15385C6.53268 5.15385 6.15385 5.53268 6.15385 6C6.15385 6.46732 6.53268 6.84615 7 6.84615C8.33342 6.84615 9.65379 7.10879 10.8857 7.61907C12.1176 8.12935 13.237 8.87728 14.1799 9.82015C15.1227 10.763 15.8707 11.8824 16.3809 13.1143C16.8912 14.3462 17.1538 15.6666 17.1538 17C17.1538 17.4673 17.5327 17.8462 18 17.8462C18.4673 17.8462 18.8462 17.4673 18.8462 17C18.8462 15.4443 18.5397 13.9039 17.9444 12.4667C17.3491 11.0294 16.4765 9.72352 15.3765 8.6235C14.2765 7.52349 12.9706 6.65091 11.5333 6.05558C10.0961 5.46026 8.55566 5.15385 7 5.15385Z" fill="#000000"/>
</svg>
</a> </li>
<li> <a href="https://www.linkedin.com/in/philippegaultier/">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" data-supported-dps="24x24" fill="currentColor" class="mercado-match" width="24" height="24" focusable="false">
<path d="M20.5 2h-17A1.5 1.5 0 002 3.5v17A1.5 1.5 0 003.5 22h17a1.5 1.5 0 001.5-1.5v-17A1.5 1.5 0 0020.5 2zM8 19H5v-9h3zM6.5 8.25A1.75 1.75 0 118.3 6.5a1.78 1.78 0 01-1.8 1.75zM19 19h-3v-4.74c0-1.42-.6-1.93-1.38-1.93A1.74 1.74 0 0013 14.19a.66.66 0 000 .14V19h-3v-9h2.9v1.3a3.11 3.11 0 012.7-1.4c1.55 0 3.36.86 3.36 3.66z"/>
</svg>
</a> </li>
<li> <a href="https://github.com/gaultier">
<svg height="32" aria-hidden="true" viewBox="0 0 24 24" version="1.1" width="32" data-view-component="true" class="octicon octicon-mark-github v-align-middle">
<path d="M12.5.75C6.146.75 1 5.896 1 12.25c0 5.089 3.292 9.387 7.863 10.91.575.101.79-.244.79-.546 0-.273-.014-1.178-.014-2.142-2.889.532-3.636-.704-3.866-1.35-.13-.331-.69-1.352-1.18-1.625-.402-.216-.977-.748-.014-.762.906-.014 1.553.834 1.769 1.179 1.035 1.74 2.688 1.25 3.349.948.1-.747.402-1.25.733-1.538-2.559-.287-5.232-1.279-5.232-5.678 0-1.25.445-2.285 1.178-3.09-.115-.288-.517-1.467.115-3.048 0 0 .963-.302 3.163 1.179.92-.259 1.897-.388 2.875-.388.977 0 1.955.13 2.875.388 2.2-1.495 3.162-1.179 3.162-1.179.633 1.581.23 2.76.115 3.048.733.805 1.179 1.825 1.179 3.09 0 4.413-2.688 5.39-5.247 5.678.417.36.776 1.05.776 2.128 0 1.538-.014 2.774-.014 3.162 0 .302.216.662.79.547C20.709 21.637 24 17.324 24 12.25 24 5.896 18.854.75 12.5.75Z"/>
</svg>
</a> </li>
<li> <a href="https://hachyderm.io/@pg">
<svg width="75" height="79" viewBox="0 0 75 79" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M73.8393 17.4898C72.6973 9.00165 65.2994 2.31235 56.5296 1.01614C55.05 0.797115 49.4441 0 36.4582 0H36.3612C23.3717 0 20.585 0.797115 19.1054 1.01614C10.5798 2.27644 2.79399 8.28712 0.904997 16.8758C-0.00358524 21.1056 -0.100549 25.7949 0.0682394 30.0965C0.308852 36.2651 0.355538 42.423 0.91577 48.5665C1.30307 52.6474 1.97872 56.6957 2.93763 60.6812C4.73325 68.042 12.0019 74.1676 19.1233 76.6666C26.7478 79.2728 34.9474 79.7055 42.8039 77.9162C43.6682 77.7151 44.5217 77.4817 45.3645 77.216C47.275 76.6092 49.5123 75.9305 51.1571 74.7385C51.1797 74.7217 51.1982 74.7001 51.2112 74.6753C51.2243 74.6504 51.2316 74.6229 51.2325 74.5948V68.6416C51.2321 68.6154 51.2259 68.5896 51.2142 68.5661C51.2025 68.5426 51.1858 68.522 51.1651 68.5058C51.1444 68.4896 51.1204 68.4783 51.0948 68.4726C51.0692 68.4669 51.0426 68.467 51.0171 68.4729C45.9835 69.675 40.8254 70.2777 35.6502 70.2682C26.7439 70.2682 24.3486 66.042 23.6626 64.2826C23.1113 62.762 22.7612 61.1759 22.6212 59.5646C22.6197 59.5375 22.6247 59.5105 22.6357 59.4857C22.6466 59.4609 22.6633 59.4391 22.6843 59.422C22.7053 59.4048 22.73 59.3929 22.7565 59.3871C22.783 59.3813 22.8104 59.3818 22.8367 59.3886C27.7864 60.5826 32.8604 61.1853 37.9522 61.1839C39.1768 61.1839 40.3978 61.1839 41.6224 61.1516C46.7435 61.008 52.1411 60.7459 57.1796 59.7621C57.3053 59.7369 57.431 59.7154 57.5387 59.6831C65.4861 58.157 73.0493 53.3672 73.8178 41.2381C73.8465 40.7606 73.9184 36.2364 73.9184 35.7409C73.9219 34.0569 74.4606 23.7949 73.8393 17.4898Z" fill="url(#paint0_linear_549_34)"/>
<path d="M61.2484 27.0263V48.114H52.8916V27.6475C52.8916 23.3388 51.096 21.1413 47.4437 21.1413C43.4287 21.1413 41.4177 23.7409 41.4177 28.8755V40.0782H33.1111V28.8755C33.1111 23.7409 31.0965 21.1413 27.0815 21.1413C23.4507 21.1413 21.6371 23.3388 21.6371 27.6475V48.114H13.2839V27.0263C13.2839 22.7176 14.384 19.2946 16.5843 16.7572C18.8539 14.2258 21.8311 12.926 25.5264 12.926C29.8036 12.926 33.0357 14.5705 35.1905 17.8559L37.2698 21.346L39.3527 17.8559C41.5074 14.5705 44.7395 12.926 49.0095 12.926C52.7013 12.926 55.6784 14.2258 57.9553 16.7572C60.1531 19.2922 61.2508 22.7152 61.2484 27.0263Z" fill="white"/>
<defs>
<linearGradient id="paint0_linear_549_34" x1="37.0692" y1="0" x2="37.0692" y2="79" gradientUnits="userSpaceOnUse">
<stop stop-color="#6364FF"/>
<stop offset="1" stop-color="#563ACC"/>
</linearGradient>
</defs>
</svg>
</a> </li>
<li> <a href="https://bsky.app/profile/pgaultier.bsky.social">
<svg fill="none" viewBox="0 0 64 57" width="32" style="width: 32px; height: 28.5px;"><path fill="#0085ff" d="M13.873 3.805C21.21 9.332 29.103 20.537 32 26.55v15.882c0-.338-.13.044-.41.867-1.512 4.456-7.418 21.847-20.923 7.944-7.111-7.32-3.819-14.64 9.125-16.85-7.405 1.264-15.73-.825-18.014-9.015C1.12 23.022 0 8.51 0 6.55 0-3.268 8.579-.182 13.873 3.805ZM50.127 3.805C42.79 9.332 34.897 20.537 32 26.55v15.882c0-.338.13.044.41.867 1.512 4.456 7.418 21.847 20.923 7.944 7.111-7.32 3.819-14.64-9.125-16.85 7.405 1.264 15.73-.825 18.014-9.015C62.88 23.022 64 8.51 64 6.55c0-9.818-8.578-6.732-13.873-2.745Z"/></svg>
</a> </li>
</ul>
</div>
<div id="search-matches" hidden>
</div>
<div id="pseudo-body">
<div class="article-prelude">
<p><a href="/blog"> ⏴ Back to all articles</a></p>
<p class="publication-date">Published on 2023-05-31</p>
</div>
<div class="article-title">
<h1>Learn x86-64 assembly by writing a GUI from scratch</h1>
<div class="tags"> <a href="/blog/articles-by-tag.html#gui" class="tag">GUI</a> <a href="/blog/articles-by-tag.html#x86-64" class="tag">x86_64</a> <a href="/blog/articles-by-tag.html#x11" class="tag">X11</a> <a href="/blog/articles-by-tag.html#optimization" class="tag">Optimization</a></div>
</div>
<strong>Table of contents</strong>
<ul>
<li>
<a href="#3018859686-what-do-we-need">What do we need?</a>
</li>
<li>
<a href="#2049729589-x11-basics">X11 basics</a>
</li>
<li>
<a href="#1992549332-main-in-x64-assembly">Main in x64 assembly</a>
</li>
<li>
<a href="#2732446636-a-stack-primer">A stack primer</a>
<ul>
<li>
<a href="#657479577-a-small-stack-example">A small stack example</a>
</li>
</ul>
</li>
<li>
<a href="#4163415294-opening-a-socket">Opening a socket</a>
</li>
<li>
<a href="#2750592591-connecting-to-the-server">Connecting to the server</a>
</li>
<li>
<a href="#484246098-sending-data-over-the-socket">Sending data over the socket</a>
</li>
<li>
<a href="#1309822244-generating-ids">Generating ids</a>
</li>
<li>
<a href="#4134081642-opening-a-font">Opening a font</a>
</li>
<li>
<a href="#3515439192-creating-a-graphical-context">Creating a graphical context</a>
</li>
<li>
<a href="#2863200396-creating-the-window">Creating the window</a>
</li>
<li>
<a href="#577694983-mapping-the-window">Mapping the window</a>
</li>
<li>
<a href="#677275119-polling-for-server-messages">Polling for server messages</a>
</li>
<li>
<a href="#3433791877-drawing-text">Drawing text</a>
</li>
<li>
<a href="#1770781618-the-end">The end</a>
</li>
<li>
<a href="#1512890027-addendum-the-full-code">Addendum: the full code</a>
</li>
</ul>
<p><em>Discussions: <a href="https://news.ycombinator.com/item?id=36153237">Hacker News</a>, <a href="https://old.reddit.com/r/programming/comments/13xgbk6/learn_x8664_assembly_by_writing_a_gui_from_scratch/">r/programming</a>, <a href="https://lobste.rs/s/dvtzfl/learn_x86_64_assembly_by_writing_gui_from">Lobsters</a>.</em></p>
<p>Most people think assembly is only to be used to write toy programs for learning purposes, or to write a highly optimized version of a specific function inside a codebase written in a high-level language.</p>
<p>Well, what if we wrote a whole program in assembly that opens a GUI window? It will be the hello world of the GUI world, but that still counts. Here is what we are working towards:</p>
<p><img src="x11_x64_final.png" alt="Result" /></p>
<p>I wanted to expand my knowledge of assembly and by doing something fun and motivating. It all originated from the observation that so many program binaries today are very big, often over 30 Mib (!), and I asked myself: How small a binary can be for a (very simplistic) GUI? Well, it turns out, very little. Spoiler alert: around 1 KiB!</p>
<blockquote>
<p>I am by no means an expert in assembly or in X11. I just hope to provide an entertaining, approachable article, something a beginner can understand. Something I wished I had found when I was learning those topics. If you spot an error, please open a <a href="https://github.com/gaultier/blog">Github issue</a>!</p>
</blockquote>
<p><em>Note: Authentication is optional in the X11 protocol, but some X11 servers e.g. XWayland require it. Authentication is skipped here and is handled in a separate <a href="/blog/write_a_video_game_from_scratch_like_1987.html#authentication">article</a>.</em></p>
<h2 id="3018859686-what-do-we-need">
<a class="title" href="#3018859686-what-do-we-need">What do we need?</a>
<a class="hash-anchor" href="#3018859686-what-do-we-need" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>I will be using the <code>nasm</code> assembler which is simple, cross-platform, fast, and has quite a readable syntax.</p>
<p>For the GUI, I will be using X11 since I am based on Linux and it has some interesting properties that make it easy to do without external libraries. If you are running Wayland, it should work with XWayland out of the box (<em>EDIT: After testing it, I can confirm it does work</em>), and perhaps also on macOS with XQuartz, but I have not tested those (for macOS, remember to tell <code>nasm</code> to use the <code>macho64</code> format, since macOS does not use the ELF format! Also, the stock linker on macOS does not support <code>-static</code>.).</p>
<p>Note that the only difference between *nix operating systems in the context of this program is the system call values. Since I am based on Linux I will be using the Linux system call values, but 'porting' this program to, say, FreeBSD, would only require to change those values, possibly using the <code>nasm</code> macros:</p>
<pre><code class="language-x86asm">%ifdef linux
%define SYSCALL_EXIT 60
%elifdef freebsd
%define SYSCALL_EXIT 1
%endif
</code></pre>
<blockquote>
<p><code>%define</code> and its variants are part of the macro system in <code>nasm</code>, which is powerful but we will only use it here to define constants, just like in C: <code>#define FOO 3</code>.</p>
</blockquote>
<p>No need for additional tooling to cross-compile, issues with dynamic libraries, libc differences, etc. Just compile on Linux by defining the right variable on the command line, send the binary to your friend on FreeBSD, and it just works(tm). That's refreshing.</p>
<blockquote>
<p>Some readers have rightfully pointed out that Linux is the only mainstream operating system that officially provides a stable userland ABI, other OSes often break their ABI from (major) version to version and recommend all programs to link to a library (e.g. <code>libSystem</code> in the case of macOS). That layer guarantees API stability, and acts as a insulation layer from breaking changes in the ABI. In practice, for common system calls such as the ones we use here, they very rarely break, but doing more exotic things may break in the future. That actually happened to the Go project in the past on macOS! The solution if that happens is to simply recompile the program on the new version of the OS.</p>
</blockquote>
<p>So let's dive in!</p>
<h2 id="2049729589-x11-basics">
<a class="title" href="#2049729589-x11-basics">X11 basics</a>
<a class="hash-anchor" href="#2049729589-x11-basics" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>X11 is a server accessible over the network that handles windowing and rendering inside those windows. A client opens a socket, connects to the server, and sends commands in a specific format to open a window, draw shapes, text, etc. The server sends message about errors or events to the client.</p>
<p>Most applications will want to use <code>libX11</code> or <code>libxcb</code> which offer a C API, but we want to do that ourselves.</p>
<p>Where the server lives is actually not relevant for a client, it might run on the same machine or in a data center far far away. Of course, in the context of a desktop computer in 2023, it will be running on the same machine, but that's a detail.</p>
<p>The <a href="https://www.x.org/releases/X11R7.7/doc/xproto/x11protocol.html">official documentation</a> is pretty good, so when in doubt we can refer to it.</p>
<h2 id="1992549332-main-in-x64-assembly">
<a class="title" href="#1992549332-main-in-x64-assembly">Main in x64 assembly</a>
<a class="hash-anchor" href="#1992549332-main-in-x64-assembly" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>Let's start slow with minimal program that simply exits with 0, and build from there.</p>
<p>First, we tell nasm we are writing a 64 bit program and that we target x86_64. Then, we need a main function, which we call <code>_start</code> and needs to be visible since this is the entry point of our program (hence the <code>global</code> keyword):</p>
<pre><code class="language-x86asm">; Comments start with a semicolon!
BITS 64 ; 64 bits.
CPU X64 ; Target the x86_64 family of CPUs.
section .text
global _start
_start:
xor rax, rax ; Set rax to 0. Not actually needed, it's just to avoid having an empty body.
</code></pre>
<p><code>section .text</code> is telling <code>nasm</code> and the linker, that what follows is code that should be placed in the text section of the executable.</p>
<p>We will soon have a <code>section .data</code> for our global variables.</p>
<p>Note that those section usually get mapped by the OS to different pages in memory with different permissions (visible with <code>readelf -l</code>) so that the text section is not writable and the data section is not executable, but that varies from OS to OS.</p>
<p>The <code>_start</code> function has a body that does nothing for now, but not for long. The actual name of the main function is actually up to us, it's just that <code>start</code> or <code>_start</code> is usual.</p>
<p>We build and run our little program like this:</p>
<pre><code class="language-sh">$ nasm -f elf64 -g main.nasm && ld main.o -static -o main
</code></pre>
<p><code>nasm</code> actually only produces an object file, so to get an executable out of it, we need to invoke the linker <code>ld</code>. The flag <code>-g</code> is telling <code>nasm</code> to produce debugging information which is immensely useful when writing raw assembly, since firing the debugger is often our only recourse in face of a bug.</p>
<p><em>To remove the debugging information, we can pass <code>-s</code> to the linker, for example when we are about to ship our program and want to save a few KiB.</em></p>
<p>We finally have an executable:</p>
<pre><code class="language-sh">$ file ./main
main: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, with debug_info, not stripped
</code></pre>
<p>We can see the different sections with <code>readelf -a ./main</code>, and it tells us that the <code>.text</code> section, which contains our code, is only 3 bytes long.</p>
<p>Now, if we try to run our program, it will segfault. That's because we are expected by the operating system to exit (using the exit system call) ourselves (otherwise the CPU will keep executing whatever comes after our entry point until it hits an unmapped page, triggering a segfault). That's what libc does for us in C programs, so let's handle that:</p>
<pre><code class="language-x86asm">%define SYSCALL_EXIT 60
global _start:
_start:
mov rax, SYSCALL_EXIT
mov rdi, 0
syscall
</code></pre>
<blockquote>
<p><code>nasm</code> uses the Intel syntax: <code><instruction> <destination>, <source></code>, so <code>mov rdi, 0</code> puts 0 into the register <code>rdi</code>. Other assemblers use the AT&T syntax which swaps the source and destination. My advice: pick one syntax and one assembler and stick to it, both syntaxes are fine and most tools have some support for both.</p>
</blockquote>
<p>Following the System V ABI, which is required on Linux and other Unices for system calls, invoking a system call requires us to put the system call code in the register <code>rax</code>, the parameters to the syscall (up to 6) in the registers <code>rdi</code>, <code>rsi</code>, <code>rdx</code>, <code>rcx</code>, <code>r8</code>, <code>r9</code>, and additional parameters, if any, on the stack (which will not happen in this program so we can forget about it).
We then use the instruction <code>syscall</code> and check <code>rax</code> for the return value, <code>0</code> usually meaning: no error.</p>
<p><em>Note that Linux (and perhaps other Unices?) has a 'fun' difference, which is that the fourth parameter of a system call is actually passed using the register <code>r10</code>.</em></p>
<blockquote>
<p>Astute readers have pointed out that this is the case across all OSes and documented in the x86_64 architecture supplement of the System V ABI. The more you know! That's only for system calls, though, regular functions still use <code>rcx</code> for the fourth parameter.</p>
</blockquote>
<blockquote>
<p>Note that the System V ABI is required when making system calls and when interfacing with C but we are free to use whatever conventions we want in our own assembly code. For a long time, Go was using a different calling convention than the System V ABI, for example, when calling functions (passing arguments on the stack). Most tools (debuggers, profilers) expect the System V ABI though, so I recommend sticking to it.</p>
</blockquote>
<p>Back to our program: when we run it, we see...nothing. That's because everything went well, true to the UNIX philosophy!</p>
<p>We can check the exit code:</p>
<pre><code class="language-sh">$ ./main; echo $?
0
</code></pre>
<p>Changing <code>mov rdi, 0</code> to <code>mov rdi, 8</code> will now result in:</p>
<pre><code class="language-sh">$ ./main; echo $?
8
</code></pre>
<p>Another way to observe system calls made by a program is with <code>strace</code>, which will also prove very useful when troubleshooting. On some BSD, its equivalent is <code>truss</code> or <code>dtruss</code>.</p>
<pre><code class="language-sh">$ strace ./main
execve("./main", ["./main"], 0x7ffc60e6bf10 /* 60 vars */) = 0
exit(8) = ?
+++ exited with 8 +++
</code></pre>
<p>Let's change it back to 0 and continue.</p>
<h2 id="2732446636-a-stack-primer">
<a class="title" href="#2732446636-a-stack-primer">A stack primer</a>
<a class="hash-anchor" href="#2732446636-a-stack-primer" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>Before we can continue, we need to know the basics of how the stack works in assembly since we have no friendly compiler to do that for us.</p>
<p><strong>The three most important things about the stack are:</strong></p>
<ul>
<li>It grows downwards: to reserve more space on the stack, we decrease the value of <code>rsp</code></li>
<li>A function must restore the stack pointer to its original value before the function returns, meaning, either remember the original value and set <code>rsp</code> to this, or, match every decrement by an increment of the same value.</li>
<li>Before a function call, the stack pointer needs to be 16 bytes aligned, according to the System V ABI. Also, at the very beginning of a function, the stack pointer value is: <code>16*N + 8</code>. That's because before the function call, its value was 16 byte aligned, i.e. <code>16*N</code>, and the <code>call</code> instruction pushes on the stack the current location (the register <code>rip</code>, which is 8 bytes long), to know where to jump when the called function returns.</li>
</ul>
<p>Not abiding by those rules will result in nasty crashes, so be warned. That's because the location of where to jump when the function returns will be likely overwritten and the program will jump to the wrong location. That, or the stack content will be overwritten and the program will operate on wrong values. Bad either way.</p>
<h3 id="657479577-a-small-stack-example">
<a class="title" href="#657479577-a-small-stack-example">A small stack example</a>
<a class="hash-anchor" href="#657479577-a-small-stack-example" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h3>
<p>Let's write a function that prints <code>hello</code> to the standard out, using the stack, to learn the ropes. An easier way would be to store this static string in the <code>.rodata</code> section, but that would not teach us anything about the stack.</p>
<p>We need to reserve (at least) 5 bytes on the stack, since that's the length in bytes of <code>hello</code>.</p>
<p>The stack looks like this:</p>
<table>
<tr> <td align="left">...</td> </tr>
<tr> <td align="left" >rbp</td> </tr>
<tr> <td align="left" >o</td> </tr>
<tr> <td align="left" >l</td> </tr>
<tr> <td align="left" >l</td> </tr>
<tr> <td align="left" >e</td> </tr>
<tr> <td align="left" >h</td> </tr>
</table>
<p>And <code>rsp</code> points to the bottom of it.</p>
<p>Here's how we access each element:</p>
<table>
<thead>
<tr> <th>Memory location (example)</th> <th>Assembly code</th> <th align="left">Stack element</th> </tr>
</thead>
<tbody>
<tr> <td>0x1016</td> <td></td> <td align="left">...</td> </tr>
<tr> <td>0x1015</td> <td>rsp + 5</td> <td align="left" >rbp</td> </tr>
<tr> <td>0x1014</td> <td>rsp + 4</td> <td align="left" >o</td> </tr>
<tr> <td>0x1013</td> <td>rsp + 3</td> <td align="left" >l</td> </tr>
<tr> <td>0x1012</td> <td>rsp + 2</td> <td align="left" >l</td> </tr>
<tr> <td>0x1011</td> <td>rsp + 1</td> <td align="left" >e</td> </tr>
<tr> <td>0x1010</td> <td>rsp + 0</td> <td align="left" >h</td> </tr>
</tbody>
</table>
<p>We then pass the address on the stack of the beginning of the string to the <code>write</code> syscall, as well as its length:</p>
<pre><code class="language-asm">%define SYSCALL_WRITE 1
%define STDOUT 1
print_hello:
push rbp ; Save rbp on the stack to be able to restore it at the end of the function.
mov rbp, rsp ; Set rbp to rsp
sub rsp, 5 ; Reserve 5 bytes of space on the stack.
mov BYTE [rsp + 0], 'h' ; Set each byte on the stack to a string character.
mov BYTE [rsp + 1], 'e'
mov BYTE [rsp + 2], 'l'
mov BYTE [rsp + 3], 'l'
mov BYTE [rsp + 4], 'o'
; Make the write syscall
mov rax, SYSCALL_WRITE
mov rdi, STDOUT ; Write to stdout.
lea rsi, [rsp] ; Address on the stack of the string.
mov rdx, 5 ; Pass the length of the string which is 5.
syscall
add rsp, 5 ; Restore the stack to its original value.
pop rbp ; Restore rbp
ret
</code></pre>
<blockquote>
<p><code>lea destination, source</code> loads the effective address of the source into the destination, which is how C pointers are implemented. To dereference a memory location we use square brackets. So, assuming we just have loaded an address into <code>rdi</code> with <code>lea</code>, e.g. <code>lea rdi, [hello_world]</code>, and we want to store the value at the address into <code>rax</code>, we do: <code>mov rax, [rdi]</code>. We usually have to tell <code>nasm</code> how many bytes to dereference with <code>BYTE</code>, <code>WORD</code>, <code>DWORD</code>, <code>QWORD</code> so: <code>mov rax, DWORD [rdi]</code>, because <code>nasm</code> does not keep track of the sizes of each variable. That's also what the C compiler does when we dereference a <code>int8_t</code>, <code>int16_t</code>, <code>int32_t</code>, and <code>int64_t</code> pointer, respectively.</p>
</blockquote>
<p>There is a lot to unpack here.</p>
<p>First, what is <code>rbp</code>? That's a register like any other. But, you can choose to follow the convention of not using this register like the other registers, to store arbitrary values, and instead, use it to store a linked list of call frames. That's a lot of words.</p>
<p>Basically, at the very beginning of a function, the value of <code>rbp</code> is stored on the stack (that's <code>push rbp</code>). Since <code>rbp</code> stores an address (the address of the frame that's called us), we are storing on the stack the address of the caller in a known location.</p>
<p>Immediately after that, we set <code>rbp</code> to <code>rsp</code>, that is, to the stack pointer at the beginning of the function. <code>push rbp</code> and <code>mov rbp, rsp</code> are thus usually referred to as the function prolog.</p>
<p>For the rest of the function body, we treat <code>rbp</code> as a constant and only decrease <code>rsp</code> if we need to reserve space on the stack.</p>
<p>So if function A calls function B which in turn calls function C, and each function stores on the stack the address of the caller frame, we know where to find on the stack the address of each. Thus, we can print a stack trace in any location of our program simply by inspecting the stack. Pretty nifty. That's already very useful to profilers and other similar tools.</p>
<p>We must not forget of course, just before we exit the function, to restore <code>rbp</code> to its original value (which is still on the stack at that point): that's <code>pop rbp</code>. This is also known as the function epilog. Another way to look at it is that we remove the last element of the linked list of call frames, since we are exiting the leaf function.</p>
<p>Don't worry if you have not fully understood everything, just remember to always have the function epilogs and prologs and you'll be fine:</p>
<pre><code class="language-x86asm">my_function:
push rbp
mov rbp, rsp
sub rsp, N
[...]
add rsp, N
pop rbp
ret
</code></pre>
<p><strong>Note</strong>: There is an optimization method that uses <code>rbp</code> as a standard register (with a C compiler, that's the flag <code>-fomit-frame-pointer</code>), which means we lose the information about the call stack. My advice is: never do this, it is no worth it.</p>
<blockquote>
<p>Wait, but didn't you say the stack needs to be 16 byte aligned (that is, a multiple of 16)? Last time I checked, 5 is not really a multiple of 16!</p>
</blockquote>
<p>Good catch! The only reason why this program works, is that <code>print_hello</code> is a leaf function, meaning it does not call another function. Remember, the stack needs to be 16 bytes aligned when we do a <code>call</code>!</p>
<p>So the correct way would be:</p>
<pre><code class="language-asm">print_hello:
push rbp
mov rbp, rsp
sub rsp, 16
mov BYTE [rsp + 0], 'h'
mov BYTE [rsp + 1], 'e'
mov BYTE [rsp + 2], 'l'
mov BYTE [rsp + 3], 'l'
mov BYTE [rsp + 4], 'o'
mov rax, SYSCALL_WRITE
mov rdi, STDOUT
lea rsi, [rsp]
mov rdx, 5
syscall
call print_world
add rsp, 16
pop rbp
ret
</code></pre>
<p>Since when we enter the function, the value of <code>rsp</code> is <code>16*N+8</code>, and pushing <code>rbp</code> increases it by 8, the stack pointer is 16 bytes aligned at the point of <code>sub rsp, 16</code>. Decrementing it by 16 (or a multiple of 16) keeps it 16 bytes aligned.</p>
<p>We now can safely call another function from within <code>print_hello</code>:</p>
<pre><code class="language-x86asm">print_world:
push rbp
mov rbp, rsp
sub rsp, 16
mov BYTE [rsp + 0], ' '
mov BYTE [rsp + 1], 'w'
mov BYTE [rsp + 2], 'o'
mov BYTE [rsp + 3], 'r'
mov BYTE [rsp + 4], 'l'
mov BYTE [rsp + 5], 'd'
mov rax, SYSCALL_WRITE
mov rdi, STDOUT
lea rsi, [rsp]
mov rdx, 6
syscall
add rsp, 16
pop rbp
ret
print_hello:
push rbp
mov rbp, rsp
sub rsp, 16
mov BYTE [rsp + 0], 'h'
mov BYTE [rsp + 1], 'e'
mov BYTE [rsp + 2], 'l'
mov BYTE [rsp + 3], 'l'
mov BYTE [rsp + 4], 'o'
mov rax, SYSCALL_WRITE
mov rdi, STDOUT
lea rsi, [rsp]
mov rdx, 5
syscall
call print_world
add rsp, 16
pop rbp
ret
</code></pre>
<p>And we get <code>hello world</code> as an output.</p>
<p>Now, try to do <code>sub rsp, 5</code> in <code>print_hello</code>, and your program <em>may</em> crash. There is no guarantee, that's what makes it hard to track down.</p>
<p>My advice is:</p>
<ul>
<li>Always use the standard function prologs and epilogs</li>
<li>Always increment/decrement <code>rsp</code> by (a multiple of) 16</li>
<li>Address items on the stack relative to <code>rsp</code>, i.e. <code>mov BYTE [rsp + 4], 'o'</code></li>
<li>If you have to decrement <code>rsp</code> by a value that's unknown at compile time (similar to how <code>alloca()</code> works in C), you can <code>and rsp, -16</code> to 16 bytes align it.</li>
</ul>
<p>And you'll be safe.</p>
<p>The last point is interesting, see for yourself:</p>
<pre><code class="language-shell">(gdb) p -100 & -16
$1 = -112
(gdb) p -112 & -16
$2 = -112
</code></pre>
<p>Which translates in assembly to:</p>
<pre><code class="language-asm">sub rsp, 100
and rsp, -16
</code></pre>
<p>Finally, following those conventions means that our assembly functions can be safely called from C or other languages following the <a href="https://wiki.osdev.org/System_V_ABI">System V ABI</a>, without any modification, which is great.</p>
<p><em>I have not talked about the red zone which is a 128 byte region at the bottom of the stack which our program is free to use as it pleases without having to change the stack pointer. In my opinion, it is not helpful and creates hard to track bugs, so I do not recommend to use it. To disable it entirely, run: <code>nasm -f elf64 -g main.nasm && cc main.o -static -o main -mno-red-zone -nostdlib</code></em>.</p>
<h2 id="4163415294-opening-a-socket">
<a class="title" href="#4163415294-opening-a-socket">Opening a socket</a>
<a class="hash-anchor" href="#4163415294-opening-a-socket" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>We now are ready to open a socket with the <code>socket(2)</code> syscall, so we add a few constants, taken from the libc headers (<em>note that those values might actually be different on a different Unix, I have not checked. Again, a few <code>%ifdef</code> can easily remedy this discrepancy</em>):</p>
<pre><code class="language-x86asm">%define AF_UNIX 1
%define SOCK_STREAM 1
%define SYSCALL_SOCKET 41
</code></pre>
<p>The <code>AF_UNIX</code> constant means we want a Unix domain socket, and <code>SOCK_STREAM</code> means <a href="https://en.wikipedia.org/wiki/Unix_domain_socket">stream-oriented</a>. We use a domain socket since we now that our server is running on the same machine and it should be faster, but we could change it to <code>AF_INET</code> to connect to a remote IPv4 address for example.</p>
<p>We then fill the relevant registers with those values and invoke the system call:</p>
<pre><code class="language-x86asm"> mov rax, SYSCALL_SOCKET
mov rdi, AF_UNIX ; Unix socket.
mov rsi, SOCK_STREAM ; Stream oriented.
mov rdx, 0 ; Automatic protocol.
syscall
</code></pre>
<p>The C equivalent would be: <code>socket(AF_UNIX, SOCK_STREAM, 0);</code>. So you see that if we fill the registers in the same order as the C function parameters, we stay close to what C code would do.</p>
<p>The whole program now looks like this:</p>
<pre><code class="language-x86asm">BITS 64 ; 64 bits.
CPU X64 ; Target the x86_64 family of CPUs.
section .text
%define AF_UNIX 1
%define SOCK_STREAM 1
%define SYSCALL_SOCKET 41
%define SYSCALL_EXIT 60
global _start:
_start:
; open a unix socket.
mov rax, SYSCALL_SOCKET
mov rdi, AF_UNIX ; Unix socket.
mov rsi, SOCK_STREAM ; Stream oriented.
mov rdx, 0 ; automatic protocol.
syscall
; The end.
mov rax, SYSCALL_EXIT
mov rdi, 0
syscall
</code></pre>
<p>Building and running it under <code>strace</code> shows that it works and we get a socket with the file descriptor <code>3</code> (in this case, it might be different for you if you are following at home):</p>
<pre><code class="language-sh">$ nasm -f elf64 -g main.nasm && ld main.o -static -o main
$ strace ./main
execve("./main", ["./main"], 0x7ffe54dfe550 /* 60 vars */) = 0
socket(AF_UNIX, SOCK_STREAM, 0) = 3
exit(0) = ?
+++ exited with 0 +++
</code></pre>
<h2 id="2750592591-connecting-to-the-server">
<a class="title" href="#2750592591-connecting-to-the-server">Connecting to the server</a>
<a class="hash-anchor" href="#2750592591-connecting-to-the-server" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>Now that we have created a socket, we can connect to the server with the <code>connect(2)</code> system call.</p>
<p>It's a good time to extract that logic in its own little function, just like in any other high-level language.</p>
<pre><code class="language-x86asm">x11_connect_to_server:
; TODO
</code></pre>
<p>In assembly, a function is simply a label we can jump to. But for clarity, both for readers of the code and tools, we can add a hint that this is a real function we can call, like this: <code>call x11_connect_to_server</code>. This will improve the call stack for example when using <code>strace -k</code>. This hint has the form (in <code>nasm</code>): <code>static <name of the function>:function</code>.</p>
<p>Of course, we also need to add our standard function prolog and epilog:</p>
<pre><code class="language-x86asm">x11_connect_to_server:
static x11_connect_to_server:function
push rbp
mov rbp, rsp
pop rbp
ret
</code></pre>
<p>An additional help when reading functions in assembly code is adding comments describing what parameters they accept and what is the return value, if any. Since there is no language level feature for this, we resort to comments:</p>
<pre><code class="language-x86asm">; Create a UNIX domain socket and connect to the X11 server.
; @returns The socket file descriptor.
x11_connect_to_server:
static x11_connect_to_server:function
push rbp
mov rbp, rsp
pop rbp
ret
</code></pre>
<p>First, let's move the socket creation logic to our function and call it in the program:</p>
<pre><code class="language-x86asm">; Create a UNIX domain socket and connect to the X11 server.
; @returns The socket file descriptor.
x11_connect_to_server:
static x11_connect_to_server:function
push rbp
mov rbp, rsp
; Open a Unix socket: socket(2).
mov rax, SYSCALL_SOCKET
mov rdi, AF_UNIX ; Unix socket.
mov rsi, SOCK_STREAM ; Stream oriented.
mov rdx, 0 ; Automatic protocol.
syscall
cmp rax, 0
jle die
mov rdi, rax ; Store socket fd in `rdi` for the remainder of the function.
pop rbp
ret
die:
mov rax, SYSCALL_EXIT
mov rdi, 1
syscall
_start:
global _start:function
call x11_connect_to_server
; The end.
mov rax, SYSCALL_EXIT
mov rdi, 0
syscall
</code></pre>
<p>The error checking is very simplistic: we only check that the return value of the system call (in <code>rax</code>) is what we expect, otherwise we exit the program with a non-zero code by jumping to the <code>die</code> section.</p>
<blockquote>
<p><code>jle</code> is a conditional jump, which inspects global flags, hopefully set just before with <code>cmp</code> or <code>test</code>, and jumps to a label if the condition is true. Here, we compare the returned value with 0, and if it is lower or equal to 0, we jump to the error label. That's how we implement conditionals and loops.</p>
</blockquote>
<hr />
<p>Ok, we can finally connect to the server now. The <code>connect(2)</code> system call takes the address of a <code>sockaddr_un</code> structure as the second argument. This structure is too big to fit in a register.</p>
<p>This is the first syscall we encounter that needs to be passed a pointer, in other words, the address of a region in memory. That region can be on the stack or on the heap, or even be our own executable mapped in memory. That's assembly, we get to do whatever we want.</p>
<p>Since we want to keep things simple and fast, we will store everything in this program on the stack. And since we have 8 MiB of it (according to <code>limit</code>, on my machine, that is), it'll be plenty enough. Actually, the most space we will need on the stack in this program will be 32 KiB.</p>
<p>The size of the <code>sockaddr_un</code> structure is 110 bytes, so we reserve 112 to align <code>rsp</code> to 16 bytes.</p>
<blockquote>
<p>Nasm does have structs, but they are rather a way to define offsets with a name, than structures like in C with a specific syntax to address a specific field. For the sake of simplicity, I'll use the manual way, without <code>nasm</code> structs.</p>
</blockquote>
<p>We set the first 2 bytes of this structure to <code>AF_UNIX</code> since this is a domain socket. Then comes the path of the Unix domain socket which X11 expects to be in a certain format. We want to display our window on the first monitor starting at 0, so the string is: <code>/tmp/.X11-unix/X0</code>.</p>
<p>In C, we would do:</p>
<pre><code class="language-c"> const sockaddr_un addr = {.sun_family = AF_UNIX,
.sun_path = "/tmp/.X11-unix/X0"};
const int res =
connect(x11_socket_fd, (const struct sockaddr *)&addr, sizeof(addr));
</code></pre>
<p>How do we translate that to assembly, especially the string part?</p>
<p>We could set each byte to each character of the string in the structure, on the stack, manually, one by one. Another <a href="https://en.wikibooks.org/wiki/X86_Assembly/Data_Transfer#Move_String">way</a> to do it is to use the <code>rep movsb</code> idiom, which instructs the CPU to copy a character from a string A to another string B, N times. This is exactly what we need!</p>
<p>The way it works is:</p>
<ul>
<li>We put the string in the <code>.rodata</code> section (same as the data section but read-only)</li>
<li>We load its address in <code>rsi</code> (it's the source)</li>
<li>We load the address of the string in the structure on the stack in <code>rdi</code> (it's the destination)</li>
<li>We set <code>rcx</code> to the number of bytes to be copied</li>
<li>We use <code>cld</code> to clear the <code>DF</code> flag to ensure the copy is done forwards (since it can also be done backwards)</li>
<li>We call <code>rep movsb</code> and voila</li>
</ul>
<p>It's basically <code>memcpy</code> from C.</p>
<blockquote>
<p>This is a interesting case: we can see that some instructions expect some of their operands to be in certain registers and there is no way around it. So, we have to plan ahead and expect those registers to be overwritten. If we need to keep their original values around, we have to store those values elsewhere, for example on the stack (that's called spilling) or in other registers. This is a broader topic of register allocation which is NP-hard! In small functions, it's manageable though.</p>
</blockquote>
<p>First, the <code>.rodata</code> section:</p>
<pre><code class="language-x86asm">section .rodata
sun_path: db "/tmp/.X11-unix/X0", 0
static sun_path:data
</code></pre>
<p>Then we copy the string:</p>
<pre><code class="language-x86asm"> mov WORD [rsp], AF_UNIX ; Set sockaddr_un.sun_family to AF_UNIX
; Fill sockaddr_un.sun_path with: "/tmp/.X11-unix/X0".
lea rsi, sun_path
mov r12, rdi ; Save the socket file descriptor in `rdi` in `r12`.
lea rdi, [rsp + 2]
cld ; Move forward
mov ecx, 19 ; Length is 19 with the null terminator.
rep movsb ; Copy.
</code></pre>
<blockquote>
<p><code>ecx</code> is the 32 bit form of the register <code>rcx</code>, meaning we only set here the lower 32 bits of the 64 bit register. <a href="https://wiki.osdev.org/CPU_Registers_x86-64">This handy table</a> lists all of the forms for all of the registers. But be cautious of the pitfall case of only setting a value in part of a register, and then using the whole register later. The rest of the bits that have not been set will contain some past value, which is hard to troubleshoot. The solution is to use <code>movzx</code> to zero extend, meaning setting the rest of the bits to 0. A good way to visualize this is to use <code>info registers</code> within gdb, and that will display for each register the value for each of its forms, e.g. for <code>rcx</code>, it will display the value for <code>rcx</code>, <code>ecx</code>, <code>cx</code>, <code>ch</code>, <code>cl</code>.</p>
</blockquote>
<p>Then, we do the syscall, check the returned value, exit the program if the value is not 0, and finally return the socket file descriptor, which will be used every time in the rest of the program when talking to the X11 server.</p>
<p>Everything together, it looks like:</p>
<pre><code class="language-x86asm">; Create a UNIX domain socket and connect to the X11 server.
; @returns The socket file descriptor.
x11_connect_to_server:
static x11_connect_to_server:function
push rbp
mov rbp, rsp
; Open a Unix socket: socket(2).
mov rax, SYSCALL_SOCKET
mov rdi, AF_UNIX ; Unix socket.
mov rsi, SOCK_STREAM ; Stream oriented.
mov rdx, 0 ; Automatic protocol.
syscall
cmp rax, 0
jle die
mov rdi, rax ; Store socket fd in `rdi` for the remainder of the function.
sub rsp, 112 ; Store struct sockaddr_un on the stack.
mov WORD [rsp], AF_UNIX ; Set sockaddr_un.sun_family to AF_UNIX
; Fill sockaddr_un.sun_path with: "/tmp/.X11-unix/X0".
lea rsi, sun_path
mov r12, rdi ; Save the socket file descriptor in `rdi` in `r12`.
lea rdi, [rsp + 2]
cld ; Move forward
mov ecx, 19 ; Length is 19 with the null terminator.
rep movsb ; Copy.
; Connect to the server: connect(2).
mov rax, SYSCALL_CONNECT
mov rdi, r12
lea rsi, [rsp]
%define SIZEOF_SOCKADDR_UN 2+108
mov rdx, SIZEOF_SOCKADDR_UN
syscall
cmp rax, 0
jne die
mov rax, rdi ; Return the socket fd.
add rsp, 112
pop rbp
ret
</code></pre>
<p>We are ready to talk to the X11 server!</p>
<h2 id="484246098-sending-data-over-the-socket">
<a class="title" href="#484246098-sending-data-over-the-socket">Sending data over the socket</a>
<a class="hash-anchor" href="#484246098-sending-data-over-the-socket" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>There is the <code>send(2)</code> syscall to do this, but we can keep it simple and use the generic <code>write(2)</code> syscall instead. Either way works.</p>
<pre><code class="language-x86asm">%define SYSCALL_WRITE 1
</code></pre>
<p>The C structure for the handshake in the case of success looks like this:</p>
<pre><code class="language-c">typedef struct {
u8 order;
u8 pad1;
u16 major, minor;
u16 auth_proto_len, auth_data_len;
u16 pad2;
// Optionally, authorization information follow, if `auth_proto_len` and `auth_data_len` are not 0.
} x11_connection_req_t;
</code></pre>
<p><code>pad*</code> fields can be ignored since they are padding and their value is not read by the server.</p>
<p>For our handshake, we need to set the <code>order</code> to be <code>l</code>, that is, little-endian, since X11 can be told to interpret message as big or little endian. Since x64 is little-endian, we do not want to have a endianness translation layer and so we stick to little-endian.</p>
<p>We also need to set the <code>major</code> field, which is the version, to <code>11</code>. I'll leave it to the reader to guess why.</p>
<p>In C, we would do:</p>
<pre><code class="language-c"> x11_connection_req_t req = {.order = 'l', .major = 11};
</code></pre>
<p>This structure is only 12 bytes long, since we do not use authorization (we leave all subsequent fields after the <code>minor_version</code> as 0).</p>
<p>But since we will have to read the response from the server which is quite big (around 14 KiB during my testing), we will right away reserve a lot of space on the stack, 32 KiB, to be safe:</p>
<pre><code class="language-x86asm"> sub rsp, 1<<15
mov BYTE [rsp + 0], 'l' ; Set order to 'l'.
mov WORD [rsp + 2], 11 ; Set major version to 11.
</code></pre>
<p>Then we send it to the server:</p>
<pre><code class="language-x86asm"> ; Send the handshake to the server: write(2).
mov rax, SYSCALL_WRITE
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 12
syscall
cmp rax, 12 ; Check that all bytes were written.
jnz die
</code></pre>
<p>After that, we read the server response, which should be at first 8 bytes:</p>
<pre><code class="language-x86asm"> ; Read the server response: read(2).
; Use the stack for the read buffer.
; The X11 server first replies with 8 bytes. Once these are read, it replies with a much bigger message.
mov rax, SYSCALL_READ
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 8
syscall
cmp rax, 8 ; Check that the server replied with 8 bytes.
jnz die
cmp BYTE [rsp], 1 ; Check that the server sent 'success' (first byte is 1).
jnz die
</code></pre>
<p>The first byte in the server response is <code>0</code> for failure and <code>1</code> for success (and <code>2</code> for authentication but we will not need it here).</p>
<p>The server sends a big message with a lot of general information, which we will need for later, so we store certain fields in global variables located in the data section.</p>
<p>First we add those variables, each 4 bytes big:</p>
<pre><code class="language-x86asm">section .data
id: dd 0
static id:data
id_base: dd 0
static id_base:data
id_mask: dd 0
static id_mask:data
root_visual_id: dd 0
static root_visual_id:data
</code></pre>
<p>Then we read the server response, and skip over the parts we are not interested in. This boils down to incrementing a pointer by a dynamic value, a few times. Note that since we do not do any checks here, that would be a great attack vector to trigger a stack overflow or such in our program.</p>
<pre><code class="language-x86asm"> ; Read the rest of the server response: read(2).
; Use the stack for the read buffer.
mov rax, SYSCALL_READ
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 1<<15
syscall
cmp rax, 0 ; Check that the server replied with something.
jle die
; Set id_base globally.
mov edx, DWORD [rsp + 4]
mov DWORD [id_base], edx
; Set id_mask globally.
mov edx, DWORD [rsp + 8]
mov DWORD [id_mask], edx
; Read the information we need, skip over the rest.
lea rdi, [rsp] ; Pointer that will skip over some data.
mov cx, WORD [rsp + 16] ; Vendor length (v).
movzx rcx, cx
mov al, BYTE [rsp + 21]; Number of formats (n).
movzx rax, al ; Fill the rest of the register with zeroes to avoid garbage values.
imul rax, 8 ; sizeof(format) == 8
add rdi, 32 ; Skip the connection setup
add rdi, rcx ; Skip over the vendor information (v).
; Skip over padding.
add rdi, 3
and rdi, -4
add rdi, rax ; Skip over the format information (n*8).
mov eax, DWORD [rdi] ; Store (and return) the window root id.
; Set the root_visual_id globally.
mov edx, DWORD [rdi + 32]
mov DWORD [root_visual_id], edx
</code></pre>
<hr />
<p>A small aside about padding, <a href="https://github.com/gaultier/blog/issues/6">thanks to a perspicacious reader</a>:</p>
<p>How we skip padding is the only bit of smartness we allow ourselves: some fields in the X11 protocol have a variable length. But the X11 protocol counts everything in units of '4 bytes'.</p>
<p>Meaning, if a field is only 5 bytes long, per the protocol, there will be 3 bytes of padding (which should be skipped over by the application), so that the field occupies 2 units of 4 bytes (it is 4 bytes-aligned).</p>
<p>How do we do that then? The specification uses some division and modulo operations, but those are annoying to do in assembly. We can do better.</p>
<p><code>libX11</code> uses this macro:</p>
<pre><code class="language-c">#define ROUNDUP(nbytes, pad) (((nbytes) + ((pad)-1)) & ~(long)((pad)-1))
</code></pre>
<p>And it should be used so:</p>
<pre><code class="language-c">assert(ROUNDUP(0, 4) == 0);
assert(ROUNDUP(1, 4) == 4);
assert(ROUNDUP(2, 4) == 4);
assert(ROUNDUP(3, 4) == 4);
assert(ROUNDUP(4, 4) == 4);
assert(ROUNDUP(5, 4) == 8);
// etc
</code></pre>
<p>This works, but is kind of complex. If we look at this output when compiling this code, we see that <code>gcc</code> smartly optimizes this macro down to:</p>
<pre><code class="language-x86asm"> add eax, 3
and eax, -4
</code></pre>
<p>So we use this form.</p>
<hr />
<p>All together:</p>
<pre><code class="language-x86asm">; Send the handshake to the X11 server and read the returned system information.
; @param rdi The socket file descriptor
; @returns The window root id (uint32_t) in rax.
x11_send_handshake:
static x11_send_handshake:function
push rbp
mov rbp, rsp
sub rsp, 1<<15
mov BYTE [rsp + 0], 'l' ; Set order to 'l'.
mov WORD [rsp + 2], 11 ; Set major version to 11.
; Send the handshake to the server: write(2).
mov rax, SYSCALL_WRITE
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 12
syscall
cmp rax, 12 ; Check that all bytes were written.
jnz die
; Read the server response: read(2).
; Use the stack for the read buffer.
; The X11 server first replies with 8 bytes. Once these are read, it replies with a much bigger message.
mov rax, SYSCALL_READ
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 8
syscall
cmp rax, 8 ; Check that the server replied with 8 bytes.
jnz die
cmp BYTE [rsp], 1 ; Check that the server sent 'success' (first byte is 1).
jnz die
; Read the rest of the server response: read(2).
; Use the stack for the read buffer.
mov rax, SYSCALL_READ
mov rdi, rdi
lea rsi, [rsp]
mov rdx, 1<<15
syscall
cmp rax, 0 ; Check that the server replied with something.
jle die
; Set id_base globally.
mov edx, DWORD [rsp + 4]
mov DWORD [id_base], edx
; Set id_mask globally.
mov edx, DWORD [rsp + 8]
mov DWORD [id_mask], edx
; Read the information we need, skip over the rest.
lea rdi, [rsp] ; Pointer that will skip over some data.
mov cx, WORD [rsp + 16] ; Vendor length (v).
movzx rcx, cx
mov al, BYTE [rsp + 21]; Number of formats (n).
movzx rax, al ; Fill the rest of the register with zeroes to avoid garbage values.
imul rax, 8 ; sizeof(format) == 8
add rdi, 32 ; Skip the connection setup
add rdi, rcx ; Skip over the vendor information (v).
; Skip over padding.
add rdi, 3
and rdi, -4
add rdi, rax ; Skip over the format information (n*8).
mov eax, DWORD [rdi] ; Store (and return) the window root id.
; Set the root_visual_id globally.
mov edx, DWORD [rdi + 32]
mov DWORD [root_visual_id], edx
add rsp, 1<<15
pop rbp
ret
</code></pre>
<blockquote>
<p>From this point on, I will assume you are familiar with the basics of assembly and X11 and will not go as much into details.</p>
</blockquote>
<h2 id="1309822244-generating-ids">
<a class="title" href="#1309822244-generating-ids">Generating ids</a>
<a class="hash-anchor" href="#1309822244-generating-ids" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>When creating resources on the server-side, we usually first generate an id on the client side, and send that id to the server when creating the resource.</p>
<p>We store the current id in a global variable and increment it each time a new id is generated.</p>
<p>This is how we do it:</p>
<pre><code class="language-x86asm">; Increment the global id.
; @return The new id.
x11_next_id:
static x11_next_id:function
push rbp
mov rbp, rsp
mov eax, DWORD [id] ; Load global id.
mov edi, DWORD [id_base] ; Load global id_base.
mov edx, DWORD [id_mask] ; Load global id_mask.
; Return: id_mask & (id) | id_base
and eax, edx
or eax, edi
add DWORD [id], 1 ; Increment id.
pop rbp
ret
</code></pre>
<h2 id="4134081642-opening-a-font">
<a class="title" href="#4134081642-opening-a-font">Opening a font</a>
<a class="hash-anchor" href="#4134081642-opening-a-font" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>To open a font, which is a prerequisite to draw text, we send a message to the server specifying (part of) the name of the font we want, and the server will select a matching font.</p>
<p>To play with another font, you can use <code>xfontsel</code> which displays all the font names that the X11 server knows about.</p>
<p>First, we generate an id for the font locally, and then we send it alongside the font name.</p>
<pre><code class="language-x86asm">; Open the font on the server side.
; @param rdi The socket file descriptor.
; @param esi The font id.
x11_open_font:
static x11_open_font:function
push rbp
mov rbp, rsp
%define OPEN_FONT_NAME_BYTE_COUNT 5
%define OPEN_FONT_PADDING ((4 - (OPEN_FONT_NAME_BYTE_COUNT % 4)) % 4)
%define OPEN_FONT_PACKET_U32_COUNT (3 + (OPEN_FONT_NAME_BYTE_COUNT + OPEN_FONT_PADDING) / 4)
%define X11_OP_REQ_OPEN_FONT 0x2d
sub rsp, 6*8
mov DWORD [rsp + 0*4], X11_OP_REQ_OPEN_FONT | (OPEN_FONT_NAME_BYTE_COUNT << 16)
mov DWORD [rsp + 1*4], esi
mov DWORD [rsp + 2*4], OPEN_FONT_NAME_BYTE_COUNT
mov BYTE [rsp + 3*4 + 0], 'f'
mov BYTE [rsp + 3*4 + 1], 'i'
mov BYTE [rsp + 3*4 + 2], 'x'
mov BYTE [rsp + 3*4 + 3], 'e'
mov BYTE [rsp + 3*4 + 4], 'd'
mov rax, SYSCALL_WRITE
mov rdi, rdi
lea rsi, [rsp]
mov rdx, OPEN_FONT_PACKET_U32_COUNT*4
syscall
cmp rax, OPEN_FONT_PACKET_U32_COUNT*4
jnz die
add rsp, 6*8
pop rbp
ret
</code></pre>
<h2 id="3515439192-creating-a-graphical-context">
<a class="title" href="#3515439192-creating-a-graphical-context">Creating a graphical context</a>
<a class="hash-anchor" href="#3515439192-creating-a-graphical-context" aria-hidden="true" onclick="navigator.clipboard.writeText(this.href);"></a>
</h2>
<p>Since an application in X11 can have multiple windows, we first need to create a graphical context containing the general information. When we create a window, we refer to this graphical context by id.</p>
<p>Again, we need to generate an id for the graphical context to be.</p>
<p>X11 stores a hierarchy of windows, so when creating the graphical context, we also need to give it the root window id (i.e. the parent id).</p>
<pre><code class="language-x86asm">; Create a X11 graphical context.
; @param rdi The socket file descriptor.
; @param esi The graphical context id.
; @param edx The window root id.
; @param ecx The font id.
x11_create_gc:
static x11_create_gc:function
push rbp
mov rbp, rsp
sub rsp, 8*8
%define X11_OP_REQ_CREATE_GC 0x37
%define X11_FLAG_GC_BG 0x00000004