forked from synopse/SynPDF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SynCrypto.pas
5893 lines (5537 loc) · 195 KB
/
SynCrypto.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/// fast cryptographic routines (hashing and cypher)
// - implements AES, XOR, ADLER32, MD5, RC4, SHA1, SHA256 algorithms
// - optimized for speed (tuned assembler and AES-NI / PADLOCK support)
// - this unit is a part of the freeware Synopse mORMot framework,
// licensed under a MPL/GPL/LGPL tri-license; version 1.18
unit SynCrypto;
(*
This file is part of Synopse framework.
Synopse framework. Copyright (C) 2015 Arnaud Bouchez
Synopse Informatique - http://synopse.info
*** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the License.
The Original Code is Synopse mORMot framework.
The Initial Developer of the Original Code is Arnaud Bouchez.
Portions created by the Initial Developer are Copyright (C) 2015
the Initial Developer. All Rights Reserved.
Contributor(s):
- Wolfgang Ehrhardt under zlib license for AES "pure pascal" versions
- Intel's sha256_sse4.asm under under a three-clause Open Software license
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
Synopse Cryptographic routines
==============================
- fastest ever 100% Delphi (and asm ;) code
- AES Crypto(128,192,256 bits key) with optimized asm version
and multi-threaded code for multi-core CPU for blocks > 512 KB
- XOR Crypto (32 bits key) - very fast with variable or fixed key
- RC4 Crypto - weak, but simple and standard (used e.g. by SynPdf)
- ADLER32 - 32 bits fast Hash with optimized asm version
- MD5 - standard fast 128 bits Hash
- SHA1 - 160 bits Secure Hash
- SHA256 - 256 bits Secure Hash with optimized asm version
- VIA PADLOCK optional support - native .o code on linux or .dll (Win32)
(tested on a Dedibox C7 (rev1) linux server - need validation for Win32)
- Microsoft AES Cryptographic Provider optional support via CryptoAPI
Source code licenced under the MPL:
see http://www.mozilla.org/MPL/MPL-1.1.html
Benchmark on my AMD-64 TL-56 dualcore-CPU:
==========================================
Testing with blocks of 16KB each
crc32 624 MB/s
adler32 pas 571 MB/s asm 1304 MB/s
MD5 176 MB/s
SHA1 101 MB/s
SHA256 63 MB/s
AES128 cypher 84 MB/s uncypher 81 MB/s asm version
AES128 cypher 57 MB/s uncypher 57 MB/s pascal version
AES192 cypher 72 MB/s uncypher 70 MB/s asm version
AES192 cypher 48 MB/s uncypher 48 MB/s pascal version
AES256 cypher 62 MB/s uncypher 61 MB/s asm version
AES256 cypher 42 MB/s uncypher 42 MB/s pascal version
XorBlock 3463 MB/s (very fast, since with 16KB data remain in L2 cache)
XorOffset 3425 MB/s
XorConst 5940 MB/s (even faster, since no table used -> all in L1 cache)
Testing with blocks of 1024KB each (for AES: block >512KB -> uses dualcore)
crc32 577 MB/s
adler32 pas 529 MB/s asm 1003 MB/s
MD5 176 MB/s
SHA1 100 MB/s
SHA256 63 MB/s
AES128 cypher 129 MB/s uncypher 130 MB/s asm version
AES128 cypher 96 MB/s uncypher 95 MB/s pascal version
AES192 cypher 107 MB/s uncypher 114 MB/s asm version
AES192 cypher 83 MB/s uncypher 85 MB/s pascal version
AES256 cypher 98 MB/s uncypher 105 MB/s asm version
AES256 cypher 76 MB/s uncypher 76 MB/s pascal version
XorBlock 1423 MB/s (we reach the memory control bandwidth)
XorOffset 1325 MB/s
XorConst 1506 MB/s
Testing with blocks of 4096KB each (for AES: block >512KB -> uses dualcore)
crc32 578 MB/s
adler32 pas 525 MB/s asm 984 MB/s
MD5 175 MB/s
SHA1 100 MB/s
SHA256 63 MB/s
AES128 cypher 159 MB/s uncypher 147 MB/s asm version
AES128 cypher 107 MB/s uncypher 109 MB/s pascal version
AES192 cypher 134 MB/s uncypher 128 MB/s asm version
AES192 cypher 90 MB/s uncypher 92 MB/s pascal version
AES256 cypher 118 MB/s uncypher 113 MB/s asm version
AES256 cypher 80 MB/s uncypher 81 MB/s pascal version
XorBlock 1385 MB/s
XorOffset 1292 MB/s
XorConst 1479 MB/s
Benchmark on a C7 Dedibox (USEPADLOCK version):
===============================================
Testing with blocks of 16KB each
crc32 402 MB/s
adler32 pas 274 MB/s asm 542 MB/s libz.so 414 MB/s
MD5 126 MB/s
SHA1 480 MB/s
SHA256 458 MB/s
AES128 cypher 1566 MB/s uncypher 1560 MB/s
AES192 cypher 1421 MB/s uncypher 1422 MB/s
AES256 cypher 1237 MB/s uncypher 1247 MB/s
XorBlock 2336 MB/s
XorOffset 1807 MB/s
XorConst 3154 MB/s
Testing with blocks of 1024KB each
crc32 352 MB/s
adler32 pas 256 MB/s asm 395 MB/s libz.so 361 MB/s
MD5 123 MB/s
SHA1 324 MB/s
SHA256 324 MB/s
AES128 cypher 552 MB/s uncypher 552 MB/s
AES192 cypher 552 MB/s uncypher 552 MB/s
AES256 cypher 552 MB/s uncypher 552 MB/s
XorBlock 354 MB/s
XorOffset 373 MB/s
XorConst 511 MB/s
Testing with blocks of 4096KB each
crc32 352 MB/s
adler32 pas 255 MB/s asm 395 MB/s libz.so 361 MB/s
MD5 124 MB/s
SHA1 324 MB/s
SHA256 326 MB/s
AES128 cypher 552 MB/s uncypher 552 MB/s
AES192 cypher 552 MB/s uncypher 552 MB/s
AES256 cypher 552 MB/s uncypher 552 MB/s
XorBlock 352 MB/s
XorOffset 368 MB/s
XorConst 510 MB/s
Conclusion:
- USETHREADSFORBIGAESBLOCKS will help on modern multi-threaded CPU
- AES speed: W.Ehrhardt's pascal is 55MB/s, A.Bouchez's asm is 84MB/s
- AES-256 is faster than a simple XOR() on a dedibox with a C7 cpu ;)
- see below for benchmarks using AES-NI or SHA-256-SSE4, which induce
a huge performance boost
Initial version (C) 2008-2009 Arnaud Bouchez http://bouchez.info
Revision History:
Version 1.0
- initial release on Internet, with MyCrypto unit name
Version 1.1
- updated release, with new optimized AES i386 assembler implementation
and no FastCode dependency (CpuCount is taken from Windows API)
Version 1.4 - February 8, 2010
- whole Synopse SQLite3 database framework released under the GNU Lesser
General Public License version 3, instead of generic "Public Domain"
Version 1.8
- mostly code review for Delphi 2009/2010 integration (unit uses now
SynCommons string types definitions)
Version 1.9
- now use direct Windows threads, since we don't need any exception handling
nor memory usage inside the AES encryption Thread handler
-> avoid classes.TThread and system.BeginThread() use
-> application is still "officialy" mono-threaded (i.e. IsMultiThread=false),
for faster System.pas and FastMM4 (prevent CPU locking - see
http://synopse.info/forum/viewtopic.php?id=57 about Delphi & multi-core)
- some other minor fixes and enhancements
Version 1.10
- code modifications to compile with Delphi 6 compiler
Version 1.13
- code modifications to compile with Delphi 5 compiler
Version 1.15
- unit now tested with Delphi XE2 (32 Bit)
Version 1.16
- added TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to handle AES
encryption of memory buffers in ECB, CBC, CFB, OFB and CTR mode (including
PKCS7 padding)
- added pure pascal version (for XE2 64 compilation) of all algorithms
Version 1.18
- added AES-NI hardware support on newer CPUs, for huge performance boost
and enhanced security
- AES encryption will compute its own tables, to get rid of 4KB of const
- tested compilation for Win64 platform
- run with FPC under Win32 and Linux (including AES-NI support), and Kylix
- added Intel's SSE4 x64 optimized asm for SHA-256 on Win64
- added overloaded procedure TMD5.Final() and function SHA256()
- introduce ESynCrypto exception class dedicated to this unit
- added AES encryption using official Microsoft AES Cryptographic Provider
(CryptoAPI) via TAESECB_API, TAESCBC_API, TAESCFB_API and TAESOFB_API -
our optimized asm version is faster, so is still our default/preferred
- added CompressShaAes() and global CompressShaAesKey, CompressShaAesIV and
CompressShaAesClass variables to be used by THttpSocket.RegisterCompress
- introduce new TRC4 object for RC4 encryption algorithm
- removed several compilation hints when assertions are set to off
*)
interface
{$I Synopse.inc} // define HASINLINE USETYPEINFO CPU32 CPU64 OWNNORMTOUPPER
{.$define USEPADLOCK}
{.$define PUREPASCAL} // for debug
{$ifdef Linux}
{$undef USETHREADSFORBIGAESBLOCKS} // uses low-level WinAPI threading
{.$define USEPADLOCK} // dedibox linux tested only, but may be OK on Windows
{$else}
{$ifdef CONDITIONALEXPRESSIONS}
// on Windows: enable Microsoft AES Cryptographic Provider (XP SP3 and up)
{$define USE_PROV_RSA_AES}
{$endif}
// on Windows: will use Threads for very big blocks (>512KB) if multi-CPU
{$define USETHREADSFORBIGAESBLOCKS}
{$endif}
{$ifdef USEPADLOCK}
{$ifdef MSWINDOWS}
{$define USEPADLOCKDLL} // Win32: we can use LibPadlock.dll
{$else}
{.$define PADLOCKDEBUG} // display message before using padlock
{.$define USEPADLOCKDLL} // Linux: use fast .o linked code
{$endif}
{$endif}
uses
{$ifdef MSWINDOWS}
Windows,
{$endif}
SysUtils,
{$ifndef LVCL}
{$ifdef CONDITIONALEXPRESSIONS}
RTLConsts,
{$endif}
{$endif}
Classes,
SynLZ, // already included in SynCommons, and used by CompressShaAes()
SynCommons;
const
/// hide all AES Context complex code
AESContextSize = 275 {$ifdef USEPADLOCK}+sizeof(pointer){$endif};
/// hide all SHA Context complex code
SHAContextSize = 108;
/// standard AES block size (in bytes) during cypher/uncypher
AESBlockSize = 16;
/// maximum AES key size (in bytes)
AESKeySize = 256 div 8;
type
/// class of Exceptions raised by this unit
ESynCrypto = class(Exception);
PAESBlock = ^TAESBlock;
/// 128 bits memory block for AES data cypher/uncypher
TAESBlock = packed array[0..AESBlockSize-1] of byte;
/// 256 bits memory block for maximum AES key storage
TAESKey = packed array[0..AESKeySize-1] of byte;
PAES = ^TAES;
/// handle AES cypher/uncypher
// - this is the default Electronic codebook (ECB) mode
// - this class will use AES-NI hardware instructions, if available
{$ifdef USEPADLOCK}
// - this class will use VIA PadLock instructions, if available
{$endif}
TAES = {$ifndef UNICODE}object{$else}record{$endif}
private
Context: packed array[1..AESContextSize] of byte;
{$ifdef USEPADLOCK}
function DoPadlockInit(const Key; KeySize: cardinal): boolean;
{$endif}
public
/// true if the context was initialized
Initialized: boolean;
/// Initialize AES contexts for cypher
// - first method to call before using this class
// - KeySize is in bits, i.e. 128,192,256
function EncryptInit(const Key; KeySize: cardinal): boolean;
/// encrypt an AES data block
procedure Encrypt(var B: TAESBlock); overload;
/// encrypt an AES data block into another data block
procedure Encrypt(const BI: TAESBlock; var BO: TAESBlock); overload;
/// Initialize AES contexts for uncypher
function DecryptInit(const Key; KeySize: cardinal): boolean;
/// decrypt an AES data block
procedure Decrypt(var B: TAESBlock); overload;
/// decrypt an AES data block into another data block
procedure Decrypt(const BI: TAESBlock; var BO: TAESBlock); overload;
/// Finalize AES contexts for both cypher and uncypher
// - only used with Padlock
procedure Done;
/// generic initialization method for AES contexts
// - call either EncryptInit() either DecryptInit() method
function DoInit(const Key; KeySize: cardinal; doEncrypt: boolean): boolean;
/// perform the AES cypher or uncypher to continuous memory blocks
// - call either Encrypt() either Decrypt() method
procedure DoBlocks(pIn, pOut: PAESBlock; out oIn, oOut: PAESBLock; Count: integer; doEncrypt: boolean); overload;
/// perform the AES cypher or uncypher to continuous memory blocks
// - call either Encrypt() either Decrypt() method
procedure DoBlocks(pIn, pOut: PAESBlock; Count: integer; doEncrypt: boolean); overload;
{$ifdef USETHREADSFORBIGAESBLOCKS}
/// perform the AES cypher or uncypher to continuous memory blocks
// - this special method will use Threads for bigs blocks (>512KB) if multi-CPU
// - call either Encrypt() either Decrypt() method
procedure DoBlocksThread(var bIn, bOut: PAESBlock; Count: integer; doEncrypt: boolean);
{$endif}
/// return TRUE if the AES-NI instruction sets are available on this CPU
function UsesAESNI: boolean;
end;
TAESAbstractClass = class of TAESAbstract;
/// handle AES cypher/uncypher with chaining
// - use any of the inherited implementation, corresponding to the chaining
// mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to
// handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7 padding)
TAESAbstract = class
protected
fKeySize: cardinal;
fKeySizeBytes: cardinal;
public
/// Initialize AES contexts for cypher
// - first method to call before using this class
// - KeySize is in bits, i.e. 128,192,256
// - IV is the Initialization Vector
constructor Create(const aKey; aKeySize: cardinal; const aIV: TAESBlock); virtual;
/// perform the AES cypher in the corresponding mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); virtual; abstract;
/// perform the AES un-cypher in the corresponding mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); virtual; abstract;
/// encrypt a memory buffer using a PKCS7 padding pattern
// - PKCS7 is described in RFC 5652 - it will add up to 16 bytes to
// the input buffer
function EncryptPKCS7(const Input: RawByteString): RawByteString;
/// decrypt a memory buffer using a PKCS7 padding pattern
// - PKCS7 is described in RFC 5652 - it will trim up to 16 bytes from
// the input buffer
function DecryptPKCS7(const Input: RawByteString): RawByteString;
/// associated Key Size, in bits (i.e. 128,192,256)
property KeySize: cardinal read fKeySize;
end;
/// handle AES cypher/uncypher with chaining
// - use any of the inherited implementation, corresponding to the chaining
// mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to
// handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7 padding)
// - this class will use AES-NI hardware instructions, if available
TAESAbstractSyn = class(TAESAbstract)
protected
fIn, fOut: PAESBlock;
AES: TAES;
fKey: TAESKey;
fCount: Cardinal;
fCV: TAESBlock;
procedure EncryptInit;
procedure DecryptInit;
procedure EncryptTrailer;
public
/// Initialize AES context for cypher
// - first method to call before using this class
// - KeySize is in bits, i.e. 128,192,256
// - IV is the Initialization Vector
constructor Create(const aKey; aKeySize: cardinal; const aIV: TAESBlock); override;
/// perform the AES cypher in the corresponding mode
// - this abstract method will set CV from AES.Context, and fIn/fOut
// from BufIn/BufOut
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the corresponding mode
// - this abstract method will set CV from AES.Context, and fIn/fOut
// from BufIn/BufOut
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher without chaining (ECB)
// - this mode is known to be less secure than the others
// - IV value set on constructor is used to code the trailing bytes
// of the buffer (by a simple XOR)
// - this class will use AES-NI hardware instructions, if available, e.g.
// ! ECB128: 19.70ms in x86 optimized code, 6.97ms with AES-NI
TAESECB = class(TAESAbstractSyn)
public
/// perform the AES cypher in the ECB mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the ECB mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher with Cipher-block chaining (CBC)
// - this class will use AES-NI hardware instructions, if available, e.g.
// ! CBC192: 24.91ms in x86 optimized code, 9.75ms with AES-NI
TAESCBC = class(TAESAbstractSyn)
public
/// perform the AES cypher in the CBC mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the CBC mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher with Cipher feedback (CFB)
// - this class will use AES-NI hardware instructions, if available, e.g.
// ! CFB128: 22.25ms in x86 optimized code, 9.29ms with AES-NI
TAESCFB = class(TAESAbstractSyn)
public
/// perform the AES cypher in the CFB mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the CFB mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher with Output feedback (OFB)
// - this class will use AES-NI hardware instructions, if available, e.g.
// ! OFB256: 27.69ms in x86 optimized code, 9.94ms with AES-NI
TAESOFB = class(TAESAbstractSyn)
public
/// perform the AES cypher in the OFB mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the OFB mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher with Counter mode (CTR)
// - this class will use AES-NI hardware instructions, e.g.
// ! CTR256: 28.13ms in x86 optimized code, 10.63ms with AES-NI
TAESCTR = class(TAESAbstractSyn)
public
/// perform the AES cypher in the CTR mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the CTR mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
{$ifdef USE_PROV_RSA_AES}
/// handle AES cypher/uncypher using Windows CryptoAPI and the
// official Microsoft AES Cryptographic Provider (PROV_RSA_AES)
// - see @http://msdn.microsoft.com/en-us/library/windows/desktop/aa386979
// - timing of our optimized asm versions, for small (<=8KB) block processing
// (similar to standard web pages or most typical JSON/XML content),
// benchmarked on a Core i7 notebook and compiled as Win32 platform:
// ! AES128 - ECB:79.33ms CBC:83.37ms CFB:80.75ms OFB:78.98ms CTR:80.45ms
// ! AES192 - ECB:91.16ms CBC:96.06ms CFB:96.45ms OFB:92.12ms CTR:93.38ms
// ! AES256 - ECB:103.22ms CBC:119.14ms CFB:111.59ms OFB:107.00ms CTR:110.13ms
// - timing of the same process, using CryptoAPI official PROV_RSA_AES provider:
// ! AES128 - ECB_API:102.88ms CBC_API:124.91ms
// ! AES192 - ECB_API:115.75ms CBC_API:129.95ms
// ! AES256 - ECB_API:139.50ms CBC_API:154.02ms
// - but the CryptoAPI does not supports AES-NI, whereas our classes do on Win32,
// with a huge speed benefit
// - under Win64, the official CryptoAPI is faster than our PUREPASCAL version,
// and the Win32 version of CryptoAPI itself:
// ! AES128 - ECB:107.95ms CBC:112.65ms CFB:109.62ms OFB:107.23ms CTR:109.42ms
// ! AES192 - ECB:130.30ms CBC:133.04ms CFB:128.78ms OFB:127.25ms CTR:130.22ms
// ! AES256 - ECB:145.33ms CBC:147.01ms CFB:148.36ms OFB:145.96ms CTR:149.67ms
// ! AES128 - ECB_API:89.64ms CBC_API:100.84ms
// ! AES192 - ECB_API:99.05ms CBC_API:105.85ms
// ! AES256 - ECB_API:107.11ms CBC_API:118.04ms
TAESAbstract_API = class(TAESAbstract)
protected
fKeyHeader: packed record
bType: byte;
bVersion: byte;
reserved: word;
aiKeyAlg: cardinal;
dwKeyLength: cardinal;
end;
fKey: TAESKey;
fIV: TAESBlock;
fKeyCryptoAPI: pointer;
fInternalMode: cardinal;
procedure InternalSetMode; virtual; abstract;
procedure EncryptDecrypt(BufIn, BufOut: pointer; Count: cardinal; DoEncrypt: boolean);
public
/// Initialize AES context for cypher
// - first method to call before using this class
// - KeySize is in bits, i.e. 128,192,256
// - IV is the Initialization Vector
constructor Create(const aKey; aKeySize: cardinal; const aIV: TAESBlock); override;
/// release the AES execution context
destructor Destroy; override;
/// perform the AES cypher in the ECB mode
procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override;
/// perform the AES un-cypher in the ECB mode
procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override;
end;
/// handle AES cypher/uncypher without chaining (ECB) using Windows CryptoAPI
TAESECB_API = class(TAESAbstract_API)
protected
/// will set fInternalMode := CRYPT_MODE_ECB
procedure InternalSetMode; override;
end;
/// handle AES cypher/uncypher Cipher-block chaining (CBC) using Windows CryptoAPI
TAESCBC_API = class(TAESAbstract_API)
protected
/// will set fInternalMode := CRYPT_MODE_CBC
procedure InternalSetMode; override;
end;
/// handle AES cypher/uncypher Cipher feedback (CFB) using Windows CryptoAPI
// - NOT TO BE USED: the current PROV_RSA_AES provider does not return
// expected values for CFB
TAESCFB_API = class(TAESAbstract_API)
protected
/// will set fInternalMode := CRYPT_MODE_CFB
procedure InternalSetMode; override;
end;
/// handle AES cypher/uncypher Output feedback (OFB) using Windows CryptoAPI
// - NOT TO BE USED: the current PROV_RSA_AES provider does not implement
// this mode, and returns a NTE_BAD_ALGID error
TAESOFB_API = class(TAESAbstract_API)
protected
/// will set fInternalMode := CRYPT_MODE_OFB
procedure InternalSetMode; override;
end;
{$endif USE_PROV_RSA_AES}
PSHA1Digest = ^TSHA1Digest;
/// 160 bits memory block for SHA1 hash digest storage
TSHA1Digest = packed array[0..19] of byte;
PSHA1 = ^TSHA1;
/// handle SHA1 hashing
TSHA1 = {$ifndef UNICODE}object{$else}record{$endif}
private
Context: packed array[1..SHAContextSize div 4] of cardinal;
procedure Compress; // used by Update and Final
public
/// initialize SHA1 context for hashing
procedure Init;
/// update the SHA1 context with some data
procedure Update(Buffer: pointer; Len: integer);
/// finalize and compute the resulting SHA1 hash Digest of all data
// affected to Update() method
procedure Final(out Digest: TSHA1Digest);
/// one method to rule them all
// - call Init, then Update(), then Final()
// - only Full() is Padlock-implemented - use this rather than Update()
procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA1Digest);
end;
PSHA256Digest = ^TSHA256Digest;
/// 256 bits memory block for SHA256 hash digest storage
TSHA256Digest = packed array[0..31] of byte;
PSHA256 = ^TSHA256;
/// handle SHA256 hashing
TSHA256 = {$ifndef UNICODE}object{$else}record{$endif}
private
Context: packed array[1..SHAContextSize] of byte;
procedure Compress; // used by Update and Final
public
/// initialize SHA256 context for hashing
procedure Init;
/// update the SHA256 context with some data
procedure Update(Buffer: pointer; Len: integer);
/// finalize and compute the resulting SHA256 hash Digest of all data
// affected to Update() method
procedure Final(out Digest: TSHA256Digest);
/// one method to rule them all
// - call Init, then Update(), then Final()
// - only Full() is Padlock-implemented - use this rather than Update()
procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA256Digest);
end;
TMD5In = array[0..15] of cardinal;
/// 128 bits memory block for MD5 hash digest storage
TMD5Digest = array[0..15] of Byte;
PMD5 = ^TMD5;
TMD5Buf = array[0..3] of cardinal;
/// handle MD5 hashing
TMD5 = {$ifndef UNICODE}object{$else}record{$endif}
private
buf: TMD5Buf;
bytes: array[0..1] of cardinal;
in_: TMD5In;
procedure Finalize;
public
/// initialize MD5 context for hashing
procedure Init;
/// update the MD5 context with some data
procedure Update(const buffer; Len: cardinal);
/// finalize and compute the resulting MD5 hash Digest of all data
// affected to Update() method
procedure Final(out result: TMD5Digest); overload;
/// finalize and compute the resulting MD5 hash Digest of all data
// affected to Update() method
function Final: TMD5Digest; overload;
/// one method to rule them all
// - call Init, then Update(), then Final()
procedure Full(Buffer: pointer; Len: integer; out Digest: TMD5Digest);
end;
/// internal key permutation buffer, as used by TRC4
TRC4InternalKey = array[byte] of byte;
/// handle RC4 encryption/decryption
TRC4 = {$ifndef UNICODE}object{$else}record{$endif}
private
key: TRC4InternalKey;
public
/// initialize the RC4 encryption/decryption
// - KeyLen is in bytes, and should be within 1..255 range
procedure Init(const aKey; aKeyLen: integer);
/// perform the RC4 cypher encryption/decryption on a buffer
// - each call to this method shall be preceded with an Init() call,
// or a RestoreKey() from a previous SaveKey(), since it will change
// the internal key[] during its process
// - RC4 is a symetrical algorithm: use this Encrypt() method for both
// encryption and decryption of any buffer
procedure Encrypt(const BufIn; var BufOut; Count: cardinal);
/// save the internal key computed by Init()
procedure SaveKey(out Backup: TRC4InternalKey);
/// restore the internal key as computed by Init()
procedure RestoreKey(const Backup: TRC4InternalKey);
end;
{$A-} { packed memory structure }
/// internal header for storing our AES data with salt and CRC
TAESFullHeader = {$ifndef UNICODE}object{$else}record{$endif}
public
/// Len before compression (if any)
OriginalLen,
/// Len before AES encoding
SourceLen,
/// Random Salt for better encryption
SomeSalt,
/// CRC from header
HeaderCheck: cardinal;
function Calc(const Key; KeySize: cardinal): cardinal;
end;
{$A+}
PAESFull = ^TAESFull;
/// AES and XOR encryption object for easy direct memory or stream access
// - calls internaly TAES objet methods, and handle memory and streams for best speed
// - a TAESFullHeader is encrypted at the begining, allowing fast Key validation,
// but the resulting stream is not compatible with raw TAES object
TAESFull = {$ifndef UNICODE}object{$else}record{$endif}
public
/// header, stored at the beginning of struct -> 16-byte aligned
Head: TAESFullHeader;
/// this memory stream is used in case of EncodeDecode(outStream=bOut=nil)
// method call
outStreamCreated: TMemoryStream;
/// main method of AES or XOR cypher/uncypher
// - return out size, -1 if error on decoding (Key not correct)
// - valid KeySize: 0=nothing, 32=xor, 128,192,256=AES
// - if outStream is TMemoryStream -> auto-reserve space (no Realloc:)
// - for normal usage, you just have to Assign one In and one Out
// - if outStream AND bOut are both nil, an outStream is created via
// THeapMemoryStream.Create
// - if Padlock is used, 16-byte alignment is forced (via tmp buffer if necessary)
// - if Encrypt -> OriginalLen can be used to store unCompressed Len
function EncodeDecode(const Key; KeySize, inLen: cardinal; Encrypt: boolean;
inStream, outStream: TStream; bIn, bOut: pointer; OriginalLen: cardinal=0): integer;
end;
/// AES encryption stream
// - encrypt the Data on the fly, in a compatible way with AES() - last bytes
// are coded with XOR (not compatible with TAESFull format)
// - not optimized for small blocks -> ok if used AFTER TBZCompressor/TZipCompressor
// - warning: Write() will crypt Buffer memory in place -> use AFTER T*Compressor
TAESWriteStream = class(TStream)
public
Adler, // CRC from uncrypted compressed data - for Key check
DestSize: cardinal;
private
Dest: TStream;
Buf: TAESBlock; // very small buffer for remainging 0..15 bytes
BufCount: integer; // number of pending bytes (0..15) in Buf
AES: TAES;
NoCrypt: boolean; // if KeySize=0
public
/// initialize the AES encryption stream for an output stream (e.g.
// a TMemoryStream or a TFileStream)
constructor Create(outStream: TStream; const Key; KeySize: cardinal);
/// finalize the AES encryption stream
// - internaly call the Finish method
destructor Destroy; override;
/// read some data is not allowed -> this method will raise an exception on call
function Read(var Buffer; Count: Longint): Longint; override;
/// append some data to the outStream, after encryption
function Write(const Buffer; Count: Longint): Longint; override;
/// read some data is not allowed -> this method will raise an exception on call
function Seek(Offset: Longint; Origin: Word): Longint; override;
/// write pending data
// - should always be called before closeing the outStream (some data may
// still be in the internal buffers)
procedure Finish;
end;
/// direct MD5 hash calculation of some data
function MD5Buf(const Buffer; Len: Cardinal): TMD5Digest;
/// direct MD5 hash calculation of some data (string-encoded)
// - result is returned in hexadecimal format
function MD5(const s: RawByteString): RawUTF8;
/// direct SHA1 hash calculation of some data (string-encoded)
// - result is returned in hexadecimal format
function SHA1(const s: RawByteString): RawUTF8;
/// direct SHA256 hash calculation of some data (string-encoded)
// - result is returned in hexadecimal format
function SHA256(const s: RawByteString): RawUTF8; overload;
/// direct SHA256 hash calculation of some binary data
// - result is returned in hexadecimal format
function SHA256(Data: pointer; Len: integer): RawUTF8; overload;
/// direct SHA256 hash calculation of some data (string-encoded)
// - result is returned in hexadecimal format
// - this procedure has a weak password protection: small incoming data
// is append to some salt, in order to have at least a 256 bytes long hash:
// such a feature improve security for small passwords, e.g.
procedure SHA256Weak(const s: RawByteString; out Digest: TSHA256Digest); overload;
/// direct Encrypt/Decrypt of data using the TAES class
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
procedure AES(const Key; KeySize: cardinal; buffer: pointer; Len: Integer; Encrypt: boolean); overload;
/// direct Encrypt/Decrypt of data using the TAES class
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
procedure AES(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: Integer; Encrypt: boolean); overload;
/// direct Encrypt/Decrypt of data using the TAES class
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
function AES(const Key; KeySize: cardinal; const s: RawByteString; Encrypt: boolean): RawByteString; overload;
/// direct Encrypt/Decrypt of data using the TAES class
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
function AES(const Key; KeySize: cardinal; buffer: pointer; Len: cardinal; Stream: TStream; Encrypt: boolean): boolean; overload;
/// AES and XOR encryption using the TAESFull format
// - outStream will be larger/smaller than Len (full AES encrypted)
// - returns true if OK
function AESFull(const Key; KeySize: cardinal; bIn: pointer; Len: Integer;
outStream: TStream; Encrypt: boolean; OriginalLen: Cardinal=0): boolean; overload;
/// AES and XOR encryption using the TAESFull format
// - bOut must be at least bIn+32/Encrypt bIn-16/Decrypt
// - returns outLength, -1 if error
function AESFull(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: Integer;
Encrypt: boolean; OriginalLen: Cardinal=0): integer; overload;
/// AES and XOR decryption check using the TAESFull format
// - return true if begining of buff contains true AESFull encrypted data with this Key
// - if not KeySize in [128,192,256] -> use fast and efficient Xor Cypher
function AESFullKeyOK(const Key; KeySize: cardinal; buff: pointer): boolean;
/// AES encryption using the TAES format with a supplied SHA256 password
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
procedure AESSHA256(Buffer: pointer; Len: integer; const Password: RawByteString; Encrypt: boolean); overload;
/// AES encryption using the TAES format with a supplied SHA256 password
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
procedure AESSHA256(bIn, bOut: pointer; Len: integer; const Password: RawByteString; Encrypt: boolean); overload;
/// AES encryption using the TAES format with a supplied SHA256 password
// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR
function AESSHA256(const s, Password: RawByteString; Encrypt: boolean): RawByteString; overload;
/// AES encryption using the TAESFull format with a supplied SHA256 password
// - outStream will be larger/smaller than Len: this is a full AES version with
// a triming TAESFullHeader at the beginning
procedure AESSHA256Full(bIn: pointer; Len: Integer; outStream: TStream; const Password: RawByteString; Encrypt: boolean); overload;
const
SHA1DIGESTSTRLEN = sizeof(TSHA1Digest)*2;
SHA256DIGESTSTRLEN = sizeof(TSHA256Digest)*2;
MD5DIGESTSTRLEN = sizeof(TMD5Digest)*2;
/// compute the hexadecimal representation of a SHA1 digest
function SHA1DigestToString(const D: TSHA1Digest): RawUTF8;
/// compute the hexadecimal representation of a SHA256 digest
function SHA256DigestToString(const D: TSHA256Digest): RawUTF8;
/// compare two supplied MD5 digests
function MD5DigestsEqual(const A, B: TMD5Digest): Boolean;
/// compute the hexadecimal representation of a MD5 digest
function MD5DigestToString(const D: TMD5Digest): RawUTF8;
/// compute the HTDigest for a user and a realm, according to a supplied password
// - apache-compatible: 'agent007:download area:8364d0044ef57b3defcfa141e8f77b65'
function htdigest(const user, realm, pass: RawByteString): RawUTF8;
/// self test of Adler32 routines
function Adler32SelfTest: boolean;
/// self test of MD5 routines
function MD5SelfTest: boolean;
/// self test of SHA1 routines
function SHA1SelfTest: boolean;
/// self test of SHA256 routines
function SHA256SelfTest: boolean;
/// self test of AES routines
function AESSelfTest(onlytables: Boolean): boolean;
/// self test of RC4 routines
function RC4SelfTest: boolean;
// little endian fast conversion
// - 160 bits = 5 integers
// - use fast bswap asm in x86/x64 mode
procedure bswap160(s,d: PIntegerArray);
// little endian fast conversion
// - 256 bits = 8 integers
// - use fast bswap asm in x86/x64 mode
procedure bswap256(s,d: PIntegerArray);
/// simple Adler32 implementation
// - a bit slower than Adler32Asm() version below, but shorter code size
function Adler32Pas(Adler: cardinal; p: pointer; Count: Integer): cardinal;
/// fast Adler32 implementation
// - 16-bytes-chunck unrolled asm version
function Adler32Asm(Adler: cardinal; p: pointer; Count: Integer): cardinal;
{$ifdef PUREPASCAL}{$ifdef HASINLINE}inline;{$endif}{$endif}
// - very fast XOR according to Cod - not Compression or Stream compatible
// - used in AESFull() for KeySize=32
procedure XorBlock(p: PIntegerArray; Count, Cod: integer);
/// fast and simple XOR Cypher using Index (=Position in Dest Stream)
// - Compression not compatible with this function: should be applied after
// compress (e.g. as outStream for TAESWriteStream)
// - Stream compatible (with updated Index)
// - used in AES() and TAESWriteStream
procedure XorOffset(p: pByte; Index,Count: integer);
/// fast XOR Cypher changing by Count value
// - Compression compatible, since the XOR value is always the same, the
// compression rate will not change a lot
procedure XorConst(p: PIntegerArray; Count: integer);
var
/// the encryption key used by CompressShaAes() global function
// - the key is global to the whole process
// - use CompressShaAesSetKey() procedure to set this Key and associated IV
CompressShaAesKey: TSHA256Digest;
/// the Initialization Vector used by CompressShaAes() global function
// - this vector is global to the whole process
// - use CompressShaAesSetKey() procedure to set this IV and associated Key
CompressShaAesIV: TAESBlock;
/// the AES-256 encoding class used by CompressShaAes() global function
// - use any of the implementation classes, corresponding to the chaining
// mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to
// handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7 padding)
// - set to the secure and efficient CTR mode by default
CompressShaAesClass: TAESAbstractClass = TAESCTR;
/// set an text-based encryption key/IV for CompressShaAes() global function
// - will compute the key/IV via SHA256Weak() and set global CompressShaAesKey var
// - the key and Initialization Vector are global to the whole process
procedure CompressShaAesSetKey(const Key: RawByteString; const IV: RawByteString='');
/// encrypt data content using the AES-256/CTR algorithm, after SynLZ compression
// - as expected by THttpSocket.RegisterCompress()
// - will return 'synshaaes' as ACCEPT-ENCODING: header parameter
// - will use global CompressShaAesKey and CompressShaAesIV variables to be set
// according to the expected compression Key and Initialization Vector, e.g.
// via a call to the CompressShaAesSetKey() global procedure
// - if you want to change the chaining mode, you can customize the global
// CompressShaAesClass variable to the expected TAES* class name
// - will store a hash of both cyphered and clear stream: if the
// data is corrupted during transmission, will instantly return ''
function CompressShaAes(var DataRawByteString; Compress: boolean): AnsiString;
{$ifdef USEPADLOCK}
var
/// if dll/so and VIA padlock compatible CPU are present
padlock_available: boolean = false;
{$endif}
implementation
{$ifdef USEPADLOCK}
const
AES_SUCCEEDED = 0;
KEY_128BITS = 0;
KEY_192BITS = 1;
KEY_256BITS = 2;
ACE_AES_ECB = 0;
ACE_AES_CBC = 1;
{$ifdef USEPADLOCKDLL}
type
tpadlock_phe_available = function: boolean; cdecl;
tpadlock_phe_sha = function(
buffer: pointer; nbytes: integer; var Digest): integer; cdecl;
tpadlock_ace_available = function: boolean; cdecl;
tpadlock_aes_begin = function: pointer; cdecl;
tpadlock_aes_setkey = function(
ctx: pointer; const key; key_len: integer): integer; cdecl;
tpadlock_aes_setmodeiv = function(
ctx: pointer; mode: integer; var iv): integer; cdecl;
tpadlock_aes_encrypt = function(
ctx, bIn, bOut: pointer; nbytes: integer): integer; cdecl;
tpadlock_aes_decrypt = function(
ctx, bIn, bOut: pointer; nbytes: integer): integer; cdecl;
tpadlock_aes_close = function(
ctx: pointer): integer; cdecl;
var
padlock_phe_available: tpadlock_phe_available = nil;
padlock_phe_sha1: tpadlock_phe_sha = nil;
padlock_phe_sha256: tpadlock_phe_sha = nil;
padlock_ace_available: tpadlock_ace_available = nil;
padlock_aes_begin: tpadlock_aes_begin = nil;
padlock_aes_setkey: tpadlock_aes_setkey = nil;
padlock_aes_setmodeiv: tpadlock_aes_setmodeiv = nil;
padlock_aes_encrypt: tpadlock_aes_encrypt = nil;
padlock_aes_decrypt: tpadlock_aes_decrypt = nil;
padlock_aes_close: tpadlock_aes_close = nil;
{$ifdef MSWINDOWS}
PadLockLibHandle: THandle = 0;
{$else} // Linux:
PadLockLibHandle: HMODULE = 0;
{$endif}
procedure PadlockInit;
begin
{$ifdef MSWINDOWS}
PadLockLibHandle := LoadLibrary('LibPadlock');
{$else} // Linux:
PadLockLibHandle := LoadLibrary('libvia_padlock.so');
if PadLockLibHandle=0 then
PadLockLibHandle := LoadLibrary('libvia_padlock.so.1.0.0');
{$endif}
if PadLockLibHandle=0 then
exit;
padlock_phe_available := GetProcAddress(PadLockLibHandle,'padlock_phe_available');