1 |
jpp |
1.1 |
Upstream-Status: Backport [https://github.com/openssl/openssl/commit/44a563dde1584cd9284e80b6e45ee5019be8d36c, https://github.com/openssl/openssl/commit/345c99b6654b8313c792d54f829943068911ddbd] |
2 |
|
|
diff --git a/crypto/modes/asm/aes-gcm-ppc.pl b/crypto/modes/asm/aes-gcm-ppc.pl |
3 |
|
|
new file mode 100644 |
4 |
|
|
index 0000000..6624e6c |
5 |
|
|
--- /dev/null |
6 |
|
|
+++ b/crypto/modes/asm/aes-gcm-ppc.pl |
7 |
|
|
@@ -0,0 +1,1438 @@ |
8 |
|
|
+#! /usr/bin/env perl |
9 |
|
|
+# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. |
10 |
|
|
+# Copyright 2021- IBM Inc. All rights reserved |
11 |
|
|
+# |
12 |
|
|
+# Licensed under the Apache License 2.0 (the "License"). You may not use |
13 |
|
|
+# this file except in compliance with the License. You can obtain a copy |
14 |
|
|
+# in the file LICENSE in the source distribution or at |
15 |
|
|
+# https://www.openssl.org/source/license.html |
16 |
|
|
+# |
17 |
|
|
+#=================================================================================== |
18 |
|
|
+# Written by Danny Tsen <dtsen@us.ibm.com> for OpenSSL Project, |
19 |
|
|
+# |
20 |
|
|
+# GHASH is based on the Karatsuba multiplication method. |
21 |
|
|
+# |
22 |
|
|
+# Xi xor X1 |
23 |
|
|
+# |
24 |
|
|
+# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = |
25 |
|
|
+# (X1.h * H4.h + xX.l * H4.l + X1 * H4) + |
26 |
|
|
+# (X2.h * H3.h + X2.l * H3.l + X2 * H3) + |
27 |
|
|
+# (X3.h * H2.h + X3.l * H2.l + X3 * H2) + |
28 |
|
|
+# (X4.h * H.h + X4.l * H.l + X4 * H) |
29 |
|
|
+# |
30 |
|
|
+# Xi = v0 |
31 |
|
|
+# H Poly = v2 |
32 |
|
|
+# Hash keys = v3 - v14 |
33 |
|
|
+# ( H.l, H, H.h) |
34 |
|
|
+# ( H^2.l, H^2, H^2.h) |
35 |
|
|
+# ( H^3.l, H^3, H^3.h) |
36 |
|
|
+# ( H^4.l, H^4, H^4.h) |
37 |
|
|
+# |
38 |
|
|
+# v30 is IV |
39 |
|
|
+# v31 - counter 1 |
40 |
|
|
+# |
41 |
|
|
+# AES used, |
42 |
|
|
+# vs0 - vs14 for round keys |
43 |
|
|
+# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) |
44 |
|
|
+# |
45 |
|
|
+# This implementation uses stitched AES-GCM approach to improve overall performance. |
46 |
|
|
+# AES is implemented with 8x blocks and GHASH is using 2 4x blocks. |
47 |
|
|
+# |
48 |
|
|
+# Current large block (16384 bytes) performance per second with 128 bit key -- |
49 |
|
|
+# |
50 |
|
|
+# Encrypt Decrypt |
51 |
|
|
+# Power10[le] (3.5GHz) 5.32G 5.26G |
52 |
|
|
+# |
53 |
|
|
+# =================================================================================== |
54 |
|
|
+# |
55 |
|
|
+# $output is the last argument if it looks like a file (it has an extension) |
56 |
|
|
+# $flavour is the first argument if it doesn't look like a file |
57 |
|
|
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; |
58 |
|
|
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; |
59 |
|
|
+ |
60 |
|
|
+if ($flavour =~ /64/) { |
61 |
|
|
+ $SIZE_T=8; |
62 |
|
|
+ $LRSAVE=2*$SIZE_T; |
63 |
|
|
+ $STU="stdu"; |
64 |
|
|
+ $POP="ld"; |
65 |
|
|
+ $PUSH="std"; |
66 |
|
|
+ $UCMP="cmpld"; |
67 |
|
|
+ $SHRI="srdi"; |
68 |
|
|
+} elsif ($flavour =~ /32/) { |
69 |
|
|
+ $SIZE_T=4; |
70 |
|
|
+ $LRSAVE=$SIZE_T; |
71 |
|
|
+ $STU="stwu"; |
72 |
|
|
+ $POP="lwz"; |
73 |
|
|
+ $PUSH="stw"; |
74 |
|
|
+ $UCMP="cmplw"; |
75 |
|
|
+ $SHRI="srwi"; |
76 |
|
|
+} else { die "nonsense $flavour"; } |
77 |
|
|
+ |
78 |
|
|
+$sp="r1"; |
79 |
|
|
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload |
80 |
|
|
+ |
81 |
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
82 |
|
|
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or |
83 |
|
|
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or |
84 |
|
|
+die "can't locate ppc-xlate.pl"; |
85 |
|
|
+ |
86 |
|
|
+open STDOUT,"| $^X $xlate $flavour \"$output\"" |
87 |
|
|
+ or die "can't call $xlate: $!"; |
88 |
|
|
+ |
89 |
|
|
+$code=<<___; |
90 |
|
|
+.machine "any" |
91 |
|
|
+.text |
92 |
|
|
+ |
93 |
|
|
+# 4x loops |
94 |
|
|
+# v15 - v18 - input states |
95 |
|
|
+# vs1 - vs9 - round keys |
96 |
|
|
+# |
97 |
|
|
+.macro Loop_aes_middle4x |
98 |
|
|
+ xxlor 19+32, 1, 1 |
99 |
|
|
+ xxlor 20+32, 2, 2 |
100 |
|
|
+ xxlor 21+32, 3, 3 |
101 |
|
|
+ xxlor 22+32, 4, 4 |
102 |
|
|
+ |
103 |
|
|
+ vcipher 15, 15, 19 |
104 |
|
|
+ vcipher 16, 16, 19 |
105 |
|
|
+ vcipher 17, 17, 19 |
106 |
|
|
+ vcipher 18, 18, 19 |
107 |
|
|
+ |
108 |
|
|
+ vcipher 15, 15, 20 |
109 |
|
|
+ vcipher 16, 16, 20 |
110 |
|
|
+ vcipher 17, 17, 20 |
111 |
|
|
+ vcipher 18, 18, 20 |
112 |
|
|
+ |
113 |
|
|
+ vcipher 15, 15, 21 |
114 |
|
|
+ vcipher 16, 16, 21 |
115 |
|
|
+ vcipher 17, 17, 21 |
116 |
|
|
+ vcipher 18, 18, 21 |
117 |
|
|
+ |
118 |
|
|
+ vcipher 15, 15, 22 |
119 |
|
|
+ vcipher 16, 16, 22 |
120 |
|
|
+ vcipher 17, 17, 22 |
121 |
|
|
+ vcipher 18, 18, 22 |
122 |
|
|
+ |
123 |
|
|
+ xxlor 19+32, 5, 5 |
124 |
|
|
+ xxlor 20+32, 6, 6 |
125 |
|
|
+ xxlor 21+32, 7, 7 |
126 |
|
|
+ xxlor 22+32, 8, 8 |
127 |
|
|
+ |
128 |
|
|
+ vcipher 15, 15, 19 |
129 |
|
|
+ vcipher 16, 16, 19 |
130 |
|
|
+ vcipher 17, 17, 19 |
131 |
|
|
+ vcipher 18, 18, 19 |
132 |
|
|
+ |
133 |
|
|
+ vcipher 15, 15, 20 |
134 |
|
|
+ vcipher 16, 16, 20 |
135 |
|
|
+ vcipher 17, 17, 20 |
136 |
|
|
+ vcipher 18, 18, 20 |
137 |
|
|
+ |
138 |
|
|
+ vcipher 15, 15, 21 |
139 |
|
|
+ vcipher 16, 16, 21 |
140 |
|
|
+ vcipher 17, 17, 21 |
141 |
|
|
+ vcipher 18, 18, 21 |
142 |
|
|
+ |
143 |
|
|
+ vcipher 15, 15, 22 |
144 |
|
|
+ vcipher 16, 16, 22 |
145 |
|
|
+ vcipher 17, 17, 22 |
146 |
|
|
+ vcipher 18, 18, 22 |
147 |
|
|
+ |
148 |
|
|
+ xxlor 23+32, 9, 9 |
149 |
|
|
+ vcipher 15, 15, 23 |
150 |
|
|
+ vcipher 16, 16, 23 |
151 |
|
|
+ vcipher 17, 17, 23 |
152 |
|
|
+ vcipher 18, 18, 23 |
153 |
|
|
+.endm |
154 |
|
|
+ |
155 |
|
|
+# 8x loops |
156 |
|
|
+# v15 - v22 - input states |
157 |
|
|
+# vs1 - vs9 - round keys |
158 |
|
|
+# |
159 |
|
|
+.macro Loop_aes_middle8x |
160 |
|
|
+ xxlor 23+32, 1, 1 |
161 |
|
|
+ xxlor 24+32, 2, 2 |
162 |
|
|
+ xxlor 25+32, 3, 3 |
163 |
|
|
+ xxlor 26+32, 4, 4 |
164 |
|
|
+ |
165 |
|
|
+ vcipher 15, 15, 23 |
166 |
|
|
+ vcipher 16, 16, 23 |
167 |
|
|
+ vcipher 17, 17, 23 |
168 |
|
|
+ vcipher 18, 18, 23 |
169 |
|
|
+ vcipher 19, 19, 23 |
170 |
|
|
+ vcipher 20, 20, 23 |
171 |
|
|
+ vcipher 21, 21, 23 |
172 |
|
|
+ vcipher 22, 22, 23 |
173 |
|
|
+ |
174 |
|
|
+ vcipher 15, 15, 24 |
175 |
|
|
+ vcipher 16, 16, 24 |
176 |
|
|
+ vcipher 17, 17, 24 |
177 |
|
|
+ vcipher 18, 18, 24 |
178 |
|
|
+ vcipher 19, 19, 24 |
179 |
|
|
+ vcipher 20, 20, 24 |
180 |
|
|
+ vcipher 21, 21, 24 |
181 |
|
|
+ vcipher 22, 22, 24 |
182 |
|
|
+ |
183 |
|
|
+ vcipher 15, 15, 25 |
184 |
|
|
+ vcipher 16, 16, 25 |
185 |
|
|
+ vcipher 17, 17, 25 |
186 |
|
|
+ vcipher 18, 18, 25 |
187 |
|
|
+ vcipher 19, 19, 25 |
188 |
|
|
+ vcipher 20, 20, 25 |
189 |
|
|
+ vcipher 21, 21, 25 |
190 |
|
|
+ vcipher 22, 22, 25 |
191 |
|
|
+ |
192 |
|
|
+ vcipher 15, 15, 26 |
193 |
|
|
+ vcipher 16, 16, 26 |
194 |
|
|
+ vcipher 17, 17, 26 |
195 |
|
|
+ vcipher 18, 18, 26 |
196 |
|
|
+ vcipher 19, 19, 26 |
197 |
|
|
+ vcipher 20, 20, 26 |
198 |
|
|
+ vcipher 21, 21, 26 |
199 |
|
|
+ vcipher 22, 22, 26 |
200 |
|
|
+ |
201 |
|
|
+ xxlor 23+32, 5, 5 |
202 |
|
|
+ xxlor 24+32, 6, 6 |
203 |
|
|
+ xxlor 25+32, 7, 7 |
204 |
|
|
+ xxlor 26+32, 8, 8 |
205 |
|
|
+ |
206 |
|
|
+ vcipher 15, 15, 23 |
207 |
|
|
+ vcipher 16, 16, 23 |
208 |
|
|
+ vcipher 17, 17, 23 |
209 |
|
|
+ vcipher 18, 18, 23 |
210 |
|
|
+ vcipher 19, 19, 23 |
211 |
|
|
+ vcipher 20, 20, 23 |
212 |
|
|
+ vcipher 21, 21, 23 |
213 |
|
|
+ vcipher 22, 22, 23 |
214 |
|
|
+ |
215 |
|
|
+ vcipher 15, 15, 24 |
216 |
|
|
+ vcipher 16, 16, 24 |
217 |
|
|
+ vcipher 17, 17, 24 |
218 |
|
|
+ vcipher 18, 18, 24 |
219 |
|
|
+ vcipher 19, 19, 24 |
220 |
|
|
+ vcipher 20, 20, 24 |
221 |
|
|
+ vcipher 21, 21, 24 |
222 |
|
|
+ vcipher 22, 22, 24 |
223 |
|
|
+ |
224 |
|
|
+ vcipher 15, 15, 25 |
225 |
|
|
+ vcipher 16, 16, 25 |
226 |
|
|
+ vcipher 17, 17, 25 |
227 |
|
|
+ vcipher 18, 18, 25 |
228 |
|
|
+ vcipher 19, 19, 25 |
229 |
|
|
+ vcipher 20, 20, 25 |
230 |
|
|
+ vcipher 21, 21, 25 |
231 |
|
|
+ vcipher 22, 22, 25 |
232 |
|
|
+ |
233 |
|
|
+ vcipher 15, 15, 26 |
234 |
|
|
+ vcipher 16, 16, 26 |
235 |
|
|
+ vcipher 17, 17, 26 |
236 |
|
|
+ vcipher 18, 18, 26 |
237 |
|
|
+ vcipher 19, 19, 26 |
238 |
|
|
+ vcipher 20, 20, 26 |
239 |
|
|
+ vcipher 21, 21, 26 |
240 |
|
|
+ vcipher 22, 22, 26 |
241 |
|
|
+ |
242 |
|
|
+ xxlor 23+32, 9, 9 |
243 |
|
|
+ vcipher 15, 15, 23 |
244 |
|
|
+ vcipher 16, 16, 23 |
245 |
|
|
+ vcipher 17, 17, 23 |
246 |
|
|
+ vcipher 18, 18, 23 |
247 |
|
|
+ vcipher 19, 19, 23 |
248 |
|
|
+ vcipher 20, 20, 23 |
249 |
|
|
+ vcipher 21, 21, 23 |
250 |
|
|
+ vcipher 22, 22, 23 |
251 |
|
|
+.endm |
252 |
|
|
+ |
253 |
|
|
+# |
254 |
|
|
+# Compute 4x hash values based on Karatsuba method. |
255 |
|
|
+# |
256 |
|
|
+ppc_aes_gcm_ghash: |
257 |
|
|
+ vxor 15, 15, 0 |
258 |
|
|
+ |
259 |
|
|
+ xxlxor 29, 29, 29 |
260 |
|
|
+ |
261 |
|
|
+ vpmsumd 23, 12, 15 # H4.L * X.L |
262 |
|
|
+ vpmsumd 24, 9, 16 |
263 |
|
|
+ vpmsumd 25, 6, 17 |
264 |
|
|
+ vpmsumd 26, 3, 18 |
265 |
|
|
+ |
266 |
|
|
+ vxor 23, 23, 24 |
267 |
|
|
+ vxor 23, 23, 25 |
268 |
|
|
+ vxor 23, 23, 26 # L |
269 |
|
|
+ |
270 |
|
|
+ vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
271 |
|
|
+ vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
272 |
|
|
+ vpmsumd 26, 7, 17 |
273 |
|
|
+ vpmsumd 27, 4, 18 |
274 |
|
|
+ |
275 |
|
|
+ vxor 24, 24, 25 |
276 |
|
|
+ vxor 24, 24, 26 |
277 |
|
|
+ vxor 24, 24, 27 # M |
278 |
|
|
+ |
279 |
|
|
+ # sum hash and reduction with H Poly |
280 |
|
|
+ vpmsumd 28, 23, 2 # reduction |
281 |
|
|
+ |
282 |
|
|
+ xxlor 29+32, 29, 29 |
283 |
|
|
+ vsldoi 26, 24, 29, 8 # mL |
284 |
|
|
+ vsldoi 29, 29, 24, 8 # mH |
285 |
|
|
+ vxor 23, 23, 26 # mL + L |
286 |
|
|
+ |
287 |
|
|
+ vsldoi 23, 23, 23, 8 # swap |
288 |
|
|
+ vxor 23, 23, 28 |
289 |
|
|
+ |
290 |
|
|
+ vpmsumd 24, 14, 15 # H4.H * X.H |
291 |
|
|
+ vpmsumd 25, 11, 16 |
292 |
|
|
+ vpmsumd 26, 8, 17 |
293 |
|
|
+ vpmsumd 27, 5, 18 |
294 |
|
|
+ |
295 |
|
|
+ vxor 24, 24, 25 |
296 |
|
|
+ vxor 24, 24, 26 |
297 |
|
|
+ vxor 24, 24, 27 |
298 |
|
|
+ |
299 |
|
|
+ vxor 24, 24, 29 |
300 |
|
|
+ |
301 |
|
|
+ # sum hash and reduction with H Poly |
302 |
|
|
+ vsldoi 27, 23, 23, 8 # swap |
303 |
|
|
+ vpmsumd 23, 23, 2 |
304 |
|
|
+ vxor 27, 27, 24 |
305 |
|
|
+ vxor 23, 23, 27 |
306 |
|
|
+ |
307 |
|
|
+ xxlor 32, 23+32, 23+32 # update hash |
308 |
|
|
+ |
309 |
|
|
+ blr |
310 |
|
|
+ |
311 |
|
|
+# |
312 |
|
|
+# Combine two 4x ghash |
313 |
|
|
+# v15 - v22 - input blocks |
314 |
|
|
+# |
315 |
|
|
+.macro ppc_aes_gcm_ghash2_4x |
316 |
|
|
+ # first 4x hash |
317 |
|
|
+ vxor 15, 15, 0 # Xi + X |
318 |
|
|
+ |
319 |
|
|
+ xxlxor 29, 29, 29 |
320 |
|
|
+ |
321 |
|
|
+ vpmsumd 23, 12, 15 # H4.L * X.L |
322 |
|
|
+ vpmsumd 24, 9, 16 |
323 |
|
|
+ vpmsumd 25, 6, 17 |
324 |
|
|
+ vpmsumd 26, 3, 18 |
325 |
|
|
+ |
326 |
|
|
+ vxor 23, 23, 24 |
327 |
|
|
+ vxor 23, 23, 25 |
328 |
|
|
+ vxor 23, 23, 26 # L |
329 |
|
|
+ |
330 |
|
|
+ vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
331 |
|
|
+ vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
332 |
|
|
+ vpmsumd 26, 7, 17 |
333 |
|
|
+ vpmsumd 27, 4, 18 |
334 |
|
|
+ |
335 |
|
|
+ vxor 24, 24, 25 |
336 |
|
|
+ vxor 24, 24, 26 |
337 |
|
|
+ |
338 |
|
|
+ # sum hash and reduction with H Poly |
339 |
|
|
+ vpmsumd 28, 23, 2 # reduction |
340 |
|
|
+ |
341 |
|
|
+ xxlor 29+32, 29, 29 |
342 |
|
|
+ |
343 |
|
|
+ vxor 24, 24, 27 # M |
344 |
|
|
+ vsldoi 26, 24, 29, 8 # mL |
345 |
|
|
+ vsldoi 29, 29, 24, 8 # mH |
346 |
|
|
+ vxor 23, 23, 26 # mL + L |
347 |
|
|
+ |
348 |
|
|
+ vsldoi 23, 23, 23, 8 # swap |
349 |
|
|
+ vxor 23, 23, 28 |
350 |
|
|
+ |
351 |
|
|
+ vpmsumd 24, 14, 15 # H4.H * X.H |
352 |
|
|
+ vpmsumd 25, 11, 16 |
353 |
|
|
+ vpmsumd 26, 8, 17 |
354 |
|
|
+ vpmsumd 27, 5, 18 |
355 |
|
|
+ |
356 |
|
|
+ vxor 24, 24, 25 |
357 |
|
|
+ vxor 24, 24, 26 |
358 |
|
|
+ vxor 24, 24, 27 # H |
359 |
|
|
+ |
360 |
|
|
+ vxor 24, 24, 29 # H + mH |
361 |
|
|
+ |
362 |
|
|
+ # sum hash and reduction with H Poly |
363 |
|
|
+ vsldoi 27, 23, 23, 8 # swap |
364 |
|
|
+ vpmsumd 23, 23, 2 |
365 |
|
|
+ vxor 27, 27, 24 |
366 |
|
|
+ vxor 27, 23, 27 # 1st Xi |
367 |
|
|
+ |
368 |
|
|
+ # 2nd 4x hash |
369 |
|
|
+ vpmsumd 24, 9, 20 |
370 |
|
|
+ vpmsumd 25, 6, 21 |
371 |
|
|
+ vpmsumd 26, 3, 22 |
372 |
|
|
+ vxor 19, 19, 27 # Xi + X |
373 |
|
|
+ vpmsumd 23, 12, 19 # H4.L * X.L |
374 |
|
|
+ |
375 |
|
|
+ vxor 23, 23, 24 |
376 |
|
|
+ vxor 23, 23, 25 |
377 |
|
|
+ vxor 23, 23, 26 # L |
378 |
|
|
+ |
379 |
|
|
+ vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L |
380 |
|
|
+ vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L |
381 |
|
|
+ vpmsumd 26, 7, 21 |
382 |
|
|
+ vpmsumd 27, 4, 22 |
383 |
|
|
+ |
384 |
|
|
+ vxor 24, 24, 25 |
385 |
|
|
+ vxor 24, 24, 26 |
386 |
|
|
+ |
387 |
|
|
+ # sum hash and reduction with H Poly |
388 |
|
|
+ vpmsumd 28, 23, 2 # reduction |
389 |
|
|
+ |
390 |
|
|
+ xxlor 29+32, 29, 29 |
391 |
|
|
+ |
392 |
|
|
+ vxor 24, 24, 27 # M |
393 |
|
|
+ vsldoi 26, 24, 29, 8 # mL |
394 |
|
|
+ vsldoi 29, 29, 24, 8 # mH |
395 |
|
|
+ vxor 23, 23, 26 # mL + L |
396 |
|
|
+ |
397 |
|
|
+ vsldoi 23, 23, 23, 8 # swap |
398 |
|
|
+ vxor 23, 23, 28 |
399 |
|
|
+ |
400 |
|
|
+ vpmsumd 24, 14, 19 # H4.H * X.H |
401 |
|
|
+ vpmsumd 25, 11, 20 |
402 |
|
|
+ vpmsumd 26, 8, 21 |
403 |
|
|
+ vpmsumd 27, 5, 22 |
404 |
|
|
+ |
405 |
|
|
+ vxor 24, 24, 25 |
406 |
|
|
+ vxor 24, 24, 26 |
407 |
|
|
+ vxor 24, 24, 27 # H |
408 |
|
|
+ |
409 |
|
|
+ vxor 24, 24, 29 # H + mH |
410 |
|
|
+ |
411 |
|
|
+ # sum hash and reduction with H Poly |
412 |
|
|
+ vsldoi 27, 23, 23, 8 # swap |
413 |
|
|
+ vpmsumd 23, 23, 2 |
414 |
|
|
+ vxor 27, 27, 24 |
415 |
|
|
+ vxor 23, 23, 27 |
416 |
|
|
+ |
417 |
|
|
+ xxlor 32, 23+32, 23+32 # update hash |
418 |
|
|
+ |
419 |
|
|
+.endm |
420 |
|
|
+ |
421 |
|
|
+# |
422 |
|
|
+# Compute update single hash |
423 |
|
|
+# |
424 |
|
|
+.macro ppc_update_hash_1x |
425 |
|
|
+ vxor 28, 28, 0 |
426 |
|
|
+ |
427 |
|
|
+ vxor 19, 19, 19 |
428 |
|
|
+ |
429 |
|
|
+ vpmsumd 22, 3, 28 # L |
430 |
|
|
+ vpmsumd 23, 4, 28 # M |
431 |
|
|
+ vpmsumd 24, 5, 28 # H |
432 |
|
|
+ |
433 |
|
|
+ vpmsumd 27, 22, 2 # reduction |
434 |
|
|
+ |
435 |
|
|
+ vsldoi 25, 23, 19, 8 # mL |
436 |
|
|
+ vsldoi 26, 19, 23, 8 # mH |
437 |
|
|
+ vxor 22, 22, 25 # LL + LL |
438 |
|
|
+ vxor 24, 24, 26 # HH + HH |
439 |
|
|
+ |
440 |
|
|
+ vsldoi 22, 22, 22, 8 # swap |
441 |
|
|
+ vxor 22, 22, 27 |
442 |
|
|
+ |
443 |
|
|
+ vsldoi 20, 22, 22, 8 # swap |
444 |
|
|
+ vpmsumd 22, 22, 2 # reduction |
445 |
|
|
+ vxor 20, 20, 24 |
446 |
|
|
+ vxor 22, 22, 20 |
447 |
|
|
+ |
448 |
|
|
+ vmr 0, 22 # update hash |
449 |
|
|
+ |
450 |
|
|
+.endm |
451 |
|
|
+ |
452 |
|
|
+# |
453 |
|
|
+# ppc_aes_gcm_encrypt (const void *inp, void *out, size_t len, |
454 |
|
|
+# const AES_KEY *key, unsigned char iv[16], |
455 |
|
|
+# void *Xip); |
456 |
|
|
+# |
457 |
|
|
+# r3 - inp |
458 |
|
|
+# r4 - out |
459 |
|
|
+# r5 - len |
460 |
|
|
+# r6 - AES round keys |
461 |
|
|
+# r7 - iv |
462 |
|
|
+# r8 - Xi, HPoli, hash keys |
463 |
|
|
+# |
464 |
|
|
+.global ppc_aes_gcm_encrypt |
465 |
|
|
+.align 5 |
466 |
|
|
+ppc_aes_gcm_encrypt: |
467 |
|
|
+_ppc_aes_gcm_encrypt: |
468 |
|
|
+ |
469 |
|
|
+ stdu 1,-512(1) |
470 |
|
|
+ mflr 0 |
471 |
|
|
+ |
472 |
|
|
+ std 14,112(1) |
473 |
|
|
+ std 15,120(1) |
474 |
|
|
+ std 16,128(1) |
475 |
|
|
+ std 17,136(1) |
476 |
|
|
+ std 18,144(1) |
477 |
|
|
+ std 19,152(1) |
478 |
|
|
+ std 20,160(1) |
479 |
|
|
+ std 21,168(1) |
480 |
|
|
+ li 9, 256 |
481 |
|
|
+ stvx 20, 9, 1 |
482 |
|
|
+ addi 9, 9, 16 |
483 |
|
|
+ stvx 21, 9, 1 |
484 |
|
|
+ addi 9, 9, 16 |
485 |
|
|
+ stvx 22, 9, 1 |
486 |
|
|
+ addi 9, 9, 16 |
487 |
|
|
+ stvx 23, 9, 1 |
488 |
|
|
+ addi 9, 9, 16 |
489 |
|
|
+ stvx 24, 9, 1 |
490 |
|
|
+ addi 9, 9, 16 |
491 |
|
|
+ stvx 25, 9, 1 |
492 |
|
|
+ addi 9, 9, 16 |
493 |
|
|
+ stvx 26, 9, 1 |
494 |
|
|
+ addi 9, 9, 16 |
495 |
|
|
+ stvx 27, 9, 1 |
496 |
|
|
+ addi 9, 9, 16 |
497 |
|
|
+ stvx 28, 9, 1 |
498 |
|
|
+ addi 9, 9, 16 |
499 |
|
|
+ stvx 29, 9, 1 |
500 |
|
|
+ addi 9, 9, 16 |
501 |
|
|
+ stvx 30, 9, 1 |
502 |
|
|
+ addi 9, 9, 16 |
503 |
|
|
+ stvx 31, 9, 1 |
504 |
|
|
+ std 0, 528(1) |
505 |
|
|
+ |
506 |
|
|
+ # Load Xi |
507 |
|
|
+ lxvb16x 32, 0, 8 # load Xi |
508 |
|
|
+ |
509 |
|
|
+ # load Hash - h^4, h^3, h^2, h |
510 |
|
|
+ li 10, 32 |
511 |
|
|
+ lxvd2x 2+32, 10, 8 # H Poli |
512 |
|
|
+ li 10, 48 |
513 |
|
|
+ lxvd2x 3+32, 10, 8 # Hl |
514 |
|
|
+ li 10, 64 |
515 |
|
|
+ lxvd2x 4+32, 10, 8 # H |
516 |
|
|
+ li 10, 80 |
517 |
|
|
+ lxvd2x 5+32, 10, 8 # Hh |
518 |
|
|
+ |
519 |
|
|
+ li 10, 96 |
520 |
|
|
+ lxvd2x 6+32, 10, 8 # H^2l |
521 |
|
|
+ li 10, 112 |
522 |
|
|
+ lxvd2x 7+32, 10, 8 # H^2 |
523 |
|
|
+ li 10, 128 |
524 |
|
|
+ lxvd2x 8+32, 10, 8 # H^2h |
525 |
|
|
+ |
526 |
|
|
+ li 10, 144 |
527 |
|
|
+ lxvd2x 9+32, 10, 8 # H^3l |
528 |
|
|
+ li 10, 160 |
529 |
|
|
+ lxvd2x 10+32, 10, 8 # H^3 |
530 |
|
|
+ li 10, 176 |
531 |
|
|
+ lxvd2x 11+32, 10, 8 # H^3h |
532 |
|
|
+ |
533 |
|
|
+ li 10, 192 |
534 |
|
|
+ lxvd2x 12+32, 10, 8 # H^4l |
535 |
|
|
+ li 10, 208 |
536 |
|
|
+ lxvd2x 13+32, 10, 8 # H^4 |
537 |
|
|
+ li 10, 224 |
538 |
|
|
+ lxvd2x 14+32, 10, 8 # H^4h |
539 |
|
|
+ |
540 |
|
|
+ # initialize ICB: GHASH( IV ), IV - r7 |
541 |
|
|
+ lxvb16x 30+32, 0, 7 # load IV - v30 |
542 |
|
|
+ |
543 |
|
|
+ mr 12, 5 # length |
544 |
|
|
+ li 11, 0 # block index |
545 |
|
|
+ |
546 |
|
|
+ # counter 1 |
547 |
|
|
+ vxor 31, 31, 31 |
548 |
|
|
+ vspltisb 22, 1 |
549 |
|
|
+ vsldoi 31, 31, 22,1 # counter 1 |
550 |
|
|
+ |
551 |
|
|
+ # load round key to VSR |
552 |
|
|
+ lxv 0, 0(6) |
553 |
|
|
+ lxv 1, 0x10(6) |
554 |
|
|
+ lxv 2, 0x20(6) |
555 |
|
|
+ lxv 3, 0x30(6) |
556 |
|
|
+ lxv 4, 0x40(6) |
557 |
|
|
+ lxv 5, 0x50(6) |
558 |
|
|
+ lxv 6, 0x60(6) |
559 |
|
|
+ lxv 7, 0x70(6) |
560 |
|
|
+ lxv 8, 0x80(6) |
561 |
|
|
+ lxv 9, 0x90(6) |
562 |
|
|
+ lxv 10, 0xa0(6) |
563 |
|
|
+ |
564 |
|
|
+ # load rounds - 10 (128), 12 (192), 14 (256) |
565 |
|
|
+ lwz 9,240(6) |
566 |
|
|
+ |
567 |
|
|
+ # |
568 |
|
|
+ # vxor state, state, w # addroundkey |
569 |
|
|
+ xxlor 32+29, 0, 0 |
570 |
|
|
+ vxor 15, 30, 29 # IV + round key - add round key 0 |
571 |
|
|
+ |
572 |
|
|
+ cmpdi 9, 10 |
573 |
|
|
+ beq Loop_aes_gcm_8x |
574 |
|
|
+ |
575 |
|
|
+ # load 2 more round keys (v11, v12) |
576 |
|
|
+ lxv 11, 0xb0(6) |
577 |
|
|
+ lxv 12, 0xc0(6) |
578 |
|
|
+ |
579 |
|
|
+ cmpdi 9, 12 |
580 |
|
|
+ beq Loop_aes_gcm_8x |
581 |
|
|
+ |
582 |
|
|
+ # load 2 more round keys (v11, v12, v13, v14) |
583 |
|
|
+ lxv 13, 0xd0(6) |
584 |
|
|
+ lxv 14, 0xe0(6) |
585 |
|
|
+ cmpdi 9, 14 |
586 |
|
|
+ beq Loop_aes_gcm_8x |
587 |
|
|
+ |
588 |
|
|
+ b aes_gcm_out |
589 |
|
|
+ |
590 |
|
|
+.align 5 |
591 |
|
|
+Loop_aes_gcm_8x: |
592 |
|
|
+ mr 14, 3 |
593 |
|
|
+ mr 9, 4 |
594 |
|
|
+ |
595 |
|
|
+ # n blocks |
596 |
|
|
+ li 10, 128 |
597 |
|
|
+ divdu 10, 5, 10 # n 128 bytes-blocks |
598 |
|
|
+ cmpdi 10, 0 |
599 |
|
|
+ beq Loop_last_block |
600 |
|
|
+ |
601 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
602 |
|
|
+ vxor 16, 30, 29 |
603 |
|
|
+ vaddudm 30, 30, 31 |
604 |
|
|
+ vxor 17, 30, 29 |
605 |
|
|
+ vaddudm 30, 30, 31 |
606 |
|
|
+ vxor 18, 30, 29 |
607 |
|
|
+ vaddudm 30, 30, 31 |
608 |
|
|
+ vxor 19, 30, 29 |
609 |
|
|
+ vaddudm 30, 30, 31 |
610 |
|
|
+ vxor 20, 30, 29 |
611 |
|
|
+ vaddudm 30, 30, 31 |
612 |
|
|
+ vxor 21, 30, 29 |
613 |
|
|
+ vaddudm 30, 30, 31 |
614 |
|
|
+ vxor 22, 30, 29 |
615 |
|
|
+ |
616 |
|
|
+ mtctr 10 |
617 |
|
|
+ |
618 |
|
|
+ li 15, 16 |
619 |
|
|
+ li 16, 32 |
620 |
|
|
+ li 17, 48 |
621 |
|
|
+ li 18, 64 |
622 |
|
|
+ li 19, 80 |
623 |
|
|
+ li 20, 96 |
624 |
|
|
+ li 21, 112 |
625 |
|
|
+ |
626 |
|
|
+ lwz 10, 240(6) |
627 |
|
|
+ |
628 |
|
|
+Loop_8x_block: |
629 |
|
|
+ |
630 |
|
|
+ lxvb16x 15, 0, 14 # load block |
631 |
|
|
+ lxvb16x 16, 15, 14 # load block |
632 |
|
|
+ lxvb16x 17, 16, 14 # load block |
633 |
|
|
+ lxvb16x 18, 17, 14 # load block |
634 |
|
|
+ lxvb16x 19, 18, 14 # load block |
635 |
|
|
+ lxvb16x 20, 19, 14 # load block |
636 |
|
|
+ lxvb16x 21, 20, 14 # load block |
637 |
|
|
+ lxvb16x 22, 21, 14 # load block |
638 |
|
|
+ addi 14, 14, 128 |
639 |
|
|
+ |
640 |
|
|
+ Loop_aes_middle8x |
641 |
|
|
+ |
642 |
|
|
+ xxlor 23+32, 10, 10 |
643 |
|
|
+ |
644 |
|
|
+ cmpdi 10, 10 |
645 |
|
|
+ beq Do_next_ghash |
646 |
|
|
+ |
647 |
|
|
+ # 192 bits |
648 |
|
|
+ xxlor 24+32, 11, 11 |
649 |
|
|
+ |
650 |
|
|
+ vcipher 15, 15, 23 |
651 |
|
|
+ vcipher 16, 16, 23 |
652 |
|
|
+ vcipher 17, 17, 23 |
653 |
|
|
+ vcipher 18, 18, 23 |
654 |
|
|
+ vcipher 19, 19, 23 |
655 |
|
|
+ vcipher 20, 20, 23 |
656 |
|
|
+ vcipher 21, 21, 23 |
657 |
|
|
+ vcipher 22, 22, 23 |
658 |
|
|
+ |
659 |
|
|
+ vcipher 15, 15, 24 |
660 |
|
|
+ vcipher 16, 16, 24 |
661 |
|
|
+ vcipher 17, 17, 24 |
662 |
|
|
+ vcipher 18, 18, 24 |
663 |
|
|
+ vcipher 19, 19, 24 |
664 |
|
|
+ vcipher 20, 20, 24 |
665 |
|
|
+ vcipher 21, 21, 24 |
666 |
|
|
+ vcipher 22, 22, 24 |
667 |
|
|
+ |
668 |
|
|
+ xxlor 23+32, 12, 12 |
669 |
|
|
+ |
670 |
|
|
+ cmpdi 10, 12 |
671 |
|
|
+ beq Do_next_ghash |
672 |
|
|
+ |
673 |
|
|
+ # 256 bits |
674 |
|
|
+ xxlor 24+32, 13, 13 |
675 |
|
|
+ |
676 |
|
|
+ vcipher 15, 15, 23 |
677 |
|
|
+ vcipher 16, 16, 23 |
678 |
|
|
+ vcipher 17, 17, 23 |
679 |
|
|
+ vcipher 18, 18, 23 |
680 |
|
|
+ vcipher 19, 19, 23 |
681 |
|
|
+ vcipher 20, 20, 23 |
682 |
|
|
+ vcipher 21, 21, 23 |
683 |
|
|
+ vcipher 22, 22, 23 |
684 |
|
|
+ |
685 |
|
|
+ vcipher 15, 15, 24 |
686 |
|
|
+ vcipher 16, 16, 24 |
687 |
|
|
+ vcipher 17, 17, 24 |
688 |
|
|
+ vcipher 18, 18, 24 |
689 |
|
|
+ vcipher 19, 19, 24 |
690 |
|
|
+ vcipher 20, 20, 24 |
691 |
|
|
+ vcipher 21, 21, 24 |
692 |
|
|
+ vcipher 22, 22, 24 |
693 |
|
|
+ |
694 |
|
|
+ xxlor 23+32, 14, 14 |
695 |
|
|
+ |
696 |
|
|
+ cmpdi 10, 14 |
697 |
|
|
+ beq Do_next_ghash |
698 |
|
|
+ b aes_gcm_out |
699 |
|
|
+ |
700 |
|
|
+Do_next_ghash: |
701 |
|
|
+ |
702 |
|
|
+ # |
703 |
|
|
+ # last round |
704 |
|
|
+ vcipherlast 15, 15, 23 |
705 |
|
|
+ vcipherlast 16, 16, 23 |
706 |
|
|
+ |
707 |
|
|
+ xxlxor 47, 47, 15 |
708 |
|
|
+ stxvb16x 47, 0, 9 # store output |
709 |
|
|
+ xxlxor 48, 48, 16 |
710 |
|
|
+ stxvb16x 48, 15, 9 # store output |
711 |
|
|
+ |
712 |
|
|
+ vcipherlast 17, 17, 23 |
713 |
|
|
+ vcipherlast 18, 18, 23 |
714 |
|
|
+ |
715 |
|
|
+ xxlxor 49, 49, 17 |
716 |
|
|
+ stxvb16x 49, 16, 9 # store output |
717 |
|
|
+ xxlxor 50, 50, 18 |
718 |
|
|
+ stxvb16x 50, 17, 9 # store output |
719 |
|
|
+ |
720 |
|
|
+ vcipherlast 19, 19, 23 |
721 |
|
|
+ vcipherlast 20, 20, 23 |
722 |
|
|
+ |
723 |
|
|
+ xxlxor 51, 51, 19 |
724 |
|
|
+ stxvb16x 51, 18, 9 # store output |
725 |
|
|
+ xxlxor 52, 52, 20 |
726 |
|
|
+ stxvb16x 52, 19, 9 # store output |
727 |
|
|
+ |
728 |
|
|
+ vcipherlast 21, 21, 23 |
729 |
|
|
+ vcipherlast 22, 22, 23 |
730 |
|
|
+ |
731 |
|
|
+ xxlxor 53, 53, 21 |
732 |
|
|
+ stxvb16x 53, 20, 9 # store output |
733 |
|
|
+ xxlxor 54, 54, 22 |
734 |
|
|
+ stxvb16x 54, 21, 9 # store output |
735 |
|
|
+ |
736 |
|
|
+ addi 9, 9, 128 |
737 |
|
|
+ |
738 |
|
|
+ # ghash here |
739 |
|
|
+ ppc_aes_gcm_ghash2_4x |
740 |
|
|
+ |
741 |
|
|
+ xxlor 27+32, 0, 0 |
742 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
743 |
|
|
+ vmr 29, 30 |
744 |
|
|
+ vxor 15, 30, 27 # add round key |
745 |
|
|
+ vaddudm 30, 30, 31 |
746 |
|
|
+ vxor 16, 30, 27 |
747 |
|
|
+ vaddudm 30, 30, 31 |
748 |
|
|
+ vxor 17, 30, 27 |
749 |
|
|
+ vaddudm 30, 30, 31 |
750 |
|
|
+ vxor 18, 30, 27 |
751 |
|
|
+ vaddudm 30, 30, 31 |
752 |
|
|
+ vxor 19, 30, 27 |
753 |
|
|
+ vaddudm 30, 30, 31 |
754 |
|
|
+ vxor 20, 30, 27 |
755 |
|
|
+ vaddudm 30, 30, 31 |
756 |
|
|
+ vxor 21, 30, 27 |
757 |
|
|
+ vaddudm 30, 30, 31 |
758 |
|
|
+ vxor 22, 30, 27 |
759 |
|
|
+ |
760 |
|
|
+ addi 12, 12, -128 |
761 |
|
|
+ addi 11, 11, 128 |
762 |
|
|
+ |
763 |
|
|
+ bdnz Loop_8x_block |
764 |
|
|
+ |
765 |
|
|
+ vmr 30, 29 |
766 |
|
|
+ |
767 |
|
|
+Loop_last_block: |
768 |
|
|
+ cmpdi 12, 0 |
769 |
|
|
+ beq aes_gcm_out |
770 |
|
|
+ |
771 |
|
|
+ # loop last few blocks |
772 |
|
|
+ li 10, 16 |
773 |
|
|
+ divdu 10, 12, 10 |
774 |
|
|
+ |
775 |
|
|
+ mtctr 10 |
776 |
|
|
+ |
777 |
|
|
+ lwz 10, 240(6) |
778 |
|
|
+ |
779 |
|
|
+ cmpdi 12, 16 |
780 |
|
|
+ blt Final_block |
781 |
|
|
+ |
782 |
|
|
+.macro Loop_aes_middle_1x |
783 |
|
|
+ xxlor 19+32, 1, 1 |
784 |
|
|
+ xxlor 20+32, 2, 2 |
785 |
|
|
+ xxlor 21+32, 3, 3 |
786 |
|
|
+ xxlor 22+32, 4, 4 |
787 |
|
|
+ |
788 |
|
|
+ vcipher 15, 15, 19 |
789 |
|
|
+ vcipher 15, 15, 20 |
790 |
|
|
+ vcipher 15, 15, 21 |
791 |
|
|
+ vcipher 15, 15, 22 |
792 |
|
|
+ |
793 |
|
|
+ xxlor 19+32, 5, 5 |
794 |
|
|
+ xxlor 20+32, 6, 6 |
795 |
|
|
+ xxlor 21+32, 7, 7 |
796 |
|
|
+ xxlor 22+32, 8, 8 |
797 |
|
|
+ |
798 |
|
|
+ vcipher 15, 15, 19 |
799 |
|
|
+ vcipher 15, 15, 20 |
800 |
|
|
+ vcipher 15, 15, 21 |
801 |
|
|
+ vcipher 15, 15, 22 |
802 |
|
|
+ |
803 |
|
|
+ xxlor 19+32, 9, 9 |
804 |
|
|
+ vcipher 15, 15, 19 |
805 |
|
|
+.endm |
806 |
|
|
+ |
807 |
|
|
+Next_rem_block: |
808 |
|
|
+ lxvb16x 15, 0, 14 # load block |
809 |
|
|
+ |
810 |
|
|
+ Loop_aes_middle_1x |
811 |
|
|
+ |
812 |
|
|
+ xxlor 23+32, 10, 10 |
813 |
|
|
+ |
814 |
|
|
+ cmpdi 10, 10 |
815 |
|
|
+ beq Do_next_1x |
816 |
|
|
+ |
817 |
|
|
+ # 192 bits |
818 |
|
|
+ xxlor 24+32, 11, 11 |
819 |
|
|
+ |
820 |
|
|
+ vcipher 15, 15, 23 |
821 |
|
|
+ vcipher 15, 15, 24 |
822 |
|
|
+ |
823 |
|
|
+ xxlor 23+32, 12, 12 |
824 |
|
|
+ |
825 |
|
|
+ cmpdi 10, 12 |
826 |
|
|
+ beq Do_next_1x |
827 |
|
|
+ |
828 |
|
|
+ # 256 bits |
829 |
|
|
+ xxlor 24+32, 13, 13 |
830 |
|
|
+ |
831 |
|
|
+ vcipher 15, 15, 23 |
832 |
|
|
+ vcipher 15, 15, 24 |
833 |
|
|
+ |
834 |
|
|
+ xxlor 23+32, 14, 14 |
835 |
|
|
+ |
836 |
|
|
+ cmpdi 10, 14 |
837 |
|
|
+ beq Do_next_1x |
838 |
|
|
+ |
839 |
|
|
+Do_next_1x: |
840 |
|
|
+ vcipherlast 15, 15, 23 |
841 |
|
|
+ |
842 |
|
|
+ xxlxor 47, 47, 15 |
843 |
|
|
+ stxvb16x 47, 0, 9 # store output |
844 |
|
|
+ addi 14, 14, 16 |
845 |
|
|
+ addi 9, 9, 16 |
846 |
|
|
+ |
847 |
|
|
+ vmr 28, 15 |
848 |
|
|
+ ppc_update_hash_1x |
849 |
|
|
+ |
850 |
|
|
+ addi 12, 12, -16 |
851 |
|
|
+ addi 11, 11, 16 |
852 |
|
|
+ xxlor 19+32, 0, 0 |
853 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
854 |
|
|
+ vxor 15, 30, 19 # add round key |
855 |
|
|
+ |
856 |
|
|
+ bdnz Next_rem_block |
857 |
|
|
+ |
858 |
|
|
+ cmpdi 12, 0 |
859 |
|
|
+ beq aes_gcm_out |
860 |
|
|
+ |
861 |
|
|
+Final_block: |
862 |
|
|
+ Loop_aes_middle_1x |
863 |
|
|
+ |
864 |
|
|
+ xxlor 23+32, 10, 10 |
865 |
|
|
+ |
866 |
|
|
+ cmpdi 10, 10 |
867 |
|
|
+ beq Do_final_1x |
868 |
|
|
+ |
869 |
|
|
+ # 192 bits |
870 |
|
|
+ xxlor 24+32, 11, 11 |
871 |
|
|
+ |
872 |
|
|
+ vcipher 15, 15, 23 |
873 |
|
|
+ vcipher 15, 15, 24 |
874 |
|
|
+ |
875 |
|
|
+ xxlor 23+32, 12, 12 |
876 |
|
|
+ |
877 |
|
|
+ cmpdi 10, 12 |
878 |
|
|
+ beq Do_final_1x |
879 |
|
|
+ |
880 |
|
|
+ # 256 bits |
881 |
|
|
+ xxlor 24+32, 13, 13 |
882 |
|
|
+ |
883 |
|
|
+ vcipher 15, 15, 23 |
884 |
|
|
+ vcipher 15, 15, 24 |
885 |
|
|
+ |
886 |
|
|
+ xxlor 23+32, 14, 14 |
887 |
|
|
+ |
888 |
|
|
+ cmpdi 10, 14 |
889 |
|
|
+ beq Do_final_1x |
890 |
|
|
+ |
891 |
|
|
+Do_final_1x: |
892 |
|
|
+ vcipherlast 15, 15, 23 |
893 |
|
|
+ |
894 |
|
|
+ lxvb16x 15, 0, 14 # load last block |
895 |
|
|
+ xxlxor 47, 47, 15 |
896 |
|
|
+ |
897 |
|
|
+ # create partial block mask |
898 |
|
|
+ li 15, 16 |
899 |
|
|
+ sub 15, 15, 12 # index to the mask |
900 |
|
|
+ |
901 |
|
|
+ vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
902 |
|
|
+ vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
903 |
|
|
+ li 10, 192 |
904 |
|
|
+ stvx 16, 10, 1 |
905 |
|
|
+ addi 10, 10, 16 |
906 |
|
|
+ stvx 17, 10, 1 |
907 |
|
|
+ |
908 |
|
|
+ addi 10, 1, 192 |
909 |
|
|
+ lxvb16x 16, 15, 10 # load partial block mask |
910 |
|
|
+ xxland 47, 47, 16 |
911 |
|
|
+ |
912 |
|
|
+ vmr 28, 15 |
913 |
|
|
+ ppc_update_hash_1x |
914 |
|
|
+ |
915 |
|
|
+ # * should store only the remaining bytes. |
916 |
|
|
+ bl Write_partial_block |
917 |
|
|
+ |
918 |
|
|
+ b aes_gcm_out |
919 |
|
|
+ |
920 |
|
|
+# |
921 |
|
|
+# Write partial block |
922 |
|
|
+# r9 - output |
923 |
|
|
+# r12 - remaining bytes |
924 |
|
|
+# v15 - partial input data |
925 |
|
|
+# |
926 |
|
|
+Write_partial_block: |
927 |
|
|
+ li 10, 192 |
928 |
|
|
+ stxvb16x 15+32, 10, 1 # last block |
929 |
|
|
+ |
930 |
|
|
+ #add 10, 9, 11 # Output |
931 |
|
|
+ addi 10, 9, -1 |
932 |
|
|
+ addi 16, 1, 191 |
933 |
|
|
+ |
934 |
|
|
+ mtctr 12 # remaining bytes |
935 |
|
|
+ li 15, 0 |
936 |
|
|
+ |
937 |
|
|
+Write_last_byte: |
938 |
|
|
+ lbzu 14, 1(16) |
939 |
|
|
+ stbu 14, 1(10) |
940 |
|
|
+ bdnz Write_last_byte |
941 |
|
|
+ blr |
942 |
|
|
+ |
943 |
|
|
+aes_gcm_out: |
944 |
|
|
+ # out = state |
945 |
|
|
+ stxvb16x 32, 0, 8 # write out Xi |
946 |
|
|
+ add 3, 11, 12 # return count |
947 |
|
|
+ |
948 |
|
|
+ li 9, 256 |
949 |
|
|
+ lvx 20, 9, 1 |
950 |
|
|
+ addi 9, 9, 16 |
951 |
|
|
+ lvx 21, 9, 1 |
952 |
|
|
+ addi 9, 9, 16 |
953 |
|
|
+ lvx 22, 9, 1 |
954 |
|
|
+ addi 9, 9, 16 |
955 |
|
|
+ lvx 23, 9, 1 |
956 |
|
|
+ addi 9, 9, 16 |
957 |
|
|
+ lvx 24, 9, 1 |
958 |
|
|
+ addi 9, 9, 16 |
959 |
|
|
+ lvx 25, 9, 1 |
960 |
|
|
+ addi 9, 9, 16 |
961 |
|
|
+ lvx 26, 9, 1 |
962 |
|
|
+ addi 9, 9, 16 |
963 |
|
|
+ lvx 27, 9, 1 |
964 |
|
|
+ addi 9, 9, 16 |
965 |
|
|
+ lvx 28, 9, 1 |
966 |
|
|
+ addi 9, 9, 16 |
967 |
|
|
+ lvx 29, 9, 1 |
968 |
|
|
+ addi 9, 9, 16 |
969 |
|
|
+ lvx 30, 9, 1 |
970 |
|
|
+ addi 9, 9, 16 |
971 |
|
|
+ lvx 31, 9, 1 |
972 |
|
|
+ |
973 |
|
|
+ ld 0, 528(1) |
974 |
|
|
+ ld 14,112(1) |
975 |
|
|
+ ld 15,120(1) |
976 |
|
|
+ ld 16,128(1) |
977 |
|
|
+ ld 17,136(1) |
978 |
|
|
+ ld 18,144(1) |
979 |
|
|
+ ld 19,152(1) |
980 |
|
|
+ ld 20,160(1) |
981 |
|
|
+ ld 21,168(1) |
982 |
|
|
+ |
983 |
|
|
+ mtlr 0 |
984 |
|
|
+ addi 1, 1, 512 |
985 |
|
|
+ blr |
986 |
|
|
+ |
987 |
|
|
+# |
988 |
|
|
+# 8x Decrypt |
989 |
|
|
+# |
990 |
|
|
+.global ppc_aes_gcm_decrypt |
991 |
|
|
+.align 5 |
992 |
|
|
+ppc_aes_gcm_decrypt: |
993 |
|
|
+_ppc_aes_gcm_decrypt: |
994 |
|
|
+ |
995 |
|
|
+ stdu 1,-512(1) |
996 |
|
|
+ mflr 0 |
997 |
|
|
+ |
998 |
|
|
+ std 14,112(1) |
999 |
|
|
+ std 15,120(1) |
1000 |
|
|
+ std 16,128(1) |
1001 |
|
|
+ std 17,136(1) |
1002 |
|
|
+ std 18,144(1) |
1003 |
|
|
+ std 19,152(1) |
1004 |
|
|
+ std 20,160(1) |
1005 |
|
|
+ std 21,168(1) |
1006 |
|
|
+ li 9, 256 |
1007 |
|
|
+ stvx 20, 9, 1 |
1008 |
|
|
+ addi 9, 9, 16 |
1009 |
|
|
+ stvx 21, 9, 1 |
1010 |
|
|
+ addi 9, 9, 16 |
1011 |
|
|
+ stvx 22, 9, 1 |
1012 |
|
|
+ addi 9, 9, 16 |
1013 |
|
|
+ stvx 23, 9, 1 |
1014 |
|
|
+ addi 9, 9, 16 |
1015 |
|
|
+ stvx 24, 9, 1 |
1016 |
|
|
+ addi 9, 9, 16 |
1017 |
|
|
+ stvx 25, 9, 1 |
1018 |
|
|
+ addi 9, 9, 16 |
1019 |
|
|
+ stvx 26, 9, 1 |
1020 |
|
|
+ addi 9, 9, 16 |
1021 |
|
|
+ stvx 27, 9, 1 |
1022 |
|
|
+ addi 9, 9, 16 |
1023 |
|
|
+ stvx 28, 9, 1 |
1024 |
|
|
+ addi 9, 9, 16 |
1025 |
|
|
+ stvx 29, 9, 1 |
1026 |
|
|
+ addi 9, 9, 16 |
1027 |
|
|
+ stvx 30, 9, 1 |
1028 |
|
|
+ addi 9, 9, 16 |
1029 |
|
|
+ stvx 31, 9, 1 |
1030 |
|
|
+ std 0, 528(1) |
1031 |
|
|
+ |
1032 |
|
|
+ # Load Xi |
1033 |
|
|
+ lxvb16x 32, 0, 8 # load Xi |
1034 |
|
|
+ |
1035 |
|
|
+ # load Hash - h^4, h^3, h^2, h |
1036 |
|
|
+ li 10, 32 |
1037 |
|
|
+ lxvd2x 2+32, 10, 8 # H Poli |
1038 |
|
|
+ li 10, 48 |
1039 |
|
|
+ lxvd2x 3+32, 10, 8 # Hl |
1040 |
|
|
+ li 10, 64 |
1041 |
|
|
+ lxvd2x 4+32, 10, 8 # H |
1042 |
|
|
+ li 10, 80 |
1043 |
|
|
+ lxvd2x 5+32, 10, 8 # Hh |
1044 |
|
|
+ |
1045 |
|
|
+ li 10, 96 |
1046 |
|
|
+ lxvd2x 6+32, 10, 8 # H^2l |
1047 |
|
|
+ li 10, 112 |
1048 |
|
|
+ lxvd2x 7+32, 10, 8 # H^2 |
1049 |
|
|
+ li 10, 128 |
1050 |
|
|
+ lxvd2x 8+32, 10, 8 # H^2h |
1051 |
|
|
+ |
1052 |
|
|
+ li 10, 144 |
1053 |
|
|
+ lxvd2x 9+32, 10, 8 # H^3l |
1054 |
|
|
+ li 10, 160 |
1055 |
|
|
+ lxvd2x 10+32, 10, 8 # H^3 |
1056 |
|
|
+ li 10, 176 |
1057 |
|
|
+ lxvd2x 11+32, 10, 8 # H^3h |
1058 |
|
|
+ |
1059 |
|
|
+ li 10, 192 |
1060 |
|
|
+ lxvd2x 12+32, 10, 8 # H^4l |
1061 |
|
|
+ li 10, 208 |
1062 |
|
|
+ lxvd2x 13+32, 10, 8 # H^4 |
1063 |
|
|
+ li 10, 224 |
1064 |
|
|
+ lxvd2x 14+32, 10, 8 # H^4h |
1065 |
|
|
+ |
1066 |
|
|
+ # initialize ICB: GHASH( IV ), IV - r7 |
1067 |
|
|
+ lxvb16x 30+32, 0, 7 # load IV - v30 |
1068 |
|
|
+ |
1069 |
|
|
+ mr 12, 5 # length |
1070 |
|
|
+ li 11, 0 # block index |
1071 |
|
|
+ |
1072 |
|
|
+ # counter 1 |
1073 |
|
|
+ vxor 31, 31, 31 |
1074 |
|
|
+ vspltisb 22, 1 |
1075 |
|
|
+ vsldoi 31, 31, 22,1 # counter 1 |
1076 |
|
|
+ |
1077 |
|
|
+ # load round key to VSR |
1078 |
|
|
+ lxv 0, 0(6) |
1079 |
|
|
+ lxv 1, 0x10(6) |
1080 |
|
|
+ lxv 2, 0x20(6) |
1081 |
|
|
+ lxv 3, 0x30(6) |
1082 |
|
|
+ lxv 4, 0x40(6) |
1083 |
|
|
+ lxv 5, 0x50(6) |
1084 |
|
|
+ lxv 6, 0x60(6) |
1085 |
|
|
+ lxv 7, 0x70(6) |
1086 |
|
|
+ lxv 8, 0x80(6) |
1087 |
|
|
+ lxv 9, 0x90(6) |
1088 |
|
|
+ lxv 10, 0xa0(6) |
1089 |
|
|
+ |
1090 |
|
|
+ # load rounds - 10 (128), 12 (192), 14 (256) |
1091 |
|
|
+ lwz 9,240(6) |
1092 |
|
|
+ |
1093 |
|
|
+ # |
1094 |
|
|
+ # vxor state, state, w # addroundkey |
1095 |
|
|
+ xxlor 32+29, 0, 0 |
1096 |
|
|
+ vxor 15, 30, 29 # IV + round key - add round key 0 |
1097 |
|
|
+ |
1098 |
|
|
+ cmpdi 9, 10 |
1099 |
|
|
+ beq Loop_aes_gcm_8x_dec |
1100 |
|
|
+ |
1101 |
|
|
+ # load 2 more round keys (v11, v12) |
1102 |
|
|
+ lxv 11, 0xb0(6) |
1103 |
|
|
+ lxv 12, 0xc0(6) |
1104 |
|
|
+ |
1105 |
|
|
+ cmpdi 9, 12 |
1106 |
|
|
+ beq Loop_aes_gcm_8x_dec |
1107 |
|
|
+ |
1108 |
|
|
+ # load 2 more round keys (v11, v12, v13, v14) |
1109 |
|
|
+ lxv 13, 0xd0(6) |
1110 |
|
|
+ lxv 14, 0xe0(6) |
1111 |
|
|
+ cmpdi 9, 14 |
1112 |
|
|
+ beq Loop_aes_gcm_8x_dec |
1113 |
|
|
+ |
1114 |
|
|
+ b aes_gcm_out |
1115 |
|
|
+ |
1116 |
|
|
+.align 5 |
1117 |
|
|
+Loop_aes_gcm_8x_dec: |
1118 |
|
|
+ mr 14, 3 |
1119 |
|
|
+ mr 9, 4 |
1120 |
|
|
+ |
1121 |
|
|
+ # n blocks |
1122 |
|
|
+ li 10, 128 |
1123 |
|
|
+ divdu 10, 5, 10 # n 128 bytes-blocks |
1124 |
|
|
+ cmpdi 10, 0 |
1125 |
|
|
+ beq Loop_last_block_dec |
1126 |
|
|
+ |
1127 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
1128 |
|
|
+ vxor 16, 30, 29 |
1129 |
|
|
+ vaddudm 30, 30, 31 |
1130 |
|
|
+ vxor 17, 30, 29 |
1131 |
|
|
+ vaddudm 30, 30, 31 |
1132 |
|
|
+ vxor 18, 30, 29 |
1133 |
|
|
+ vaddudm 30, 30, 31 |
1134 |
|
|
+ vxor 19, 30, 29 |
1135 |
|
|
+ vaddudm 30, 30, 31 |
1136 |
|
|
+ vxor 20, 30, 29 |
1137 |
|
|
+ vaddudm 30, 30, 31 |
1138 |
|
|
+ vxor 21, 30, 29 |
1139 |
|
|
+ vaddudm 30, 30, 31 |
1140 |
|
|
+ vxor 22, 30, 29 |
1141 |
|
|
+ |
1142 |
|
|
+ mtctr 10 |
1143 |
|
|
+ |
1144 |
|
|
+ li 15, 16 |
1145 |
|
|
+ li 16, 32 |
1146 |
|
|
+ li 17, 48 |
1147 |
|
|
+ li 18, 64 |
1148 |
|
|
+ li 19, 80 |
1149 |
|
|
+ li 20, 96 |
1150 |
|
|
+ li 21, 112 |
1151 |
|
|
+ |
1152 |
|
|
+ lwz 10, 240(6) |
1153 |
|
|
+ |
1154 |
|
|
+Loop_8x_block_dec: |
1155 |
|
|
+ |
1156 |
|
|
+ lxvb16x 15, 0, 14 # load block |
1157 |
|
|
+ lxvb16x 16, 15, 14 # load block |
1158 |
|
|
+ lxvb16x 17, 16, 14 # load block |
1159 |
|
|
+ lxvb16x 18, 17, 14 # load block |
1160 |
|
|
+ lxvb16x 19, 18, 14 # load block |
1161 |
|
|
+ lxvb16x 20, 19, 14 # load block |
1162 |
|
|
+ lxvb16x 21, 20, 14 # load block |
1163 |
|
|
+ lxvb16x 22, 21, 14 # load block |
1164 |
|
|
+ addi 14, 14, 128 |
1165 |
|
|
+ |
1166 |
|
|
+ Loop_aes_middle8x |
1167 |
|
|
+ |
1168 |
|
|
+ xxlor 23+32, 10, 10 |
1169 |
|
|
+ |
1170 |
|
|
+ cmpdi 10, 10 |
1171 |
|
|
+ beq Do_last_aes_dec |
1172 |
|
|
+ |
1173 |
|
|
+ # 192 bits |
1174 |
|
|
+ xxlor 24+32, 11, 11 |
1175 |
|
|
+ |
1176 |
|
|
+ vcipher 15, 15, 23 |
1177 |
|
|
+ vcipher 16, 16, 23 |
1178 |
|
|
+ vcipher 17, 17, 23 |
1179 |
|
|
+ vcipher 18, 18, 23 |
1180 |
|
|
+ vcipher 19, 19, 23 |
1181 |
|
|
+ vcipher 20, 20, 23 |
1182 |
|
|
+ vcipher 21, 21, 23 |
1183 |
|
|
+ vcipher 22, 22, 23 |
1184 |
|
|
+ |
1185 |
|
|
+ vcipher 15, 15, 24 |
1186 |
|
|
+ vcipher 16, 16, 24 |
1187 |
|
|
+ vcipher 17, 17, 24 |
1188 |
|
|
+ vcipher 18, 18, 24 |
1189 |
|
|
+ vcipher 19, 19, 24 |
1190 |
|
|
+ vcipher 20, 20, 24 |
1191 |
|
|
+ vcipher 21, 21, 24 |
1192 |
|
|
+ vcipher 22, 22, 24 |
1193 |
|
|
+ |
1194 |
|
|
+ xxlor 23+32, 12, 12 |
1195 |
|
|
+ |
1196 |
|
|
+ cmpdi 10, 12 |
1197 |
|
|
+ beq Do_last_aes_dec |
1198 |
|
|
+ |
1199 |
|
|
+ # 256 bits |
1200 |
|
|
+ xxlor 24+32, 13, 13 |
1201 |
|
|
+ |
1202 |
|
|
+ vcipher 15, 15, 23 |
1203 |
|
|
+ vcipher 16, 16, 23 |
1204 |
|
|
+ vcipher 17, 17, 23 |
1205 |
|
|
+ vcipher 18, 18, 23 |
1206 |
|
|
+ vcipher 19, 19, 23 |
1207 |
|
|
+ vcipher 20, 20, 23 |
1208 |
|
|
+ vcipher 21, 21, 23 |
1209 |
|
|
+ vcipher 22, 22, 23 |
1210 |
|
|
+ |
1211 |
|
|
+ vcipher 15, 15, 24 |
1212 |
|
|
+ vcipher 16, 16, 24 |
1213 |
|
|
+ vcipher 17, 17, 24 |
1214 |
|
|
+ vcipher 18, 18, 24 |
1215 |
|
|
+ vcipher 19, 19, 24 |
1216 |
|
|
+ vcipher 20, 20, 24 |
1217 |
|
|
+ vcipher 21, 21, 24 |
1218 |
|
|
+ vcipher 22, 22, 24 |
1219 |
|
|
+ |
1220 |
|
|
+ xxlor 23+32, 14, 14 |
1221 |
|
|
+ |
1222 |
|
|
+ cmpdi 10, 14 |
1223 |
|
|
+ beq Do_last_aes_dec |
1224 |
|
|
+ b aes_gcm_out |
1225 |
|
|
+ |
1226 |
|
|
+Do_last_aes_dec: |
1227 |
|
|
+ |
1228 |
|
|
+ # |
1229 |
|
|
+ # last round |
1230 |
|
|
+ vcipherlast 15, 15, 23 |
1231 |
|
|
+ vcipherlast 16, 16, 23 |
1232 |
|
|
+ |
1233 |
|
|
+ xxlxor 47, 47, 15 |
1234 |
|
|
+ stxvb16x 47, 0, 9 # store output |
1235 |
|
|
+ xxlxor 48, 48, 16 |
1236 |
|
|
+ stxvb16x 48, 15, 9 # store output |
1237 |
|
|
+ |
1238 |
|
|
+ vcipherlast 17, 17, 23 |
1239 |
|
|
+ vcipherlast 18, 18, 23 |
1240 |
|
|
+ |
1241 |
|
|
+ xxlxor 49, 49, 17 |
1242 |
|
|
+ stxvb16x 49, 16, 9 # store output |
1243 |
|
|
+ xxlxor 50, 50, 18 |
1244 |
|
|
+ stxvb16x 50, 17, 9 # store output |
1245 |
|
|
+ |
1246 |
|
|
+ vcipherlast 19, 19, 23 |
1247 |
|
|
+ vcipherlast 20, 20, 23 |
1248 |
|
|
+ |
1249 |
|
|
+ xxlxor 51, 51, 19 |
1250 |
|
|
+ stxvb16x 51, 18, 9 # store output |
1251 |
|
|
+ xxlxor 52, 52, 20 |
1252 |
|
|
+ stxvb16x 52, 19, 9 # store output |
1253 |
|
|
+ |
1254 |
|
|
+ vcipherlast 21, 21, 23 |
1255 |
|
|
+ vcipherlast 22, 22, 23 |
1256 |
|
|
+ |
1257 |
|
|
+ xxlxor 53, 53, 21 |
1258 |
|
|
+ stxvb16x 53, 20, 9 # store output |
1259 |
|
|
+ xxlxor 54, 54, 22 |
1260 |
|
|
+ stxvb16x 54, 21, 9 # store output |
1261 |
|
|
+ |
1262 |
|
|
+ addi 9, 9, 128 |
1263 |
|
|
+ |
1264 |
|
|
+ xxlor 15+32, 15, 15 |
1265 |
|
|
+ xxlor 16+32, 16, 16 |
1266 |
|
|
+ xxlor 17+32, 17, 17 |
1267 |
|
|
+ xxlor 18+32, 18, 18 |
1268 |
|
|
+ xxlor 19+32, 19, 19 |
1269 |
|
|
+ xxlor 20+32, 20, 20 |
1270 |
|
|
+ xxlor 21+32, 21, 21 |
1271 |
|
|
+ xxlor 22+32, 22, 22 |
1272 |
|
|
+ |
1273 |
|
|
+ # ghash here |
1274 |
|
|
+ ppc_aes_gcm_ghash2_4x |
1275 |
|
|
+ |
1276 |
|
|
+ xxlor 27+32, 0, 0 |
1277 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
1278 |
|
|
+ vmr 29, 30 |
1279 |
|
|
+ vxor 15, 30, 27 # add round key |
1280 |
|
|
+ vaddudm 30, 30, 31 |
1281 |
|
|
+ vxor 16, 30, 27 |
1282 |
|
|
+ vaddudm 30, 30, 31 |
1283 |
|
|
+ vxor 17, 30, 27 |
1284 |
|
|
+ vaddudm 30, 30, 31 |
1285 |
|
|
+ vxor 18, 30, 27 |
1286 |
|
|
+ vaddudm 30, 30, 31 |
1287 |
|
|
+ vxor 19, 30, 27 |
1288 |
|
|
+ vaddudm 30, 30, 31 |
1289 |
|
|
+ vxor 20, 30, 27 |
1290 |
|
|
+ vaddudm 30, 30, 31 |
1291 |
|
|
+ vxor 21, 30, 27 |
1292 |
|
|
+ vaddudm 30, 30, 31 |
1293 |
|
|
+ vxor 22, 30, 27 |
1294 |
|
|
+ addi 12, 12, -128 |
1295 |
|
|
+ addi 11, 11, 128 |
1296 |
|
|
+ |
1297 |
|
|
+ bdnz Loop_8x_block_dec |
1298 |
|
|
+ |
1299 |
|
|
+ vmr 30, 29 |
1300 |
|
|
+ |
1301 |
|
|
+Loop_last_block_dec: |
1302 |
|
|
+ cmpdi 12, 0 |
1303 |
|
|
+ beq aes_gcm_out |
1304 |
|
|
+ |
1305 |
|
|
+ # loop last few blocks |
1306 |
|
|
+ li 10, 16 |
1307 |
|
|
+ divdu 10, 12, 10 |
1308 |
|
|
+ |
1309 |
|
|
+ mtctr 10 |
1310 |
|
|
+ |
1311 |
|
|
+ lwz 10,240(6) |
1312 |
|
|
+ |
1313 |
|
|
+ cmpdi 12, 16 |
1314 |
|
|
+ blt Final_block_dec |
1315 |
|
|
+ |
1316 |
|
|
+Next_rem_block_dec: |
1317 |
|
|
+ lxvb16x 15, 0, 14 # load block |
1318 |
|
|
+ |
1319 |
|
|
+ Loop_aes_middle_1x |
1320 |
|
|
+ |
1321 |
|
|
+ xxlor 23+32, 10, 10 |
1322 |
|
|
+ |
1323 |
|
|
+ cmpdi 10, 10 |
1324 |
|
|
+ beq Do_next_1x_dec |
1325 |
|
|
+ |
1326 |
|
|
+ # 192 bits |
1327 |
|
|
+ xxlor 24+32, 11, 11 |
1328 |
|
|
+ |
1329 |
|
|
+ vcipher 15, 15, 23 |
1330 |
|
|
+ vcipher 15, 15, 24 |
1331 |
|
|
+ |
1332 |
|
|
+ xxlor 23+32, 12, 12 |
1333 |
|
|
+ |
1334 |
|
|
+ cmpdi 10, 12 |
1335 |
|
|
+ beq Do_next_1x_dec |
1336 |
|
|
+ |
1337 |
|
|
+ # 256 bits |
1338 |
|
|
+ xxlor 24+32, 13, 13 |
1339 |
|
|
+ |
1340 |
|
|
+ vcipher 15, 15, 23 |
1341 |
|
|
+ vcipher 15, 15, 24 |
1342 |
|
|
+ |
1343 |
|
|
+ xxlor 23+32, 14, 14 |
1344 |
|
|
+ |
1345 |
|
|
+ cmpdi 10, 14 |
1346 |
|
|
+ beq Do_next_1x_dec |
1347 |
|
|
+ |
1348 |
|
|
+Do_next_1x_dec: |
1349 |
|
|
+ vcipherlast 15, 15, 23 |
1350 |
|
|
+ |
1351 |
|
|
+ xxlxor 47, 47, 15 |
1352 |
|
|
+ stxvb16x 47, 0, 9 # store output |
1353 |
|
|
+ addi 14, 14, 16 |
1354 |
|
|
+ addi 9, 9, 16 |
1355 |
|
|
+ |
1356 |
|
|
+ xxlor 28+32, 15, 15 |
1357 |
|
|
+ ppc_update_hash_1x |
1358 |
|
|
+ |
1359 |
|
|
+ addi 12, 12, -16 |
1360 |
|
|
+ addi 11, 11, 16 |
1361 |
|
|
+ xxlor 19+32, 0, 0 |
1362 |
|
|
+ vaddudm 30, 30, 31 # IV + counter |
1363 |
|
|
+ vxor 15, 30, 19 # add round key |
1364 |
|
|
+ |
1365 |
|
|
+ bdnz Next_rem_block_dec |
1366 |
|
|
+ |
1367 |
|
|
+ cmpdi 12, 0 |
1368 |
|
|
+ beq aes_gcm_out |
1369 |
|
|
+ |
1370 |
|
|
+Final_block_dec: |
1371 |
|
|
+ Loop_aes_middle_1x |
1372 |
|
|
+ |
1373 |
|
|
+ xxlor 23+32, 10, 10 |
1374 |
|
|
+ |
1375 |
|
|
+ cmpdi 10, 10 |
1376 |
|
|
+ beq Do_final_1x_dec |
1377 |
|
|
+ |
1378 |
|
|
+ # 192 bits |
1379 |
|
|
+ xxlor 24+32, 11, 11 |
1380 |
|
|
+ |
1381 |
|
|
+ vcipher 15, 15, 23 |
1382 |
|
|
+ vcipher 15, 15, 24 |
1383 |
|
|
+ |
1384 |
|
|
+ xxlor 23+32, 12, 12 |
1385 |
|
|
+ |
1386 |
|
|
+ cmpdi 10, 12 |
1387 |
|
|
+ beq Do_final_1x_dec |
1388 |
|
|
+ |
1389 |
|
|
+ # 256 bits |
1390 |
|
|
+ xxlor 24+32, 13, 13 |
1391 |
|
|
+ |
1392 |
|
|
+ vcipher 15, 15, 23 |
1393 |
|
|
+ vcipher 15, 15, 24 |
1394 |
|
|
+ |
1395 |
|
|
+ xxlor 23+32, 14, 14 |
1396 |
|
|
+ |
1397 |
|
|
+ cmpdi 10, 14 |
1398 |
|
|
+ beq Do_final_1x_dec |
1399 |
|
|
+ |
1400 |
|
|
+Do_final_1x_dec: |
1401 |
|
|
+ vcipherlast 15, 15, 23 |
1402 |
|
|
+ |
1403 |
|
|
+ lxvb16x 15, 0, 14 # load block |
1404 |
|
|
+ xxlxor 47, 47, 15 |
1405 |
|
|
+ |
1406 |
|
|
+ # create partial block mask |
1407 |
|
|
+ li 15, 16 |
1408 |
|
|
+ sub 15, 15, 12 # index to the mask |
1409 |
|
|
+ |
1410 |
|
|
+ vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
1411 |
|
|
+ vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
1412 |
|
|
+ li 10, 192 |
1413 |
|
|
+ stvx 16, 10, 1 |
1414 |
|
|
+ addi 10, 10, 16 |
1415 |
|
|
+ stvx 17, 10, 1 |
1416 |
|
|
+ |
1417 |
|
|
+ addi 10, 1, 192 |
1418 |
|
|
+ lxvb16x 16, 15, 10 # load block mask |
1419 |
|
|
+ xxland 47, 47, 16 |
1420 |
|
|
+ |
1421 |
|
|
+ xxlor 28+32, 15, 15 |
1422 |
|
|
+ ppc_update_hash_1x |
1423 |
|
|
+ |
1424 |
|
|
+ # * should store only the remaining bytes. |
1425 |
|
|
+ bl Write_partial_block |
1426 |
|
|
+ |
1427 |
|
|
+ b aes_gcm_out |
1428 |
|
|
+ |
1429 |
|
|
+ |
1430 |
|
|
+___ |
1431 |
|
|
+ |
1432 |
|
|
+foreach (split("\n",$code)) { |
1433 |
|
|
+ s/\`([^\`]*)\`/eval $1/geo; |
1434 |
|
|
+ |
1435 |
|
|
+ if ($flavour =~ /le$/o) { # little-endian |
1436 |
|
|
+ s/le\?//o or |
1437 |
|
|
+ s/be\?/#be#/o; |
1438 |
|
|
+ } else { |
1439 |
|
|
+ s/le\?/#le#/o or |
1440 |
|
|
+ s/be\?//o; |
1441 |
|
|
+ } |
1442 |
|
|
+ print $_,"\n"; |
1443 |
|
|
+} |
1444 |
|
|
+ |
1445 |
|
|
+close STDOUT or die "error closing STDOUT: $!"; # enforce flush |
1446 |
|
|
diff --git a/crypto/modes/build.info b/crypto/modes/build.info |
1447 |
|
|
index 687e872..0ea122e 100644 |
1448 |
|
|
--- a/crypto/modes/build.info |
1449 |
|
|
+++ b/crypto/modes/build.info |
1450 |
|
|
@@ -32,7 +32,7 @@ IF[{- !$disabled{asm} -}] |
1451 |
|
|
$MODESASM_parisc20_64=$MODESASM_parisc11 |
1452 |
|
|
$MODESDEF_parisc20_64=$MODESDEF_parisc11 |
1453 |
|
|
|
1454 |
|
|
- $MODESASM_ppc32=ghashp8-ppc.s |
1455 |
|
|
+ $MODESASM_ppc32=ghashp8-ppc.s aes-gcm-ppc.s |
1456 |
|
|
$MODESDEF_ppc32= |
1457 |
|
|
$MODESASM_ppc64=$MODESASM_ppc32 |
1458 |
|
|
$MODESDEF_ppc64=$MODESDEF_ppc32 |
1459 |
|
|
@@ -71,6 +71,7 @@ INCLUDE[ghash-sparcv9.o]=.. |
1460 |
|
|
GENERATE[ghash-alpha.S]=asm/ghash-alpha.pl |
1461 |
|
|
GENERATE[ghash-parisc.s]=asm/ghash-parisc.pl |
1462 |
|
|
GENERATE[ghashp8-ppc.s]=asm/ghashp8-ppc.pl |
1463 |
|
|
+GENERATE[aes-gcm-ppc.s]=asm/aes-gcm-ppc.pl |
1464 |
|
|
GENERATE[ghash-armv4.S]=asm/ghash-armv4.pl |
1465 |
|
|
INCLUDE[ghash-armv4.o]=.. |
1466 |
|
|
GENERATE[ghashv8-armx.S]=asm/ghashv8-armx.pl |
1467 |
|
|
diff --git a/include/crypto/aes_platform.h b/include/crypto/aes_platform.h |
1468 |
|
|
index e95ad5a..0c281a3 100644 |
1469 |
|
|
--- a/include/crypto/aes_platform.h |
1470 |
|
|
+++ b/include/crypto/aes_platform.h |
1471 |
|
|
@@ -74,6 +74,26 @@ void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len, |
1472 |
|
|
# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks |
1473 |
|
|
# define HWAES_xts_encrypt aes_p8_xts_encrypt |
1474 |
|
|
# define HWAES_xts_decrypt aes_p8_xts_decrypt |
1475 |
|
|
+# define PPC_AES_GCM_CAPABLE (OPENSSL_ppccap_P & PPC_MADD300) |
1476 |
|
|
+# define AES_GCM_ENC_BYTES 128 |
1477 |
|
|
+# define AES_GCM_DEC_BYTES 128 |
1478 |
|
|
+size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, |
1479 |
|
|
+ size_t len, const void *key, unsigned char ivec[16], |
1480 |
|
|
+ u64 *Xi); |
1481 |
|
|
+size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, |
1482 |
|
|
+ size_t len, const void *key, unsigned char ivec[16], |
1483 |
|
|
+ u64 *Xi); |
1484 |
|
|
+size_t ppc_aes_gcm_encrypt_wrap(const unsigned char *in, unsigned char *out, |
1485 |
|
|
+ size_t len, const void *key, |
1486 |
|
|
+ unsigned char ivec[16], u64 *Xi); |
1487 |
|
|
+size_t ppc_aes_gcm_decrypt_wrap(const unsigned char *in, unsigned char *out, |
1488 |
|
|
+ size_t len, const void *key, |
1489 |
|
|
+ unsigned char ivec[16], u64 *Xi); |
1490 |
|
|
+# define AES_gcm_encrypt ppc_aes_gcm_encrypt_wrap |
1491 |
|
|
+# define AES_gcm_decrypt ppc_aes_gcm_decrypt_wrap |
1492 |
|
|
+# define AES_GCM_ASM(gctx) ((gctx)->ctr==aes_p8_ctr32_encrypt_blocks && \ |
1493 |
|
|
+ (gctx)->gcm.ghash==gcm_ghash_p8) |
1494 |
|
|
+void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp, size_t len); |
1495 |
|
|
# endif /* PPC */ |
1496 |
|
|
|
1497 |
|
|
# if (defined(__arm__) || defined(__arm) || defined(__aarch64__)) |
1498 |
|
|
diff --git a/providers/implementations/ciphers/cipher_aes_gcm_hw.c b/providers/implementations/ciphers/cipher_aes_gcm_hw.c |
1499 |
|
|
index 44fa9d4..789ec12 100644 |
1500 |
|
|
--- a/providers/implementations/ciphers/cipher_aes_gcm_hw.c |
1501 |
|
|
+++ b/providers/implementations/ciphers/cipher_aes_gcm_hw.c |
1502 |
|
|
@@ -141,6 +141,8 @@ static const PROV_GCM_HW aes_gcm = { |
1503 |
|
|
# include "cipher_aes_gcm_hw_t4.inc" |
1504 |
|
|
#elif defined(AES_PMULL_CAPABLE) && defined(AES_GCM_ASM) |
1505 |
|
|
# include "cipher_aes_gcm_hw_armv8.inc" |
1506 |
|
|
+#elif defined(PPC_AES_GCM_CAPABLE) |
1507 |
|
|
+# include "cipher_aes_gcm_hw_ppc.inc" |
1508 |
|
|
#else |
1509 |
|
|
const PROV_GCM_HW *ossl_prov_aes_hw_gcm(size_t keybits) |
1510 |
|
|
{ |
1511 |
|
|
diff --git a/providers/implementations/ciphers/cipher_aes_gcm_hw_ppc.inc b/providers/implementations/ciphers/cipher_aes_gcm_hw_ppc.inc |
1512 |
|
|
new file mode 100644 |
1513 |
|
|
index 0000000..4eed0f4 |
1514 |
|
|
--- /dev/null |
1515 |
|
|
+++ b/providers/implementations/ciphers/cipher_aes_gcm_hw_ppc.inc |
1516 |
|
|
@@ -0,0 +1,119 @@ |
1517 |
|
|
+/* |
1518 |
|
|
+ * Copyright 2001-2021 The OpenSSL Project Authors. All Rights Reserved. |
1519 |
|
|
+ * |
1520 |
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use |
1521 |
|
|
+ * this file except in compliance with the License. You can obtain a copy |
1522 |
|
|
+ * in the file LICENSE in the source distribution or at |
1523 |
|
|
+ * https://www.openssl.org/source/license.html |
1524 |
|
|
+ */ |
1525 |
|
|
+ |
1526 |
|
|
+/*- |
1527 |
|
|
+ * PPC support for AES GCM. |
1528 |
|
|
+ * This file is included by cipher_aes_gcm_hw.c |
1529 |
|
|
+ */ |
1530 |
|
|
+ |
1531 |
|
|
+static int aes_ppc_gcm_initkey(PROV_GCM_CTX *ctx, const unsigned char *key, |
1532 |
|
|
+ size_t keylen) |
1533 |
|
|
+{ |
1534 |
|
|
+ PROV_AES_GCM_CTX *actx = (PROV_AES_GCM_CTX *)ctx; |
1535 |
|
|
+ AES_KEY *ks = &actx->ks.ks; |
1536 |
|
|
+ |
1537 |
|
|
+ GCM_HW_SET_KEY_CTR_FN(ks, aes_p8_set_encrypt_key, aes_p8_encrypt, |
1538 |
|
|
+ aes_p8_ctr32_encrypt_blocks); |
1539 |
|
|
+ return 1; |
1540 |
|
|
+} |
1541 |
|
|
+ |
1542 |
|
|
+ |
1543 |
|
|
+extern size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, |
1544 |
|
|
+ const void *key, unsigned char ivec[16], u64 *Xi); |
1545 |
|
|
+extern size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, |
1546 |
|
|
+ const void *key, unsigned char ivec[16], u64 *Xi); |
1547 |
|
|
+ |
1548 |
|
|
+static inline u32 UTO32(unsigned char *buf) |
1549 |
|
|
+{ |
1550 |
|
|
+ return ((u32) buf[0] << 24) | ((u32) buf[1] << 16) | ((u32) buf[2] << 8) | ((u32) buf[3]); |
1551 |
|
|
+} |
1552 |
|
|
+ |
1553 |
|
|
+static inline u32 add32TOU(unsigned char buf[4], u32 n) |
1554 |
|
|
+{ |
1555 |
|
|
+ u32 r; |
1556 |
|
|
+ |
1557 |
|
|
+ r = UTO32(buf); |
1558 |
|
|
+ r += n; |
1559 |
|
|
+ buf[0] = (unsigned char) (r >> 24) & 0xFF; |
1560 |
|
|
+ buf[1] = (unsigned char) (r >> 16) & 0xFF; |
1561 |
|
|
+ buf[2] = (unsigned char) (r >> 8) & 0xFF; |
1562 |
|
|
+ buf[3] = (unsigned char) r & 0xFF; |
1563 |
|
|
+ return r; |
1564 |
|
|
+} |
1565 |
|
|
+ |
1566 |
|
|
+static size_t aes_p10_gcm_crypt(const unsigned char *in, unsigned char *out, size_t len, |
1567 |
|
|
+ const void *key, unsigned char ivec[16], u64 *Xi, int encrypt) |
1568 |
|
|
+{ |
1569 |
|
|
+ int s = 0; |
1570 |
|
|
+ int ndone = 0; |
1571 |
|
|
+ int ctr_reset = 0; |
1572 |
|
|
+ u64 blocks_unused; |
1573 |
|
|
+ u64 nb = len / 16; |
1574 |
|
|
+ u64 next_ctr = 0; |
1575 |
|
|
+ unsigned char ctr_saved[12]; |
1576 |
|
|
+ |
1577 |
|
|
+ memcpy(ctr_saved, ivec, 12); |
1578 |
|
|
+ |
1579 |
|
|
+ while (nb) { |
1580 |
|
|
+ blocks_unused = (u64) 0xffffffffU + 1 - (u64) UTO32 (ivec + 12); |
1581 |
|
|
+ if (nb > blocks_unused) { |
1582 |
|
|
+ len = blocks_unused * 16; |
1583 |
|
|
+ nb -= blocks_unused; |
1584 |
|
|
+ next_ctr = blocks_unused; |
1585 |
|
|
+ ctr_reset = 1; |
1586 |
|
|
+ } else { |
1587 |
|
|
+ len = nb * 16; |
1588 |
|
|
+ next_ctr = nb; |
1589 |
|
|
+ nb = 0; |
1590 |
|
|
+ } |
1591 |
|
|
+ |
1592 |
|
|
+ s = encrypt ? ppc_aes_gcm_encrypt(in, out, len, key, ivec, Xi) |
1593 |
|
|
+ : ppc_aes_gcm_decrypt(in, out, len, key, ivec, Xi); |
1594 |
|
|
+ |
1595 |
|
|
+ /* add counter to ivec */ |
1596 |
|
|
+ add32TOU(ivec + 12, (u32) next_ctr); |
1597 |
|
|
+ if (ctr_reset) { |
1598 |
|
|
+ ctr_reset = 0; |
1599 |
|
|
+ in += len; |
1600 |
|
|
+ out += len; |
1601 |
|
|
+ } |
1602 |
|
|
+ memcpy(ivec, ctr_saved, 12); |
1603 |
|
|
+ ndone += s; |
1604 |
|
|
+ } |
1605 |
|
|
+ |
1606 |
|
|
+ return ndone; |
1607 |
|
|
+} |
1608 |
|
|
+ |
1609 |
|
|
+size_t ppc_aes_gcm_encrypt_wrap(const unsigned char *in, unsigned char *out, size_t len, |
1610 |
|
|
+ const void *key, unsigned char ivec[16], u64 *Xi) |
1611 |
|
|
+{ |
1612 |
|
|
+ return aes_p10_gcm_crypt(in, out, len, key, ivec, Xi, 1); |
1613 |
|
|
+} |
1614 |
|
|
+ |
1615 |
|
|
+size_t ppc_aes_gcm_decrypt_wrap(const unsigned char *in, unsigned char *out, size_t len, |
1616 |
|
|
+ const void *key, unsigned char ivec[16], u64 *Xi) |
1617 |
|
|
+{ |
1618 |
|
|
+ return aes_p10_gcm_crypt(in, out, len, key, ivec, Xi, 0); |
1619 |
|
|
+} |
1620 |
|
|
+ |
1621 |
|
|
+ |
1622 |
|
|
+static const PROV_GCM_HW aes_ppc_gcm = { |
1623 |
|
|
+ aes_ppc_gcm_initkey, |
1624 |
|
|
+ ossl_gcm_setiv, |
1625 |
|
|
+ ossl_gcm_aad_update, |
1626 |
|
|
+ generic_aes_gcm_cipher_update, |
1627 |
|
|
+ ossl_gcm_cipher_final, |
1628 |
|
|
+ ossl_gcm_one_shot |
1629 |
|
|
+}; |
1630 |
|
|
+ |
1631 |
|
|
+const PROV_GCM_HW *ossl_prov_aes_hw_gcm(size_t keybits) |
1632 |
|
|
+{ |
1633 |
|
|
+ return PPC_AES_GCM_CAPABLE ? &aes_ppc_gcm : &aes_gcm; |
1634 |
|
|
+} |
1635 |
|
|
+ |