1 |
jpp |
1.1 |
From 8e257b86e5812c6e1cfa9e8e5f5660ac7bed899d Mon Sep 17 00:00:00 2001 |
2 |
|
|
From: Dmitry Belyavskiy <beldmit@gmail.com> |
3 |
|
|
Date: Fri, 20 Jan 2023 15:03:40 +0000 |
4 |
|
|
Subject: [PATCH 03/18] Fix Timing Oracle in RSA decryption |
5 |
|
|
|
6 |
|
|
A timing based side channel exists in the OpenSSL RSA Decryption |
7 |
|
|
implementation which could be sufficient to recover a plaintext across |
8 |
|
|
a network in a Bleichenbacher style attack. To achieve a successful |
9 |
|
|
decryption an attacker would have to be able to send a very large number |
10 |
|
|
of trial messages for decryption. The vulnerability affects all RSA |
11 |
|
|
padding modes: PKCS#1 v1.5, RSA-OEAP and RSASVE. |
12 |
|
|
|
13 |
|
|
Patch written by Dmitry Belyavsky and Hubert Kario |
14 |
|
|
|
15 |
|
|
CVE-2022-4304 |
16 |
|
|
|
17 |
|
|
Reviewed-by: Matt Caswell <matt@openssl.org> |
18 |
|
|
Reviewed-by: Tomas Mraz <tomas@openssl.org> |
19 |
|
|
--- |
20 |
|
|
crypto/bn/bn_blind.c | 14 - |
21 |
|
|
crypto/bn/bn_local.h | 14 + |
22 |
|
|
crypto/bn/build.info | 2 +- |
23 |
|
|
crypto/bn/rsa_sup_mul.c | 604 ++++++++++++++++++++++++++++++++++++++++ |
24 |
|
|
crypto/rsa/rsa_ossl.c | 19 +- |
25 |
|
|
include/crypto/bn.h | 6 + |
26 |
|
|
6 files changed, 638 insertions(+), 21 deletions(-) |
27 |
|
|
create mode 100644 crypto/bn/rsa_sup_mul.c |
28 |
|
|
|
29 |
|
|
diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c |
30 |
|
|
index 72457b34cf..6061ebb4c0 100644 |
31 |
|
|
--- a/crypto/bn/bn_blind.c |
32 |
|
|
+++ b/crypto/bn/bn_blind.c |
33 |
|
|
@@ -13,20 +13,6 @@ |
34 |
|
|
|
35 |
|
|
#define BN_BLINDING_COUNTER 32 |
36 |
|
|
|
37 |
|
|
-struct bn_blinding_st { |
38 |
|
|
- BIGNUM *A; |
39 |
|
|
- BIGNUM *Ai; |
40 |
|
|
- BIGNUM *e; |
41 |
|
|
- BIGNUM *mod; /* just a reference */ |
42 |
|
|
- CRYPTO_THREAD_ID tid; |
43 |
|
|
- int counter; |
44 |
|
|
- unsigned long flags; |
45 |
|
|
- BN_MONT_CTX *m_ctx; |
46 |
|
|
- int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
47 |
|
|
- const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); |
48 |
|
|
- CRYPTO_RWLOCK *lock; |
49 |
|
|
-}; |
50 |
|
|
- |
51 |
|
|
BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) |
52 |
|
|
{ |
53 |
|
|
BN_BLINDING *ret = NULL; |
54 |
|
|
diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h |
55 |
|
|
index c9a7ecf298..8c428f919d 100644 |
56 |
|
|
--- a/crypto/bn/bn_local.h |
57 |
|
|
+++ b/crypto/bn/bn_local.h |
58 |
|
|
@@ -290,6 +290,20 @@ struct bn_gencb_st { |
59 |
|
|
} cb; |
60 |
|
|
}; |
61 |
|
|
|
62 |
|
|
+struct bn_blinding_st { |
63 |
|
|
+ BIGNUM *A; |
64 |
|
|
+ BIGNUM *Ai; |
65 |
|
|
+ BIGNUM *e; |
66 |
|
|
+ BIGNUM *mod; /* just a reference */ |
67 |
|
|
+ CRYPTO_THREAD_ID tid; |
68 |
|
|
+ int counter; |
69 |
|
|
+ unsigned long flags; |
70 |
|
|
+ BN_MONT_CTX *m_ctx; |
71 |
|
|
+ int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, |
72 |
|
|
+ const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); |
73 |
|
|
+ CRYPTO_RWLOCK *lock; |
74 |
|
|
+}; |
75 |
|
|
+ |
76 |
|
|
/*- |
77 |
|
|
* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions |
78 |
|
|
* |
79 |
|
|
diff --git a/crypto/bn/build.info b/crypto/bn/build.info |
80 |
|
|
index c4ba51b265..f4ff619239 100644 |
81 |
|
|
--- a/crypto/bn/build.info |
82 |
|
|
+++ b/crypto/bn/build.info |
83 |
|
|
@@ -105,7 +105,7 @@ $COMMON=bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c \ |
84 |
|
|
bn_mod.c bn_conv.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \ |
85 |
|
|
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_sqr.c \ |
86 |
|
|
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ |
87 |
|
|
- bn_intern.c bn_dh.c bn_rsa_fips186_4.c bn_const.c |
88 |
|
|
+ bn_intern.c bn_dh.c bn_rsa_fips186_4.c bn_const.c rsa_sup_mul.c |
89 |
|
|
SOURCE[../../libcrypto]=$COMMON $BNASM bn_print.c bn_err.c bn_srp.c |
90 |
|
|
DEFINE[../../libcrypto]=$BNDEF |
91 |
|
|
IF[{- !$disabled{'deprecated-0.9.8'} -}] |
92 |
|
|
diff --git a/crypto/bn/rsa_sup_mul.c b/crypto/bn/rsa_sup_mul.c |
93 |
|
|
new file mode 100644 |
94 |
|
|
index 0000000000..0e0d02e194 |
95 |
|
|
--- /dev/null |
96 |
|
|
+++ b/crypto/bn/rsa_sup_mul.c |
97 |
|
|
@@ -0,0 +1,604 @@ |
98 |
|
|
+#include <openssl/e_os2.h> |
99 |
|
|
+#include <stddef.h> |
100 |
|
|
+#include <sys/types.h> |
101 |
|
|
+#include <string.h> |
102 |
|
|
+#include <openssl/bn.h> |
103 |
|
|
+#include <openssl/err.h> |
104 |
|
|
+#include <openssl/rsaerr.h> |
105 |
|
|
+#include "internal/endian.h" |
106 |
|
|
+#include "internal/numbers.h" |
107 |
|
|
+#include "internal/constant_time.h" |
108 |
|
|
+#include "bn_local.h" |
109 |
|
|
+ |
110 |
|
|
+# if BN_BYTES == 8 |
111 |
|
|
+typedef uint64_t limb_t; |
112 |
|
|
+# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16 |
113 |
|
|
+typedef uint128_t limb2_t; |
114 |
|
|
+# define HAVE_LIMB2_T |
115 |
|
|
+# endif |
116 |
|
|
+# define LIMB_BIT_SIZE 64 |
117 |
|
|
+# define LIMB_BYTE_SIZE 8 |
118 |
|
|
+# elif BN_BYTES == 4 |
119 |
|
|
+typedef uint32_t limb_t; |
120 |
|
|
+typedef uint64_t limb2_t; |
121 |
|
|
+# define LIMB_BIT_SIZE 32 |
122 |
|
|
+# define LIMB_BYTE_SIZE 4 |
123 |
|
|
+# define HAVE_LIMB2_T |
124 |
|
|
+# else |
125 |
|
|
+# error "Not supported" |
126 |
|
|
+# endif |
127 |
|
|
+ |
128 |
|
|
+/* |
129 |
|
|
+ * For multiplication we're using schoolbook multiplication, |
130 |
|
|
+ * so if we have two numbers, each with 6 "digits" (words) |
131 |
|
|
+ * the multiplication is calculated as follows: |
132 |
|
|
+ * A B C D E F |
133 |
|
|
+ * x I J K L M N |
134 |
|
|
+ * -------------- |
135 |
|
|
+ * N*F |
136 |
|
|
+ * N*E |
137 |
|
|
+ * N*D |
138 |
|
|
+ * N*C |
139 |
|
|
+ * N*B |
140 |
|
|
+ * N*A |
141 |
|
|
+ * M*F |
142 |
|
|
+ * M*E |
143 |
|
|
+ * M*D |
144 |
|
|
+ * M*C |
145 |
|
|
+ * M*B |
146 |
|
|
+ * M*A |
147 |
|
|
+ * L*F |
148 |
|
|
+ * L*E |
149 |
|
|
+ * L*D |
150 |
|
|
+ * L*C |
151 |
|
|
+ * L*B |
152 |
|
|
+ * L*A |
153 |
|
|
+ * K*F |
154 |
|
|
+ * K*E |
155 |
|
|
+ * K*D |
156 |
|
|
+ * K*C |
157 |
|
|
+ * K*B |
158 |
|
|
+ * K*A |
159 |
|
|
+ * J*F |
160 |
|
|
+ * J*E |
161 |
|
|
+ * J*D |
162 |
|
|
+ * J*C |
163 |
|
|
+ * J*B |
164 |
|
|
+ * J*A |
165 |
|
|
+ * I*F |
166 |
|
|
+ * I*E |
167 |
|
|
+ * I*D |
168 |
|
|
+ * I*C |
169 |
|
|
+ * I*B |
170 |
|
|
+ * + I*A |
171 |
|
|
+ * ========================== |
172 |
|
|
+ * N*B N*D N*F |
173 |
|
|
+ * + N*A N*C N*E |
174 |
|
|
+ * + M*B M*D M*F |
175 |
|
|
+ * + M*A M*C M*E |
176 |
|
|
+ * + L*B L*D L*F |
177 |
|
|
+ * + L*A L*C L*E |
178 |
|
|
+ * + K*B K*D K*F |
179 |
|
|
+ * + K*A K*C K*E |
180 |
|
|
+ * + J*B J*D J*F |
181 |
|
|
+ * + J*A J*C J*E |
182 |
|
|
+ * + I*B I*D I*F |
183 |
|
|
+ * + I*A I*C I*E |
184 |
|
|
+ * |
185 |
|
|
+ * 1+1 1+3 1+5 |
186 |
|
|
+ * 1+0 1+2 1+4 |
187 |
|
|
+ * 0+1 0+3 0+5 |
188 |
|
|
+ * 0+0 0+2 0+4 |
189 |
|
|
+ * |
190 |
|
|
+ * 0 1 2 3 4 5 6 |
191 |
|
|
+ * which requires n^2 multiplications and 2n full length additions |
192 |
|
|
+ * as we can keep every other result of limb multiplication in two separate |
193 |
|
|
+ * limbs |
194 |
|
|
+ */ |
195 |
|
|
+ |
196 |
|
|
+#if defined HAVE_LIMB2_T |
197 |
|
|
+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) |
198 |
|
|
+{ |
199 |
|
|
+ limb2_t t; |
200 |
|
|
+ /* |
201 |
|
|
+ * this is idiomatic code to tell compiler to use the native mul |
202 |
|
|
+ * those three lines will actually compile to single instruction |
203 |
|
|
+ */ |
204 |
|
|
+ |
205 |
|
|
+ t = (limb2_t)a * b; |
206 |
|
|
+ *hi = t >> LIMB_BIT_SIZE; |
207 |
|
|
+ *lo = (limb_t)t; |
208 |
|
|
+} |
209 |
|
|
+#elif (BN_BYTES == 8) && (defined _MSC_VER) |
210 |
|
|
+/* https://learn.microsoft.com/en-us/cpp/intrinsics/umul128?view=msvc-170 */ |
211 |
|
|
+#pragma intrinsic(_umul128) |
212 |
|
|
+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) |
213 |
|
|
+{ |
214 |
|
|
+ *lo = _umul128(a, b, hi); |
215 |
|
|
+} |
216 |
|
|
+#else |
217 |
|
|
+/* |
218 |
|
|
+ * if the compiler doesn't have either a 128bit data type nor a "return |
219 |
|
|
+ * high 64 bits of multiplication" |
220 |
|
|
+ */ |
221 |
|
|
+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) |
222 |
|
|
+{ |
223 |
|
|
+ limb_t a_low = (limb_t)(uint32_t)a; |
224 |
|
|
+ limb_t a_hi = a >> 32; |
225 |
|
|
+ limb_t b_low = (limb_t)(uint32_t)b; |
226 |
|
|
+ limb_t b_hi = b >> 32; |
227 |
|
|
+ |
228 |
|
|
+ limb_t p0 = a_low * b_low; |
229 |
|
|
+ limb_t p1 = a_low * b_hi; |
230 |
|
|
+ limb_t p2 = a_hi * b_low; |
231 |
|
|
+ limb_t p3 = a_hi * b_hi; |
232 |
|
|
+ |
233 |
|
|
+ uint32_t cy = (uint32_t)(((p0 >> 32) + (uint32_t)p1 + (uint32_t)p2) >> 32); |
234 |
|
|
+ |
235 |
|
|
+ *lo = p0 + (p1 << 32) + (p2 << 32); |
236 |
|
|
+ *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy; |
237 |
|
|
+} |
238 |
|
|
+#endif |
239 |
|
|
+ |
240 |
|
|
+/* add two limbs with carry in, return carry out */ |
241 |
|
|
+static ossl_inline limb_t _add_limb(limb_t *ret, limb_t a, limb_t b, limb_t carry) |
242 |
|
|
+{ |
243 |
|
|
+ limb_t carry1, carry2, t; |
244 |
|
|
+ /* |
245 |
|
|
+ * `c = a + b; if (c < a)` is idiomatic code that makes compilers |
246 |
|
|
+ * use add with carry on assembly level |
247 |
|
|
+ */ |
248 |
|
|
+ |
249 |
|
|
+ *ret = a + carry; |
250 |
|
|
+ if (*ret < a) |
251 |
|
|
+ carry1 = 1; |
252 |
|
|
+ else |
253 |
|
|
+ carry1 = 0; |
254 |
|
|
+ |
255 |
|
|
+ t = *ret; |
256 |
|
|
+ *ret = t + b; |
257 |
|
|
+ if (*ret < t) |
258 |
|
|
+ carry2 = 1; |
259 |
|
|
+ else |
260 |
|
|
+ carry2 = 0; |
261 |
|
|
+ |
262 |
|
|
+ return carry1 + carry2; |
263 |
|
|
+} |
264 |
|
|
+ |
265 |
|
|
+/* |
266 |
|
|
+ * add two numbers of the same size, return overflow |
267 |
|
|
+ * |
268 |
|
|
+ * add a to b, place result in ret; all arrays need to be n limbs long |
269 |
|
|
+ * return overflow from addition (0 or 1) |
270 |
|
|
+ */ |
271 |
|
|
+static ossl_inline limb_t add(limb_t *ret, limb_t *a, limb_t *b, size_t n) |
272 |
|
|
+{ |
273 |
|
|
+ limb_t c = 0; |
274 |
|
|
+ ossl_ssize_t i; |
275 |
|
|
+ |
276 |
|
|
+ for(i = n - 1; i > -1; i--) |
277 |
|
|
+ c = _add_limb(&ret[i], a[i], b[i], c); |
278 |
|
|
+ |
279 |
|
|
+ return c; |
280 |
|
|
+} |
281 |
|
|
+ |
282 |
|
|
+/* |
283 |
|
|
+ * return number of limbs necessary for temporary values |
284 |
|
|
+ * when multiplying numbers n limbs large |
285 |
|
|
+ */ |
286 |
|
|
+static ossl_inline size_t mul_limb_numb(size_t n) |
287 |
|
|
+{ |
288 |
|
|
+ return 2 * n * 2; |
289 |
|
|
+} |
290 |
|
|
+ |
291 |
|
|
+/* |
292 |
|
|
+ * multiply two numbers of the same size |
293 |
|
|
+ * |
294 |
|
|
+ * multiply a by b, place result in ret; a and b need to be n limbs long |
295 |
|
|
+ * ret needs to be 2*n limbs long, tmp needs to be mul_limb_numb(n) limbs |
296 |
|
|
+ * long |
297 |
|
|
+ */ |
298 |
|
|
+static void limb_mul(limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp) |
299 |
|
|
+{ |
300 |
|
|
+ limb_t *r_odd, *r_even; |
301 |
|
|
+ size_t i, j, k; |
302 |
|
|
+ |
303 |
|
|
+ r_odd = tmp; |
304 |
|
|
+ r_even = &tmp[2 * n]; |
305 |
|
|
+ |
306 |
|
|
+ memset(ret, 0, 2 * n * sizeof(limb_t)); |
307 |
|
|
+ |
308 |
|
|
+ for (i = 0; i < n; i++) { |
309 |
|
|
+ for (k = 0; k < i + n + 1; k++) { |
310 |
|
|
+ r_even[k] = 0; |
311 |
|
|
+ r_odd[k] = 0; |
312 |
|
|
+ } |
313 |
|
|
+ for (j = 0; j < n; j++) { |
314 |
|
|
+ /* |
315 |
|
|
+ * place results from even and odd limbs in separate arrays so that |
316 |
|
|
+ * we don't have to calculate overflow every time we get individual |
317 |
|
|
+ * limb multiplication result |
318 |
|
|
+ */ |
319 |
|
|
+ if (j % 2 == 0) |
320 |
|
|
+ _mul_limb(&r_even[i + j], &r_even[i + j + 1], a[i], b[j]); |
321 |
|
|
+ else |
322 |
|
|
+ _mul_limb(&r_odd[i + j], &r_odd[i + j + 1], a[i], b[j]); |
323 |
|
|
+ } |
324 |
|
|
+ /* |
325 |
|
|
+ * skip the least significant limbs when adding multiples of |
326 |
|
|
+ * more significant limbs (they're zero anyway) |
327 |
|
|
+ */ |
328 |
|
|
+ add(ret, ret, r_even, n + i + 1); |
329 |
|
|
+ add(ret, ret, r_odd, n + i + 1); |
330 |
|
|
+ } |
331 |
|
|
+} |
332 |
|
|
+ |
333 |
|
|
+/* modifies the value in place by performing a right shift by one bit */ |
334 |
|
|
+static ossl_inline void rshift1(limb_t *val, size_t n) |
335 |
|
|
+{ |
336 |
|
|
+ limb_t shift_in = 0, shift_out = 0; |
337 |
|
|
+ size_t i; |
338 |
|
|
+ |
339 |
|
|
+ for (i = 0; i < n; i++) { |
340 |
|
|
+ shift_out = val[i] & 1; |
341 |
|
|
+ val[i] = shift_in << (LIMB_BIT_SIZE - 1) | (val[i] >> 1); |
342 |
|
|
+ shift_in = shift_out; |
343 |
|
|
+ } |
344 |
|
|
+} |
345 |
|
|
+ |
346 |
|
|
+/* extend the LSB of flag to all bits of limb */ |
347 |
|
|
+static ossl_inline limb_t mk_mask(limb_t flag) |
348 |
|
|
+{ |
349 |
|
|
+ flag |= flag << 1; |
350 |
|
|
+ flag |= flag << 2; |
351 |
|
|
+ flag |= flag << 4; |
352 |
|
|
+ flag |= flag << 8; |
353 |
|
|
+ flag |= flag << 16; |
354 |
|
|
+#if (LIMB_BYTE_SIZE == 8) |
355 |
|
|
+ flag |= flag << 32; |
356 |
|
|
+#endif |
357 |
|
|
+ return flag; |
358 |
|
|
+} |
359 |
|
|
+ |
360 |
|
|
+/* |
361 |
|
|
+ * copy from either a or b to ret based on flag |
362 |
|
|
+ * when flag == 0, then copies from b |
363 |
|
|
+ * when flag == 1, then copies from a |
364 |
|
|
+ */ |
365 |
|
|
+static ossl_inline void cselect(limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n) |
366 |
|
|
+{ |
367 |
|
|
+ /* |
368 |
|
|
+ * would be more efficient with non volatile mask, but then gcc |
369 |
|
|
+ * generates code with jumps |
370 |
|
|
+ */ |
371 |
|
|
+ volatile limb_t mask; |
372 |
|
|
+ size_t i; |
373 |
|
|
+ |
374 |
|
|
+ mask = mk_mask(flag); |
375 |
|
|
+ for (i = 0; i < n; i++) { |
376 |
|
|
+#if (LIMB_BYTE_SIZE == 8) |
377 |
|
|
+ ret[i] = constant_time_select_64(mask, a[i], b[i]); |
378 |
|
|
+#else |
379 |
|
|
+ ret[i] = constant_time_select_32(mask, a[i], b[i]); |
380 |
|
|
+#endif |
381 |
|
|
+ } |
382 |
|
|
+} |
383 |
|
|
+ |
384 |
|
|
+static limb_t _sub_limb(limb_t *ret, limb_t a, limb_t b, limb_t borrow) |
385 |
|
|
+{ |
386 |
|
|
+ limb_t borrow1, borrow2, t; |
387 |
|
|
+ /* |
388 |
|
|
+ * while it doesn't look constant-time, this is idiomatic code |
389 |
|
|
+ * to tell compilers to use the carry bit from subtraction |
390 |
|
|
+ */ |
391 |
|
|
+ |
392 |
|
|
+ *ret = a - borrow; |
393 |
|
|
+ if (*ret > a) |
394 |
|
|
+ borrow1 = 1; |
395 |
|
|
+ else |
396 |
|
|
+ borrow1 = 0; |
397 |
|
|
+ |
398 |
|
|
+ t = *ret; |
399 |
|
|
+ *ret = t - b; |
400 |
|
|
+ if (*ret > t) |
401 |
|
|
+ borrow2 = 1; |
402 |
|
|
+ else |
403 |
|
|
+ borrow2 = 0; |
404 |
|
|
+ |
405 |
|
|
+ return borrow1 + borrow2; |
406 |
|
|
+} |
407 |
|
|
+ |
408 |
|
|
+/* |
409 |
|
|
+ * place the result of a - b into ret, return the borrow bit. |
410 |
|
|
+ * All arrays need to be n limbs long |
411 |
|
|
+ */ |
412 |
|
|
+static limb_t sub(limb_t *ret, limb_t *a, limb_t *b, size_t n) |
413 |
|
|
+{ |
414 |
|
|
+ limb_t borrow = 0; |
415 |
|
|
+ ossl_ssize_t i; |
416 |
|
|
+ |
417 |
|
|
+ for (i = n - 1; i > -1; i--) |
418 |
|
|
+ borrow = _sub_limb(&ret[i], a[i], b[i], borrow); |
419 |
|
|
+ |
420 |
|
|
+ return borrow; |
421 |
|
|
+} |
422 |
|
|
+ |
423 |
|
|
+/* return the number of limbs necessary to allocate for the mod() tmp operand */ |
424 |
|
|
+static ossl_inline size_t mod_limb_numb(size_t anum, size_t modnum) |
425 |
|
|
+{ |
426 |
|
|
+ return (anum + modnum) * 3; |
427 |
|
|
+} |
428 |
|
|
+ |
429 |
|
|
+/* |
430 |
|
|
+ * calculate a % mod, place the result in ret |
431 |
|
|
+ * size of a is defined by anum, size of ret and mod is modnum, |
432 |
|
|
+ * size of tmp is returned by mod_limb_numb() |
433 |
|
|
+ */ |
434 |
|
|
+static void mod(limb_t *ret, limb_t *a, size_t anum, limb_t *mod, |
435 |
|
|
+ size_t modnum, limb_t *tmp) |
436 |
|
|
+{ |
437 |
|
|
+ limb_t *atmp, *modtmp, *rettmp; |
438 |
|
|
+ limb_t res; |
439 |
|
|
+ size_t i; |
440 |
|
|
+ |
441 |
|
|
+ memset(tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE); |
442 |
|
|
+ |
443 |
|
|
+ atmp = tmp; |
444 |
|
|
+ modtmp = &tmp[anum + modnum]; |
445 |
|
|
+ rettmp = &tmp[(anum + modnum) * 2]; |
446 |
|
|
+ |
447 |
|
|
+ for (i = modnum; i <modnum + anum; i++) |
448 |
|
|
+ atmp[i] = a[i-modnum]; |
449 |
|
|
+ |
450 |
|
|
+ for (i = 0; i < modnum; i++) |
451 |
|
|
+ modtmp[i] = mod[i]; |
452 |
|
|
+ |
453 |
|
|
+ for (i = 0; i < anum * LIMB_BIT_SIZE; i++) { |
454 |
|
|
+ rshift1(modtmp, anum + modnum); |
455 |
|
|
+ res = sub(rettmp, atmp, modtmp, anum+modnum); |
456 |
|
|
+ cselect(res, atmp, atmp, rettmp, anum+modnum); |
457 |
|
|
+ } |
458 |
|
|
+ |
459 |
|
|
+ memcpy(ret, &atmp[anum], sizeof(limb_t) * modnum); |
460 |
|
|
+} |
461 |
|
|
+ |
462 |
|
|
+/* necessary size of tmp for a _mul_add_limb() call with provided anum */ |
463 |
|
|
+static ossl_inline size_t _mul_add_limb_numb(size_t anum) |
464 |
|
|
+{ |
465 |
|
|
+ return 2 * (anum + 1); |
466 |
|
|
+} |
467 |
|
|
+ |
468 |
|
|
+/* multiply a by m, add to ret, return carry */ |
469 |
|
|
+static limb_t _mul_add_limb(limb_t *ret, limb_t *a, size_t anum, |
470 |
|
|
+ limb_t m, limb_t *tmp) |
471 |
|
|
+{ |
472 |
|
|
+ limb_t carry = 0; |
473 |
|
|
+ limb_t *r_odd, *r_even; |
474 |
|
|
+ size_t i; |
475 |
|
|
+ |
476 |
|
|
+ memset(tmp, 0, sizeof(limb_t) * (anum + 1) * 2); |
477 |
|
|
+ |
478 |
|
|
+ r_odd = tmp; |
479 |
|
|
+ r_even = &tmp[anum + 1]; |
480 |
|
|
+ |
481 |
|
|
+ for (i = 0; i < anum; i++) { |
482 |
|
|
+ /* |
483 |
|
|
+ * place the results from even and odd limbs in separate arrays |
484 |
|
|
+ * so that we have to worry about carry just once |
485 |
|
|
+ */ |
486 |
|
|
+ if (i % 2 == 0) |
487 |
|
|
+ _mul_limb(&r_even[i], &r_even[i + 1], a[i], m); |
488 |
|
|
+ else |
489 |
|
|
+ _mul_limb(&r_odd[i], &r_odd[i + 1], a[i], m); |
490 |
|
|
+ } |
491 |
|
|
+ /* assert: add() carry here will be equal zero */ |
492 |
|
|
+ add(r_even, r_even, r_odd, anum + 1); |
493 |
|
|
+ /* |
494 |
|
|
+ * while here it will not overflow as the max value from multiplication |
495 |
|
|
+ * is -2 while max overflow from addition is 1, so the max value of |
496 |
|
|
+ * carry is -1 (i.e. max int) |
497 |
|
|
+ */ |
498 |
|
|
+ carry = add(ret, ret, &r_even[1], anum) + r_even[0]; |
499 |
|
|
+ |
500 |
|
|
+ return carry; |
501 |
|
|
+} |
502 |
|
|
+ |
503 |
|
|
+static ossl_inline size_t mod_montgomery_limb_numb(size_t modnum) |
504 |
|
|
+{ |
505 |
|
|
+ return modnum * 2 + _mul_add_limb_numb(modnum); |
506 |
|
|
+} |
507 |
|
|
+ |
508 |
|
|
+/* |
509 |
|
|
+ * calculate a % mod, place result in ret |
510 |
|
|
+ * assumes that a is in Montgomery form with the R (Montgomery modulus) being |
511 |
|
|
+ * smallest power of two big enough to fit mod and that's also a power |
512 |
|
|
+ * of the count of number of bits in limb_t (B). |
513 |
|
|
+ * For calculation, we also need n', such that mod * n' == -1 mod B. |
514 |
|
|
+ * anum must be <= 2 * modnum |
515 |
|
|
+ * ret needs to be modnum words long |
516 |
|
|
+ * tmp needs to be mod_montgomery_limb_numb(modnum) limbs long |
517 |
|
|
+ */ |
518 |
|
|
+static void mod_montgomery(limb_t *ret, limb_t *a, size_t anum, limb_t *mod, |
519 |
|
|
+ size_t modnum, limb_t ni0, limb_t *tmp) |
520 |
|
|
+{ |
521 |
|
|
+ limb_t carry, v; |
522 |
|
|
+ limb_t *res, *rp, *tmp2; |
523 |
|
|
+ ossl_ssize_t i; |
524 |
|
|
+ |
525 |
|
|
+ res = tmp; |
526 |
|
|
+ /* |
527 |
|
|
+ * for intermediate result we need an integer twice as long as modulus |
528 |
|
|
+ * but keep the input in the least significant limbs |
529 |
|
|
+ */ |
530 |
|
|
+ memset(res, 0, sizeof(limb_t) * (modnum * 2)); |
531 |
|
|
+ memcpy(&res[modnum * 2 - anum], a, sizeof(limb_t) * anum); |
532 |
|
|
+ rp = &res[modnum]; |
533 |
|
|
+ tmp2 = &res[modnum * 2]; |
534 |
|
|
+ |
535 |
|
|
+ carry = 0; |
536 |
|
|
+ |
537 |
|
|
+ /* add multiples of the modulus to the value until R divides it cleanly */ |
538 |
|
|
+ for (i = modnum; i > 0; i--, rp--) { |
539 |
|
|
+ v = _mul_add_limb(rp, mod, modnum, rp[modnum-1] * ni0, tmp2); |
540 |
|
|
+ v = v + carry + rp[-1]; |
541 |
|
|
+ carry |= (v != rp[-1]); |
542 |
|
|
+ carry &= (v <= rp[-1]); |
543 |
|
|
+ rp[-1] = v; |
544 |
|
|
+ } |
545 |
|
|
+ |
546 |
|
|
+ /* perform the final reduction by mod... */ |
547 |
|
|
+ carry -= sub(ret, rp, mod, modnum); |
548 |
|
|
+ |
549 |
|
|
+ /* ...conditionally */ |
550 |
|
|
+ cselect(carry, ret, rp, ret, modnum); |
551 |
|
|
+} |
552 |
|
|
+ |
553 |
|
|
+/* allocated buffer should be freed afterwards */ |
554 |
|
|
+static void BN_to_limb(const BIGNUM *bn, limb_t *buf, size_t limbs) |
555 |
|
|
+{ |
556 |
|
|
+ int i; |
557 |
|
|
+ int real_limbs = (BN_num_bytes(bn) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; |
558 |
|
|
+ limb_t *ptr = buf + (limbs - real_limbs); |
559 |
|
|
+ |
560 |
|
|
+ for (i = 0; i < real_limbs; i++) |
561 |
|
|
+ ptr[i] = bn->d[real_limbs - i - 1]; |
562 |
|
|
+} |
563 |
|
|
+ |
564 |
|
|
+#if LIMB_BYTE_SIZE == 8 |
565 |
|
|
+static ossl_inline uint64_t be64(uint64_t host) |
566 |
|
|
+{ |
567 |
|
|
+ uint64_t big = 0; |
568 |
|
|
+ DECLARE_IS_ENDIAN; |
569 |
|
|
+ |
570 |
|
|
+ if (!IS_LITTLE_ENDIAN) |
571 |
|
|
+ return host; |
572 |
|
|
+ |
573 |
|
|
+ big |= (host & 0xff00000000000000) >> 56; |
574 |
|
|
+ big |= (host & 0x00ff000000000000) >> 40; |
575 |
|
|
+ big |= (host & 0x0000ff0000000000) >> 24; |
576 |
|
|
+ big |= (host & 0x000000ff00000000) >> 8; |
577 |
|
|
+ big |= (host & 0x00000000ff000000) << 8; |
578 |
|
|
+ big |= (host & 0x0000000000ff0000) << 24; |
579 |
|
|
+ big |= (host & 0x000000000000ff00) << 40; |
580 |
|
|
+ big |= (host & 0x00000000000000ff) << 56; |
581 |
|
|
+ return big; |
582 |
|
|
+} |
583 |
|
|
+ |
584 |
|
|
+#else |
585 |
|
|
+/* Not all platforms have htobe32(). */ |
586 |
|
|
+static ossl_inline uint32_t be32(uint32_t host) |
587 |
|
|
+{ |
588 |
|
|
+ uint32_t big = 0; |
589 |
|
|
+ DECLARE_IS_ENDIAN; |
590 |
|
|
+ |
591 |
|
|
+ if (!IS_LITTLE_ENDIAN) |
592 |
|
|
+ return host; |
593 |
|
|
+ |
594 |
|
|
+ big |= (host & 0xff000000) >> 24; |
595 |
|
|
+ big |= (host & 0x00ff0000) >> 8; |
596 |
|
|
+ big |= (host & 0x0000ff00) << 8; |
597 |
|
|
+ big |= (host & 0x000000ff) << 24; |
598 |
|
|
+ return big; |
599 |
|
|
+} |
600 |
|
|
+#endif |
601 |
|
|
+ |
602 |
|
|
+/* |
603 |
|
|
+ * We assume that intermediate, possible_arg2, blinding, and ctx are used |
604 |
|
|
+ * similar to BN_BLINDING_invert_ex() arguments. |
605 |
|
|
+ * to_mod is RSA modulus. |
606 |
|
|
+ * buf and num is the serialization buffer and its length. |
607 |
|
|
+ * |
608 |
|
|
+ * Here we use classic/Montgomery multiplication and modulo. After the calculation finished |
609 |
|
|
+ * we serialize the new structure instead of BIGNUMs taking endianness into account. |
610 |
|
|
+ */ |
611 |
|
|
+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, |
612 |
|
|
+ const BN_BLINDING *blinding, |
613 |
|
|
+ const BIGNUM *possible_arg2, |
614 |
|
|
+ const BIGNUM *to_mod, BN_CTX *ctx, |
615 |
|
|
+ unsigned char *buf, int num) |
616 |
|
|
+{ |
617 |
|
|
+ limb_t *l_im = NULL, *l_mul = NULL, *l_mod = NULL; |
618 |
|
|
+ limb_t *l_ret = NULL, *l_tmp = NULL, l_buf; |
619 |
|
|
+ size_t l_im_count = 0, l_mul_count = 0, l_size = 0, l_mod_count = 0; |
620 |
|
|
+ size_t l_tmp_count = 0; |
621 |
|
|
+ int ret = 0; |
622 |
|
|
+ size_t i; |
623 |
|
|
+ unsigned char *tmp; |
624 |
|
|
+ const BIGNUM *arg1 = intermediate; |
625 |
|
|
+ const BIGNUM *arg2 = (possible_arg2 == NULL) ? blinding->Ai : possible_arg2; |
626 |
|
|
+ |
627 |
|
|
+ l_im_count = (BN_num_bytes(arg1) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; |
628 |
|
|
+ l_mul_count = (BN_num_bytes(arg2) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; |
629 |
|
|
+ l_mod_count = (BN_num_bytes(to_mod) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; |
630 |
|
|
+ |
631 |
|
|
+ l_size = l_im_count > l_mul_count ? l_im_count : l_mul_count; |
632 |
|
|
+ l_im = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); |
633 |
|
|
+ l_mul = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); |
634 |
|
|
+ l_mod = OPENSSL_zalloc(l_mod_count * LIMB_BYTE_SIZE); |
635 |
|
|
+ |
636 |
|
|
+ if ((l_im == NULL) || (l_mul == NULL) || (l_mod == NULL)) |
637 |
|
|
+ goto err; |
638 |
|
|
+ |
639 |
|
|
+ BN_to_limb(arg1, l_im, l_size); |
640 |
|
|
+ BN_to_limb(arg2, l_mul, l_size); |
641 |
|
|
+ BN_to_limb(to_mod, l_mod, l_mod_count); |
642 |
|
|
+ |
643 |
|
|
+ l_ret = OPENSSL_malloc(2 * l_size * LIMB_BYTE_SIZE); |
644 |
|
|
+ |
645 |
|
|
+ if (blinding->m_ctx != NULL) { |
646 |
|
|
+ l_tmp_count = mul_limb_numb(l_size) > mod_montgomery_limb_numb(l_mod_count) ? |
647 |
|
|
+ mul_limb_numb(l_size) : mod_montgomery_limb_numb(l_mod_count); |
648 |
|
|
+ l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); |
649 |
|
|
+ } else { |
650 |
|
|
+ l_tmp_count = mul_limb_numb(l_size) > mod_limb_numb(2 * l_size, l_mod_count) ? |
651 |
|
|
+ mul_limb_numb(l_size) : mod_limb_numb(2 * l_size, l_mod_count); |
652 |
|
|
+ l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); |
653 |
|
|
+ } |
654 |
|
|
+ |
655 |
|
|
+ if ((l_ret == NULL) || (l_tmp == NULL)) |
656 |
|
|
+ goto err; |
657 |
|
|
+ |
658 |
|
|
+ if (blinding->m_ctx != NULL) { |
659 |
|
|
+ limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); |
660 |
|
|
+ mod_montgomery(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, |
661 |
|
|
+ blinding->m_ctx->n0[0], l_tmp); |
662 |
|
|
+ } else { |
663 |
|
|
+ limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); |
664 |
|
|
+ mod(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, l_tmp); |
665 |
|
|
+ } |
666 |
|
|
+ |
667 |
|
|
+ /* modulus size in bytes can be equal to num but after limbs conversion it becomes bigger */ |
668 |
|
|
+ if (num < BN_num_bytes(to_mod)) { |
669 |
|
|
+ ERR_raise(ERR_LIB_BN, ERR_R_PASSED_INVALID_ARGUMENT); |
670 |
|
|
+ goto err; |
671 |
|
|
+ } |
672 |
|
|
+ |
673 |
|
|
+ memset(buf, 0, num); |
674 |
|
|
+ tmp = buf + num - BN_num_bytes(to_mod); |
675 |
|
|
+ for (i = 0; i < l_mod_count; i++) { |
676 |
|
|
+#if LIMB_BYTE_SIZE == 8 |
677 |
|
|
+ l_buf = be64(l_ret[i]); |
678 |
|
|
+#else |
679 |
|
|
+ l_buf = be32(l_ret[i]); |
680 |
|
|
+#endif |
681 |
|
|
+ if (i == 0) { |
682 |
|
|
+ int delta = LIMB_BYTE_SIZE - ((l_mod_count * LIMB_BYTE_SIZE) - num); |
683 |
|
|
+ |
684 |
|
|
+ memcpy(tmp, ((char *)&l_buf) + LIMB_BYTE_SIZE - delta, delta); |
685 |
|
|
+ tmp += delta; |
686 |
|
|
+ } else { |
687 |
|
|
+ memcpy(tmp, &l_buf, LIMB_BYTE_SIZE); |
688 |
|
|
+ tmp += LIMB_BYTE_SIZE; |
689 |
|
|
+ } |
690 |
|
|
+ } |
691 |
|
|
+ ret = num; |
692 |
|
|
+ |
693 |
|
|
+ err: |
694 |
|
|
+ OPENSSL_free(l_im); |
695 |
|
|
+ OPENSSL_free(l_mul); |
696 |
|
|
+ OPENSSL_free(l_mod); |
697 |
|
|
+ OPENSSL_free(l_tmp); |
698 |
|
|
+ OPENSSL_free(l_ret); |
699 |
|
|
+ |
700 |
|
|
+ return ret; |
701 |
|
|
+} |
702 |
|
|
diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c |
703 |
|
|
index 381c659352..7e8b791fba 100644 |
704 |
|
|
--- a/crypto/rsa/rsa_ossl.c |
705 |
|
|
+++ b/crypto/rsa/rsa_ossl.c |
706 |
|
|
@@ -469,13 +469,20 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from, |
707 |
|
|
BN_free(d); |
708 |
|
|
} |
709 |
|
|
|
710 |
|
|
- if (blinding) |
711 |
|
|
- if (!rsa_blinding_invert(blinding, ret, unblind, ctx)) |
712 |
|
|
+ if (blinding) { |
713 |
|
|
+ /* |
714 |
|
|
+ * ossl_bn_rsa_do_unblind() combines blinding inversion and |
715 |
|
|
+ * 0-padded BN BE serialization |
716 |
|
|
+ */ |
717 |
|
|
+ j = ossl_bn_rsa_do_unblind(ret, blinding, unblind, rsa->n, ctx, |
718 |
|
|
+ buf, num); |
719 |
|
|
+ if (j == 0) |
720 |
|
|
goto err; |
721 |
|
|
- |
722 |
|
|
- j = BN_bn2binpad(ret, buf, num); |
723 |
|
|
- if (j < 0) |
724 |
|
|
- goto err; |
725 |
|
|
+ } else { |
726 |
|
|
+ j = BN_bn2binpad(ret, buf, num); |
727 |
|
|
+ if (j < 0) |
728 |
|
|
+ goto err; |
729 |
|
|
+ } |
730 |
|
|
|
731 |
|
|
switch (padding) { |
732 |
|
|
case RSA_PKCS1_PADDING: |
733 |
|
|
diff --git a/include/crypto/bn.h b/include/crypto/bn.h |
734 |
|
|
index cf69bea848..cd45654210 100644 |
735 |
|
|
--- a/include/crypto/bn.h |
736 |
|
|
+++ b/include/crypto/bn.h |
737 |
|
|
@@ -114,4 +114,10 @@ OSSL_LIB_CTX *ossl_bn_get_libctx(BN_CTX *ctx); |
738 |
|
|
|
739 |
|
|
extern const BIGNUM ossl_bn_inv_sqrt_2; |
740 |
|
|
|
741 |
|
|
+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, |
742 |
|
|
+ const BN_BLINDING *blinding, |
743 |
|
|
+ const BIGNUM *possible_arg2, |
744 |
|
|
+ const BIGNUM *to_mod, BN_CTX *ctx, |
745 |
|
|
+ unsigned char *buf, int num); |
746 |
|
|
+ |
747 |
|
|
#endif |
748 |
|
|
-- |
749 |
|
|
2.39.1 |
750 |
|
|
|