Line data Source code
1 : #include "tommath_private.h"
2 : #ifdef BN_S_MP_TOOM_MUL_C
3 : /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 : /* SPDX-License-Identifier: Unlicense */
5 :
6 : /* multiplication using the Toom-Cook 3-way algorithm
7 : *
8 : * Much more complicated than Karatsuba but has a lower
9 : * asymptotic running time of O(N**1.464). This algorithm is
10 : * only particularly useful on VERY large inputs
11 : * (we're talking 1000s of digits here...).
12 : */
13 :
14 : /*
15 : This file contains code from J. Arndt's book "Matters Computational"
16 : and the accompanying FXT-library with permission of the author.
17 : */
18 :
19 : /*
20 : Setup from
21 :
22 : Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae."
23 : 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007.
24 :
25 : The interpolation from above needed one temporary variable more
26 : than the interpolation here:
27 :
28 : Bodrato, Marco, and Alberto Zanoni. "What about Toom-Cook matrices optimality."
29 : Centro Vito Volterra Universita di Roma Tor Vergata (2006)
30 : */
31 :
32 0 : mp_err s_mp_toom_mul(const mp_int *a, const mp_int *b, mp_int *c)
33 : {
34 0 : mp_int S1, S2, T1, a0, a1, a2, b0, b1, b2;
35 0 : int B, count;
36 0 : mp_err err;
37 :
38 : /* init temps */
39 0 : if ((err = mp_init_multi(&S1, &S2, &T1, NULL)) != MP_OKAY) {
40 0 : return err;
41 : }
42 :
43 : /* B */
44 0 : B = MP_MIN(a->used, b->used) / 3;
45 :
46 : /** a = a2 * x^2 + a1 * x + a0; */
47 0 : if ((err = mp_init_size(&a0, B)) != MP_OKAY) goto LBL_ERRa0;
48 :
49 0 : for (count = 0; count < B; count++) {
50 0 : a0.dp[count] = a->dp[count];
51 0 : a0.used++;
52 : }
53 0 : mp_clamp(&a0);
54 0 : if ((err = mp_init_size(&a1, B)) != MP_OKAY) goto LBL_ERRa1;
55 0 : for (; count < (2 * B); count++) {
56 0 : a1.dp[count - B] = a->dp[count];
57 0 : a1.used++;
58 : }
59 0 : mp_clamp(&a1);
60 0 : if ((err = mp_init_size(&a2, B + (a->used - (3 * B)))) != MP_OKAY) goto LBL_ERRa2;
61 0 : for (; count < a->used; count++) {
62 0 : a2.dp[count - (2 * B)] = a->dp[count];
63 0 : a2.used++;
64 : }
65 0 : mp_clamp(&a2);
66 :
67 : /** b = b2 * x^2 + b1 * x + b0; */
68 0 : if ((err = mp_init_size(&b0, B)) != MP_OKAY) goto LBL_ERRb0;
69 0 : for (count = 0; count < B; count++) {
70 0 : b0.dp[count] = b->dp[count];
71 0 : b0.used++;
72 : }
73 0 : mp_clamp(&b0);
74 0 : if ((err = mp_init_size(&b1, B)) != MP_OKAY) goto LBL_ERRb1;
75 0 : for (; count < (2 * B); count++) {
76 0 : b1.dp[count - B] = b->dp[count];
77 0 : b1.used++;
78 : }
79 0 : mp_clamp(&b1);
80 0 : if ((err = mp_init_size(&b2, B + (b->used - (3 * B)))) != MP_OKAY) goto LBL_ERRb2;
81 0 : for (; count < b->used; count++) {
82 0 : b2.dp[count - (2 * B)] = b->dp[count];
83 0 : b2.used++;
84 : }
85 0 : mp_clamp(&b2);
86 :
87 : /** \\ S1 = (a2+a1+a0) * (b2+b1+b0); */
88 : /** T1 = a2 + a1; */
89 0 : if ((err = mp_add(&a2, &a1, &T1)) != MP_OKAY) goto LBL_ERR;
90 :
91 : /** S2 = T1 + a0; */
92 0 : if ((err = mp_add(&T1, &a0, &S2)) != MP_OKAY) goto LBL_ERR;
93 :
94 : /** c = b2 + b1; */
95 0 : if ((err = mp_add(&b2, &b1, c)) != MP_OKAY) goto LBL_ERR;
96 :
97 : /** S1 = c + b0; */
98 0 : if ((err = mp_add(c, &b0, &S1)) != MP_OKAY) goto LBL_ERR;
99 :
100 : /** S1 = S1 * S2; */
101 0 : if ((err = mp_mul(&S1, &S2, &S1)) != MP_OKAY) goto LBL_ERR;
102 :
103 : /** \\S2 = (4*a2+2*a1+a0) * (4*b2+2*b1+b0); */
104 : /** T1 = T1 + a2; */
105 0 : if ((err = mp_add(&T1, &a2, &T1)) != MP_OKAY) goto LBL_ERR;
106 :
107 : /** T1 = T1 << 1; */
108 0 : if ((err = mp_mul_2(&T1, &T1)) != MP_OKAY) goto LBL_ERR;
109 :
110 : /** T1 = T1 + a0; */
111 0 : if ((err = mp_add(&T1, &a0, &T1)) != MP_OKAY) goto LBL_ERR;
112 :
113 : /** c = c + b2; */
114 0 : if ((err = mp_add(c, &b2, c)) != MP_OKAY) goto LBL_ERR;
115 :
116 : /** c = c << 1; */
117 0 : if ((err = mp_mul_2(c, c)) != MP_OKAY) goto LBL_ERR;
118 :
119 : /** c = c + b0; */
120 0 : if ((err = mp_add(c, &b0, c)) != MP_OKAY) goto LBL_ERR;
121 :
122 : /** S2 = T1 * c; */
123 0 : if ((err = mp_mul(&T1, c, &S2)) != MP_OKAY) goto LBL_ERR;
124 :
125 : /** \\S3 = (a2-a1+a0) * (b2-b1+b0); */
126 : /** a1 = a2 - a1; */
127 0 : if ((err = mp_sub(&a2, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
128 :
129 : /** a1 = a1 + a0; */
130 0 : if ((err = mp_add(&a1, &a0, &a1)) != MP_OKAY) goto LBL_ERR;
131 :
132 : /** b1 = b2 - b1; */
133 0 : if ((err = mp_sub(&b2, &b1, &b1)) != MP_OKAY) goto LBL_ERR;
134 :
135 : /** b1 = b1 + b0; */
136 0 : if ((err = mp_add(&b1, &b0, &b1)) != MP_OKAY) goto LBL_ERR;
137 :
138 : /** a1 = a1 * b1; */
139 0 : if ((err = mp_mul(&a1, &b1, &a1)) != MP_OKAY) goto LBL_ERR;
140 :
141 : /** b1 = a2 * b2; */
142 0 : if ((err = mp_mul(&a2, &b2, &b1)) != MP_OKAY) goto LBL_ERR;
143 :
144 : /** \\S2 = (S2 - S3)/3; */
145 : /** S2 = S2 - a1; */
146 0 : if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY) goto LBL_ERR;
147 :
148 : /** S2 = S2 / 3; \\ this is an exact division */
149 0 : if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY) goto LBL_ERR;
150 :
151 : /** a1 = S1 - a1; */
152 0 : if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY) goto LBL_ERR;
153 :
154 : /** a1 = a1 >> 1; */
155 0 : if ((err = mp_div_2(&a1, &a1)) != MP_OKAY) goto LBL_ERR;
156 :
157 : /** a0 = a0 * b0; */
158 0 : if ((err = mp_mul(&a0, &b0, &a0)) != MP_OKAY) goto LBL_ERR;
159 :
160 : /** S1 = S1 - a0; */
161 0 : if ((err = mp_sub(&S1, &a0, &S1)) != MP_OKAY) goto LBL_ERR;
162 :
163 : /** S2 = S2 - S1; */
164 0 : if ((err = mp_sub(&S2, &S1, &S2)) != MP_OKAY) goto LBL_ERR;
165 :
166 : /** S2 = S2 >> 1; */
167 0 : if ((err = mp_div_2(&S2, &S2)) != MP_OKAY) goto LBL_ERR;
168 :
169 : /** S1 = S1 - a1; */
170 0 : if ((err = mp_sub(&S1, &a1, &S1)) != MP_OKAY) goto LBL_ERR;
171 :
172 : /** S1 = S1 - b1; */
173 0 : if ((err = mp_sub(&S1, &b1, &S1)) != MP_OKAY) goto LBL_ERR;
174 :
175 : /** T1 = b1 << 1; */
176 0 : if ((err = mp_mul_2(&b1, &T1)) != MP_OKAY) goto LBL_ERR;
177 :
178 : /** S2 = S2 - T1; */
179 0 : if ((err = mp_sub(&S2, &T1, &S2)) != MP_OKAY) goto LBL_ERR;
180 :
181 : /** a1 = a1 - S2; */
182 0 : if ((err = mp_sub(&a1, &S2, &a1)) != MP_OKAY) goto LBL_ERR;
183 :
184 :
185 : /** P = b1*x^4+ S2*x^3+ S1*x^2+ a1*x + a0; */
186 0 : if ((err = mp_lshd(&b1, 4 * B)) != MP_OKAY) goto LBL_ERR;
187 0 : if ((err = mp_lshd(&S2, 3 * B)) != MP_OKAY) goto LBL_ERR;
188 0 : if ((err = mp_add(&b1, &S2, &b1)) != MP_OKAY) goto LBL_ERR;
189 0 : if ((err = mp_lshd(&S1, 2 * B)) != MP_OKAY) goto LBL_ERR;
190 0 : if ((err = mp_add(&b1, &S1, &b1)) != MP_OKAY) goto LBL_ERR;
191 0 : if ((err = mp_lshd(&a1, 1 * B)) != MP_OKAY) goto LBL_ERR;
192 0 : if ((err = mp_add(&b1, &a1, &b1)) != MP_OKAY) goto LBL_ERR;
193 0 : if ((err = mp_add(&b1, &a0, c)) != MP_OKAY) goto LBL_ERR;
194 :
195 : /** a * b - P */
196 :
197 :
198 0 : LBL_ERR:
199 0 : mp_clear(&b2);
200 0 : LBL_ERRb2:
201 0 : mp_clear(&b1);
202 0 : LBL_ERRb1:
203 0 : mp_clear(&b0);
204 0 : LBL_ERRb0:
205 0 : mp_clear(&a2);
206 0 : LBL_ERRa2:
207 0 : mp_clear(&a1);
208 0 : LBL_ERRa1:
209 0 : mp_clear(&a0);
210 0 : LBL_ERRa0:
211 0 : mp_clear_multi(&S1, &S2, &T1, NULL);
212 0 : return err;
213 : }
214 :
215 : #endif
|