-
Notifications
You must be signed in to change notification settings - Fork 0
/
mpn_extras.h
208 lines (151 loc) · 6.66 KB
/
mpn_extras.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/*============================================================================
Copyright (C) 2007, William Hart, David Harvey
This file is part of FLINT.
FLINT is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
FLINT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FLINT; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===============================================================================*/
#ifndef MPN_EXTRAS_H
#define MPN_EXTRAS_H
#include "flint.h"
#include "ZmodF_poly.h"
#include "longlong_wrapper.h"
#include "longlong.h"
/*============================================================================
"mpn-wannabe" code.
These are functions that I wish were in GMP's mpn layer.
=============================================================================*/
#define pre_limb_t mp_limb_t
static inline
mp_limb_t F_mpn_precompute_inverse(mp_limb_t xl)
{
mp_limb_t dummy, invxl;
udiv_qrnnd (invxl, dummy, ~(xl), ~(0L), xl);
return invxl;
}
/*
Computes the negation of a multiple-precision integer in 2's complement.
Input is count limbs stored at src. Output is stored at dest.
src and dest can be the same buffer. If they're not, they should be disjoint.
todo: currently this code will make only 1 pass over the data, EXCEPT in the
case where all limbs are zero, in which case it will make two passes.
FIX THIS!
todo: try writing another version that makes a block of zeroes and then
uses mpn_sub_n repeatedly. This could be faster, if GMP's assembler
is better than what gcc can come up with.
todo: consider writing this in assembly
todo: write test function for this
todo: consider using GMP's mpn_com_n (undocumented)
*/
static inline void F_mpn_negate(mp_limb_t* dest, mp_limb_t* src, unsigned long count)
{
long i;
for (i = count - 1; i >= 0; i--)
dest[i] = ~src[i];
mpn_add_1(dest, dest, count, 1);
}
/*
Copies a bunch of limbs from one buffer to another.
Input is count limbs stored at src. Output is stored at dest.
src and dest can be the same buffer. If they're not, they should be disjoint.
todo: it's completely insane that we're not using memcpy. But memcpy seems
to have crazy overhead and is slow!! Why is this?
todo: GMP has code to do limb copying. Clearly memcpy wasn't good enough for
them either. Work out how to use their code. It's not a documented
interface, so some hackishness may be necessary.
*/
static inline void F_mpn_copy(mp_limb_t* dest, const mp_limb_t* src, unsigned long count)
{
long i;
for (i = count - 1; i >= 0; i--)
{
dest[i] = src[i];
}
}
static inline void F_mpn_copy_forward(mp_limb_t* dest, const mp_limb_t* src, unsigned long count)
{
long i;
for (i = 0; i < count; i++)
{
dest[i] = src[i];
}
}
/*
Sets a bunch of limbs to zero.
todo: why does memset have so much overhead????!!?
*/
static inline void F_mpn_clear(mp_limb_t* dest, unsigned long count)
{
long i;
for (i = count - 1; i >= 0; i--)
dest[i] = 0;
}
/*
Sets a bunch of limbs to 0xfff....
todo: why does memset have so much overhead????!!?
*/
static inline void F_mpn_set(mp_limb_t* dest, unsigned long count)
{
long i;
for (i = count - 1; i >= 0; i--)
dest[i] = (mp_limb_t)(-1L);
}
mp_limb_t F_mpn_divrem_ui_precomp(mp_limb_t * qp, mp_limb_t * up,
unsigned long un, mp_limb_t d, mp_limb_t dinv);
mp_limb_t F_mpn_addmul(mp_limb_t * rp, mp_limb_t * s1p, unsigned long s1n,
mp_limb_t * s2p, unsigned long s2n);
static inline
void F_mpn_printx(mp_limb_t * mpn, unsigned long count)
{
unsigned long i;
if (count)
for (i = 0; i < count; i++)
printf("%lx ", mpn[i]);
}
/*
Large integer multiplication
*/
typedef enum {FFT_PRE, KAR_PRE} precache_type;
typedef struct
{
precache_type type;
ZmodF_poly_p poly;
unsigned long length;
unsigned long length2;
unsigned long coeff_limbs;
unsigned long limbs1;
unsigned long limbs2;
unsigned long msl_bits;
unsigned long bits;
} F_mpn_precache_s;
typedef F_mpn_precache_s F_mpn_precache_t[1];
void F_mpn_FFT_split_bits(ZmodF_poly_t poly, const mp_limb_t * limbs, const unsigned long total_limbs,
unsigned long bits, unsigned long output_limbs);
void F_mpn_FFT_combine_bits(mp_limb_t * res, ZmodF_poly_t poly, unsigned long bits,
unsigned long output_limbs, unsigned long total_limbs);
mp_limb_t __F_mpn_mul(mp_limb_t * res, const mp_limb_t * data1, const unsigned long limbs1,
const mp_limb_t * data2, const unsigned long limbs2,
unsigned long twk);
mp_limb_t F_mpn_mul(mp_limb_t * res, const mp_limb_t * data1, const unsigned long limbs1,
const mp_limb_t * data2, const unsigned long limbs2);
mp_limb_t F_mpn_mul_trunc(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1,
mp_limb_t * data2, unsigned long limbs2, unsigned long trunc);
void F_mpn_mul_precache_init(F_mpn_precache_t precache, mp_limb_t * data1, unsigned long limbs1, unsigned long limbs2);
void F_mpn_mul_precache_clear(F_mpn_precache_t precache);
mp_limb_t F_mpn_mul_precache(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precache_t precache);
mp_limb_t F_mpn_mul_precache_trunc(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precache_t precache, unsigned long trunc);
mp_limb_t __F_mpn_mul_middle(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1,
mp_limb_t * data2, unsigned long limbs2,
unsigned long start, unsigned long trunc);
mp_limb_t __F_mpn_mul_middle_precache(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1,
F_mpn_precache_t pre,
unsigned long start, unsigned long trunc);
#endif