Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/radix/invmod_bn.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix)
if (n == 2)
return 1;

nn_ptr u, v;
nn_ptr u;
TMP_INIT;
TMP_START;
u = TMP_ALLOC(2 * n * sizeof(ulong));
v = u + n;
/* Todo: can tighten allocation when we always do a mulhigh */
u = TMP_ALLOC(n * sizeof(ulong));

slong a[FLINT_BITS];
slong i, m;
Expand All @@ -144,9 +144,27 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix)
m = n;
n = a[i];

radix_mulmid(u, res, m, res, m, 0, n, radix);
radix_mulmid(v, u, n, x, FLINT_MIN(n, xn), 0, n, radix);
radix_neg(res + m, v + m, n - m, radix);
slong rxn = FLINT_MIN(n, m + xn);

/* Can use middle product */
if (m > 3 && LIMB_RADIX(radix) >= m && rxn > (m - 3))
{
ulong one = 1;
radix_mulmid(u + m - 3, res, m, x, FLINT_MIN(n, xn), m - 3, rxn, radix);
/* We know that the m least significant limbs of the full product
res * x are 00...001. The approximate high product is either
exact or 1 ulp too small. The latter is indicated by a
nonzero m-1 limb (i.e. a borrow to be returned). */
if (u[m - 1] != 0)
radix_add(u + m, u + m, n - m, &one, 1, radix);
radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix);
}
else
{
radix_mulmid(u, res, m, x, FLINT_MIN(n, xn), 0, rxn, radix);
radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix);
}
radix_neg(res + m, res + m, n - m, radix);
}

TMP_END;
Expand Down
74 changes: 74 additions & 0 deletions src/radix/profile/p-add.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
Copyright (C) 2026 Fredrik Johansson

This file is part of FLINT.

FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/

#include <math.h>
#include "profiler.h"
#include "radix.h"

int main()
{
radix_t radix;
slong digits, nd, n1, n2;
nn_ptr a, b, c;
double tmpn, tradix, FLINT_SET_BUT_UNUSED(tt);

flint_rand_t state;
flint_rand_init(state);

flint_printf(" decimal mpn decimal time time relative\n");
flint_printf(" digits limbs limbs mpn_add radix_add time\n\n");

for (nd = 1; nd <= 100000000; nd = FLINT_MAX(nd + 1, nd * 1.5))
{
radix_init(radix, 10, 0);

digits = nd * 19;

/* Number of full-word limbs */
n1 = (slong) (digits * (log(10) / (FLINT_BITS * log(2))) + 1.0);
/* Number of radix 10^e limbs */
n2 = (digits + radix->exp - 1) / radix->exp;

a = flint_malloc(n2 * sizeof(ulong));
b = flint_malloc(n2 * sizeof(ulong));
c = flint_malloc(2 * n2 * sizeof(ulong));

flint_mpn_urandomb(a, state, n1 * FLINT_BITS);
flint_mpn_urandomb(b, state, n1 * FLINT_BITS);

mpn_add(c, a, n1, b, n1);
TIMEIT_START;
mpn_add(c, a, n1, b, n1);
TIMEIT_STOP_VALUES(tt, tmpn);

radix_rand_limbs(a, state, n2, radix);
radix_rand_limbs(b, state, n2, radix);

radix_add(c, a, n2, b, n2, radix);
TIMEIT_START;
radix_add(c, a, n2, b, n2, radix);
TIMEIT_STOP_VALUES(tt, tradix);

flint_printf("%10wd %8wd %8wd %8g %8g %.3fx\n",
digits, n1, n2, tmpn, tradix, tradix / tmpn);

flint_free(a);
flint_free(b);
flint_free(c);

radix_clear(radix);
}

flint_rand_clear(state);
flint_cleanup_master();
return 0;
}

172 changes: 172 additions & 0 deletions src/radix/profile/p-invmod.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
Copyright (C) 2026 Fredrik Johansson

This file is part of FLINT.

FLINT is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version. See <https://www.gnu.org/licenses/>.
*/

#include <stdint.h>
#include "fmpz.h"
#include "gr.h"
#include "profiler.h"
#include "gmp.h"
#include "radix.h"
#include "fmpz_mod.h"
#include "padic.h"
#include "nmod.h"

int main()
{
flint_rand_t state;
ulong p;
slong n, exp;
int have_mpn_mod;

flint_rand_init(state);
radix_t radix;

p = 7;
radix_init(radix, p, 0);
exp = radix->exp;

flint_printf("radix = %wu^%wd\n", p, radix->exp);

for (n = 1; n <= 10000000; n *= 2)
{
fmpz_mod_ctx_t mod_ctx;
gr_ctx_t gr_ctx;
radix_integer_t x, y, z;
gr_ptr gx = NULL, gy = NULL, gz = NULL;
fmpz_t fx, fy, fz, fp, pn;
padic_ctx_t pctx;
padic_t px, py, pz;

fmpz_init_set_ui(fp, p);
fmpz_init(pn);
fmpz_ui_pow_ui(pn, p, exp * n);

fmpz_mod_ctx_init(mod_ctx, pn);
padic_ctx_init(pctx, fp, exp * n, exp * n, PADIC_SERIES);
have_mpn_mod = (gr_ctx_init_mpn_mod(gr_ctx, pn) == GR_SUCCESS);

fmpz_init(fx);
fmpz_init(fy);
fmpz_init(fz);

radix_integer_init(x, radix);
radix_integer_init(y, radix);
radix_integer_init(z, radix);

padic_init2(px, exp * n);
padic_init2(py, exp * n);
padic_init2(pz, exp * n);

do
{
fmpz_randm(fx, state, pn);
} while (fmpz_divisible_ui(fx, p));
do
{
fmpz_randm(fy, state, pn);
} while (fmpz_divisible_ui(fy, p));

padic_set_fmpz(px, fx, pctx);
padic_set_fmpz(py, fy, pctx);

if (have_mpn_mod)
{
gx = gr_heap_init(gr_ctx);
gy = gr_heap_init(gr_ctx);
gz = gr_heap_init(gr_ctx);

GR_MUST_SUCCEED(gr_set_fmpz(gx, fx, gr_ctx));
GR_MUST_SUCCEED(gr_set_fmpz(gy, fy, gr_ctx));
}

do
{
radix_integer_rand_limbs(x, state, n, radix);
} while (x->d[0] % p == 0);
do
{
radix_integer_rand_limbs(y, state, n, radix);
} while (y->d[0] % p == 0);

double t1, t2, t3, t4, FLINT_SET_BUT_UNUSED(__);

TIMEIT_START;
padic_inv(pz, px, pctx);
TIMEIT_STOP_VALUES(__, t1);

TIMEIT_START;
fmpz_mod_inv(fz, fx, mod_ctx);
TIMEIT_STOP_VALUES(__, t2);

if (have_mpn_mod)
{
TIMEIT_START;
GR_IGNORE(gr_inv(gz, gx, gr_ctx));
TIMEIT_STOP_VALUES(__, t3);
}
else
t3 = 0.0;

TIMEIT_START;
radix_integer_invmod_limbs(z, x, n, radix);
TIMEIT_STOP_VALUES(__, t4);

char st1[20];
char st2[20];
char st3[20];
char st4[20];

sprintf(st1, "%.2e", t1);
sprintf(st2, "%.2e", t2);
if (t3 == 0.0)
sprintf(st3, "-");
else
sprintf(st3, "%.2e", t3);
sprintf(st4, "%.2e", t4);

if (n == 1)
flint_printf(" n padic fmpz_mod mpn_mod radix_integer speedup/padic speedup/fmpz_mod\n");

flint_printf("%8wd %8s %8s %8s %8s %.2fx %.2fx\n", n, st1, st2, st3, st4, t1 / t4, t2 / t4);

padic_clear(px);
padic_clear(py);
padic_clear(pz);

radix_integer_clear(x, radix);
radix_integer_clear(y, radix);
radix_integer_clear(z, radix);

fmpz_clear(fx);
fmpz_clear(fy);
fmpz_clear(fz);

if (have_mpn_mod)
{
gr_heap_clear(gx, gr_ctx);
gr_heap_clear(gy, gr_ctx);
gr_heap_clear(gz, gr_ctx);
gr_ctx_clear(gr_ctx);
}

fmpz_mod_ctx_clear(mod_ctx);
padic_ctx_clear(pctx);

fmpz_clear(pn);
fmpz_clear(fp);
}

radix_clear(radix);
flint_rand_clear(state);
flint_cleanup_master();
return 0;
}

Loading