diff --git a/src/radix/invmod_bn.c b/src/radix/invmod_bn.c index 2d347c1b5d..f538bb8e99 100644 --- a/src/radix/invmod_bn.c +++ b/src/radix/invmod_bn.c @@ -127,11 +127,11 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix) if (n == 2) return 1; - nn_ptr u, v; + nn_ptr u; TMP_INIT; TMP_START; - u = TMP_ALLOC(2 * n * sizeof(ulong)); - v = u + n; + /* Todo: can tighten allocation when we always do a mulhigh */ + u = TMP_ALLOC(n * sizeof(ulong)); slong a[FLINT_BITS]; slong i, m; @@ -144,9 +144,27 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix) m = n; n = a[i]; - radix_mulmid(u, res, m, res, m, 0, n, radix); - radix_mulmid(v, u, n, x, FLINT_MIN(n, xn), 0, n, radix); - radix_neg(res + m, v + m, n - m, radix); + slong rxn = FLINT_MIN(n, m + xn); + + /* Can use middle product */ + if (m > 3 && LIMB_RADIX(radix) >= m && rxn > (m - 3)) + { + ulong one = 1; + radix_mulmid(u + m - 3, res, m, x, FLINT_MIN(n, xn), m - 3, rxn, radix); + /* We know that the m least significant limbs of the full product + res * x are 00...001. The approximate high product is either + exact or 1 ulp too small. The latter is indicated by a + nonzero m-1 limb (i.e. a borrow to be returned). */ + if (u[m - 1] != 0) + radix_add(u + m, u + m, n - m, &one, 1, radix); + radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix); + } + else + { + radix_mulmid(u, res, m, x, FLINT_MIN(n, xn), 0, rxn, radix); + radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix); + } + radix_neg(res + m, res + m, n - m, radix); } TMP_END; diff --git a/src/radix/profile/p-add.c b/src/radix/profile/p-add.c new file mode 100644 index 0000000000..fab856e0fc --- /dev/null +++ b/src/radix/profile/p-add.c @@ -0,0 +1,74 @@ +/* + Copyright (C) 2026 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "profiler.h" +#include "radix.h" + +int main() +{ + radix_t radix; + slong digits, nd, n1, n2; + nn_ptr a, b, c; + double tmpn, tradix, FLINT_SET_BUT_UNUSED(tt); + + flint_rand_t state; + flint_rand_init(state); + + flint_printf(" decimal mpn decimal time time relative\n"); + flint_printf(" digits limbs limbs mpn_add radix_add time\n\n"); + + for (nd = 1; nd <= 100000000; nd = FLINT_MAX(nd + 1, nd * 1.5)) + { + radix_init(radix, 10, 0); + + digits = nd * 19; + + /* Number of full-word limbs */ + n1 = (slong) (digits * (log(10) / (FLINT_BITS * log(2))) + 1.0); + /* Number of radix 10^e limbs */ + n2 = (digits + radix->exp - 1) / radix->exp; + + a = flint_malloc(n2 * sizeof(ulong)); + b = flint_malloc(n2 * sizeof(ulong)); + c = flint_malloc(2 * n2 * sizeof(ulong)); + + flint_mpn_urandomb(a, state, n1 * FLINT_BITS); + flint_mpn_urandomb(b, state, n1 * FLINT_BITS); + + mpn_add(c, a, n1, b, n1); + TIMEIT_START; + mpn_add(c, a, n1, b, n1); + TIMEIT_STOP_VALUES(tt, tmpn); + + radix_rand_limbs(a, state, n2, radix); + radix_rand_limbs(b, state, n2, radix); + + radix_add(c, a, n2, b, n2, radix); + TIMEIT_START; + radix_add(c, a, n2, b, n2, radix); + TIMEIT_STOP_VALUES(tt, tradix); + + flint_printf("%10wd %8wd %8wd %8g %8g %.3fx\n", + digits, n1, n2, tmpn, tradix, tradix / tmpn); + + flint_free(a); + flint_free(b); + flint_free(c); + + radix_clear(radix); + } + + flint_rand_clear(state); + flint_cleanup_master(); + return 0; +} + diff --git a/src/radix/profile/p-invmod.c b/src/radix/profile/p-invmod.c new file mode 100644 index 0000000000..54a76d4f18 --- /dev/null +++ b/src/radix/profile/p-invmod.c @@ -0,0 +1,172 @@ +/* + Copyright (C) 2026 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "fmpz.h" +#include "gr.h" +#include "profiler.h" +#include "gmp.h" +#include "radix.h" +#include "fmpz_mod.h" +#include "padic.h" +#include "nmod.h" + +int main() +{ + flint_rand_t state; + ulong p; + slong n, exp; + int have_mpn_mod; + + flint_rand_init(state); + radix_t radix; + + p = 7; + radix_init(radix, p, 0); + exp = radix->exp; + + flint_printf("radix = %wu^%wd\n", p, radix->exp); + + for (n = 1; n <= 10000000; n *= 2) + { + fmpz_mod_ctx_t mod_ctx; + gr_ctx_t gr_ctx; + radix_integer_t x, y, z; + gr_ptr gx = NULL, gy = NULL, gz = NULL; + fmpz_t fx, fy, fz, fp, pn; + padic_ctx_t pctx; + padic_t px, py, pz; + + fmpz_init_set_ui(fp, p); + fmpz_init(pn); + fmpz_ui_pow_ui(pn, p, exp * n); + + fmpz_mod_ctx_init(mod_ctx, pn); + padic_ctx_init(pctx, fp, exp * n, exp * n, PADIC_SERIES); + have_mpn_mod = (gr_ctx_init_mpn_mod(gr_ctx, pn) == GR_SUCCESS); + + fmpz_init(fx); + fmpz_init(fy); + fmpz_init(fz); + + radix_integer_init(x, radix); + radix_integer_init(y, radix); + radix_integer_init(z, radix); + + padic_init2(px, exp * n); + padic_init2(py, exp * n); + padic_init2(pz, exp * n); + + do + { + fmpz_randm(fx, state, pn); + } while (fmpz_divisible_ui(fx, p)); + do + { + fmpz_randm(fy, state, pn); + } while (fmpz_divisible_ui(fy, p)); + + padic_set_fmpz(px, fx, pctx); + padic_set_fmpz(py, fy, pctx); + + if (have_mpn_mod) + { + gx = gr_heap_init(gr_ctx); + gy = gr_heap_init(gr_ctx); + gz = gr_heap_init(gr_ctx); + + GR_MUST_SUCCEED(gr_set_fmpz(gx, fx, gr_ctx)); + GR_MUST_SUCCEED(gr_set_fmpz(gy, fy, gr_ctx)); + } + + do + { + radix_integer_rand_limbs(x, state, n, radix); + } while (x->d[0] % p == 0); + do + { + radix_integer_rand_limbs(y, state, n, radix); + } while (y->d[0] % p == 0); + + double t1, t2, t3, t4, FLINT_SET_BUT_UNUSED(__); + + TIMEIT_START; + padic_inv(pz, px, pctx); + TIMEIT_STOP_VALUES(__, t1); + + TIMEIT_START; + fmpz_mod_inv(fz, fx, mod_ctx); + TIMEIT_STOP_VALUES(__, t2); + + if (have_mpn_mod) + { + TIMEIT_START; + GR_IGNORE(gr_inv(gz, gx, gr_ctx)); + TIMEIT_STOP_VALUES(__, t3); + } + else + t3 = 0.0; + + TIMEIT_START; + radix_integer_invmod_limbs(z, x, n, radix); + TIMEIT_STOP_VALUES(__, t4); + + char st1[20]; + char st2[20]; + char st3[20]; + char st4[20]; + + sprintf(st1, "%.2e", t1); + sprintf(st2, "%.2e", t2); + if (t3 == 0.0) + sprintf(st3, "-"); + else + sprintf(st3, "%.2e", t3); + sprintf(st4, "%.2e", t4); + + if (n == 1) + flint_printf(" n padic fmpz_mod mpn_mod radix_integer speedup/padic speedup/fmpz_mod\n"); + + flint_printf("%8wd %8s %8s %8s %8s %.2fx %.2fx\n", n, st1, st2, st3, st4, t1 / t4, t2 / t4); + + padic_clear(px); + padic_clear(py); + padic_clear(pz); + + radix_integer_clear(x, radix); + radix_integer_clear(y, radix); + radix_integer_clear(z, radix); + + fmpz_clear(fx); + fmpz_clear(fy); + fmpz_clear(fz); + + if (have_mpn_mod) + { + gr_heap_clear(gx, gr_ctx); + gr_heap_clear(gy, gr_ctx); + gr_heap_clear(gz, gr_ctx); + gr_ctx_clear(gr_ctx); + } + + fmpz_mod_ctx_clear(mod_ctx); + padic_ctx_clear(pctx); + + fmpz_clear(pn); + fmpz_clear(fp); + } + + radix_clear(radix); + flint_rand_clear(state); + flint_cleanup_master(); + return 0; +} + diff --git a/src/radix/profile/p-mulmod.c b/src/radix/profile/p-mulmod.c new file mode 100644 index 0000000000..ce06d19f12 --- /dev/null +++ b/src/radix/profile/p-mulmod.c @@ -0,0 +1,172 @@ +/* + Copyright (C) 2026 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "fmpz.h" +#include "gr.h" +#include "profiler.h" +#include "gmp.h" +#include "radix.h" +#include "fmpz_mod.h" +#include "padic.h" +#include "nmod.h" + +int main() +{ + flint_rand_t state; + ulong p; + slong n, exp; + int have_mpn_mod; + + flint_rand_init(state); + radix_t radix; + + p = 7; + radix_init(radix, p, 0); + exp = radix->exp; + + flint_printf("radix = %wu^%wd\n", p, radix->exp); + + for (n = 1; n <= 10000000; n *= 2) + { + fmpz_mod_ctx_t mod_ctx; + gr_ctx_t gr_ctx; + radix_integer_t x, y, z; + gr_ptr gx = NULL, gy = NULL, gz = NULL; + fmpz_t fx, fy, fz, fp, pn; + padic_ctx_t pctx; + padic_t px, py, pz; + + fmpz_init_set_ui(fp, p); + fmpz_init(pn); + fmpz_ui_pow_ui(pn, p, exp * n); + + fmpz_mod_ctx_init(mod_ctx, pn); + padic_ctx_init(pctx, fp, exp * n, exp * n, PADIC_SERIES); + have_mpn_mod = (gr_ctx_init_mpn_mod(gr_ctx, pn) == GR_SUCCESS); + + fmpz_init(fx); + fmpz_init(fy); + fmpz_init(fz); + + radix_integer_init(x, radix); + radix_integer_init(y, radix); + radix_integer_init(z, radix); + + padic_init2(px, exp * n); + padic_init2(py, exp * n); + padic_init2(pz, exp * n); + + do + { + fmpz_randm(fx, state, pn); + } while (fmpz_divisible_ui(fx, p)); + do + { + fmpz_randm(fy, state, pn); + } while (fmpz_divisible_ui(fy, p)); + + padic_set_fmpz(px, fx, pctx); + padic_set_fmpz(py, fy, pctx); + + if (have_mpn_mod) + { + gx = gr_heap_init(gr_ctx); + gy = gr_heap_init(gr_ctx); + gz = gr_heap_init(gr_ctx); + + GR_MUST_SUCCEED(gr_set_fmpz(gx, fx, gr_ctx)); + GR_MUST_SUCCEED(gr_set_fmpz(gy, fy, gr_ctx)); + } + + do + { + radix_integer_rand_limbs(x, state, n, radix); + } while (x->d[0] % p == 0); + do + { + radix_integer_rand_limbs(y, state, n, radix); + } while (y->d[0] % p == 0); + + double t1, t2, t3, t4, FLINT_SET_BUT_UNUSED(__); + + TIMEIT_START; + padic_mul(pz, px, py, pctx); + TIMEIT_STOP_VALUES(__, t1); + + TIMEIT_START; + fmpz_mod_mul(fz, fx, fy, mod_ctx); + TIMEIT_STOP_VALUES(__, t2); + + if (have_mpn_mod) + { + TIMEIT_START; + GR_IGNORE(gr_mul(gz, gx, gy, gr_ctx)); + TIMEIT_STOP_VALUES(__, t3); + } + else + t3 = 0.0; + + TIMEIT_START; + radix_integer_mullow_limbs(z, x, y, n, radix); + TIMEIT_STOP_VALUES(__, t4); + + char st1[20]; + char st2[20]; + char st3[20]; + char st4[20]; + + sprintf(st1, "%.2e", t1); + sprintf(st2, "%.2e", t2); + if (t3 == 0.0) + sprintf(st3, "-"); + else + sprintf(st3, "%.2e", t3); + sprintf(st4, "%.2e", t4); + + if (n == 1) + flint_printf(" n padic fmpz_mod mpn_mod radix_integer speedup/padic speedup/fmpz_mod\n"); + + flint_printf("%8wd %8s %8s %8s %8s %.2fx %.2fx\n", n, st1, st2, st3, st4, t1 / t4, t2 / t4); + + padic_clear(px); + padic_clear(py); + padic_clear(pz); + + radix_integer_clear(x, radix); + radix_integer_clear(y, radix); + radix_integer_clear(z, radix); + + fmpz_clear(fx); + fmpz_clear(fy); + fmpz_clear(fz); + + if (have_mpn_mod) + { + gr_heap_clear(gx, gr_ctx); + gr_heap_clear(gy, gr_ctx); + gr_heap_clear(gz, gr_ctx); + gr_ctx_clear(gr_ctx); + } + + fmpz_mod_ctx_clear(mod_ctx); + padic_ctx_clear(pctx); + + fmpz_clear(pn); + fmpz_clear(fp); + } + + radix_clear(radix); + flint_rand_clear(state); + flint_cleanup_master(); + return 0; +} + diff --git a/src/radix/profile/p-sub.c b/src/radix/profile/p-sub.c new file mode 100644 index 0000000000..367e6cac61 --- /dev/null +++ b/src/radix/profile/p-sub.c @@ -0,0 +1,74 @@ +/* + Copyright (C) 2026 Fredrik Johansson + + This file is part of FLINT. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. See . +*/ + +#include +#include "profiler.h" +#include "radix.h" + +int main() +{ + radix_t radix; + slong digits, nd, n1, n2; + nn_ptr a, b, c; + double tmpn, tradix, FLINT_SET_BUT_UNUSED(tt); + + flint_rand_t state; + flint_rand_init(state); + + flint_printf(" decimal mpn decimal time time relative\n"); + flint_printf(" digits limbs limbs mpn_add radix_add time\n\n"); + + for (nd = 1; nd <= 100000000; nd = FLINT_MAX(nd + 1, nd * 1.5)) + { + radix_init(radix, 10, 0); + + digits = nd * 19; + + /* Number of full-word limbs */ + n1 = (slong) (digits * (log(10) / (FLINT_BITS * log(2))) + 1.0); + /* Number of radix 10^e limbs */ + n2 = (digits + radix->exp - 1) / radix->exp; + + a = flint_malloc(n2 * sizeof(ulong)); + b = flint_malloc(n2 * sizeof(ulong)); + c = flint_malloc(2 * n2 * sizeof(ulong)); + + flint_mpn_urandomb(a, state, n1 * FLINT_BITS); + flint_mpn_urandomb(b, state, n1 * FLINT_BITS); + + mpn_sub(c, a, n1, b, n1); + TIMEIT_START; + mpn_sub(c, a, n1, b, n1); + TIMEIT_STOP_VALUES(tt, tmpn); + + radix_rand_limbs(a, state, n2, radix); + radix_rand_limbs(b, state, n2, radix); + + radix_sub(c, a, n2, b, n2, radix); + TIMEIT_START; + radix_sub(c, a, n2, b, n2, radix); + TIMEIT_STOP_VALUES(tt, tradix); + + flint_printf("%10wd %8wd %8wd %8g %8g %.3fx\n", + digits, n1, n2, tmpn, tradix, tradix / tmpn); + + flint_free(a); + flint_free(b); + flint_free(c); + + radix_clear(radix); + } + + flint_rand_clear(state); + flint_cleanup_master(); + return 0; +} + diff --git a/src/radix/test/t-invmod_bn.c b/src/radix/test/t-invmod_bn.c index d800ba4d3e..8dbc1d73ba 100644 --- a/src/radix/test/t-invmod_bn.c +++ b/src/radix/test/t-invmod_bn.c @@ -24,8 +24,8 @@ TEST_FUNCTION_START(radix_invmod_bn, state) radix_init_randtest(radix, state); - an = 1 + n_randint(state, 10); - n = 1 + n_randint(state, 10); + an = 1 + n_randint(state, 50); + n = 1 + n_randint(state, 50); a = flint_malloc(an * sizeof(ulong)); b = flint_malloc(n * sizeof(ulong));