diff --git a/src/radix/invmod_bn.c b/src/radix/invmod_bn.c
index 2d347c1b5d..f538bb8e99 100644
--- a/src/radix/invmod_bn.c
+++ b/src/radix/invmod_bn.c
@@ -127,11 +127,11 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix)
if (n == 2)
return 1;
- nn_ptr u, v;
+ nn_ptr u;
TMP_INIT;
TMP_START;
- u = TMP_ALLOC(2 * n * sizeof(ulong));
- v = u + n;
+ /* Todo: can tighten allocation when we always do a mulhigh */
+ u = TMP_ALLOC(n * sizeof(ulong));
slong a[FLINT_BITS];
slong i, m;
@@ -144,9 +144,27 @@ radix_invmod_bn(nn_ptr res, nn_srcptr x, slong xn, slong n, const radix_t radix)
m = n;
n = a[i];
- radix_mulmid(u, res, m, res, m, 0, n, radix);
- radix_mulmid(v, u, n, x, FLINT_MIN(n, xn), 0, n, radix);
- radix_neg(res + m, v + m, n - m, radix);
+ slong rxn = FLINT_MIN(n, m + xn);
+
+ /* Can use middle product */
+ if (m > 3 && LIMB_RADIX(radix) >= m && rxn > (m - 3))
+ {
+ ulong one = 1;
+ radix_mulmid(u + m - 3, res, m, x, FLINT_MIN(n, xn), m - 3, rxn, radix);
+ /* We know that the m least significant limbs of the full product
+ res * x are 00...001. The approximate high product is either
+ exact or 1 ulp too small. The latter is indicated by a
+ nonzero m-1 limb (i.e. a borrow to be returned). */
+ if (u[m - 1] != 0)
+ radix_add(u + m, u + m, n - m, &one, 1, radix);
+ radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix);
+ }
+ else
+ {
+ radix_mulmid(u, res, m, x, FLINT_MIN(n, xn), 0, rxn, radix);
+ radix_mulmid(res + m, u + m, rxn - m, res, m, 0, n - m, radix);
+ }
+ radix_neg(res + m, res + m, n - m, radix);
}
TMP_END;
diff --git a/src/radix/profile/p-add.c b/src/radix/profile/p-add.c
new file mode 100644
index 0000000000..fab856e0fc
--- /dev/null
+++ b/src/radix/profile/p-add.c
@@ -0,0 +1,74 @@
+/*
+ Copyright (C) 2026 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "profiler.h"
+#include "radix.h"
+
+int main()
+{
+ radix_t radix;
+ slong digits, nd, n1, n2;
+ nn_ptr a, b, c;
+ double tmpn, tradix, FLINT_SET_BUT_UNUSED(tt);
+
+ flint_rand_t state;
+ flint_rand_init(state);
+
+ flint_printf(" decimal mpn decimal time time relative\n");
+ flint_printf(" digits limbs limbs mpn_add radix_add time\n\n");
+
+ for (nd = 1; nd <= 100000000; nd = FLINT_MAX(nd + 1, nd * 1.5))
+ {
+ radix_init(radix, 10, 0);
+
+ digits = nd * 19;
+
+ /* Number of full-word limbs */
+ n1 = (slong) (digits * (log(10) / (FLINT_BITS * log(2))) + 1.0);
+ /* Number of radix 10^e limbs */
+ n2 = (digits + radix->exp - 1) / radix->exp;
+
+ a = flint_malloc(n2 * sizeof(ulong));
+ b = flint_malloc(n2 * sizeof(ulong));
+ c = flint_malloc(2 * n2 * sizeof(ulong));
+
+ flint_mpn_urandomb(a, state, n1 * FLINT_BITS);
+ flint_mpn_urandomb(b, state, n1 * FLINT_BITS);
+
+ mpn_add(c, a, n1, b, n1);
+ TIMEIT_START;
+ mpn_add(c, a, n1, b, n1);
+ TIMEIT_STOP_VALUES(tt, tmpn);
+
+ radix_rand_limbs(a, state, n2, radix);
+ radix_rand_limbs(b, state, n2, radix);
+
+ radix_add(c, a, n2, b, n2, radix);
+ TIMEIT_START;
+ radix_add(c, a, n2, b, n2, radix);
+ TIMEIT_STOP_VALUES(tt, tradix);
+
+ flint_printf("%10wd %8wd %8wd %8g %8g %.3fx\n",
+ digits, n1, n2, tmpn, tradix, tradix / tmpn);
+
+ flint_free(a);
+ flint_free(b);
+ flint_free(c);
+
+ radix_clear(radix);
+ }
+
+ flint_rand_clear(state);
+ flint_cleanup_master();
+ return 0;
+}
+
diff --git a/src/radix/profile/p-invmod.c b/src/radix/profile/p-invmod.c
new file mode 100644
index 0000000000..54a76d4f18
--- /dev/null
+++ b/src/radix/profile/p-invmod.c
@@ -0,0 +1,172 @@
+/*
+ Copyright (C) 2026 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "fmpz.h"
+#include "gr.h"
+#include "profiler.h"
+#include "gmp.h"
+#include "radix.h"
+#include "fmpz_mod.h"
+#include "padic.h"
+#include "nmod.h"
+
+int main()
+{
+ flint_rand_t state;
+ ulong p;
+ slong n, exp;
+ int have_mpn_mod;
+
+ flint_rand_init(state);
+ radix_t radix;
+
+ p = 7;
+ radix_init(radix, p, 0);
+ exp = radix->exp;
+
+ flint_printf("radix = %wu^%wd\n", p, radix->exp);
+
+ for (n = 1; n <= 10000000; n *= 2)
+ {
+ fmpz_mod_ctx_t mod_ctx;
+ gr_ctx_t gr_ctx;
+ radix_integer_t x, y, z;
+ gr_ptr gx = NULL, gy = NULL, gz = NULL;
+ fmpz_t fx, fy, fz, fp, pn;
+ padic_ctx_t pctx;
+ padic_t px, py, pz;
+
+ fmpz_init_set_ui(fp, p);
+ fmpz_init(pn);
+ fmpz_ui_pow_ui(pn, p, exp * n);
+
+ fmpz_mod_ctx_init(mod_ctx, pn);
+ padic_ctx_init(pctx, fp, exp * n, exp * n, PADIC_SERIES);
+ have_mpn_mod = (gr_ctx_init_mpn_mod(gr_ctx, pn) == GR_SUCCESS);
+
+ fmpz_init(fx);
+ fmpz_init(fy);
+ fmpz_init(fz);
+
+ radix_integer_init(x, radix);
+ radix_integer_init(y, radix);
+ radix_integer_init(z, radix);
+
+ padic_init2(px, exp * n);
+ padic_init2(py, exp * n);
+ padic_init2(pz, exp * n);
+
+ do
+ {
+ fmpz_randm(fx, state, pn);
+ } while (fmpz_divisible_ui(fx, p));
+ do
+ {
+ fmpz_randm(fy, state, pn);
+ } while (fmpz_divisible_ui(fy, p));
+
+ padic_set_fmpz(px, fx, pctx);
+ padic_set_fmpz(py, fy, pctx);
+
+ if (have_mpn_mod)
+ {
+ gx = gr_heap_init(gr_ctx);
+ gy = gr_heap_init(gr_ctx);
+ gz = gr_heap_init(gr_ctx);
+
+ GR_MUST_SUCCEED(gr_set_fmpz(gx, fx, gr_ctx));
+ GR_MUST_SUCCEED(gr_set_fmpz(gy, fy, gr_ctx));
+ }
+
+ do
+ {
+ radix_integer_rand_limbs(x, state, n, radix);
+ } while (x->d[0] % p == 0);
+ do
+ {
+ radix_integer_rand_limbs(y, state, n, radix);
+ } while (y->d[0] % p == 0);
+
+ double t1, t2, t3, t4, FLINT_SET_BUT_UNUSED(__);
+
+ TIMEIT_START;
+ padic_inv(pz, px, pctx);
+ TIMEIT_STOP_VALUES(__, t1);
+
+ TIMEIT_START;
+ fmpz_mod_inv(fz, fx, mod_ctx);
+ TIMEIT_STOP_VALUES(__, t2);
+
+ if (have_mpn_mod)
+ {
+ TIMEIT_START;
+ GR_IGNORE(gr_inv(gz, gx, gr_ctx));
+ TIMEIT_STOP_VALUES(__, t3);
+ }
+ else
+ t3 = 0.0;
+
+ TIMEIT_START;
+ radix_integer_invmod_limbs(z, x, n, radix);
+ TIMEIT_STOP_VALUES(__, t4);
+
+ char st1[20];
+ char st2[20];
+ char st3[20];
+ char st4[20];
+
+ sprintf(st1, "%.2e", t1);
+ sprintf(st2, "%.2e", t2);
+ if (t3 == 0.0)
+ sprintf(st3, "-");
+ else
+ sprintf(st3, "%.2e", t3);
+ sprintf(st4, "%.2e", t4);
+
+ if (n == 1)
+ flint_printf(" n padic fmpz_mod mpn_mod radix_integer speedup/padic speedup/fmpz_mod\n");
+
+ flint_printf("%8wd %8s %8s %8s %8s %.2fx %.2fx\n", n, st1, st2, st3, st4, t1 / t4, t2 / t4);
+
+ padic_clear(px);
+ padic_clear(py);
+ padic_clear(pz);
+
+ radix_integer_clear(x, radix);
+ radix_integer_clear(y, radix);
+ radix_integer_clear(z, radix);
+
+ fmpz_clear(fx);
+ fmpz_clear(fy);
+ fmpz_clear(fz);
+
+ if (have_mpn_mod)
+ {
+ gr_heap_clear(gx, gr_ctx);
+ gr_heap_clear(gy, gr_ctx);
+ gr_heap_clear(gz, gr_ctx);
+ gr_ctx_clear(gr_ctx);
+ }
+
+ fmpz_mod_ctx_clear(mod_ctx);
+ padic_ctx_clear(pctx);
+
+ fmpz_clear(pn);
+ fmpz_clear(fp);
+ }
+
+ radix_clear(radix);
+ flint_rand_clear(state);
+ flint_cleanup_master();
+ return 0;
+}
+
diff --git a/src/radix/profile/p-mulmod.c b/src/radix/profile/p-mulmod.c
new file mode 100644
index 0000000000..ce06d19f12
--- /dev/null
+++ b/src/radix/profile/p-mulmod.c
@@ -0,0 +1,172 @@
+/*
+ Copyright (C) 2026 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "fmpz.h"
+#include "gr.h"
+#include "profiler.h"
+#include "gmp.h"
+#include "radix.h"
+#include "fmpz_mod.h"
+#include "padic.h"
+#include "nmod.h"
+
+int main()
+{
+ flint_rand_t state;
+ ulong p;
+ slong n, exp;
+ int have_mpn_mod;
+
+ flint_rand_init(state);
+ radix_t radix;
+
+ p = 7;
+ radix_init(radix, p, 0);
+ exp = radix->exp;
+
+ flint_printf("radix = %wu^%wd\n", p, radix->exp);
+
+ for (n = 1; n <= 10000000; n *= 2)
+ {
+ fmpz_mod_ctx_t mod_ctx;
+ gr_ctx_t gr_ctx;
+ radix_integer_t x, y, z;
+ gr_ptr gx = NULL, gy = NULL, gz = NULL;
+ fmpz_t fx, fy, fz, fp, pn;
+ padic_ctx_t pctx;
+ padic_t px, py, pz;
+
+ fmpz_init_set_ui(fp, p);
+ fmpz_init(pn);
+ fmpz_ui_pow_ui(pn, p, exp * n);
+
+ fmpz_mod_ctx_init(mod_ctx, pn);
+ padic_ctx_init(pctx, fp, exp * n, exp * n, PADIC_SERIES);
+ have_mpn_mod = (gr_ctx_init_mpn_mod(gr_ctx, pn) == GR_SUCCESS);
+
+ fmpz_init(fx);
+ fmpz_init(fy);
+ fmpz_init(fz);
+
+ radix_integer_init(x, radix);
+ radix_integer_init(y, radix);
+ radix_integer_init(z, radix);
+
+ padic_init2(px, exp * n);
+ padic_init2(py, exp * n);
+ padic_init2(pz, exp * n);
+
+ do
+ {
+ fmpz_randm(fx, state, pn);
+ } while (fmpz_divisible_ui(fx, p));
+ do
+ {
+ fmpz_randm(fy, state, pn);
+ } while (fmpz_divisible_ui(fy, p));
+
+ padic_set_fmpz(px, fx, pctx);
+ padic_set_fmpz(py, fy, pctx);
+
+ if (have_mpn_mod)
+ {
+ gx = gr_heap_init(gr_ctx);
+ gy = gr_heap_init(gr_ctx);
+ gz = gr_heap_init(gr_ctx);
+
+ GR_MUST_SUCCEED(gr_set_fmpz(gx, fx, gr_ctx));
+ GR_MUST_SUCCEED(gr_set_fmpz(gy, fy, gr_ctx));
+ }
+
+ do
+ {
+ radix_integer_rand_limbs(x, state, n, radix);
+ } while (x->d[0] % p == 0);
+ do
+ {
+ radix_integer_rand_limbs(y, state, n, radix);
+ } while (y->d[0] % p == 0);
+
+ double t1, t2, t3, t4, FLINT_SET_BUT_UNUSED(__);
+
+ TIMEIT_START;
+ padic_mul(pz, px, py, pctx);
+ TIMEIT_STOP_VALUES(__, t1);
+
+ TIMEIT_START;
+ fmpz_mod_mul(fz, fx, fy, mod_ctx);
+ TIMEIT_STOP_VALUES(__, t2);
+
+ if (have_mpn_mod)
+ {
+ TIMEIT_START;
+ GR_IGNORE(gr_mul(gz, gx, gy, gr_ctx));
+ TIMEIT_STOP_VALUES(__, t3);
+ }
+ else
+ t3 = 0.0;
+
+ TIMEIT_START;
+ radix_integer_mullow_limbs(z, x, y, n, radix);
+ TIMEIT_STOP_VALUES(__, t4);
+
+ char st1[20];
+ char st2[20];
+ char st3[20];
+ char st4[20];
+
+ sprintf(st1, "%.2e", t1);
+ sprintf(st2, "%.2e", t2);
+ if (t3 == 0.0)
+ sprintf(st3, "-");
+ else
+ sprintf(st3, "%.2e", t3);
+ sprintf(st4, "%.2e", t4);
+
+ if (n == 1)
+ flint_printf(" n padic fmpz_mod mpn_mod radix_integer speedup/padic speedup/fmpz_mod\n");
+
+ flint_printf("%8wd %8s %8s %8s %8s %.2fx %.2fx\n", n, st1, st2, st3, st4, t1 / t4, t2 / t4);
+
+ padic_clear(px);
+ padic_clear(py);
+ padic_clear(pz);
+
+ radix_integer_clear(x, radix);
+ radix_integer_clear(y, radix);
+ radix_integer_clear(z, radix);
+
+ fmpz_clear(fx);
+ fmpz_clear(fy);
+ fmpz_clear(fz);
+
+ if (have_mpn_mod)
+ {
+ gr_heap_clear(gx, gr_ctx);
+ gr_heap_clear(gy, gr_ctx);
+ gr_heap_clear(gz, gr_ctx);
+ gr_ctx_clear(gr_ctx);
+ }
+
+ fmpz_mod_ctx_clear(mod_ctx);
+ padic_ctx_clear(pctx);
+
+ fmpz_clear(pn);
+ fmpz_clear(fp);
+ }
+
+ radix_clear(radix);
+ flint_rand_clear(state);
+ flint_cleanup_master();
+ return 0;
+}
+
diff --git a/src/radix/profile/p-sub.c b/src/radix/profile/p-sub.c
new file mode 100644
index 0000000000..367e6cac61
--- /dev/null
+++ b/src/radix/profile/p-sub.c
@@ -0,0 +1,74 @@
+/*
+ Copyright (C) 2026 Fredrik Johansson
+
+ This file is part of FLINT.
+
+ FLINT is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include
+#include "profiler.h"
+#include "radix.h"
+
+int main()
+{
+ radix_t radix;
+ slong digits, nd, n1, n2;
+ nn_ptr a, b, c;
+ double tmpn, tradix, FLINT_SET_BUT_UNUSED(tt);
+
+ flint_rand_t state;
+ flint_rand_init(state);
+
+ flint_printf(" decimal mpn decimal time time relative\n");
+ flint_printf(" digits limbs limbs mpn_add radix_add time\n\n");
+
+ for (nd = 1; nd <= 100000000; nd = FLINT_MAX(nd + 1, nd * 1.5))
+ {
+ radix_init(radix, 10, 0);
+
+ digits = nd * 19;
+
+ /* Number of full-word limbs */
+ n1 = (slong) (digits * (log(10) / (FLINT_BITS * log(2))) + 1.0);
+ /* Number of radix 10^e limbs */
+ n2 = (digits + radix->exp - 1) / radix->exp;
+
+ a = flint_malloc(n2 * sizeof(ulong));
+ b = flint_malloc(n2 * sizeof(ulong));
+ c = flint_malloc(2 * n2 * sizeof(ulong));
+
+ flint_mpn_urandomb(a, state, n1 * FLINT_BITS);
+ flint_mpn_urandomb(b, state, n1 * FLINT_BITS);
+
+ mpn_sub(c, a, n1, b, n1);
+ TIMEIT_START;
+ mpn_sub(c, a, n1, b, n1);
+ TIMEIT_STOP_VALUES(tt, tmpn);
+
+ radix_rand_limbs(a, state, n2, radix);
+ radix_rand_limbs(b, state, n2, radix);
+
+ radix_sub(c, a, n2, b, n2, radix);
+ TIMEIT_START;
+ radix_sub(c, a, n2, b, n2, radix);
+ TIMEIT_STOP_VALUES(tt, tradix);
+
+ flint_printf("%10wd %8wd %8wd %8g %8g %.3fx\n",
+ digits, n1, n2, tmpn, tradix, tradix / tmpn);
+
+ flint_free(a);
+ flint_free(b);
+ flint_free(c);
+
+ radix_clear(radix);
+ }
+
+ flint_rand_clear(state);
+ flint_cleanup_master();
+ return 0;
+}
+
diff --git a/src/radix/test/t-invmod_bn.c b/src/radix/test/t-invmod_bn.c
index d800ba4d3e..8dbc1d73ba 100644
--- a/src/radix/test/t-invmod_bn.c
+++ b/src/radix/test/t-invmod_bn.c
@@ -24,8 +24,8 @@ TEST_FUNCTION_START(radix_invmod_bn, state)
radix_init_randtest(radix, state);
- an = 1 + n_randint(state, 10);
- n = 1 + n_randint(state, 10);
+ an = 1 + n_randint(state, 50);
+ n = 1 + n_randint(state, 50);
a = flint_malloc(an * sizeof(ulong));
b = flint_malloc(n * sizeof(ulong));