diff --git a/CMakeLists.txt b/CMakeLists.txt index 528b9e865..90b641733 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,64 +109,67 @@ option_with_default(CMAKE_BUILD_TYPE "Build type (Release or Debug)" Release) ### compiler-only option_with_print(LIBINT2_BUILD_LIBRARY_AS_SUBPROJECT - "[EXPERT] Build generated library as a subproject: if FALSE will configure and build separately" OFF) + "[EXPERT] Build generated library as a subproject: if FALSE will configure and build separately" OFF) ### library-only option_with_print(LIBINT2_REQUIRE_CXX_API - "C++11 Libint API: define library targets + test (requires Eigen3, Boost is optional but strongly recommended)" ON) + "C++11 Libint API: define library targets + test (requires Eigen3, Boost is optional but strongly recommended)" ON) option_with_print(LIBINT2_REQUIRE_CXX_API_COMPILED - "Build C++11 Compiled (not just header-only) targets (requires Eigen3, Boost strongly recommended)" ON) + "Build C++11 Compiled (not just header-only) targets (requires Eigen3, Boost strongly recommended)" ON) option_with_print(LIBINT2_ENABLE_FORTRAN - "Build Fortran03+ Libint interface (requires Fortran)" OFF) + "Build Fortran03+ Libint interface (requires Fortran)" OFF) option_with_print(LIBINT2_ENABLE_PYTHON - "Build Python bindings (requires Python and Pybind11 and Eigen3)" OFF) + "Build Python bindings (requires Python and Pybind11 and Eigen3)" OFF) option_with_print(LIBINT2_PREFIX_PYTHON_INSTALL - "For LIBINT2_ENABLE_PYTHON=ON, whether to install the Python module in the Linux manner to CMAKE_INSTALL_PREFIX or to not install it. See target libint2-python-wheel for alternate installation in the Python manner to Python_EXECUTABLE's site-packages." OFF) + "For LIBINT2_ENABLE_PYTHON=ON, whether to install the Python module in the Linux manner to CMAKE_INSTALL_PREFIX or to not install it. See target libint2-python-wheel for alternate installation in the Python manner to Python_EXECUTABLE's site-packages." OFF) option_with_print(BUILD_SHARED_LIBS - "Build Libint library as shared, not static" OFF) + "Build Libint library as shared, not static" OFF) option_with_print(LIBINT2_BUILD_SHARED_AND_STATIC_LIBS - "Build both shared and static Libint libraries in one shot. Uses -fPIC." OFF) + "Build both shared and static Libint libraries in one shot. Uses -fPIC." OFF) option_with_print(LIBINT2_ENABLE_MPFR - "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) + "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) # <<< Which Integrals Classes, Which Derivative Levels >>> option_with_default(LIBINT2_ENABLE_ONEBODY - "Compile with support for up to N-th derivatives of 1-body integrals (-1 for OFF)" 0) + "Compile with support for up to N-th derivatives of 1-body integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_ERI - "Compile with support for up to N-th derivatives of 4-center electron repulsion integrals (-1 for OFF)" 0) + "Compile with support for up to N-th derivatives of 4-center electron repulsion integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_ERI3 - "Compile with support for up to N-th derivatives of 3-center electron repulsion integrals (-1 for OFF)" -1) + "Compile with support for up to N-th derivatives of 3-center electron repulsion integrals (-1 for OFF)" -1) option_with_default(LIBINT2_ENABLE_ERI2 - "Compile with support for up to N-th derivatives of 2-center electron repulsion integrals (-1 for OFF)" -1) + "Compile with support for up to N-th derivatives of 2-center electron repulsion integrals (-1 for OFF)" -1) +option_with_default(LIBINT2_ENABLE_RKB_ERI + "Compile with support for up to N-th derivatives of relativistic restricted kinetic + balance (RKB) 4-center electron repulsion integrals (-1 for OFF)" 0) option_with_default(LIBINT2_ENABLE_G12 - "Compile with support for N-th derivatives of MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) + "Compile with support for N-th derivatives of MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) option_with_default(LIBINT2_ENABLE_G12DKH - "Compile with support for N-th derivatives of DKH-MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) + "Compile with support for N-th derivatives of DKH-MP2-F12 energies with Gaussian factors (-1 for OFF)" -1) option_with_print(LIBINT2_DISABLE_ONEBODY_PROPERTY_DERIVS - "Disable geometric derivatives of 1-body property integrals (all but overlap, kinetic, elecpot). + "Disable geometric derivatives of 1-body property integrals (all but overlap, kinetic, elecpot). These derivatives are disabled by default to save compile time. (enable with OFF) Note that the libtool build won't enable this- if forcibly enabled, build_libint balks." ON) option_with_print(LIBINT2_ENABLE_T1G12 - "Enable [Ti,G12] integrals when G12 integrals are enabled. Irrelevant when `LIBINT2_ENABLE_G12=OFF`. (disable with OFF)" ON) + "Enable [Ti,G12] integrals when G12 integrals are enabled. Irrelevant when `LIBINT2_ENABLE_G12=OFF`. (disable with OFF)" ON) # <<< Ordering Conventions >>> option_with_default(LIBINT2_SHGAUSS_ORDERING - "Ordering for shells of solid harmonic Gaussians: + "Ordering for shells of solid harmonic Gaussians: standard -- standard ordering (-l, -l+1 ... l) gaussian -- the Gaussian ordering (0, 1, -1, 2, -2, ... l, -l) See https://github.com/evaleev/libint/blob/master/INSTALL.md#solid-harmonic-ordering-scope-and-history ." standard) option_with_default(LIBINT2_CARTGAUSS_ORDERING - "Orderings for shells of cartesian Gaussians: + "Orderings for shells of cartesian Gaussians: standard -- standard ordering (xxx, xxy, xxz, xyy, xyz, xzz, yyy, ...) intv3 -- intv3 ordering (yyy, yyz, yzz, zzz, xyy, xyz, xzz, xxy, xxz, xxx) gamess -- GAMESS ordering (xxx, yyy, zzz, xxy, xxz, yyx, yyz, zzx, zzy, xyz) orca -- ORCA ordering (hydrid between GAMESS and standard) bagel -- axis-permuted version of intv3 (xxx, xxy, xyy, yyy, xxz, xyz, yyz, xzz, yzz, zzz)" standard) option_with_default(LIBINT2_SHELL_SET - "Support computation of shell sets sets subject to these restrictions: + "Support computation of shell sets sets subject to these restrictions: standard -- standard ordering: for (ab|cd): l(a) >= l(b), @@ -195,99 +198,107 @@ option_with_default(LIBINT2_SHELL_SET # `export CMAKE_BUILD_PARALLEL_LEVEL=N`. option_with_default(LIBINT2_MAX_AM - "Support Gaussians of angular momentum up to N. + "Support Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. If ERI3 ints are enabled, this option also controls the AM of the paired centers." 4) option_with_default(LIBINT2_OPT_AM - "Optimize maximally for up to angular momentum N (N <= max-am). + "Optimize maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (libint_max_am/2)+1)" -1) option_with_default(LIBINT2_MULTIPOLE_MAX_ORDER - "Maximum order of spherical multipole integrals. There is no maximum" 4) + "Maximum order of spherical multipole integrals. There is no maximum" 4) option_with_default(LIBINT2_ONEBODY_MAX_AM - "Support 1-body ints for Gaussians of angular momentum up to N. + "Support 1-body ints for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ONEBODY_OPT_AM - "Optimize 1-body ints maximally for up to angular momentum N (N <= max-am). + "Optimize 1-body ints maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) option_with_default(LIBINT2_ERI_MAX_AM - "Support 4-center ERIs for Gaussians of angular momentum up to N. + "Support 4-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ERI_OPT_AM - "Optimize 4-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 4-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) +option_with_default(LIBINT2_RKB_ERI_MAX_AM + "Support relativistic restricted kinetic balance (RKB) 4-center ERIs for Gaussians of angular momentum up to N. + Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) +option_with_default(LIBINT2_RKB_ERI_OPT_AM + "Optimize relativistic restricted kinetic balance (RKB) 4-center ERIs maximally for up to angular momentum N (N <= max-am). + Can specify values for each derivative level as a semicolon-separated string (default: (max_am/2)+1)" -1) + + option_with_default(LIBINT2_ERI3_MAX_AM - "Support 3-center ERIs for Gaussians of angular momentum up to N. + "Support 3-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am) This option controls only the single fitting center. The paired centers use LIBINT2_MAX_AM." -1) option_with_default(LIBINT2_ERI3_OPT_AM - "Optimize 3-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 3-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (max_am/2)+1)" -1) option_with_print(LIBINT2_ERI3_PURE_SH - "Assume the 'unpaired' center of 3-center ERIs will be transformed to pure solid harmonics" OFF) + "Assume the 'unpaired' center of 3-center ERIs will be transformed to pure solid harmonics" OFF) option_with_default(LIBINT2_ERI2_MAX_AM - "Support 2-center ERIs for Gaussians of angular momentum up to N. + "Support 2-center ERIs for Gaussians of angular momentum up to N. Can specify values for each derivative level as a semicolon-separated string. (default: max_am)" -1) option_with_default(LIBINT2_ERI2_OPT_AM - "Optimize 2-center ERIs maximally for up to angular momentum N (N <= max-am). + "Optimize 2-center ERIs maximally for up to angular momentum N (N <= max-am). Can specify values for each derivative level as a semicolon-separated string. (default: (max_am/2)+1)" -1) option_with_print(LIBINT2_ERI2_PURE_SH - "Assume the 2-center ERIs will be transformed to pure solid harmonics" OFF) + "Assume the 2-center ERIs will be transformed to pure solid harmonics" OFF) option_with_default(LIBINT2_G12_MAX_AM - "Support integrals for G12 methods of angular momentum up to N. (default: max_am)" -1) + "Support integrals for G12 methods of angular momentum up to N. (default: max_am)" -1) option_with_default(LIBINT2_G12_OPT_AM - "Optimize G12 integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) + "Optimize G12 integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) option_with_default(LIBINT2_G12DKH_MAX_AM - "Support integrals for relativistic G12 methods of angular momentum up to N. (default: max_am)" -1) + "Support integrals for relativistic G12 methods of angular momentum up to N. (default: max_am)" -1) option_with_default(LIBINT2_G12DKH_OPT_AM - "Optimize G12DKH integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) + "Optimize G12DKH integrals for up to angular momentum N (N <= max-am). (default: (max_am/2)+1)" -1) # <<< Miscellaneous >>> option_with_print(LIBINT2_CONTRACTED_INTS - "Turn on support for contracted integrals." ON) + "Turn on support for contracted integrals." ON) option_with_default(LIBINT2_ERI_STRATEGY - "(EXPERT) Compute ERIs using the following strategy. (0 for OS, 1 for HGP, 2 for HL)" 1) + "(EXPERT) Compute ERIs using the following strategy. (0 for OS, 1 for HGP, 2 for HL)" 1) option_with_print(LIBINT2_USE_COMPOSITE_EVALUATORS - "Libint will use composite evaluators (i.e. every evaluator will compute one integral type only)" ON) + "Libint will use composite evaluators (i.e. every evaluator will compute one integral type only)" ON) option_with_print(LIBINT2_SINGLE_EVALTYPE - "Generate single evaluator type (i.e. all tasks use the same evaluator). OFF is NYI" ON) + "Generate single evaluator type (i.e. all tasks use the same evaluator). OFF is NYI" ON) option_with_default(LIBINT2_ENABLE_UNROLLING - "Unroll shell sets into integrals (will unroll shell sets larger than N) (0 for never, N for N, 1000000000 for always)" 100) + "Unroll shell sets into integrals (will unroll shell sets larger than N) (0 for never, N for N, 1000000000 for always)" 100) option_with_default(LIBINT2_ALIGN_SIZE - "(EXPERT) if posix_memalign is available, this will specify alignment of Libint data, in units of + "(EXPERT) if posix_memalign is available, this will specify alignment of Libint data, in units of sizeof(LIBINT2_REALTYPE). Default is to use built-in heuristics: system-determined for vectorization off (default) or veclen * sizeof(LIBINT2_REALTYPE) for vectorization on." 0) mark_as_advanced(LIBINT2_ALIGN_SIZE) option_with_default(LIBINT2_REALTYPE - "Specifies the floating-point data type used by the library. Consumed at library build-time." double) + "Specifies the floating-point data type used by the library. Consumed at library build-time." double) option_with_print(LIBINT2_USER_DEFINED_REAL_INCLUDES - "Additional #includes necessary to use the real type." OFF) + "Additional #includes necessary to use the real type." OFF) include(int_userreal) option_with_print(LIBINT2_GENERATE_FMA - "Generate FMA (fused multiply-add) instructions (to benefit must have FMA-capable hardware and compiler)" OFF) + "Generate FMA (fused multiply-add) instructions (to benefit must have FMA-capable hardware and compiler)" OFF) option_with_print(LIBINT2_ENABLE_GENERIC_CODE "Use manually-written generic code" OFF) option_with_default(LIBINT2_API_PREFIX "Prepend this string to every name in the library API (except for the types)." "") option_with_print(LIBINT2_VECTOR_LENGTH - "Compute integrals in vectors of length N." OFF) + "Compute integrals in vectors of length N." OFF) option_with_default(LIBINT2_VECTOR_METHOD - "Specifies how to vectorize integrals. Irrelevant when `LIBINT2_VECTOR_LENGTH=OFF. Allowed values are 'block' (default), and 'line'." block) + "Specifies how to vectorize integrals. Irrelevant when `LIBINT2_VECTOR_LENGTH=OFF. Allowed values are 'block' (default), and 'line'." block) option_with_print(LIBINT2_ACCUM_INTS - "Accumulate integrals to the buffer, rather than copy (OFF for copy, ON for accum)." OFF) + "Accumulate integrals to the buffer, rather than copy (OFF for copy, ON for accum)." OFF) option_with_print(LIBINT2_FLOP_COUNT - "Support (approximate) FLOP counting by the library. (Generated code will require C++11!)" OFF) + "Support (approximate) FLOP counting by the library. (Generated code will require C++11!)" OFF) option_with_print(LIBINT2_PROFILE - "Turn on profiling instrumentation of the library. (Generated code will require C++11!)" OFF) + "Turn on profiling instrumentation of the library. (Generated code will require C++11!)" OFF) option_with_print(LIBINT2_ENABLE_MPFR - "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) + "Use GNU MPFR library for high-precision testing (EXPERTS ONLY). Consumed at library build-time." OFF) option_with_default(LIBINT2_EXPORT_COMPRESSOR - "Export tarball with compression gzip or bzip2" gzip) + "Export tarball with compression gzip or bzip2" gzip) # next one defined by `include(CTest)` message(STATUS "Showing option BUILD_TESTING: ${BUILD_TESTING}") @@ -304,13 +315,13 @@ include(int_am) check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN) if (NOT HAVE_POSIX_MEMALIGN) message(FATAL_ERROR "did not find posix_memalign ... this SHOULD NOT happen. Cannot proceed.") -endif() +endif () check_include_file_cxx(stdint.h HAVE_STDINT_H) # limits.h? if (cxx_std_11 IN_LIST CMAKE_CXX_COMPILE_FEATURES) set(LIBINT_HAS_CXX11 1) -endif() +endif () booleanize01(LIBINT2_ERI3_PURE_SH) booleanize01(LIBINT2_ERI2_PURE_SH) @@ -334,9 +345,9 @@ elseif (LIBINT2_EXPORT_COMPRESSOR STREQUAL "xz") elseif (LIBINT2_EXPORT_COMPRESSOR STREQUAL "bzip2") set(LIBINT_EXPORT_COMPRESSOR_CMD "jcf") set(LIBINT_EXPORT_COMPRESSOR_EXT "tbz2") -else() +else () message(FATAL_ERROR "No valid compressor; invoke CMake with -DLIBINT2_EXPORT_COMPRESSOR=gzip|bzip2") -endif() +endif () ################################## Dependencies ################################# @@ -347,9 +358,9 @@ if (LIBINT2_ENABLE_MPFR) # mpfr detected in CMakeLists.txt.export at appropriate time for library, but prechecking here find_package(Multiprecision MODULE REQUIRED COMPONENTS gmpxx mpfr) set(LIBINT_HAS_MPFR 1) -else() +else () find_package(Multiprecision MODULE REQUIRED COMPONENTS gmpxx) -endif() +endif () get_property(_loc TARGET Multiprecision::gmp PROPERTY LOCATION) message(VERBOSE "${Cyan}Found GMP${ColourReset}: ${_loc}") @@ -358,12 +369,12 @@ message(VERBOSE "${Cyan}Found GMPXX${ColourReset}: ${_loc}") if (TARGET Multiprecision::mpfr) get_property(_loc TARGET Multiprecision::mpfr PROPERTY LOCATION) message(VERBOSE "${Cyan}Found MPFR${ColourReset}: ${_loc} (found version ${MPFR_VERSION})") -endif() +endif () find_package(Boost 1.57 REQUIRED) if (TARGET Boost::headers) set(LIBINT_HAS_SYSTEM_BOOST_PREPROCESSOR_VARIADICS 1) -endif() +endif () # deferring find_package(Eigen3) to library (CMakeLists.txt.export) @@ -373,9 +384,9 @@ endif() set(EXPORT_STAGE_DIR ${PROJECT_BINARY_DIR}/libint-${LIBINT_EXT_VERSION}) configure_file( - cmake/modules/int_computed.cmake.in - cmake/modules/int_computed.cmake - @ONLY) + cmake/modules/int_computed.cmake.in + cmake/modules/int_computed.cmake + @ONLY) # CMake data transmitted to C++ via config.h for generator/compiler (_EXPORT_MODE=0). # Same info is positioned for the library export, but _EXPORT_MODE=1 turns on @@ -383,28 +394,28 @@ configure_file( # library build time. set(_EXPORT_MODE 0) # convert user-facing LIBINT2_ variables to LIBINT_ internal variables -foreach(_var API_PREFIX;ERI3_PURE_SH;ERI2_PURE_SH;DISABLE_ONEBODY_PROPERTY_DERIVS;ENABLE_UNROLLING;ENABLE_GENERIC_CODE;VECTOR_LENGTH;VECTOR_METHOD;ALIGN_SIZE;USER_DEFINED_REAL;USER_DEFINED_REAL_INCLUDES;GENERATE_FMA;ACCUM_INTS;FLOP_COUNT;PROFILE;CONTRACTED_INTS;SINGLE_EVALTYPE;USE_COMPOSITE_EVALUATORS;ERI_STRATEGY;MULTIPOLE_MAX_ORDER) +foreach (_var API_PREFIX;ERI3_PURE_SH;ERI2_PURE_SH;DISABLE_ONEBODY_PROPERTY_DERIVS;ENABLE_UNROLLING;ENABLE_GENERIC_CODE;VECTOR_LENGTH;VECTOR_METHOD;ALIGN_SIZE;USER_DEFINED_REAL;USER_DEFINED_REAL_INCLUDES;GENERATE_FMA;ACCUM_INTS;FLOP_COUNT;PROFILE;CONTRACTED_INTS;SINGLE_EVALTYPE;USE_COMPOSITE_EVALUATORS;ERI_STRATEGY;MULTIPOLE_MAX_ORDER) if (DEFINED LIBINT2_${_var}) if (DEFINED LIBINT_${_var}) message(FATAL_ERROR "renaming user-facing LIBINT2_${_var} variable but internal variable LIBINT_${_var} already exists") else () set(LIBINT_${_var} ${LIBINT2_${_var}}) - endif() - endif() + endif () + endif () endforeach () configure_file( - include/libint2/config.h.cmake.in - include/libint2/config.h - @ONLY) + include/libint2/config.h.cmake.in + include/libint2/config.h + @ONLY) set(_EXPORT_MODE 1) configure_file( - include/libint2/config.h.cmake.in - ${EXPORT_STAGE_DIR}/include/libint2/config.h - @ONLY) + include/libint2/config.h.cmake.in + ${EXPORT_STAGE_DIR}/include/libint2/config.h + @ONLY) configure_file( - include/libint2/config2.h.cmake.in - ${EXPORT_STAGE_DIR}/include/libint2/config2.h.cmake.in - COPYONLY) + include/libint2/config2.h.cmake.in + ${EXPORT_STAGE_DIR}/include/libint2/config2.h.cmake.in + COPYONLY) add_subdirectory(src) diff --git a/cmake/modules/int_am.cmake b/cmake/modules/int_am.cmake index c1d61cd55..350924f49 100644 --- a/cmake/modules/int_am.cmake +++ b/cmake/modules/int_am.cmake @@ -262,7 +262,7 @@ macro(process_integrals_class class) if (LIBINT2_${class}_OPT_AM EQUAL -1) set(LIBINT_${class}_OPT_AM "") else() - set($LIBINT_{class}_OPT_AM ${LIBINT2_${class}_OPT_AM}) + set(LIBINT_${class}_OPT_AM ${LIBINT2_${class}_OPT_AM}) endif() endif() if (LIBINT_OPT_AM_LIST) @@ -357,6 +357,7 @@ endmacro() process_integrals_class(ONEBODY) process_integrals_class(ERI) +process_integrals_class(RKB_ERI) process_integrals_class(ERI3) process_integrals_class(ERI2) # unlike above, these classes (1) don't do AM_LIST and (2) require value in config.h if enabled @@ -396,7 +397,7 @@ list(REVERSE _amlist) list(APPEND Libint2_ERI_COMPONENTS "${_amlist}") message(VERBOSE "setting components ${_amlist}") -foreach(_cls ONEBODY;ERI;ERI3;ERI2;G12;G12DKH) +foreach(_cls ONEBODY;ERI;RKB_ERI;ERI3;ERI2;G12;G12DKH) if((_cls STREQUAL G12) OR (_cls STREQUAL G12DKH)) add_feature_info( "integral class ${_cls}" @@ -433,6 +434,8 @@ foreach(_cls ONEBODY;ERI;ERI3;ERI2;G12;G12DKH) list(APPEND _amlist "onebody_${_am${_l}}${_am${_l}}_d${_d}") elseif (_cls STREQUAL "G12") list(APPEND _amlist "g12_${_am${_l}}${_am${_l}}${_am${_l}}${_am${_l}}_d${_d}") + elseif (_cls STREQUAL "RKB_ERI") + list(APPEND _amlist "rkb_eri_${_am${_l}}${_am${_l}}${_am${_l}}${_am${_l}}_d${_d}") endif() endforeach() if (_cls STREQUAL "ERI3") diff --git a/export/tests/unit/test-1body.cc b/export/tests/unit/test-1body.cc index 8c27be49f..5d0458b60 100644 --- a/export/tests/unit/test-1body.cc +++ b/export/tests/unit/test-1body.cc @@ -244,6 +244,73 @@ TEST_CASE_METHOD(libint2::unit::DefaultFixture, "W correctness", #endif // LIBINT2_SUPPORT_ONEBODY } +TEST_CASE_METHOD(libint2::unit::DefaultFixture, "σpRσp correctness", + "[engine][1-body]") { +#if defined(LIBINT2_SUPPORT_ONEBODY) + if (LIBINT_SHGSHELL_ORDERING != LIBINT_SHGSHELL_ORDERING_STANDARD) return; + + // Two contracted Gaussian shells at distinct centers. We exercise + // `(σ·p) r (σ·p)` over the (s|d) and (d|s) shell pairs and validate + // Hermiticity: trace components (q=0) are symmetric under bra↔ket swap, + // antisym components (q=1,2,3) are antisymmetric. + std::vector obs{ + Shell{{1.0, 3.0}, {{0, true, {1.0, 0.3}}}, {{0.0, 0.0, 0.0}}}, + Shell{{2.0, 5.0}, {{2, true, {1.0, 0.2}}}, {{1.0, 1.0, 1.0}}}}; + + const auto lmax = std::min(2, LIBINT2_MAX_AM_oprop); + if (lmax < 2) return; + + auto engine = Engine(Operator::oprop, 2, lmax); + engine.set_params(std::array{{0.0, 0.0, 0.0}}); + + // (s|σpRσp|d) and (d|σpRσp|s) + engine.compute(obs[0], obs[1]); + std::array, 12> ab; + for (int c = 0; c < 12; ++c) { + const auto* buf = engine.results()[c]; + REQUIRE(buf != nullptr); + ab[c].assign(buf, buf + (1 * 5)); // n_s × n_d_pure = 1 × 5 + } + + engine.compute(obs[1], obs[0]); + std::array, 12> ba; + for (int c = 0; c < 12; ++c) { + const auto* buf = engine.results()[c]; + REQUIRE(buf != nullptr); + ba[c].assign(buf, buf + (5 * 1)); // n_d_pure × n_s + } + + // Hermiticity check: σpRσp is Hermitian, but the Pauli identity routes the + // imaginary i factor on the antisym pieces into a real-stored sign flip. + // q=0 trace: matrix is symmetric ⇒ ab[0+k][i,j] == ba[0+k][j,i] + // q=1..3 : matrix is antisym ⇒ ab[q+k][i,j] == -ba[q+k][j,i] + // (Indices: ab is laid out (i=0..n_s-1, j=0..n_d-1); ba is (j, i).) + const double tol = 1.0e-12; + for (int k = 0; k < 3; ++k) { + for (int q = 0; q < 4; ++q) { + const int comp = 4 * k + q; + const double expected_sign = (q == 0) ? +1.0 : -1.0; + for (int i = 0; i < 1; ++i) { + for (int j = 0; j < 5; ++j) { + const double v_ab = ab[comp][i * 5 + j]; + const double v_ba = ba[comp][j * 1 + i]; + REQUIRE(std::isfinite(v_ab)); + REQUIRE(std::abs(v_ab - expected_sign * v_ba) < tol); + } + } + } + } + + // Sanity: not every component is identically zero (would mask codegen bugs). + bool any_nonzero = false; + for (int c = 0; c < 12; ++c) + for (double v : ab[c]) + if (std::abs(v) > 1.0e-10) any_nonzero = true; + REQUIRE(any_nonzero); +#endif // LIBINT2_SUPPORT_ONEBODY +} + + // Helper: max number of primitives across all centers in q_gau data. static size_t max_nprim(const libint2::GaussianPotentialCentersData& data) { size_t result = 0; diff --git a/export/tests/unit/test-2body.cc b/export/tests/unit/test-2body.cc index fd602a910..dc0c01b73 100644 --- a/export/tests/unit/test-2body.cc +++ b/export/tests/unit/test-2body.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2024 Edward F. Valeev + * Copyright (C) 2004-2026 Edward F. Valeev * * This file is part of Libint library. * @@ -344,6 +344,438 @@ TEST_CASE("eri geometric derivatives", "[engine][2-body]") { } } +TEST_CASE("RKB Coulomb integrals", "[engine][2-body]") { + std::vector obs{// pseudorandom s + Shell{{1.0}, {{0, false, {1.0}}}, {{0.0, 0.0, 0.0}}}, + // pseudorandom p + Shell{{2.0}, {{1, false, {1.0}}}, {{1.0, 1.0, 1.0}}}}; + + const auto max_nprim = libint2::max_nprim(obs); + const auto max_l = libint2::max_l(obs); + typedef std::array der_idx; + + // e.g. d_xx maps the derivative index of derivative w.r.t x + // coord of ket1 and x coord of ket2 in Chemist notation. + // deriv indices for (LL|SS) + der_idx d_xx = {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx d_yy = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx d_zz = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx d_yz = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1}; + der_idx d_zy = {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}; + der_idx d_zx = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}; + der_idx d_xz = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}; + der_idx d_xy = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx d_yx = {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0}; + + // deriv indices for (SS|SS) + // 0th component + der_idx xxxx = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx yyxx = {0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}; + der_idx zzxx = {0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0}; + der_idx yxyx = {0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}; + der_idx xyyx = {1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}; + der_idx yxxy = {0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx xyxy = {1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0}; + der_idx xxyy = {1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx yyyy = {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0}; + der_idx zzyy = {0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0}; + der_idx xxzz = {1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx yyzz = {0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1}; + der_idx zzzz = {0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1}; + + // x-component + der_idx zxzx = {0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0}; + der_idx xzzx = {1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0}; + der_idx zyzy = {0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0}; + der_idx yzzy = {0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0}; + der_idx zxxz = {0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1}; + der_idx xzxz = {1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1}; + der_idx zyyz = {0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1}; + der_idx yzyz = {0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1}; + + // y-component + der_idx zyzx = {0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0}; + der_idx yzzx = {0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0}; + der_idx zxzy = {0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0}; + der_idx xzzy = {1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0}; + der_idx zyxz = {0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1}; + der_idx yzxz = {0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1}; + der_idx zxyz = {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1}; + der_idx xzyz = {1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1}; + + // z-component + der_idx yxxx = {0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; + der_idx xyxx = {1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}; + der_idx xxyx = {1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}; + der_idx yyyx = {0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}; + der_idx zzyx = {0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0}; + der_idx xxxy = {1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0}; + der_idx yyxy = {0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0}; + der_idx zzxy = {0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + der_idx yxyy = {0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0}; + der_idx xyyy = {1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0}; + der_idx yxzz = {0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1}; + der_idx xyzz = {1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1}; + + SECTION("Coulombσpσp and σpσpCoulombσpσp") { + Engine engine_llss, engine_ssss; + try { + engine_llss = Engine(Operator::coulomb_opop, max_nprim, max_l, 0); + engine_ssss = Engine(Operator::opop_coulomb_opop, max_nprim, max_l, 0); + // TODO: need another unit test for derivatives of RKB ERIs + } catch ( + Engine::lmax_exceeded &) { // skip the test if lmax exceeded or libint2 + // not configured with RKB support + return; + } + + const auto nshell = obs.size(); + for (int s0 = 0; s0 != nshell; ++s0) { + for (int s1 = 0; s1 != nshell; ++s1) { + for (int s2 = 0; s2 != nshell; ++s2) { + for (int s3 = 0; s3 != nshell; ++s3) { + const auto &results_llss = + engine_llss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + const auto &results_ssss = + engine_ssss.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results_llss.size() == + 4); // 4 buffers for single-spin quaternion components + + LIBINT2_REF_REALTYPE Aref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + LIBINT2_REF_REALTYPE Bref[3]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + LIBINT2_REF_REALTYPE Cref[3]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + LIBINT2_REF_REALTYPE Dref[3]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + std::array ref_coulomb_opop{0.0, 0.0, 0.0, + 0.0}; + std::array ref_opop_coulomb_opop{}; + ref_opop_coulomb_opop.fill(0.0); + uint p0123 = 0; + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++, p0123++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + + const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + auto eri_drrrr = [&](der_idx d_rrrr) { + return eri(d_rrrr.data(), l0, m0, n0, alpha0, Aref, l1, + m1, n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, + l3, m3, n3, alpha3, Dref, 0); + }; + + // helper: build der_idx from 4 derivative directions + // (0=x, 1=y, 2=z) for centers A, B, C, D + constexpr int X = 0, Y = 1, Z = 2; + auto didx = [](int a, int b, int c, int d) -> der_idx { + der_idx r = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + r[a] = 1; + r[3 + b] = 1; + r[6 + c] = 1; + r[9 + d] = 1; + return r; + }; + // shorthand: evaluate derivative ERI from 4 directions + auto D = [&](int a, int b, int c, int d) { + return eri_drrrr(didx(a, b, c, d)); + }; + + // (LL|SS) + ref_coulomb_opop[0] += + c0123 * + (eri_drrrr(d_xx) + eri_drrrr(d_yy) + eri_drrrr(d_zz)); + ref_coulomb_opop[1] += + c0123 * (eri_drrrr(d_yz) - eri_drrrr(d_zy)); + ref_coulomb_opop[2] += + c0123 * (eri_drrrr(d_zx) - eri_drrrr(d_xz)); + ref_coulomb_opop[3] += + c0123 * (eri_drrrr(d_xy) - eri_drrrr(d_yx)); + + // (SS|SS) — 16 components, Option A: index = 4*bra + ket + // 0:SS 1:SX 2:SY 3:SZ + ref_opop_coulomb_opop[0] += + c0123 * (D(X, X, X, X) + D(X, X, Y, Y) + D(X, X, Z, Z) + + D(Y, Y, X, X) + D(Y, Y, Y, Y) + D(Y, Y, Z, Z) + + D(Z, Z, X, X) + D(Z, Z, Y, Y) + D(Z, Z, Z, Z)); + ref_opop_coulomb_opop[1] += + c0123 * (D(X, X, Y, Z) - D(X, X, Z, Y) + D(Y, Y, Y, Z) - + D(Y, Y, Z, Y) + D(Z, Z, Y, Z) - D(Z, Z, Z, Y)); + ref_opop_coulomb_opop[2] += + c0123 * (D(X, X, Z, X) - D(X, X, X, Z) + D(Y, Y, Z, X) - + D(Y, Y, X, Z) + D(Z, Z, Z, X) - D(Z, Z, X, Z)); + ref_opop_coulomb_opop[3] += + c0123 * (D(X, X, X, Y) - D(X, X, Y, X) + D(Y, Y, X, Y) - + D(Y, Y, Y, X) + D(Z, Z, X, Y) - D(Z, Z, Y, X)); + // 4:XS 5:XX 6:XY 7:XZ + ref_opop_coulomb_opop[4] += + c0123 * (D(Y, Z, X, X) - D(Z, Y, X, X) + D(Y, Z, Y, Y) - + D(Z, Y, Y, Y) + D(Y, Z, Z, Z) - D(Z, Y, Z, Z)); + ref_opop_coulomb_opop[5] += + c0123 * (-D(Y, Z, Y, Z) + D(Y, Z, Z, Y) + + D(Z, Y, Y, Z) - D(Z, Y, Z, Y)); + ref_opop_coulomb_opop[6] += + c0123 * (-D(Y, Z, Z, X) + D(Y, Z, X, Z) + + D(Z, Y, Z, X) - D(Z, Y, X, Z)); + ref_opop_coulomb_opop[7] += + c0123 * (-D(Y, Z, X, Y) + D(Y, Z, Y, X) + + D(Z, Y, X, Y) - D(Z, Y, Y, X)); + // 8:YS 9:YX 10:YY 11:YZ + ref_opop_coulomb_opop[8] += + c0123 * (D(Z, X, X, X) - D(X, Z, X, X) + D(Z, X, Y, Y) - + D(X, Z, Y, Y) + D(Z, X, Z, Z) - D(X, Z, Z, Z)); + ref_opop_coulomb_opop[9] += + c0123 * (-D(Z, X, Y, Z) + D(Z, X, Z, Y) + + D(X, Z, Y, Z) - D(X, Z, Z, Y)); + ref_opop_coulomb_opop[10] += + c0123 * (-D(Z, X, Z, X) + D(Z, X, X, Z) + + D(X, Z, Z, X) - D(X, Z, X, Z)); + ref_opop_coulomb_opop[11] += + c0123 * (-D(Z, X, X, Y) + D(Z, X, Y, X) + + D(X, Z, X, Y) - D(X, Z, Y, X)); + // 12:ZS 13:ZX 14:ZY 15:ZZ + ref_opop_coulomb_opop[12] += + c0123 * (D(X, Y, X, X) - D(Y, X, X, X) + D(X, Y, Y, Y) - + D(Y, X, Y, Y) + D(X, Y, Z, Z) - D(Y, X, Z, Z)); + ref_opop_coulomb_opop[13] += + c0123 * (-D(X, Y, Y, Z) + D(X, Y, Z, Y) + + D(Y, X, Y, Z) - D(Y, X, Z, Y)); + ref_opop_coulomb_opop[14] += + c0123 * (-D(X, Y, Z, X) + D(X, Y, X, Z) + + D(Y, X, Z, X) - D(Y, X, X, Z)); + ref_opop_coulomb_opop[15] += + c0123 * (-D(X, Y, X, Y) + D(X, Y, Y, X) + + D(Y, X, X, Y) - D(Y, X, Y, X)); + } + } + } + } + + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = + 1.0E-9; // For more detail on choice of these thresholds, see + // the comments in the TEST_CASE "eri geometric + // derivatives" + + std::array abs_errs_llss; + std::array rel_abs_errs_llss; + + // (LL|SS) has 4 components + for (auto comp = 0; comp < 4; ++comp) { + abs_errs_llss[comp] = + abs(ref_coulomb_opop[comp] - results_llss[comp][ijkl]); + rel_abs_errs_llss[comp] = + abs(abs_errs_llss[comp] / ref_coulomb_opop[comp]); + + bool llss_not_ok = + rel_abs_errs_llss[comp] > RELATIVE_DEVIATION_THRESHOLD && + abs_errs_llss[comp] > ABSOLUTE_DEVIATION_THRESHOLD; + + if (llss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 + << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_coulomb_opop[comp] + << " libint = " << results_llss[comp][ijkl] + << " relabs_error = " << rel_abs_errs_llss[comp] + << " abs_error = " << abs_errs_llss[comp] + << std::endl; + } + REQUIRE(!llss_not_ok); + } + + // (SS|SS) has 16 components (two independent spin spaces) + for (auto comp = 0; comp < 16; ++comp) { + auto abs_err_ssss = + abs(ref_opop_coulomb_opop[comp] - results_ssss[comp][ijkl]); + auto rel_abs_err_ssss = + abs(abs_err_ssss / ref_opop_coulomb_opop[comp]); + + bool ssss_not_ok = + rel_abs_err_ssss > RELATIVE_DEVIATION_THRESHOLD && + abs_err_ssss > ABSOLUTE_DEVIATION_THRESHOLD; + + if (ssss_not_ok) { + std::cout << "(l0 l1| l2 l3) = " + << "(" << s0 << " " << s1 << " | " << s2 << " " << s3 + << ") " + << "Elem " << ijkl << " comp= " << comp + << " : ref = " << ref_opop_coulomb_opop[comp] + << " libint = " << results_ssss[comp][ijkl] + << " relabs_error = " << rel_abs_err_ssss + << " abs_error = " << abs_err_ssss << std::endl; + } + REQUIRE(!ssss_not_ok); + } + + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART + } + } + } + } + } + + SECTION("op_coulomb_op") { + Engine engine_opCop; + try { + engine_opCop = Engine(Operator::op_coulomb_op, max_nprim, max_l, 0); + } catch (Engine::lmax_exceeded &) { + return; + } + + const auto nshell = obs.size(); + for (int s0 = 0; s0 != nshell; ++s0) { + for (int s1 = 0; s1 != nshell; ++s1) { + for (int s2 = 0; s2 != nshell; ++s2) { + for (int s3 = 0; s3 != nshell; ++s3) { + const auto &results = + engine_opCop.compute(obs[s0], obs[s1], obs[s2], obs[s3]); + assert(results.size() == 9); + + LIBINT2_REF_REALTYPE Aref[3], Bref[3], Cref[3], Dref[3]; + for (int i = 0; i < 3; ++i) Aref[i] = obs[s0].O[i]; + for (int i = 0; i < 3; ++i) Bref[i] = obs[s1].O[i]; + for (int i = 0; i < 3; ++i) Cref[i] = obs[s2].O[i]; + for (int i = 0; i < 3; ++i) Dref[i] = obs[s3].O[i]; + + int ijkl = 0; + + int l0, m0, n0; + FOR_CART(l0, m0, n0, obs[s0].contr[0].l) + int l1, m1, n1; + FOR_CART(l1, m1, n1, obs[s1].contr[0].l) + int l2, m2, n2; + FOR_CART(l2, m2, n2, obs[s2].contr[0].l) + int l3, m3, n3; + FOR_CART(l3, m3, n3, obs[s3].contr[0].l) + + // Raw 3x3 dyadic T_{ab} = (a b∂_a | c d∂_b), accumulated in + // chemist-notation index 3*a+b. After the primitive loop we + // project into the 9 SO(3) irreducible components that the engine + // returns: Scalar, AntisymX/Y/Z, SymTLDiagA/B, SymTLOffXY/XZ/YZ. + std::array ref_raw{}; + ref_raw.fill(0.0); + + for (uint p0 = 0; p0 < obs[s0].nprim(); p0++) { + for (uint p1 = 0; p1 < obs[s1].nprim(); p1++) { + for (uint p2 = 0; p2 < obs[s2].nprim(); p2++) { + for (uint p3 = 0; p3 < obs[s3].nprim(); p3++) { + const LIBINT2_REF_REALTYPE alpha0 = obs[s0].alpha[p0]; + const LIBINT2_REF_REALTYPE alpha1 = obs[s1].alpha[p1]; + const LIBINT2_REF_REALTYPE alpha2 = obs[s2].alpha[p2]; + const LIBINT2_REF_REALTYPE alpha3 = obs[s3].alpha[p3]; + const LIBINT2_REF_REALTYPE c0 = obs[s0].contr[0].coeff[p0]; + const LIBINT2_REF_REALTYPE c1 = obs[s1].contr[0].coeff[p1]; + const LIBINT2_REF_REALTYPE c2 = obs[s2].contr[0].coeff[p2]; + const LIBINT2_REF_REALTYPE c3 = obs[s3].contr[0].coeff[p3]; + const LIBINT2_REF_REALTYPE c0123 = c0 * c1 * c2 * c3; + + // Deriv on ν (center B, index 1) and on λ (center D, idx + // 3). + auto didx_bd = [](int a, int b) -> der_idx { + der_idx r = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + r[3 + a] = 1; + r[9 + b] = 1; + return r; + }; + auto D = [&](int a, int b) { + auto di = didx_bd(a, b); + return eri(di.data(), l0, m0, n0, alpha0, Aref, l1, m1, + n1, alpha1, Bref, l2, m2, n2, alpha2, Cref, l3, + m3, n3, alpha3, Dref, 0); + }; + for (int a = 0; a < 3; ++a) + for (int b = 0; b < 3; ++b) + ref_raw[3 * a + b] += c0123 * D(a, b); + } + } + } + } + + // Project raw dyadic into the 9 SO(3) irrep components used by + // op_coulomb_op (must match σpCoulombσp_Descr::Component order). + const auto Txx = ref_raw[0]; + const auto Txy = ref_raw[1]; + const auto Txz = ref_raw[2]; + const auto Tyx = ref_raw[3]; + const auto Tyy = ref_raw[4]; + const auto Tyz = ref_raw[5]; + const auto Tzx = ref_raw[6]; + const auto Tzy = ref_raw[7]; + const auto Tzz = ref_raw[8]; + std::array ref_op_coulomb_op{ + Txx + Tyy + Tzz, // Scalar + Tyz - Tzy, // AntisymX + Tzx - Txz, // AntisymY + Txy - Tyx, // AntisymZ + Txx - Tyy, // SymTLDiagA + 2.0 * Tzz - Txx - Tyy, // SymTLDiagB + Txy + Tyx, // SymTLOffXY + Txz + Tzx, // SymTLOffXZ + Tyz + Tzy, // SymTLOffYZ + }; + + const double ABSOLUTE_DEVIATION_THRESHOLD = 5.0E-14; + const double RELATIVE_DEVIATION_THRESHOLD = 1.0E-9; + for (auto comp = 0; comp < 9; ++comp) { + auto abs_err = abs(ref_op_coulomb_op[comp] - results[comp][ijkl]); + auto rel_abs_err = abs(abs_err / ref_op_coulomb_op[comp]); + bool not_ok = rel_abs_err > RELATIVE_DEVIATION_THRESHOLD && + abs_err > ABSOLUTE_DEVIATION_THRESHOLD; + if (not_ok) { + std::cout << "(l0 l1| l2 l3) = (" << s0 << " " << s1 << " | " + << s2 << " " << s3 << ") Elem " << ijkl + << " comp= " << comp + << " : ref = " << ref_op_coulomb_op[comp] + << " libint = " << results[comp][ijkl] + << " relabs_error = " << rel_abs_err + << " abs_error = " << abs_err << std::endl; + } + REQUIRE(!not_ok); + } + + ++ijkl; + END_FOR_CART + END_FOR_CART + END_FOR_CART + END_FOR_CART + } + } + } + } + } +} + TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { // pseudorandom s shells std::vector obs{ @@ -374,12 +806,12 @@ TEST_CASE("Erfx_Coulomb integrals", "[engine][2-body]") { REQUIRE(results[0] != nullptr); switch (k) { /* VALIDATION WOLFRAM CODE: -(* Integral of Coulomb kernel damped by (\[Lambda] Erf[\[Omega] r] + \ -\[Sigma] Erfc[\[Omega] r]), over unit-normalized s functions, \ -see Eq 52 in DOI 10.1039/b605188j *) -F0[T_] := If[T == 0, 1, Sqrt[\[Pi]/T]*Erf[Sqrt[T]]/2]; -sN[a_] := ((2 a)/\[Pi])^(3/4); -VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, + (* Integral of Coulomb kernel damped by (\[Lambda] Erf[\[Omega] r] + \ + \[Sigma] Erfc[\[Omega] r]), over unit-normalized s functions, \ + see Eq 52 in DOI 10.1039/b605188j *) + F0[T_] := If[T == 0, 1, Sqrt[\[Pi]/T]*Erf[Sqrt[T]]/2]; + sN[a_] := ((2 a)/\[Pi])^(3/4); + VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, B1_List, \[Beta]2_, B2_List, \[Omega]_, \[Lambda]_, \[Sigma]_] := Module[{\[Gamma]1, \[Gamma]2, P1, P2, K1, K2, T, result, \[Rho]}, \[Gamma]1 = \[Alpha]1 + \[Beta]1; @@ -397,13 +829,13 @@ VVeeErfx[\[Alpha]1_, A1_List, \[Alpha]2_, A2_List, \[Beta]1_, T]) sN[\[Alpha]1] sN[\[Alpha]2] sN[\[Beta]1] sN[\[Beta]2]; Return[result]; ]; -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 1, 0], 20]]] -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 0, 1], 20]]] -Print[CForm[ + Print[CForm[ N[VVeeErfx[1, {0, 0, 0}, 3, {2, 2, 2}, 2, {1, 1, 1}, 4, {3, 3, 3}, 1.1, 2, 3], 20]]] */ diff --git a/include/libint2.h b/include/libint2.h index 59d17ef65..25a231be7 100644 --- a/include/libint2.h +++ b/include/libint2.h @@ -22,18 +22,19 @@ #define _libint2_header_ #define LIBINT_T_SS_EREP_SS(mValue) \ - _aB_s___0__s___1___TwoPRep_s___0__s___1___Ab__up_##mValue + _aB_s____0__s____1___TwoPRep_s____0__s____1___Ab__up_##mValue #define LIBINT_T_SS_Km1G12_SS(mValue) \ - _aB_s___0__s___1___r12_minus_1_g12_s___0__s___1___Ab__up_##mValue + _aB_s____0__s____1___r12_minus_1_g12_s____0__s____1___Ab__up_##mValue #define LIBINT_T_SS_K0G12_SS_0 \ - _aB_s___0__s___1___r12_0_g12_s___0__s___1___Ab__up_0 + _aB_s____0__s____1___r12_0_g12_s____0__s____1___Ab__up_0 #define LIBINT_T_SS_K2G12_SS_0 \ - _aB_s___0__s___1___r12_2_g12_s___0__s___1___Ab__up_0 + _aB_s____0__s____1___r12_2_g12_s____0__s____1___Ab__up_0 #define LIBINT_T_SS_K4G12_SS_0 \ - _aB_s___0__s___1___r12_4_g12_s___0__s___1___Ab__up_0 -#define LIBINT_T_S_OVERLAP_S _aB_s___0___Overlap_s___0___Ab__up_ -#define LIBINT_T_S_KINETIC_S _aB_s___0___Kinetic_s___0___Ab__up_ -#define LIBINT_T_S_ELECPOT_S(mValue) _aB_s___0___ElecPot_s___0___Ab__up_##mValue + _aB_s____0__s____1___r12_4_g12_s____0__s____1___Ab__up_0 +#define LIBINT_T_S_OVERLAP_S _aB_s____0___Overlap_s____0___Ab__up_ +#define LIBINT_T_S_KINETIC_S _aB_s____0___Kinetic_s____0___Ab__up_ +#define LIBINT_T_S_ELECPOT_S(mValue) \ + _aB_s____0___ElecPot_s____0___Ab__up_##mValue #include #include diff --git a/include/libint2/config.h.cmake.in b/include/libint2/config.h.cmake.in index 640e68099..3eb32e2f0 100644 --- a/include/libint2/config.h.cmake.in +++ b/include/libint2/config.h.cmake.in @@ -71,6 +71,13 @@ #undef LIBINT_INCLUDE_ERI #endif +/* Support ERI derivatives up to this order */ +#define LIBINT_INCLUDE_RKB_ERI @LIBINT_INCLUDE_RKB_ERI@ +#if @LIBINT_INCLUDE_RKB_ERI@ == -1 +#undef LIBINT_INCLUDE_RKB_ERI +#endif + + /* Support 3-center ERI derivatives up to this order */ #define LIBINT_INCLUDE_ERI3 @LIBINT_INCLUDE_ERI3@ #if @LIBINT_INCLUDE_ERI3@ == -1 @@ -122,6 +129,18 @@ /* Max optimized AM for ERI and its derivatives */ #cmakedefine LIBINT_ERI_OPT_AM_LIST "@LIBINT_ERI_OPT_AM_LIST@" +/* Max AM for RKB_ERI (same for all derivatives; if not defined see LIBINT_RKB_ERI_MAX_AM_LIST) */ +#cmakedefine LIBINT_RKB_ERI_MAX_AM @LIBINT_RKB_ERI_MAX_AM@ + +/* Max AM for RKB_ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI_MAX_AM_LIST "@LIBINT_RKB_ERI_MAX_AM_LIST@" + +/* Max optimized AM for ERI (same for all derivatives; if not defined see LIBINT_RKB_ERI_OPT_AM_LIST) */ +#cmakedefine LIBINT_RKB_ERI_OPT_AM @LIBINT_RKB_ERI_OPT_AM@ + +/* Max optimized AM for ERI and its derivatives */ +#cmakedefine LIBINT_RKB_ERI_OPT_AM_LIST "@LIBINT_RKB_ERI_OPT_AM_LIST@" + /* Max AM for 3-center ERI (same for all derivatives; if not defined see LIBINT_ERI3_MAX_AM_LIST) */ #cmakedefine LIBINT_ERI3_MAX_AM @LIBINT_ERI3_MAX_AM@ diff --git a/include/libint2/cxxapi.h b/include/libint2/cxxapi.h index 0aceb509b..22686f958 100644 --- a/include/libint2/cxxapi.h +++ b/include/libint2/cxxapi.h @@ -35,9 +35,9 @@ #if !defined(LIBINT_INCLUDE_ONEBODY) || \ !(defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_ERI3) || \ - defined(LIBINT_INCLUDE_ERI2)) + defined(LIBINT_INCLUDE_ERI2) || defined(LIBINT_INCLUDE_RKB_ERI)) #error \ - "C++ API is only supported if both 1-body and some (eri, eri3, eri2) 2-body integrals are enabled" + "C++ API is only supported if both 1-body and some (eri, eri3, eri2, rkb_eri) 2-body integrals are enabled" #endif #include diff --git a/include/libint2/engine.h b/include/libint2/engine.h index 8e1a30ed1..0e04b085d 100644 --- a/include/libint2/engine.h +++ b/include/libint2/engine.h @@ -192,12 +192,32 @@ enum class Operator { /// \sa operator_traits /// \sa Surjuse, Deng, Asadchev, Valeev, arXiv:2603.16989 (2025) op_q_gau_op, + /// (1-body) σp . r . σp, the σ·p-on-both-sides analog of the dipole moment. + /// Produces 12 components = 3 dipole directions × 4 Pauli quaternion + /// components (trace + 3 antisym), indexed as `4*k + q` with `k ∈ {x,y,z}` + /// the dipole direction and `q ∈ {0=trace, 1=σ_x, 2=σ_y, 3=σ_z}` the Pauli + /// piece. Origin set via `engine.set_params(std::array)`. + oprop, /// \f$ \delta(\vec{r}_1 - \vec{r}_2) \f$ delta, /// (2-body) Coulomb operator = \f$ r_{12}^{-1} \f$ coulomb, /// alias for Operator::coulomb r12_m1 = coulomb, + /// (2-body) \f$ r_{12}^{-1} (σ.p_{k1})(σ.p_{k2})\f$ where k1 & k2 are + /// centers of ket1 and ket2, respectively + coulomb_opop, + /// (2-body) \f$ (σ.p_{b1})(σ.p_{b2}) r_{12}^{-1} (σ.p_{k1})(σ.p_{k2})\f$ + /// where b1 & b2 are centers of bra1 and bra2 and k1 & k2 are centers of + /// ket1 and ket2, respectively + opop_coulomb_opop, + /// (2-body) \f$ (σ.p_{b2}) r_{12}^{-1} (σ.p_{k2}) \f$ where b2 is the center + /// of bra2 and k2 is the center of ket2; Gaunt LS "bilinear" integral. + /// Produces 9 components (outer product of two Cartesian directions), + /// indexed as `3*a + b` with `a` = bra-side direction, `b` = ket-side + /// direction, and `a,b ∈ {x=0, y=1, z=2}`. Unlike coulomb_opop, the 9 + /// components are NOT contracted via σ·σ — all are kept independent. + op_coulomb_op, /// contracted Gaussian geminal cgtg, /// contracted Gaussian geminal times Coulomb @@ -230,7 +250,7 @@ enum class Operator { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!keep this // updated!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! first_1body_oper = overlap, - last_1body_oper = op_q_gau_op, + last_1body_oper = oprop, first_2body_oper = delta, last_2body_oper = stg_x_coulomb, first_oper = first_1body_oper, @@ -291,6 +311,7 @@ struct operator_traits typedef const libint2::FmEval_Reference core_eval_type; #endif }; + /// opVop: 4 quaternionic components (scalar, x, y, z) of (σ·p)V(σ·p). /// Inherits point-charge params from nuclear; uses Boys function core eval. template <> @@ -415,6 +436,13 @@ struct operator_traits (LIBINT_MULTIPOLE_MAX_ORDER + 1) * (LIBINT_MULTIPOLE_MAX_ORDER + 1); }; +template <> +struct operator_traits + : public operator_traits { + static constexpr auto nopers = 12; + static constexpr auto intrinsic_deriv_order = 2; +}; + template <> struct operator_traits : public detail::default_operator_traits { @@ -424,6 +452,31 @@ struct operator_traits typedef const libint2::FmEval_Reference core_eval_type; #endif }; + +template <> +struct operator_traits + : public operator_traits { + static constexpr auto nopers = 4; + static constexpr auto intrinsic_deriv_order = 2; +}; +template <> +struct operator_traits + : public operator_traits { + /// 16 components: tensor product of two independent spin-space quaternions + /// index = 4 * bra_spin + ket_spin, where spin in {S=0, X=1, Y=2, Z=3} + static constexpr auto nopers = 16; + static constexpr auto intrinsic_deriv_order = 4; +}; +template <> +struct operator_traits + : public operator_traits { + /// 9 components: Cartesian dyadic of the two (σ·p) directions. + /// index = 3 * a + b, with a = bra-side direction, b = ket-side direction, + /// a,b ∈ {x=0, y=1, z=2}. + static constexpr auto nopers = 9; + static constexpr auto intrinsic_deriv_order = 2; +}; + namespace detail { template struct cgtg_operator_traits : public detail::default_operator_traits { @@ -917,16 +970,16 @@ class Engine { const Shell& ket2, const ShellPair* spbra, const ShellPair* spket); // clang-format off - /** this specifies target precision for computing the integrals, i.e. - * the target absolute (i.e., not relative) error of the integrals. - * It is used to screen out primitive integrals. For some screening - * methods precision can be almost guaranteed (due to finite precision - * of the precomputed interpolation tables used to evaluate the core integrals - * it is not in general possible to guarantee precision rigorously). - * - * @param[in] prec the target precision - * @sa ScreeningMethod - */ + /** this specifies target precision for computing the integrals, i.e. + * the target absolute (i.e., not relative) error of the integrals. + * It is used to screen out primitive integrals. For some screening + * methods precision can be almost guaranteed (due to finite precision + * of the precomputed interpolation tables used to evaluate the core integrals + * it is not in general possible to guarantee precision rigorously). + * + * @param[in] prec the target precision + * @sa ScreeningMethod + */ // clang-format on Engine& set_precision(scalar_type prec) { if (prec <= 0.) { diff --git a/include/libint2/engine.impl.h b/include/libint2/engine.impl.h index b200b6945..01269538d 100644 --- a/include/libint2/engine.impl.h +++ b/include/libint2/engine.impl.h @@ -70,40 +70,44 @@ typename std::remove_all_extents::type* to_ptr1(T (&a)[N]) { /// These MUST appear in the same order as in Operator. /// You must also update BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX when you add /// one-body ints -#define BOOST_PP_NBODY_OPERATOR_LIST \ - (overlap, /* overlap */ \ - (kinetic, /* kinetic */ \ - (elecpot, /* nuclear */ \ - (elecpot, /* erf_nuclear */ \ - (elecpot, /* erfc_nuclear */ \ - (elecpot, /* erfx_nuclear */ \ - (elecpot, /* q_gau */ \ - (1emultipole, /* emultipole1 */ \ - (2emultipole, /* emultipole2 */ \ - (3emultipole, /* emultipole3 */ \ - (sphemultipole, /* sphemultipole */ \ - (opVop, /* opVop */ \ - (opVop, /* op_q_gau_op */ \ - (eri, /* delta */ \ - (eri, /* coulomb */ \ - (eri, /* cgtg */ \ - (eri, /* cgtg_x_coulomb */ \ - (eri, /* delcgtg2 */ \ - (eri, /* r12 */ \ - (eri, /* erf_coulomb */ \ - (eri, /* erfc_coulomb */ \ - (eri, /* erfx_coulomb */ \ - (eri, /* stg */ \ - (eri, /* yukawa */ \ - BOOST_PP_NIL)))))))))))))))))))))))) +#define BOOST_PP_NBODY_OPERATOR_LIST \ + (overlap, /* overlap */ \ + (kinetic, /* kinetic */ \ + (elecpot, /* nuclear */ \ + (elecpot, /* erf_nuclear */ \ + (elecpot, /* erfc_nuclear */ \ + (elecpot, /* erfx_nuclear */ \ + (elecpot, /* q_gau */ \ + (1emultipole, /* emultipole1 */ \ + (2emultipole, /* emultipole2 */ \ + (3emultipole, /* emultipole3 */ \ + (sphemultipole, /* sphemultipole */ \ + (opVop, /* opVop */ \ + (opVop, /* op_q_gau_op */ \ + (oprop, /* oprop */ \ + (eri, /* delta */ \ + (eri, /* coulomb */ \ + (coulomb_opop, /* coulomb_opop */ \ + (opop_coulomb_opop, /* opop_coulomb_opop */ \ + (op_coulomb_op, /* op_coulomb_op */ \ + (eri, /* cgtg */ \ + (eri, /* cgtg_x_coulomb */ \ + (eri, /* delcgtg2 */ \ + (eri, /* r12 */ \ + (eri, /* erf_coulomb */ \ + (eri, /* erfc_coulomb */ \ + (eri, /* erfx_coulomb */ \ + (eri, /* stg */ \ + (eri, /* yukawa */ \ + BOOST_PP_NIL)))))))))))))))))))))))))))) #define BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE \ BOOST_PP_MAKE_TUPLE(BOOST_PP_LIST_SIZE(BOOST_PP_NBODY_OPERATOR_LIST)) #define BOOST_PP_NBODY_OPERATOR_INDEX_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_NBODY_OPERATOR_INDEX_TUPLE) #define BOOST_PP_NBODY_OPERATOR_LAST_ONEBODY_INDEX \ - 12 // op_q_gau_op, the 13th member of BOOST_PP_NBODY_OPERATOR_LIST, is the - // last 1-body operator + 13 // oprop, the 14th member of BOOST_PP_NBODY_OPERATOR_LIST, is the last + // 1-body operator // make list of braket indices for n-body ints #define BOOST_PP_NBODY_BRAKET_INDEX_TUPLE \ @@ -668,23 +672,23 @@ __libint2_engine_inline void Engine::initialize(size_t max_nprim) { // validate braket #ifndef LIBINT_INCLUDE_ONEBODY assert(braket_ != BraKet::x_x && - "this braket type not supported by the library; give --enable-1body " - "to configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT2_ENABLE_ONEBODY >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI assert(braket_ != BraKet::xx_xx && - "this braket type not supported by the library; give --enable-eri to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT2_ENABLE_ERI >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI3 assert((braket_ != BraKet::xs_xx && braket_ != BraKet::xx_xs) && - "this braket type not supported by the library; give --enable-eri3 to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT2_ENABLE_ERI3 >= 0"); #endif #ifndef LIBINT_INCLUDE_ERI2 assert(braket_ != BraKet::xs_xs && - "this braket type not supported by the library; give --enable-eri2 to " - "configure"); + "this braket type not supported by the library; configure with " + "-DLIBINT2_ENABLE_ERI2 >= 0"); #endif // make sure it's no default initialized @@ -706,6 +710,7 @@ __libint2_engine_inline void Engine::initialize(size_t max_nprim) { // target indices. const auto permutable_targets = deriv_order_ > 0 && + (braket_ == BraKet::xx_xx || braket_ == BraKet::xs_xx || braket_ == BraKet::xx_xs); if (permutable_targets) @@ -1050,7 +1055,7 @@ __libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, // } if (oper_ == Operator::emultipole1 || oper_ == Operator::emultipole2 || - oper_ == Operator::emultipole3) { + oper_ == Operator::emultipole3 || oper_ == Operator::oprop) { const auto& O = any_cast< const operator_traits::oper_params_type&>( params_); // same as emultipoleX @@ -1098,7 +1103,8 @@ __libint2_engine_inline void Engine::compute_primdata(Libint_t& primdata, primdata._0_Overlap_0_z[0] = ovlp_ss_z; if (oper_ == Operator::kinetic || (deriv_order_ > 0) || - oper_ == Operator::opVop || oper_ == Operator::op_q_gau_op) { + oper_ == Operator::opVop || oper_ == Operator::op_q_gau_op || + oper_ == Operator::oprop) { #if LIBINT2_DEFINED(eri, two_alpha0_bra) primdata.two_alpha0_bra[0] = 2.0 * alpha1; #endif @@ -1262,21 +1268,96 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( #if LIBINT2_SHELLQUARTET_SET == \ LIBINT2_SHELLQUARTET_SET_STANDARD // standard angular momentum ordering - const auto swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); - const auto swap_tket = (tket1.contr[0].l < tket2.contr[0].l); - const auto swap_braket = - ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l > - tket1.contr[0].l + tket2.contr[0].l)) || - braket_ == BraKet::xx_xs; + bool swap_braket; + bool swap_tbra, swap_tket; + if (oper_ == Operator::opop_coulomb_opop) { + // For σpσpCoulombσpσp: (ab|cd) = (cd|ab) = (ba|dc)* = (dc|ba)* + // Canonical form: lc >= ld (or la >= lb when lc == ld), + // la+lb <= lc+ld (or max(la,lb) <= lc when sums equal) + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + const auto bra_max = std::max(tbra1.contr[0].l, tbra2.contr[0].l); + const auto ket_max = std::max(tket1.contr[0].l, tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (bra_total > ket_total || + (bra_total == ket_total && bra_max > ket_max))) || + braket_ == BraKet::xx_xs; + // Coupled swap: after braket swap, sort the pair that ends up in ket + // position to ensure lc >= ld; when lc == ld, also sort bra (la >= lb) + if (swap_braket) { + // After braket swap: new ket = original bra, new bra = original ket. + // Coupled swap sorts new ket (ensure lc >= ld). + const bool swap_p1p2 = (tbra1.contr[0].l < tbra2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } else { + // No braket swap: ket stays as original ket. + // Coupled swap sorts ket (ensure lc >= ld). + const bool swap_p1p2 = (tket1.contr[0].l < tket2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } + } else if (oper_ == Operator::op_coulomb_op) { + // σpCoulombσp: only bra↔ket (particle 1↔2) swap is a symmetry (with + // (a,b)↔(b,a) component remap). Within-side swap is NOT a symmetry + // because σ·p attaches to one specific function per side; moving it to + // the other function changes the integral in a way IBP cannot recover + // across electrons. Canonical form: la+lb <= lc+ld only. + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + swap_braket = ((braket_ == BraKet::xx_xx) && (bra_total > ket_total)) || + braket_ == BraKet::xx_xs; + swap_tbra = false; + swap_tket = false; + } else { + swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l > + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; + swap_tbra = (tbra1.contr[0].l < tbra2.contr[0].l); + swap_tket = (tket1.contr[0].l < tket2.contr[0].l); + } + + // N.B. cannot swap bra and ket for coulomb_opop since the ket is mutated by + // this operator #else // orca angular momentum ordering - const auto swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); - const auto swap_tket = (tket1.contr[0].l > tket2.contr[0].l); - const auto swap_braket = - ((braket_ == BraKet::xx_xx) && (tbra1.contr[0].l + tbra2.contr[0].l < - tket1.contr[0].l + tket2.contr[0].l)) || - braket_ == BraKet::xx_xs; - assert(false && "feature not implemented"); - abort(); + bool swap_braket; + bool swap_tbra, swap_tket; + if (oper_ == Operator::opop_coulomb_opop) { + // ORCA canonical for σpσpCoulombσpσp: lc <= ld (or la <= lb when lc == ld), + // la+lb >= lc+ld (or min(la,lb) >= lc when sums equal) + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + const auto bra_min = std::min(tbra1.contr[0].l, tbra2.contr[0].l); + const auto ket_min = std::min(tket1.contr[0].l, tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (bra_total < ket_total || + (bra_total == ket_total && bra_min < ket_min))) || + braket_ == BraKet::xx_xs; + if (swap_braket) { + const bool swap_p1p2 = (tbra1.contr[0].l > tbra2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } else { + const bool swap_p1p2 = (tket1.contr[0].l > tket2.contr[0].l); + swap_tbra = swap_tket = swap_p1p2; + } + } else if (oper_ == Operator::op_coulomb_op) { + // σpCoulombσp: only bra↔ket swap is a symmetry (with (a,b)↔(b,a) remap). + // ORCA canonical form: la+lb >= lc+ld only. + const auto bra_total = tbra1.contr[0].l + tbra2.contr[0].l; + const auto ket_total = tket1.contr[0].l + tket2.contr[0].l; + swap_braket = ((braket_ == BraKet::xx_xx) && (bra_total < ket_total)) || + braket_ == BraKet::xx_xs; + swap_tbra = false; + swap_tket = false; + } else { + swap_tbra = (tbra1.contr[0].l > tbra2.contr[0].l); + swap_tket = (tket1.contr[0].l > tket2.contr[0].l); + swap_braket = ((braket_ == BraKet::xx_xx) && + (tbra1.contr[0].l + tbra2.contr[0].l < + tket1.contr[0].l + tket2.contr[0].l) && + (oper_ != Operator::coulomb_opop)) || + braket_ == BraKet::xx_xs; + } #endif const auto& bra1 = swap_braket ? (swap_tket ? tket2 : tket1) : (swap_tbra ? tbra2 : tbra1); @@ -1471,17 +1552,24 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( const scalar_type rho = gammap * gammaq * oogammapq; const scalar_type T = PQ2 * rho; auto* gm_ptr = &(primdata.LIBINT_T_SS_EREP_SS(0)[0]); - const auto mmax = l + deriv_order_; + const auto mmax = l + deriv_order_ + intrinsic_deriv_order(); if (!skip_core_ints) { +// Simple core-eval dispatch: just `core_eval_ptr->eval(gm_ptr, T, mmax)`. +// Applies to Coulomb-family operators whose core integral is the bare Fm. +#define LIBINT2_SIMPLE_CORE_EVAL_CASE(OP) \ + case Operator::OP: { \ + const auto& core_eval_ptr = \ + any_cast&>( \ + core_eval_pack_) \ + .first(); \ + core_eval_ptr->eval(gm_ptr, T, mmax); \ + } break switch (oper_) { - case Operator::coulomb: { - const auto& core_eval_ptr = - any_cast&>(core_eval_pack_) - .first(); - core_eval_ptr->eval(gm_ptr, T, mmax); - } break; + LIBINT2_SIMPLE_CORE_EVAL_CASE(coulomb); + LIBINT2_SIMPLE_CORE_EVAL_CASE(coulomb_opop); + LIBINT2_SIMPLE_CORE_EVAL_CASE(opop_coulomb_opop); + LIBINT2_SIMPLE_CORE_EVAL_CASE(op_coulomb_op); case Operator::cgtg_x_coulomb: { const auto& core_eval_ptr = any_cast 0 || lmax_bra > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0 || lmax_bra > 0) { #if LIBINT2_DEFINED(eri, WP_x) primdata.WP_x[0] = Wx - P[0]; #endif @@ -1712,7 +1801,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( primdata.WP_z[0] = Wz - P[2]; #endif } - if (deriv_order_ > 0 || lmax_ket > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0 || lmax_ket > 0) { #if LIBINT2_DEFINED(eri, WQ_x) primdata.WQ_x[0] = Wx - Q[0]; #endif @@ -1796,7 +1885,7 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( #endif // prefactors for derivative ERI relations - if (deriv_order_ > 0) { + if (deriv_order_ + intrinsic_deriv_order() > 0) { #if LIBINT2_DEFINED(eri, alpha1_rho_over_zeta2) primdata.alpha1_rho_over_zeta2[0] = alpha0 * (oogammap * gammaq_o_gammapgammaq); @@ -1879,7 +1968,8 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( } // compute directly (ss|ss) - const auto compute_directly = lmax == 0 && deriv_order_ == 0; + const auto compute_directly = + lmax == 0 && deriv_order_ == 0 & intrinsic_deriv_order() == 0; if (compute_directly) { #ifdef LIBINT2_ENGINE_TIMERS @@ -1957,8 +2047,10 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( "the angular momentum limit is exceeded"); assert(ket2.contr[0].l <= ket_lmax && "the angular momentum limit is exceeded"); + buildfnidx = (bra1.contr[0].l * ket_lmax + ket1.contr[0].l) * ket_lmax + ket2.contr[0].l; + #ifdef LIBINT_ERI3_PURE_SH if (bra1.contr[0].l > 1) assert(bra1.contr[0].pure && @@ -2141,19 +2233,52 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( tgt_ptr + tgt_col_idx, nr1_tgt, nr2_tgt, Eigen::Stride( nr2_tgt * ncol_tgt, ncol_tgt)); + // Coupled swap sign correction for multi-component operators + Shell::real_t oper_cart_component_phase = 1.0; + if (swap_tket && oper_ == Operator::opop_coulomb_opop) { + const bool bra_is_spin = (s / 4) > 0; + const bool ket_is_spin = (s % 4) > 0; + if (bra_is_spin != ket_is_spin) + oper_cart_component_phase = -1.0; + } + if (swap_tket && oper_ == Operator::coulomb_opop && s > 0) + oper_cart_component_phase = -1.0; + // op_coulomb_op irrep layout under bra↔ket swap: antisym + // components (s ∈ {1,2,3}) flip sign; scalar (0) and sym-TL + // (4..8) are invariant. swap_tket is always false for this + // operator; the sign correction applies on swap_braket alone. + if (oper_ == Operator::op_coulomb_op && s >= 1 && s <= 3) + oper_cart_component_phase = -1.0; if (swap_tbra) - tgt_blk_mat = src_blk_mat.transpose(); + tgt_blk_mat = + oper_cart_component_phase * src_blk_mat.transpose(); else - tgt_blk_mat = src_blk_mat; + tgt_blk_mat = oper_cart_component_phase * src_blk_mat; } else { // source row {r1,r2} is mapped to target row {r1,r2} if // !swap_tbra, else to {r2,r1} const auto tgt_row_idx = !swap_tbra ? r1 * nr2 + r2 : r2 * nr1 + r1; Map tgt_blk_mat(tgt_ptr + tgt_row_idx * ncol, nc1_tgt, nc2_tgt); - if (swap_tket) - tgt_blk_mat = src_blk_mat.transpose(); - else + if (swap_tket) { + Shell::real_t oper_cart_component_phase = 1.0; + if (oper_ == Operator::opop_coulomb_opop) { + // Option A ordering: index = 4*bra + ket + // Coupled swap (a<->b AND c<->d) flips sign when exactly + // one of bra/ket is a cross product (spin != S): + // bra_spin = s/4, ket_spin = s%4 (0=S, 1-3=X/Y/Z) + const bool bra_is_spin = (s / 4) > 0; + const bool ket_is_spin = (s % 4) > 0; + if (bra_is_spin != ket_is_spin) + oper_cart_component_phase = -1.0; + } + if (oper_ == Operator::coulomb_opop && s > 0) + oper_cart_component_phase = + -1.0; // x,y,z quaternion components flip sign on + // swapping ket for coulomb_opop + tgt_blk_mat = + oper_cart_component_phase * src_blk_mat.transpose(); + } else tgt_blk_mat = src_blk_mat; } } // end of loop @@ -2178,7 +2303,20 @@ __libint2_engine_inline const Engine::target_ptr_vec& Engine::compute2( // to primdata_[0].targets targets_[s_target] = source; } - } // loop over shellsets + } // loop over shellsets + + // For opop_coulomb_opop with swap_braket: swapping particles remaps + // component (α,β) → (β,α). With Option A ordering (index=4*bra+ket), + // this is a matrix transpose: s_new = 4*(s%4) + (s/4). + if (permute && oper_ == Operator::opop_coulomb_opop && swap_braket) { + std::array temp; + for (auto s = 0; s != ntargets; ++s) temp[s] = targets_[s]; + for (auto s = 0; s != ntargets; ++s) + targets_[4 * (s % 4) + (s / 4)] = temp[s]; + } + // op_coulomb_op irrep layout: bra↔ket swap is handled in-place by + // oper_cart_component_phase above (sign flip on antisym components, + // identity on scalar / sym-TL); no pointer remap is needed. } // if need_scratch => needed to transpose and/or tform else { // did not use scratch? may still need to update targets_ if (set_targets_) { diff --git a/src/bin/libint/build_libint.cc b/src/bin/libint/build_libint.cc index 8b5ece9ea..67700a630 100644 --- a/src/bin/libint/build_libint.cc +++ b/src/bin/libint/build_libint.cc @@ -31,7 +31,9 @@ */ #include +#include #include +#include #include #include #include @@ -66,6 +68,8 @@ using namespace std; using namespace libint2; +CodeGenProgress g_progress; + enum ShellSetType { ShellSetType_Standard = LIBINT_SHELL_SET_STANDARD, ShellSetType_ORCA = LIBINT_SHELL_SET_ORCA @@ -73,18 +77,39 @@ enum ShellSetType { template struct ShellQuartetSetPredicate { // return true if this set of angular momenta is included - static bool value(int la, int lb, int lc, int ld); + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true); }; + +/** + * standard ordering for angular momenta la, lb, lc, ld + * @param p1p2_swappable whether operator allows swaps of particle 1 and 2 + * functions (e.g., not allowed for Coulombσpσp but allowed + * for Coulomb (TwoPRep)). + * @param bra_ket_coswappable whether need to swap within both bra and ket. + * Not individually swapping of either ket of bra allowed + * ( e.g., for σpσpCoulombσpσp) + */ template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld) { - return la >= lb && lc >= ld && la + lb <= lc + ld; + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, + bool bra_ket_coswappable = false) { + if (bra_ket_coswappable) + return lc >= ld && (la + lb < lc + ld || + (la + lb == lc + ld && std::max(la, lb) <= lc)); + else + return la >= lb && lc >= ld && (!p1p2_swappable || la + lb <= lc + ld); } }; template <> struct ShellQuartetSetPredicate { - static bool value(int la, int lb, int lc, int ld) { - return la <= lb && lc <= ld && (la < lc || (la == lc && lb <= ld)); + static bool value(int la, int lb, int lc, int ld, bool p1p2_swappable = true, + bool bra_ket_coswappable = false) { + if (bra_ket_coswappable) + return lc <= ld && (la + lb > lc + ld || + (la + lb == lc + ld && std::min(la, lb) >= lc)); + else + return la <= lb && lc <= ld && + (!p1p2_swappable || (la < lc || (la == lc && lb <= ld))); } }; template @@ -192,14 +217,20 @@ static void config_to_api(const std::shared_ptr& cparams, #ifdef LIBINT_INCLUDE_ERI #define USE_GENERIC_ERI_BUILD 1 -#if !USE_GENERIC_ERI_BUILD +#endif +#if defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_RKB_ERI) +#if defined(USE_GENERIC_ERI_BUILD) && !USE_GENERIC_ERI_BUILD +template static void build_TwoPRep_2b_2k( - std::ostream& os, const std::shared_ptr& cparams, + std::ostream& os, std::string label, + const std::shared_ptr& cparams, std::shared_ptr& iface); #else +template static void build_TwoPRep_2b_2k( - std::ostream& os, const std::shared_ptr& cparams, - std::shared_ptr& iface, unsigned int deriv_level); + std::ostream& os, std::string label, + const std::shared_ptr& cparams, + std::shared_ptr& iface, unsigned int deriv_level = 0); #endif #endif @@ -249,6 +280,8 @@ struct AuxQuantaType { typedef EmptySet type; }; +} // namespace + template OperDescrType make_descr(int, int = 0, int = 0) { return OperDescrType(); @@ -274,7 +307,21 @@ template <> σpVσp_Descr make_descr<σpVσp_Descr>(int p, int, int) { return σpVσp_Descr(p); } -} // namespace + +template <> +σpRσp_Descr make_descr<σpRσp_Descr>(int p, int, int) { + return σpRσp_Descr(p); +} + +template <> +Coulombσpσp_Descr make_descr(int p, int, int) { + return Coulombσpσp_Descr(p); +} + +template <> +σpσpCoulombσpσp_Descr make_descr<σpσpCoulombσpσp_Descr>(int p, int, int) { + return σpσpCoulombσpσp_Descr(p); +} template void build_onebody_1b_1k(std::ostream& os, std::string label, @@ -403,6 +450,14 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, descrs.emplace_back(make_descr(p)); } } + if (std::is_same<_OperType, σpRσpOper>::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 12 = 3 dipole directions × 4 Pauli components + for (int p = 0; p != 12; ++p) { + descrs.emplace_back(make_descr(p)); + } + } // derivative index is the outermost (slowest running) // operator component is second slowest @@ -480,8 +535,8 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, eval_label = oss.str(); } - std::cout << "working on " << eval_label << " ... "; - std::cout.flush(); + g_progress.current_task = eval_label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; @@ -521,8 +576,6 @@ void build_onebody_1b_1k(std::ostream& os, std::string label, dg->reset(); memman->reset(); - std::cout << "done" << std::endl; - } // end of b loop } // end of a loop } @@ -544,11 +597,11 @@ void try_main(int argc, char* argv[]) { // overlap, kinetic, elecpot cannot be omitted #define BOOST_PP_ONEBODY_TASK_TUPLE \ (overlap, kinetic, elecpot, 1emultipole, 2emultipole, 3emultipole, \ - sphemultipole, opVop) + sphemultipole, opVop, oprop) #define BOOST_PP_ONEBODY_TASK_OPER_TUPLE \ (OverlapOper, KineticOper, ElecPotOper, CartesianMultipoleOper<3u>, \ CartesianMultipoleOper<3u>, CartesianMultipoleOper<3u>, \ - SphericalMultipoleOper, σpVσpOper) + SphericalMultipoleOper, σpVσpOper, σpRσpOper) #define BOOST_PP_ONEBODY_TASK_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_ONEBODY_TASK_TUPLE) #define BOOST_PP_ONEBODY_TASK_OPER_LIST \ @@ -567,6 +620,26 @@ void try_main(int argc, char* argv[]) { taskmgr.add(task_label("eri", d)); } #endif + +#ifdef LIBINT_INCLUDE_RKB_ERI +#define BOOST_PP_RKB_ERI_TASK_TUPLE \ + (coulomb_opop, opop_coulomb_opop, op_coulomb_op) +#define BOOST_PP_RKB_ERI_TASK_OPER_TUPLE \ + (CoulombσpσpOper, σpσpCoulombσpσpOper, σpCoulombσpOper) +#define BOOST_PP_RKB_ERI_TASK_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_TUPLE) +#define BOOST_PP_RKB_ERI_TASK_OPER_LIST \ + BOOST_PP_TUPLE_TO_LIST(BOOST_PP_RKB_ERI_TASK_OPER_TUPLE) + + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR1(r, data, elem) \ + taskmgr.add(task_label(BOOST_PP_STRINGIZE(elem), d)); + + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR1, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR1 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { taskmgr.add(task_label("3eri", d)); @@ -668,6 +741,46 @@ void try_main(int argc, char* argv[]) { cparams->num_bf(task_label("eri", d), 4); } #endif + +#ifdef LIBINT_INCLUDE_RKB_ERI + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#if defined(LIBINT_RKB_ERI_MAX_AM_LIST) +#define BOOST_PP_RKB_ERI_MCR2(r, data, elem) \ + cparams->max_am( \ + task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI_MAX_AM_LIST, ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR2, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR2 +#elif defined(LIBINT_RKB_ERI_MAX_AM) +#define BOOST_PP_RKB_ERI_MCR3(r, data, elem) \ + cparams->max_am(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI_MAX_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR3, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR3 +#endif +#if defined(LIBINT_RKB_ERI_OPT_AM_LIST) +#define BOOST_PP_RKB_ERI_MCR4(r, data, elem) \ + cparams->max_am_opt( \ + task_label(BOOST_PP_STRINGIZE(elem), d), \ + token(LIBINT_RKB_ERI_OPT_AM_LIST, ',', d)); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR4, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR4 +#elif defined(LIBINT_RKB_ERI_OPT_AM) +#define BOOST_PP_RKB_ERI_MCR5(r, data, elem) \ + cparams->max_am_opt(task_label(BOOST_PP_STRINGIZE(elem), d), \ + LIBINT_RKB_ERI_OPT_AM); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR5, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR5 +#endif + } + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR6(r, data, elem) \ + cparams->num_bf(task_label(BOOST_PP_STRINGIZE(elem), d), 4); + BOOST_PP_LIST_FOR_EACH(BOOST_PP_RKB_ERI_MCR6, _, BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR6 + } +#endif // LIBINT_INCLUDE_RKB_ERI + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { #if defined(LIBINT_ERI3_MAX_AM_LIST) @@ -852,6 +965,9 @@ void try_main(int argc, char* argv[]) { #ifdef LIBINT_INCLUDE_ERI max_deriv = std::max(LIBINT_INCLUDE_ERI, max_deriv); #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + max_deriv = std::max(LIBINT_INCLUDE_RKB_ERI, max_deriv); +#endif #ifdef LIBINT_INCLUDE_ERI3 max_deriv = std::max(LIBINT_INCLUDE_ERI3, max_deriv); #endif @@ -867,6 +983,8 @@ void try_main(int argc, char* argv[]) { #endif cparams->print(os); + g_progress.start(); + #ifdef LIBINT_INCLUDE_ONEBODY for (unsigned int d = 0; d <= LIBINT_INCLUDE_ONEBODY; ++d) { #define BOOST_PP_ONEBODY_MCR7(r, data, i, elem) \ @@ -879,13 +997,25 @@ void try_main(int argc, char* argv[]) { #endif #ifdef LIBINT_INCLUDE_ERI #if !USE_GENERIC_ERI_BUILD - build_TwoPRep_2b_2k(os, cparams, iface); + build_TwoPRep_2b_2k(os, "eri", cparams, iface); #else for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI; ++d) { - build_TwoPRep_2b_2k(os, cparams, iface, d); + build_TwoPRep_2b_2k(os, "eri", cparams, iface, d); } #endif #endif + +#ifdef LIBINT_INCLUDE_RKB_ERI + for (unsigned int d = 0; d <= LIBINT_INCLUDE_RKB_ERI; ++d) { +#define BOOST_PP_RKB_ERI_MCR7(r, data, i, elem) \ + build_TwoPRep_2b_2k( \ + os, BOOST_PP_STRINGIZE(elem), cparams, iface, d); + BOOST_PP_LIST_FOR_EACH_I(BOOST_PP_RKB_ERI_MCR7, _, + BOOST_PP_RKB_ERI_TASK_LIST) +#undef BOOST_PP_RKB_ERI_MCR7 + } +#endif + #ifdef LIBINT_INCLUDE_ERI3 for (unsigned int d = 0; d <= LIBINT_INCLUDE_ERI3; ++d) { build_TwoPRep_1b_2k(os, cparams, iface, d); @@ -913,6 +1043,8 @@ void try_main(int argc, char* argv[]) { build_G12DKH_2b_2k(os, cparams, iface); #endif + g_progress.finish(); + // Generate code for the set-level RRs std::deque decl_filenames, def_filenames; generate_rr_code(os, cparams, decl_filenames, def_filenames); @@ -985,15 +1117,26 @@ void print_config(std::ostream& os) { #ifdef LIBINT_INCLUDE_G12DKH os << "Will support G12DKH" << endl; #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + os << "Will support restricted kinetically balance (RKB) 4-center ERIs " + << std::endl; + if (LIBINT_INCLUDE_RKB_ERI > 0) + os << "(deriv order = " << LIBINT_INCLUDE_RKB_ERI << ")"; + os << endl; +#endif } -#ifdef LIBINT_INCLUDE_ERI -void build_TwoPRep_2b_2k(std::ostream& os, - const std::shared_ptr& cparams, - std::shared_ptr& iface, - unsigned int deriv_level) { - const std::string task = task_label("eri", deriv_level); - typedef TwoPRep_11_11_sq TwoPRep_sh_11_11; +#if defined(LIBINT_INCLUDE_ERI) || defined(LIBINT_INCLUDE_RKB_ERI) +template +static void build_TwoPRep_2b_2k( + std::ostream& os, std::string label, + const std::shared_ptr& cparams, + std::shared_ptr& iface, unsigned int deriv_level) { + typedef GenIntegralSet_11_11 TwoBody_sh_11_11; + typedef typename OperType::Descriptor OperDescrType; + + const std::string task = task_label(label, deriv_level); + vector shells; unsigned int lmax = cparams->max_am(task); for (unsigned int l = 0; l <= lmax; l++) { @@ -1005,6 +1148,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, taskmgr.current(task); iface->to_params(iface->macro_define(std::string("MAX_AM_") + task, lmax)); + const auto nullaux = typename TwoBody_sh_11_11::AuxIndexType(0u); // // Construct graphs for each desired target integral and // 1) generate source code for the found traversal path @@ -1019,13 +1163,39 @@ void build_TwoPRep_2b_2k(std::ostream& os, std::shared_ptr context(new CppCodeContext(cparams)); std::shared_ptr memman(new WorstFitMemoryManager()); + // σpCoulombσp has a 2-fold bra↔ket-swap symmetry, with per-component sign + // flips under the swap (Antisym* flip sign; Scalar/SymTL* invariant) — this + // is captured by p1_p2_swappable=true plus a dedicated predicate that + // canonicalizes only la+lb<=lc+ld and emits code for *all* within-side + // orderings (within-side swap is NOT a symmetry — σ·p attaches to one + // specific function per side, and IBP cannot recover the sign across the + // 1/r12 coupling). + bool p1_p2_swappable = !std::is_same::value; + bool bra_ket_coswappable = std::is_same::value; + + // Note: la, lb, lc, ld generate code for chemist notation (ab|O|cd), where O + // is a two-body operator. for (unsigned int la = 0; la <= lmax; la++) { for (unsigned int lb = 0; lb <= lmax; lb++) { for (unsigned int lc = 0; lc <= lmax; lc++) { for (unsigned int ld = 0; ld <= lmax; ld++) { - if (!ShellQuartetSetPredicate( - LIBINT_SHELL_SET)>::value(la, lb, lc, ld)) - continue; + // σpCoulombσp: only bra↔ket (particle 1↔2) swap is a symmetry. + // Within-side swap is NOT (σ·p would move to a different physical + // center; IBP cannot repair the sign across 1/r12). Dedicated + // predicate canonicalizes la+lb<=lc+ld only (ORCA: >=) and accepts + // all within-side orderings. + if constexpr (std::is_same::value) { +#if LIBINT_SHELL_SET == LIBINT_SHELL_SET_STANDARD + if (!(la + lb <= lc + ld)) continue; +#else + if (!(la + lb >= lc + ld)) continue; +#endif + } else { + if (!ShellQuartetSetPredicate( + LIBINT_SHELL_SET)>::value(la, lb, lc, ld, p1_p2_swappable, + bra_ket_coswappable)) + continue; + } // std::shared_ptr tactic(new ParticleDirectionTactic(la+lb > // lc+ld ? false : true)); @@ -1036,13 +1206,46 @@ void build_TwoPRep_2b_2k(std::ostream& os, const int lim = 1; if (!(la == lim && lb == lim && lc == lim && ld == lim)) continue; #endif + // this will hold all target shell sets + std::vector> targets; + + ///////////////////////////////// + // loop over operator components + ///////////////////////////////// + std::vector descrs(1); + if constexpr (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 4 quaternion components (single spin space) + for (int p = 0; p != 4; ++p) { + descrs.emplace_back(OperDescrType(p)); + } + } + if constexpr (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 16 components (tensor product of two spin spaces) + for (int p = 0; p != 16; ++p) { + descrs.emplace_back(OperDescrType(p)); + } + } + if constexpr (std::is_same::value) { + // reset descriptors array + descrs.resize(0); + // iterate over 9 SO(3) irrep components: 1 scalar trace + 3 + // antisym (curl-curl) + 5 sym-traceless + for (int p = 0; p != 9; ++p) { + descrs.emplace_back(OperDescrType(p)); + } + } // unroll only if max_am <= cparams->max_am_opt(task) using std::max; const unsigned int max_am = max(max(la, lb), max(lc, ld)); const bool need_to_optimize = (max_am <= cparams->max_am_opt(task)); + const auto nopers = descrs.size(); const bool need_to_unroll = - l_to_cgshellsize(la) * l_to_cgshellsize(lb) * + nopers * l_to_cgshellsize(la) * l_to_cgshellsize(lb) * l_to_cgshellsize(lc) * l_to_cgshellsize(ld) <= cparams->unroll_threshold(); const unsigned int unroll_threshold = @@ -1050,10 +1253,14 @@ void build_TwoPRep_2b_2k(std::ostream& os, ? std::numeric_limits::max() : 0; dg_xxxx->registry()->unroll_threshold(unroll_threshold); - dg_xxxx->registry()->do_cse(need_to_optimize); - dg_xxxx->registry()->condense_expr(condense_expr( - cparams->unroll_threshold(), cparams->max_vector_length() > 1)); - // dg_xxxx->registry()->condense_expr(true); + // For multi-component operators (RKB), components share no + // intermediates, so CSE/condense_expr is pure overhead — disable. + const bool do_optimize = (nopers > 1) ? false : need_to_optimize; + dg_xxxx->registry()->do_cse(do_optimize); + dg_xxxx->registry()->condense_expr( + do_optimize ? condense_expr(cparams->unroll_threshold(), + cparams->max_vector_length() > 1) + : false); // Need to accumulate integrals? dg_xxxx->registry()->accumulate_targets( cparams->accumulate_targets()); @@ -1067,7 +1274,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, //////////// // NB translational invariance is now handled by CR_DerivGauss CartesianDerivIterator<4> diter(deriv_level); - std::vector> targets; + bool last_deriv = false; do { CGShell a(la); @@ -1084,21 +1291,18 @@ void build_TwoPRep_2b_2k(std::ostream& os, } } - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - targets.push_back(abcd); + // operator component loop + for (unsigned int op = 0; op != descrs.size(); ++op) { + OperType oper(descrs[op]); + + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + targets.push_back(abcd); + } + last_deriv = diter.last(); if (!last_deriv) diter.next(); } while (!last_deriv); - // append all derivatives as targets to the graph - for (std::vector>::const_iterator - t = targets.begin(); - t != targets.end(); ++t) { - std::shared_ptr t_ptr = - std::dynamic_pointer_cast(*t); - dg_xxxx->append_target(t_ptr); - } - // make label that characterizes this set of targets // use the label of the nondifferentiated integral as a base std::string abcd_label; @@ -1107,33 +1311,52 @@ void build_TwoPRep_2b_2k(std::ostream& os, CGShell b(lb); CGShell c(lc); CGShell d(ld); - std::shared_ptr abcd = - TwoPRep_sh_11_11::Instance(a, b, c, d, mType(0u)); - abcd_label = abcd->label(); + + if constexpr (std::is_same::value) { + OperType oper; + oper = OperType(descrs[0]); + std::shared_ptr abcd = + TwoBody_sh_11_11::Instance(a, b, c, d, nullaux, oper); + abcd_label = abcd->label(); + } else { + std::ostringstream oss; + oss << "_" << a.label() << "_" << b.label(); + oss << "_" << label; + oss << "_" << c.label() << "_" << d.label(); + abcd_label = oss.str(); + } } // + derivative level (if deriv_level > 0) - std::string label; + std::string eval_label; { - label = ""; + eval_label = ""; if (deriv_level != 0) { std::ostringstream oss; oss << "deriv" << deriv_level; - label += oss.str(); + eval_label += oss.str(); } - label += abcd_label; + eval_label += abcd_label; } - std::cout << "working on " << label << " ... "; - std::cout.flush(); + g_progress.current_task = eval_label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; std::deque def_filenames; - // this will generate code for these targets, and potentially generate - // code for its prerequisites + // append all targets to the graph + for (auto it = targets.begin(); it != targets.end(); ++it) { + std::shared_ptr t_ptr = + std::dynamic_pointer_cast(*it); + dg_xxxx->append_target(t_ptr); + } + + // this will generate code for these targets, and potentially + // generate code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, - decl_filenames, def_filenames, prefix, label, false); + decl_filenames, def_filenames, prefix, eval_label, + false); // update max stack size and # of targets const std::shared_ptr& tparams = @@ -1147,16 +1370,14 @@ void build_TwoPRep_2b_2k(std::ostream& os, ostringstream oss; oss << context->label_to_function_name("libint2_build_" + task) << "[" << la << "][" << lb << "][" << lc << "][" << ld - << "] = " << context->label_to_function_name(label) + << "] = " << context->label_to_function_name(eval_label) << context->end_of_stat() << endl; iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = - decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } @@ -1166,7 +1387,8 @@ void build_TwoPRep_2b_2k(std::ostream& os, dg_xxxx->reset(); memman->reset(); - std::cout << "done" << std::endl; + ++g_progress.done; + g_progress.print(); } // end of d loop } // end of c loop @@ -1174,7 +1396,7 @@ void build_TwoPRep_2b_2k(std::ostream& os, } // end of a loop } -#endif // LIBINT_INCLUDE_ERI +#endif // LIBINT_INCLUDE_ERI || LIBINT_INCLUDE_RKB_ERI #ifdef LIBINT_INCLUDE_ERI3 @@ -1334,8 +1556,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, label += abcd_label; } - std::cout << "working on " << label << " ... "; - std::cout.flush(); + g_progress.current_task = label; + g_progress.print(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; @@ -1363,10 +1585,9 @@ void build_TwoPRep_1b_2k(std::ostream& os, iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } @@ -1375,7 +1596,8 @@ void build_TwoPRep_1b_2k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - + ++g_progress.done; + g_progress.print(); } // end of d loop } // end of c loop } // end of bra loop @@ -1532,15 +1754,16 @@ void build_TwoPRep_1b_1k(std::ostream& os, label += abcd_label; } - std::cout << "working on " << label << " ... "; + g_progress.current_task = label; + g_progress.print(); std::cout.flush(); std::string prefix(cparams->source_directory()); std::deque decl_filenames; std::deque def_filenames; - // this will generate code for this targets, and potentially generate code - // for its prerequisites + // this will generate code for this targets, and potentially generate + // code for its prerequisites GenerateCode(dg_xxx, context, cparams, strat, tactic, memman, decl_filenames, def_filenames, prefix, label, false); @@ -1549,7 +1772,8 @@ void build_TwoPRep_1b_1k(std::ostream& os, taskmgr.current().params(); tparams->max_stack_size(max_am, memman->max_memory_used()); tparams->max_ntarget(targets.size()); - // os << " Max memory used = " << memman->max_memory_used() << std::endl; + // os << " Max memory used = " << memman->max_memory_used() << + // std::endl; // set pointer to the top-level evaluator function ostringstream oss; @@ -1560,10 +1784,9 @@ void build_TwoPRep_1b_1k(std::ostream& os, iface->to_static_init(oss.str()); // need to declare this function internally - for (std::deque::const_iterator i = decl_filenames.begin(); - i != decl_filenames.end(); ++i) { + for (auto& decl_filename : decl_filenames) { oss.str(""); - oss << "#include <" << *i << ">" << endl; + oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); } @@ -1572,7 +1795,6 @@ void build_TwoPRep_1b_1k(std::ostream& os, #endif dg_xxx->reset(); memman->reset(); - } // end of ket loop } // end of bra loop } @@ -1741,8 +1963,8 @@ void build_R12kG12_2b_2k(std::ostream& os, std::deque decl_filenames; std::deque def_filenames; - // this will generate code for this targets, and potentially generate - // code for its prerequisites + // this will generate code for this targets, and potentially + // generate code for its prerequisites GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman, decl_filenames, def_filenames, prefix, label, false); @@ -2094,11 +2316,11 @@ void build_G12DKH_2b_2k(std::ostream& os, oss << "#include <" << decl_filename << ">" << endl; iface->to_int_iface(oss.str()); - // For the most expensive (i.e. presumably complete) graph extract all - // precomputed quantities -- these will be members of the evaluator - // structure also extract all RRs -- need to keep track of these to - // figure out which external symbols appearing in RR code belong to - // this task also + // For the most expensive (i.e. presumably complete) graph extract + // all precomputed quantities -- these will be members of the + // evaluator structure also extract all RRs -- need to keep track of + // these to figure out which external symbols appearing in RR code + // belong to this task also if (la == lmax && lb == lmax && lc == lmax && ld == lmax) extract_symbols(dg_xxxx); @@ -2136,6 +2358,12 @@ void config_to_api(const std::shared_ptr& cparams, iface->to_params(iface->macro_define("DERIV_ERI_ORDER", LIBINT_INCLUDE_ERI)); max_deriv_order = std::max(max_deriv_order, LIBINT_INCLUDE_ERI); #endif +#ifdef LIBINT_INCLUDE_RKB_ERI + iface->to_params(iface->macro_define("SUPPORT_RKB_ERI", 1)); + iface->to_params( + iface->macro_define("DERIV_RKB_ERI_ORDER", LIBINT_INCLUDE_RKB_ERI)); + max_deriv_order = std::max(max_deriv_order, LIBINT_INCLUDE_RKB_ERI); +#endif #ifdef LIBINT_INCLUDE_ERI3 iface->to_params(iface->macro_define("SUPPORT_ERI3", 1)); iface->to_params( @@ -2165,7 +2393,8 @@ void config_to_api(const std::shared_ptr& cparams, // generated tasks declare all tasks in a range of valid tasks as defined or // not LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); - // the range is defined by max # of centers, max deriv order, and operator set + // the range is defined by max # of centers, max deriv order, and operator + // set const size_t max_ncenter = 4; for (unsigned int ncenter = 0; ncenter <= max_ncenter; ++ncenter) { std::stringstream oss; @@ -2191,8 +2420,9 @@ void config_to_api(const std::shared_ptr& cparams, { // 2-body ints -#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ - ("eri", "r12kg12", "r12_0_g12", "r12_2_g12", "g12_T1_g12", "g12dkh") +#define BOOST_PP_TWOBODY_TASKOPER_TUPLE \ + ("eri", "coulomb_opop", "opop_coulomb_opop", "op_coulomb_op", "r12kg12", \ + "r12_0_g12", "r12_2_g12", "g12_T1_g12", "g12dkh") #define BOOST_PP_TWOBODY_TASKOPER_LIST \ BOOST_PP_TUPLE_TO_LIST(BOOST_PP_TWOBODY_TASKOPER_TUPLE) diff --git a/src/bin/libint/buildtest.h b/src/bin/libint/buildtest.h index a4923c022..0ea5b571e 100644 --- a/src/bin/libint/buildtest.h +++ b/src/bin/libint/buildtest.h @@ -30,13 +30,57 @@ #include #include +#include #include #include +#include #include #include #include #include +/// Progress tracker for code generation. +struct CodeGenProgress { + unsigned int done = 0; + std::string current_task; + std::chrono::steady_clock::time_point start_time; + bool started = false; + + void start() { + start_time = std::chrono::steady_clock::now(); + started = true; + } + + void print() const { + if (!started) return; + static const char spinner[] = "|/-\\"; + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time) + .count(); + const auto mins = elapsed / 60; + const auto secs = elapsed % 60; + std::cerr << "\r " << spinner[done % 4] << " " << std::setfill('0') + << std::setw(2) << mins << ":" << std::setw(2) << secs + << std::setfill(' ') << " [" << done << " functions generated] " + << current_task << " " << std::flush; + } + + void finish() { + if (!started) return; + const auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time) + .count(); + const auto mins = elapsed / 60; + const auto secs = elapsed % 60; + std::cerr << "\r done " << std::setfill('0') << std::setw(2) << mins + << ":" << std::setw(2) << secs << std::setfill(' ') << " [" + << done << " functions generated]" + << " " << std::endl; + started = false; + } +}; +extern CodeGenProgress g_progress; + namespace libint2 { // defined in buildtest.cc @@ -263,7 +307,6 @@ void GenerateCode(const std::shared_ptr& dg, // if there are missing prerequisites -- make a list of them PrerequisitesExtractor pe; if (dg->missing_prerequisites()) { - // std::cout << "missing some prerequisites!" << std::endl; dg->foreach (pe); } std::deque > prereq_list = pe.vertices; @@ -296,6 +339,11 @@ void GenerateCode(const std::shared_ptr& dg, // extract all external symbols extract_symbols(dg); + // Update progress + ++g_progress.done; + g_progress.current_task = label; + g_progress.print(); + #if PRINT_DAG_GRAPHVIZ { std::basic_ofstream dotfile(dg->label() + ".symb.dot"); diff --git "a/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" new file mode 100644 index 000000000..db28cb7ea --- /dev/null +++ "b/src/bin/libint/comp_11_Coulomb\317\203p\317\203p_11.h" @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_COULOMBΣPΣP_11_H +#define LIBINT_COMP_11_COULOMBΣPΣP_11_H + +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** + * this computes integral of + * \f$ \frac{1}{r_{ij}} \sigma \cdot \hat{p}_1 \sigma \cdot \hat{p}_2 \f$ over + * CGShell/CGF by rewriting it as a linear combination of integrals over + * derivatives of \frac{1}{r_{ij}} + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_Coulombσpσp_11 + : public GenericRecurrenceRelation< + CR_11_Coulombσpσp_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_Coulombσpσp_11 ThisType; + typedef F BasisFunctionType; + typedef CoulombσpσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; // TODO figure out + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by Instance that maintains + /// registry of these objects + CR_11_Coulombσpσp_11(const std::shared_ptr&, unsigned int = 0); + + static std::string descr() { return "CR"; } + + // --- Code sharing overrides --- + // All shell combos with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_Coulombopop_" + + std::to_string(target_->oper()->descr().quaternion_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Hand-emit a simple element-wise loop function. + /// comp 0: target = src0 + src1 + src2 (dot product) + /// comp 1-3: target = src0 - src1 (cross product components) + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + // declare_function lives in dg.cc + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + // inline assign_symbols_: set symbol names on target/children and + // populate CodeSymbols + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + const unsigned int nc = this->num_children(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsi 1) ? nc - 1 : 0; + def << "/** Number of flops = " << nflops << " */\n"; + def << context->close_block() << std::endl; + def << context->code_postfix(); + } +}; + +template +CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11( + const std::shared_ptr& Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto& oper = Tint->oper(); + + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + F c_x{c}; + c_x.deriv().inc(x); // d(c)/dx = c_x + F c_y{c}; + c_y.deriv().inc(y); // d(c)/dy = c_y + F c_z{c}; + c_z.deriv().inc(z); // d(c)/dz = c_z + + F d_x{d}; + d_x.deriv().inc(x); // d(d)/dx = d_x + F d_y{d}; + d_y.deriv().inc(y); // d(d)/dy = d_y + F d_z{d}; + d_z.deriv().inc(z); // d(d)/dz = d_z + + // Component wise generation for quaternion ( a b | 1/r12 | (σ.p) c (σ.p) d ) + switch (oper->descr().quaternion_index()) { + case 0: { + // zeroth component = (a b | c_x d_x) + (a b | c_y d_y) + (a b | c_z d_z) + auto a_b_cx_dx = factory.make_child(a, b, c_x, d_x, zero_m); + auto a_b_cy_dy = factory.make_child(a, b, c_y, d_y, zero_m); + auto a_b_cz_dz = factory.make_child(a, b, c_z, d_z, zero_m); + if (is_simple()) { + expr_ = a_b_cx_dx + a_b_cy_dy + a_b_cz_dz; + nflops_ += 2; + } + } break; + case 1: { + // x component = (a b | c_y d_z) - (a b | c_z d_y) + auto a_b_cy_dz = factory.make_child(a, b, c_y, d_z, zero_m); + auto a_b_cz_dy = factory.make_child(a, b, c_z, d_y, zero_m); + if (is_simple()) { + expr_ = a_b_cy_dz - a_b_cz_dy; + nflops_ += 1; + } + } break; + case 2: { + // y component = (a b | c_z d_x) - (a b | c_x d_z) + auto a_b_cz_dx = factory.make_child(a, b, c_z, d_x, zero_m); + auto a_b_cx_dz = factory.make_child(a, b, c_x, d_z, zero_m); + if (is_simple()) { + expr_ = a_b_cz_dx - a_b_cx_dz; + nflops_ += 1; + } + } break; + case 3: { + // z component = (a b | c_x d_y) - (a b | c_y d_x) + auto a_b_cx_dy = factory.make_child(a, b, c_x, d_y, zero_m); + auto a_b_cy_dx = factory.make_child(a, b, c_y, d_x, zero_m); + if (is_simple()) { + expr_ = a_b_cx_dy - a_b_cy_dx; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error( + "CR_11_Coulombσpσp_11: invalid quaternionic index"); + } + +} // CR_11_Coulombσpσp_11::CR_11_Coulombσpσp_11 +}; // namespace libint2 + +#endif // LIBINT_COMP_11_COULOMBΣPΣP_11_H diff --git "a/src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" new file mode 100644 index 000000000..7391d3d31 --- /dev/null +++ "b/src/bin/libint/comp_11_\317\203pCoulomb\317\203p_11.h" @@ -0,0 +1,317 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_ΣPCOULOMBΣP_11_H +#define LIBINT_COMP_11_ΣPCOULOMBΣP_11_H + +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** + * Computes the "Gaunt LS bilinear" integral + * \f$ (\mu\, \sigma\cdot\hat{p}\,\nu | 1/r_{12} | \kappa\, + * \sigma\cdot\hat{p}\,\lambda ) \f$ in the SO(3) irreducible decomposition of + * the rank-2 tensor \f$ T_{ab} = ( \mu \cdot \partial_a \nu | 1/r_{12} | \kappa + * \cdot \partial_b \lambda ) \f$: 1 scalar trace + 3 antisymmetric (curl-curl) + * + 5 symmetric-traceless = 9 components total. Each output is a small linear + * combination of raw deriv-TwoPRep children, mirroring + * comp_11_Coulombσpσp_11.h's pattern of trace/antisym emission. + * + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_σpCoulombσp_11 + : public GenericRecurrenceRelation< + CR_11_σpCoulombσp_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_σpCoulombσp_11 ThisType; + typedef F BasisFunctionType; + typedef σpCoulombσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 3; + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by Instance that maintains + /// registry of these objects + CR_11_σpCoulombσp_11(const std::shared_ptr&, unsigned int = 0); + + static std::string descr() { return "CR"; } + + // --- Code sharing overrides (mirror Coulombσpσp pattern) --- + // All shell quartets with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_σpCoulombσp_" + + std::to_string(target_->oper()->descr().component_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Hand-emit the per-component irrep linear combination over deriv-ERI + /// children. The combination depends on the target's component index. + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsitarget_->oper()->descr().component_index(); + std::string rhs; + unsigned int nflops = 0; + switch (comp) { + case σpCoulombσp_Descr::Scalar: + rhs = "src0[hsi] + src1[hsi] + src2[hsi]"; + nflops = 2; + break; + case σpCoulombσp_Descr::AntisymX: + case σpCoulombσp_Descr::AntisymY: + case σpCoulombσp_Descr::AntisymZ: + case σpCoulombσp_Descr::SymTLDiagA: + rhs = "src0[hsi] - src1[hsi]"; + nflops = 1; + break; + case σpCoulombσp_Descr::SymTLDiagB: + rhs = "2.0*src0[hsi] - src1[hsi] - src2[hsi]"; + nflops = 3; + break; + case σpCoulombσp_Descr::SymTLOffXY: + case σpCoulombσp_Descr::SymTLOffXZ: + case σpCoulombσp_Descr::SymTLOffYZ: + rhs = "src0[hsi] + src1[hsi]"; + nflops = 1; + break; + default: + throw std::runtime_error( + "CR_11_σpCoulombσp_11::generate_code: invalid component index"); + } + def << "target[hsi] = " << rhs << ";\n}\n"; + def << "/** Number of flops = " << nflops << " */\n"; + def << context->close_block() << std::endl; + def << context->code_postfix(); + } +}; + +template +CR_11_σpCoulombσp_11::CR_11_σpCoulombσp_11( + const std::shared_ptr& Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto& oper = Tint->oper(); + + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + // Chemist notation: (a b | op c op d). σ·p acts on one function per electron + // — direction `i` on ket(0,0) = b (electron 1), direction `j` on ket(1,0) = d + // (electron 2). Each output is an SO(3) irrep combination of the raw 3×3 + // T_{ij} dyadic; case bodies build only the children each combination needs. + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + auto T = [&](int i, int j) { + F b_d{b}; + b_d.deriv().inc(i); + F d_d{d}; + d_d.deriv().inc(j); + return factory.make_child(a, b_d, c, d_d, zero_m); + }; + + switch (oper->descr().component_index()) { + case σpCoulombσp_Descr::Scalar: { + auto Txx = T(x, x); + auto Tyy = T(y, y); + auto Tzz = T(z, z); + if (is_simple()) { + expr_ = Txx + Tyy + Tzz; + nflops_ += 2; + } + } break; + case σpCoulombσp_Descr::AntisymX: { + auto Tyz = T(y, z); + auto Tzy = T(z, y); + if (is_simple()) { + expr_ = Tyz - Tzy; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::AntisymY: { + auto Tzx = T(z, x); + auto Txz = T(x, z); + if (is_simple()) { + expr_ = Tzx - Txz; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::AntisymZ: { + auto Txy = T(x, y); + auto Tyx = T(y, x); + if (is_simple()) { + expr_ = Txy - Tyx; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::SymTLDiagA: { + auto Txx = T(x, x); + auto Tyy = T(y, y); + if (is_simple()) { + expr_ = Txx - Tyy; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::SymTLDiagB: { + // 2·T_zz − T_xx − T_yy: child order (Tzz, Txx, Tyy) matches generate_code + auto Tzz = T(z, z); + auto Txx = T(x, x); + auto Tyy = T(y, y); + if (is_simple()) { + expr_ = Scalar(2) * Tzz - Txx - Tyy; + nflops_ += 3; + } + } break; + case σpCoulombσp_Descr::SymTLOffXY: { + auto Txy = T(x, y); + auto Tyx = T(y, x); + if (is_simple()) { + expr_ = Txy + Tyx; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::SymTLOffXZ: { + auto Txz = T(x, z); + auto Tzx = T(z, x); + if (is_simple()) { + expr_ = Txz + Tzx; + nflops_ += 1; + } + } break; + case σpCoulombσp_Descr::SymTLOffYZ: { + auto Tyz = T(y, z); + auto Tzy = T(z, y); + if (is_simple()) { + expr_ = Tyz + Tzy; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error("CR_11_σpCoulombσp_11: invalid component index"); + } + +} // CR_11_σpCoulombσp_11::CR_11_σpCoulombσp_11 + +} // namespace libint2 + +#endif // LIBINT_COMP_11_ΣPCOULOMBΣP_11_H diff --git "a/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" new file mode 100644 index 000000000..64d1fdee6 --- /dev/null +++ "b/src/bin/libint/comp_11_\317\203p\317\203pCoulomb\317\203p\317\203p_11.h" @@ -0,0 +1,471 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H +#define LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H + +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** + * Computes integral of + * \f$ (\sigma_1 \cdot \hat{p}_a)(\sigma_1 \cdot \hat{p}_b) + * \frac{1}{r_{12}} + * (\sigma_2 \cdot \hat{p}_c)(\sigma_2 \cdot \hat{p}_d) \f$ + * over CGShell/CGF by rewriting it as a linear combination of integrals + * over derivatives of \f$ \frac{1}{r_{12}} \f$. + * + * The two sigma operators act on independent spin spaces (electron 1 and + * electron 2). Using the Dirac identity (see e.g. Eq. 1.27 of I. P. Grant, + * "Relativistic Quantum Theory of Atoms and Molecules", Springer, 2007): + * \f$ (\sigma \cdot a)(\sigma \cdot b) = (a \cdot b)I + * + i\sigma \cdot (a \times b) \f$ + * applied independently to each particle's spin space gives a tensor product + * of two quaternions with \f$ 4 \times 4 = 16 \f$ components: + * + * index = 4 * bra_spin_index + ket_spin_index + * + * where spin indices are: 0=S (scalar/dot product), 1=X, 2=Y, 3=Z + * (cross product components). + * + * The 16 components map to: + * T1 (index 0): SS = (a.b)(c.d) [scalar x scalar] + * T2 (indices 1-3): SX,SY,SZ = (a.b)(cxd)_{x,y,z} [scalar x spin] + * T3 (indices 4-6): XS,YS,ZS = (axb)_{x,y,z}(c.d) [spin x scalar] + * T4 (indices 7-15): XX..ZZ = -(axb)_i(cxd)_j [spin x spin] + * + * Sign convention: T4 components include the minus sign from \f$ i^2 = -1 \f$, + * arising from the product of two \f$ i \f$ factors in the Dirac identity: + * \f$ [i\sigma_1 \cdot (a \times b)] \otimes [i\sigma_2 \cdot (c \times d)] + * = -\sigma_{1,i} \otimes \sigma_{2,j}\; (a \times b)_i\, (c \times d)_j + * \f$ + * + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_11_σpσpCoulombσpσp_11 + : public GenericRecurrenceRelation< + CR_11_σpσpCoulombσpσp_11, F, + GenIntegralSet_11_11> { + public: + typedef CR_11_σpσpCoulombσpσp_11 ThisType; + typedef F BasisFunctionType; + typedef σpσpCoulombσpσpOper OperType; + typedef GenIntegralSet_11_11 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; // TODO figure out + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + /// Constructor is private, used by Instance that maintains + /// registry of these objects + CR_11_σpσpCoulombσpσp_11(const std::shared_ptr&, + unsigned int = 0); + + static std::string descr() { return "CR"; } + + // --- Code sharing overrides --- + // All shell combos with the same quaternion component share one function. + + std::string generate_label() const override { + return "CR_opopCoulombopop_" + + std::to_string(target_->oper()->descr().quaternion_index()); + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override { + std::ostringstream os; + os << context->label_to_function_name(this->label()) << "(inteval, " + << context->value_to_pointer(this->rr_target()->symbol()); + const unsigned int nc = this->num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(this->rr_child(c)->symbol()); + } + // total_dim = product of all shell dims (all 4 shells are spectators) + unsigned int total_dim = 1; + for (unsigned int p = 0; p < 2; p++) { + SubIterator* si = target_->bra().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + si = target_->ket().member_subiter(p, 0); + total_dim *= si->num_iter(); + delete si; + } + os << "," << total_dim; + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(total_dim); + os << ")" << context->end_of_stat() << std::endl; + return os.str(); + } + + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override { + auto high_dim = std::make_shared>("highdim"); + return std::make_shared(high_dim, dims->low(), + dims->vecdim()); + } + + /// Hand-emit a simple element-wise loop function. + /// Cannot use S-shell dummy because TwoPRep particle-swap canonicalization + /// deduplicates children (e.g., (S_x S_x|S_y S_y) = (S_y S_y|S_x S_x)), + /// giving fewer children than the real instance. + void generate_code(const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::string& funcname, std::ostream& decl, + std::ostream& def) override { + // declare_function lives in dg.cc + extern std::string declare_function( + const std::shared_ptr& context, + const std::shared_ptr& dims, + const std::shared_ptr& args, const std::string& tlabel, + const std::string& function_descr, std::ostream& decl); + + std::shared_ptr localdims = adapt_dims_(dims); + // inline assign_symbols_: set symbol names on target/children and + // populate CodeSymbols + std::shared_ptr symbols(new CodeSymbols); + this->rr_target()->set_symbol("target"); + symbols->append_symbol("target"); + for (unsigned int c = 0; c < this->num_children(); c++) { + std::string symb = "src" + std::to_string(c); + this->rr_child(c)->set_symbol(symb); + symbols->append_symbol(symb); + } + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + const std::string tlabel = taskmgr.current().label(); + const std::string func_decl = + declare_function(context, localdims, symbols, tlabel, funcname, decl); + def << context->std_header(); + def << "#include <" << context->label_to_name(funcname) << ".h>\n\n"; + def << context->code_prefix(); + def << func_decl << context->open_block() << std::endl; + def << context->std_function_header(); + // Sign patterns for each component, indexed by child order in constructor. + // comp 0 (SS): 9 children, all +1 + // comp 1-3 (SX,SY,SZ): 6 children, alternating +1,-1 + // comp 4,8,12 (XS,YS,ZS): 6 children, alternating +1,-1 + // comp 5-7,9-11,13-15 (XX..ZZ): 4 children, pattern -1,+1,+1,-1 + const unsigned int nc = this->num_children(); + def << "#ifdef __INTEL_COMPILER\n#pragma ivdep\n#endif\n"; + def << "for(int hsi = 0; hsi 1) ? nc - 1 : 0; + def << "/** Number of flops = " << nflops << " */\n"; + def << context->close_block() << std::endl; + def << context->code_postfix(); + } +}; + +template +CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11( + const std::shared_ptr& Tint, unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_bra(/* particle */ 1) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 1) == 1); + + F a(Tint->bra(0, 0)); + F b(Tint->ket(0, 0)); + F c(Tint->bra(1, 0)); + F d(Tint->ket(1, 0)); + + const auto& oper = Tint->oper(); + + if (a.contracted() || b.contracted() || c.contracted() || d.contracted()) + return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + const mType zero_m(0u); + + ChildFactory> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + auto mc = [&](const int r1, const int r2, const int r3, const int r4) { + F a_r1{a}; + a_r1.deriv().inc(r1); + F b_r2{b}; + b_r2.deriv().inc(r2); + F c_r3{c}; + c_r3.deriv().inc(r3); + F d_r4{d}; + d_r4.deriv().inc(r4); + return factory.make_child(a_r1, b_r2, c_r3, d_r4, zero_m); + }; + + // 16-component generation for two independent spin spaces: + // ( (σ₁.p) a (σ₁.p) b | 1/r12 | (σ₂.p) c (σ₂.p) d ) + // + // Option A (tensor product) ordering: index = 4 * bra_spin + ket_spin + // bra_spin = index / 4, ket_spin = index % 4 + // spin indices: S=0, X=1, Y=2, Z=3 + // + // Row bra=S: 0=SS, 1=SX, 2=SY, 3=SZ (T1 + T2) + // Row bra=X: 4=XS, 5=XX, 6=XY, 7=XZ (T3 + T4) + // Row bra=Y: 8=YS, 9=YX, 10=YY, 11=YZ (T3 + T4) + // Row bra=Z: 12=ZS, 13=ZX, 14=ZY, 15=ZZ (T3 + T4) + // + // T4 components include minus sign from i^2 = -1. + switch (oper->descr().quaternion_index()) { + // ===== 0: SS = (a.b)(c.d) ===== + case 0: { + auto xxxx = mc(x, x, x, x); + auto xxyy = mc(x, x, y, y); + auto xxzz = mc(x, x, z, z); + auto yyxx = mc(y, y, x, x); + auto yyyy = mc(y, y, y, y); + auto yyzz = mc(y, y, z, z); + auto zzxx = mc(z, z, x, x); + auto zzyy = mc(z, z, y, y); + auto zzzz = mc(z, z, z, z); + if (is_simple()) { + expr_ = xxxx + xxyy + xxzz + yyxx + yyyy + yyzz + zzxx + zzyy + zzzz; + nflops_ += 8; + } + } break; + // ===== 1: SX = (a.b)(c×d)_x ===== + case 1: { + auto xxyz = mc(x, x, y, z); + auto xxzy = mc(x, x, z, y); + auto yyyz = mc(y, y, y, z); + auto yyzy = mc(y, y, z, y); + auto zzyz = mc(z, z, y, z); + auto zzzy = mc(z, z, z, y); + if (is_simple()) { + expr_ = xxyz - xxzy + yyyz - yyzy + zzyz - zzzy; + nflops_ += 5; + } + } break; + // ===== 2: SY = (a.b)(c×d)_y ===== + case 2: { + auto xxzx = mc(x, x, z, x); + auto xxxz = mc(x, x, x, z); + auto yyzx = mc(y, y, z, x); + auto yyxz = mc(y, y, x, z); + auto zzzx = mc(z, z, z, x); + auto zzxz = mc(z, z, x, z); + if (is_simple()) { + expr_ = xxzx - xxxz + yyzx - yyxz + zzzx - zzxz; + nflops_ += 5; + } + } break; + // ===== 3: SZ = (a.b)(c×d)_z ===== + case 3: { + auto xxxy = mc(x, x, x, y); + auto xxyx = mc(x, x, y, x); + auto yyxy = mc(y, y, x, y); + auto yyyx = mc(y, y, y, x); + auto zzxy = mc(z, z, x, y); + auto zzyx = mc(z, z, y, x); + if (is_simple()) { + expr_ = xxxy - xxyx + yyxy - yyyx + zzxy - zzyx; + nflops_ += 5; + } + } break; + // ===== 4: XS = (a×b)_x(c.d) ===== + case 4: { + auto yzxx = mc(y, z, x, x); + auto zyxx = mc(z, y, x, x); + auto yzyy = mc(y, z, y, y); + auto zyyy = mc(z, y, y, y); + auto yzzz = mc(y, z, z, z); + auto zyzz = mc(z, y, z, z); + if (is_simple()) { + expr_ = yzxx - zyxx + yzyy - zyyy + yzzz - zyzz; + nflops_ += 5; + } + } break; + // ===== 5: XX = -(a×b)_x(c×d)_x (minus from i²=-1) ===== + case 5: { + auto yzyz = mc(y, z, y, z); + auto yzzy = mc(y, z, z, y); + auto zyyz = mc(z, y, y, z); + auto zyzy = mc(z, y, z, y); + if (is_simple()) { + expr_ = yzzy - yzyz + zyyz - zyzy; + nflops_ += 3; + } + } break; + // ===== 6: XY = -(a×b)_x(c×d)_y ===== + case 6: { + auto yzzx = mc(y, z, z, x); + auto yzxz = mc(y, z, x, z); + auto zyzx = mc(z, y, z, x); + auto zyxz = mc(z, y, x, z); + if (is_simple()) { + expr_ = yzxz - yzzx + zyzx - zyxz; + nflops_ += 3; + } + } break; + // ===== 7: XZ = -(a×b)_x(c×d)_z ===== + case 7: { + auto yzxy = mc(y, z, x, y); + auto yzyx = mc(y, z, y, x); + auto zyxy = mc(z, y, x, y); + auto zyyx = mc(z, y, y, x); + if (is_simple()) { + expr_ = yzyx - yzxy + zyxy - zyyx; + nflops_ += 3; + } + } break; + // ===== 8: YS = (a×b)_y(c.d) ===== + case 8: { + auto zxxx = mc(z, x, x, x); + auto xzxx = mc(x, z, x, x); + auto zxyy = mc(z, x, y, y); + auto xzyy = mc(x, z, y, y); + auto zxzz = mc(z, x, z, z); + auto xzzz = mc(x, z, z, z); + if (is_simple()) { + expr_ = zxxx - xzxx + zxyy - xzyy + zxzz - xzzz; + nflops_ += 5; + } + } break; + // ===== 9: YX = -(a×b)_y(c×d)_x ===== + case 9: { + auto zxyz = mc(z, x, y, z); + auto zxzy = mc(z, x, z, y); + auto xzyz = mc(x, z, y, z); + auto xzzy = mc(x, z, z, y); + if (is_simple()) { + expr_ = zxzy - zxyz + xzyz - xzzy; + nflops_ += 3; + } + } break; + // ===== 10: YY = -(a×b)_y(c×d)_y ===== + case 10: { + auto zxzx = mc(z, x, z, x); + auto zxxz = mc(z, x, x, z); + auto xzzx = mc(x, z, z, x); + auto xzxz = mc(x, z, x, z); + if (is_simple()) { + expr_ = zxxz - zxzx + xzzx - xzxz; + nflops_ += 3; + } + } break; + // ===== 11: YZ = -(a×b)_y(c×d)_z ===== + case 11: { + auto zxxy = mc(z, x, x, y); + auto zxyx = mc(z, x, y, x); + auto xzxy = mc(x, z, x, y); + auto xzyx = mc(x, z, y, x); + if (is_simple()) { + expr_ = zxyx - zxxy + xzxy - xzyx; + nflops_ += 3; + } + } break; + // ===== 12: ZS = (a×b)_z(c.d) ===== + case 12: { + auto xyxx = mc(x, y, x, x); + auto yxxx = mc(y, x, x, x); + auto xyyy = mc(x, y, y, y); + auto yxyy = mc(y, x, y, y); + auto xyzz = mc(x, y, z, z); + auto yxzz = mc(y, x, z, z); + if (is_simple()) { + expr_ = xyxx - yxxx + xyyy - yxyy + xyzz - yxzz; + nflops_ += 5; + } + } break; + // ===== 13: ZX = -(a×b)_z(c×d)_x ===== + case 13: { + auto xyyz = mc(x, y, y, z); + auto xyzy = mc(x, y, z, y); + auto yxyz = mc(y, x, y, z); + auto yxzy = mc(y, x, z, y); + if (is_simple()) { + expr_ = xyzy - xyyz + yxyz - yxzy; + nflops_ += 3; + } + } break; + // ===== 14: ZY = -(a×b)_z(c×d)_y ===== + case 14: { + auto xyzx = mc(x, y, z, x); + auto xyxz = mc(x, y, x, z); + auto yxzx = mc(y, x, z, x); + auto yxxz = mc(y, x, x, z); + if (is_simple()) { + expr_ = xyxz - xyzx + yxzx - yxxz; + nflops_ += 3; + } + } break; + // ===== 15: ZZ = -(a×b)_z(c×d)_z ===== + case 15: { + auto xyxy = mc(x, y, x, y); + auto xyyx = mc(x, y, y, x); + auto yxxy = mc(y, x, x, y); + auto yxyx = mc(y, x, y, x); + if (is_simple()) { + expr_ = xyyx - xyxy + yxxy - yxyx; + nflops_ += 3; + } + } break; + default: + throw std::runtime_error( + "CR_11_σpσpCoulombσpσp_11: invalid component index (expected 0-15)"); + } + +} // CR_11_σpσpCoulombσpσp_11::CR_11_σpσpCoulombσpσp_11 +}; // namespace libint2 + +#endif // LIBINT_COMP_11_ΣPΣPCOULOMBΣPΣP_11_H diff --git "a/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" "b/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" new file mode 100644 index 000000000..e698c9047 --- /dev/null +++ "b/src/bin/libint/comp_1_\317\203pR\317\203p_1.h" @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef LIBINT_COMP_1_ΣPRΣP_1_H +#define LIBINT_COMP_1_ΣPRΣP_1_H + +#include + +namespace libint2 { + +/** + * Computes the integral of \f$ \sigma \cdot \hat{p}\, r_k\, \sigma \cdot + * \hat{p} \f$ over CGShell/CGF by folding the 9 raw \f$ \sigma_a \partial_a r_k + * \sigma_b \partial_b \f$ dyadics per dipole direction \f$ k \f$ to 4 + * Pauli-quaternion components via \f$ \sigma_a \sigma_b = \delta_{ab} + + * i\epsilon_{abc}\sigma_c \f$. 12 outputs total = 3 dipole directions × 4 + * Pauli components, mirroring the σpVσp fold but with the central operator + * being a Cartesian dipole instead of the electrostatic potential V. + * + * @tparam F basis function type. valid choices are CGShell or CGF + */ +template +class CR_1_σpRσp_1 + : public GenericRecurrenceRelation< + CR_1_σpRσp_1, F, GenIntegralSet_1_1> { + public: + typedef CR_1_σpRσp_1 ThisType; + typedef F BasisFunctionType; + typedef σpRσpOper OperType; + typedef GenIntegralSet_1_1 TargetType; + typedef GenericRecurrenceRelation + ParentType; + friend class GenericRecurrenceRelation; + static const unsigned int max_nchildren = 100; + + using ParentType::Instance; + + static bool directional() { return false; } + + private: + using ParentType::is_simple; + using ParentType::target_; + using ParentType::RecurrenceRelation::expr_; + using ParentType::RecurrenceRelation::nflops_; + + CR_1_σpRσp_1(const std::shared_ptr &, unsigned int = 0); + + static std::string descr() { return "CR"; } +}; + +template +CR_1_σpRσp_1::CR_1_σpRσp_1(const std::shared_ptr &Tint, + unsigned int) + : ParentType(Tint, 0) { + assert(Tint->num_func_bra(/* particle */ 0) == 1); + assert(Tint->num_func_ket(/* particle */ 0) == 1); + const auto &a = Tint->bra(0, 0); + const auto &b = Tint->ket(0, 0); + const auto &oper = Tint->oper(); + + // express σ·p r_k σ·p in terms of derivative integrals of the dipole + // operator r_k for primitive Gaussians only + if (a.contracted() || b.contracted()) return; + + using namespace libint2::algebra; + using namespace libint2::prefactor; + using libint2::algebra::operator*; + + ChildFactory, + EmptySet>> + factory(this); + + constexpr auto x = 0; + constexpr auto y = 1; + constexpr auto z = 2; + + F Dx_a{a}; + Dx_a.deriv().inc(x); + F Dx_b{b}; + Dx_b.deriv().inc(x); + F Dy_a{a}; + Dy_a.deriv().inc(y); + F Dy_b{b}; + Dy_b.deriv().inc(y); + F Dz_a{a}; + Dz_a.deriv().inc(z); + F Dz_b{b}; + Dz_b.deriv().inc(z); + + // Build the dipole multipole descriptor for direction k. + const auto k = oper->descr().dipole_dir(); + CartesianMultipole_Descr<3u> mu_k; + mu_k.inc(k, 1); // r_k = (kx,ky,kz) with k_k = 1, others 0 + + // Pauli quaternion fold per (k, q): + // q=0: trace δ_ab → Σ_a (∂_a μ | r_k | ∂_a ν) + // q=1: σ_x antisym → (∂_y μ | r_k | ∂_z ν) − (∂_z μ | r_k | ∂_y ν) + // q=2: σ_y antisym → (∂_z μ | r_k | ∂_x ν) − (∂_x μ | r_k | ∂_z ν) + // q=3: σ_z antisym → (∂_x μ | r_k | ∂_y ν) − (∂_y μ | r_k | ∂_x ν) + switch (oper->descr().quaternion_index()) { + case 0: { + auto Dx_a_R_Dx_b = factory.make_child(Dx_a, Dx_b, EmptySet(), mu_k); + auto Dy_a_R_Dy_b = factory.make_child(Dy_a, Dy_b, EmptySet(), mu_k); + auto Dz_a_R_Dz_b = factory.make_child(Dz_a, Dz_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dx_a_R_Dx_b + Dy_a_R_Dy_b + Dz_a_R_Dz_b; + nflops_ += 2; + } + } break; + case 1: { + auto Dy_a_R_Dz_b = factory.make_child(Dy_a, Dz_b, EmptySet(), mu_k); + auto Dz_a_R_Dy_b = factory.make_child(Dz_a, Dy_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dy_a_R_Dz_b - Dz_a_R_Dy_b; + nflops_ += 1; + } + } break; + case 2: { + auto Dz_a_R_Dx_b = factory.make_child(Dz_a, Dx_b, EmptySet(), mu_k); + auto Dx_a_R_Dz_b = factory.make_child(Dx_a, Dz_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dz_a_R_Dx_b - Dx_a_R_Dz_b; + nflops_ += 1; + } + } break; + case 3: { + auto Dx_a_R_Dy_b = factory.make_child(Dx_a, Dy_b, EmptySet(), mu_k); + auto Dy_a_R_Dx_b = factory.make_child(Dy_a, Dx_b, EmptySet(), mu_k); + if (is_simple()) { + expr_ = Dx_a_R_Dy_b - Dy_a_R_Dx_b; + nflops_ += 1; + } + } break; + default: + throw std::runtime_error("CR_1_σpRσp_1: invalid quaternionic index"); + } + +} // CR_1_σpRσp_1::CR_1_σpRσp_1 + +}; // namespace libint2 + +#endif // LIBINT_COMP_1_ΣPRΣP_1_H diff --git "a/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" "b/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" index 5ecfc796b..9fbdef361 100644 --- "a/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" +++ "b/src/bin/libint/comp_1_\317\203pV\317\203p_1.h" @@ -107,7 +107,7 @@ CR_1_σpVσp_1::CR_1_σpVσp_1(const std::shared_ptr &Tint, // (a|W0|b) = (d a/dAx | V | d b/dBx) + (d a/dAy | V | d b/dBy) + (d a/dAz | V // | d b/dBz) - switch (oper->descr().quaternionic_index()) { + switch (oper->descr().quaternion_index()) { case 0: { auto Dx_a_V_Dx_b = factory.make_child(Dx_a, Dx_b, zero_m); auto Dy_a_V_Dy_b = factory.make_child(Dy_a, Dy_b, zero_m); @@ -146,7 +146,7 @@ CR_1_σpVσp_1::CR_1_σpVσp_1(const std::shared_ptr &Tint, } } break; default: - throw std::runtime_error("CR_1_σpVσp_1: invalid Pauli index"); + throw std::runtime_error("CR_1_σpVσp_1: invalid quaternionic index"); } } // CR_1_σpVσp_1::CR_1_σpVσp_1 diff --git a/src/bin/libint/comp_deriv_gauss_v2.h b/src/bin/libint/comp_deriv_gauss_v2.h new file mode 100644 index 000000000..970bab0ae --- /dev/null +++ b/src/bin/libint/comp_deriv_gauss_v2.h @@ -0,0 +1,547 @@ +/* + * Copyright (C) 2004-2026 Edward F. Valeev + * + * This file is part of Libint compiler. + * + * Libint compiler is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Libint compiler is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Libint compiler. If not, see . + * + */ + +#ifndef _libint2_src_bin_libint_compderivgaussv2_h_ +#define _libint2_src_bin_libint_compderivgaussv2_h_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace libint2 { + +/** Optimized compute relation for (geometric) derivative Gaussian integrals. + * + * Like CR_DerivGauss, this expands derivative Gaussians via: + * d/dr G(a) = 2*alpha * G(a+1) - a_i * G(a-1) + * + * Unlike CR_DerivGauss, this uses the HRR-like code-sharing optimization: + * since differentiation of a Gaussian at position (part, where) depends only + * on that shell's quanta (not spectator shells), we generate code once per + * unique differentiated shell and pass spectator dimensions at the call site. + * + * @tparam IntType integral type + * @tparam part particle index of the function to be differentiated + * @tparam where position of the function to be differentiated (InBra/InKet) + * @tparam trans_inv_part if non-negative, specifies the particle index for + * translational invariance + * @tparam trans_inv_where position for translational invariance + */ +template +class DerivGaussV2 : public RecurrenceRelation { + private: + static constexpr auto trans_inv_oper = + not IntType::OperType::Properties::odep; + static constexpr auto using_trans_inv = + trans_inv_oper && (part == trans_inv_part) && (where == trans_inv_where); + + public: + typedef RecurrenceRelation ParentType; + typedef typename IntType::BasisFunctionType BasisFunctionType; + typedef DerivGaussV2 ThisType; + typedef IntType TargetType; + typedef IntType ChildType; + typedef RecurrenceRelation::ExprType ExprType; + + static const unsigned int max_nchildren_ = + using_trans_inv ? (IntType::num_bf - 1) : 2u; + + static std::shared_ptr Instance( + const std::shared_ptr& Tint, unsigned int dir = 0); + virtual ~DerivGaussV2() {} + + /// always directional + static bool directional() { return true; } + + unsigned int num_children() const override { return nchildren_; } + std::shared_ptr rr_target() const override { + return std::static_pointer_cast(target_); + } + std::shared_ptr rr_child(unsigned int i) const override { + return children_.at(i); + } + bool is_simple() const override { + return TrivialBFSet::result; + } + + std::string spfunction_call( + const std::shared_ptr& context, + const std::shared_ptr& dims) const override; + + private: + DerivGaussV2(const std::shared_ptr& Tint, unsigned int dir); + + unsigned int dir_; + std::shared_ptr target_; + std::vector> children_; + unsigned int nchildren_; + + std::string generate_label() const override; + std::shared_ptr adapt_dims_( + const std::shared_ptr& dims) const override; + bool register_with_rrstack() const; + bool expl_high_dim() const; + bool expl_low_dim() const; + + /// add child, deduplicating + const std::shared_ptr& add_child( + const std::shared_ptr& child) { + for (auto& c : children_) { + if (c == child) return c; + } + children_.push_back(child); + ++nchildren_; + return children_.back(); + } +}; + +// +// Implementation +// + +template +std::shared_ptr< + DerivGaussV2> +DerivGaussV2::Instance( + const std::shared_ptr& Tint, unsigned int dir) { + std::shared_ptr this_ptr(new ThisType(Tint, dir)); + if (this_ptr->num_children() != 0) { + this_ptr->register_with_rrstack(); + return this_ptr; + } + return std::shared_ptr(); +} + +template +DerivGaussV2:: + DerivGaussV2(const std::shared_ptr& Tint, unsigned int dir) + : dir_(dir), target_(Tint), nchildren_(0) { + using namespace libint2::algebra; + using namespace libint2::prefactor; + using namespace libint2::braket; + typedef BasisFunctionType F; + const F& _1 = unit(is_simple() ? dir : 0); + + const typename IntType::AuxQuantaType& aux = Tint->aux(); + const typename IntType::OperType& oper = Tint->oper(); + + children_.reserve(max_nchildren_); + + // the Gaussian must be differentiated in direction dir + { + if (where == InBra && Tint->bra(part, 0).deriv().d(dir) == 0) return; + if (where == InKet && Tint->ket(part, 0).deriv().d(dir) == 0) return; + } + + // if not using translational invariance, can only expand primitives + if (not using_trans_inv) { + if (where == InBra && Tint->bra(part, 0).contracted()) return; + if (where == InKet && Tint->ket(part, 0).contracted()) return; + } + + typedef typename IntType::BraType IBraType; + typedef typename IntType::KetType IKetType; + IBraType* bra = new IBraType(Tint->bra()); + IKetType* ket = new IKetType(Tint->ket()); + + if (not using_trans_inv) { // differentiate + + if (where == InBra) { + F a(bra->member(part, 0)); + + // add a+1 + F ap1(bra->member(part, 0) + _1); + ap1.deriv().dec(dir); + bra->set_member(ap1, part, 0); + auto int_ap1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, part, 0); + if (is_simple()) { + std::ostringstream oss; + oss << "two_alpha" << part << "_bra"; + expr_ = Scalar(oss.str()) * int_ap1; + nflops_ += 1; + } + + // See if a-1 exists + F am1(bra->member(part, 0) - _1); + if (exists(am1)) { + am1.deriv().dec(dir); + bra->set_member(am1, part, 0); + auto int_am1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, part, 0); + if (is_simple()) { + expr_ -= Scalar(a[dir]) * int_am1; + nflops_ += 2; + } + } + delete bra; + delete ket; + return; + } + + if (where == InKet) { + F a(ket->member(part, 0)); + + // add a+1 + F ap1(ket->member(part, 0) + _1); + ap1.deriv().dec(dir); + ket->set_member(ap1, part, 0); + auto int_ap1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, part, 0); + if (is_simple()) { + std::ostringstream oss; + oss << "two_alpha" << part << "_ket"; + expr_ = Scalar(oss.str()) * int_ap1; + nflops_ += 1; + } + + // See if a-1 exists + F am1(ket->member(part, 0) - _1); + if (exists(am1)) { + am1.deriv().dec(dir); + ket->set_member(am1, part, 0); + auto int_am1 = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, part, 0); + if (is_simple()) { + expr_ -= Scalar(a[dir]) * int_am1; + nflops_ += 2; + } + } + delete bra; + delete ket; + return; + } + + } else { // use translational invariance + + // remove one deriv quantum from the target function + if (where == InBra) bra->member(part, 0).deriv().dec(dir); + if (where == InKet) ket->member(part, 0).deriv().dec(dir); + + int term_count = 0; + for (int p = 0; p != IntType::num_particles; ++p) { + typedef BasisFunctionType F; + if (p != trans_inv_part || trans_inv_where != InBra) { + F a(bra->member(p, 0)); + if (not a.is_unit()) { + F da(a); + da.deriv().inc(dir); + bra->set_member(da, p, 0); + auto int_da = add_child(IntType::Instance(*bra, *ket, aux, oper)); + bra->set_member(a, p, 0); + if (is_simple()) { + if (term_count == 0) + expr_ = Scalar(-1) * int_da; + else + expr_ -= int_da; + ++term_count; + nflops_ += 1; + } + } + } + if (p != trans_inv_part || trans_inv_where != InKet) { + F a(ket->member(p, 0)); + if (not a.is_unit()) { + F da(a); + da.deriv().inc(dir); + ket->set_member(da, p, 0); + auto int_da = add_child(IntType::Instance(*bra, *ket, aux, oper)); + ket->set_member(a, p, 0); + if (is_simple()) { + if (term_count == 0) + expr_ = Scalar(-1) * int_da; + else + expr_ -= int_da; + ++term_count; + nflops_ += 1; + } + } + } + } + } + + delete bra; + delete ket; +} + +template +bool DerivGaussV2::register_with_rrstack() const { + using std::swap; + + // only register RRs for shell sets (not individual integrals) + if (TrivialBFSet::result) return false; + + // translational invariance path not optimized yet — register as-is + if (using_trans_inv) { + std::shared_ptr rrstack = RRStack::Instance(); + std::shared_ptr this_ptr = + std::const_pointer_cast( + std::static_pointer_cast( + std::enable_shared_from_this::shared_from_this())); + rrstack->find(this_ptr); + return true; + } + + typedef typename IntType::BraType IBraType; + typedef typename IntType::KetType IKetType; + const IBraType& bra = target_->bra(); + const IKetType& ket = target_->ket(); + + // check if all spectator shells already have zero quanta + bool nonzero_quanta = false; + unsigned const int npart = IntType::OperatorType::Properties::np; + for (unsigned int p = 0; p < npart; p++) { + int nfbra = bra.num_members(p); + for (int f = 0; f < nfbra; f++) { + // skip the differentiated position + if (static_cast(p) == part && where == InBra) continue; + if (!bra.member(p, f).zero() || !bra.member(p, f).deriv().zero()) + nonzero_quanta = true; + } + int nfket = ket.num_members(p); + for (int f = 0; f < nfket; f++) { + if (static_cast(p) == part && where == InKet) continue; + if (!ket.member(p, f).zero() || !ket.member(p, f).deriv().zero()) + nonzero_quanta = true; + } + } + + // if all spectators are zero, register this instance directly + if (!nonzero_quanta) { + std::shared_ptr rrstack = RRStack::Instance(); + std::shared_ptr this_ptr = + std::const_pointer_cast( + std::static_pointer_cast( + std::enable_shared_from_this::shared_from_this())); + rrstack->find(this_ptr); + return true; + } + + // Otherwise, zero out all spectator shells and register a dummy + IBraType bra_zero(bra); + IKetType ket_zero(ket); + for (unsigned int p = 0; p < npart; p++) { + int nfbra = bra_zero.num_members(p); + for (int f = 0; f < nfbra; f++) { + if (static_cast(p) == part && where == InBra) continue; + typedef typename IBraType::bfs_type bfs_type; + typedef typename IBraType::bfs_ref bfs_ref; + bfs_ref bfs = bra_zero.member(p, f); + if (!bfs.zero() || !bfs.deriv().zero()) { + bfs_type null_bfs; + swap(bfs, null_bfs); + } + } + int nfket = ket_zero.num_members(p); + for (int f = 0; f < nfket; f++) { + if (static_cast(p) == part && where == InKet) continue; + typedef typename IKetType::bfs_type bfs_type; + typedef typename IKetType::bfs_ref bfs_ref; + bfs_ref bfs = ket_zero.member(p, f); + if (!bfs.zero() || !bfs.deriv().zero()) { + bfs_type null_bfs; + swap(bfs, null_bfs); + } + } + } + + // create a generic integral with a dummy operator + typedef GenOper> + DummyOper; + typedef EmptySet DummyQuanta; + typedef GenIntegralSet + DummyIntegral; + DummyOper dummy_oper; + DummyQuanta dummy_quanta(std::vector(0, 0)); + std::shared_ptr dummy_integral = + DummyIntegral::Instance(bra_zero, ket_zero, dummy_quanta, dummy_oper); + + // construct a DerivGaussV2 over the dummy integral and register it + typedef DerivGaussV2 DummyDerivGaussV2; + std::shared_ptr dummy_rr = + DummyDerivGaussV2::Instance(dummy_integral, dir_); + std::shared_ptr rrstack = RRStack::Instance(); + rrstack->find(dummy_rr); + return true; +} + +template +std::string DerivGaussV2::generate_label() const { + std::ostringstream os; + + // For translational invariance, children depend on ALL shells, so + // the label must include full integral info (no code sharing). + // For direct differentiation, only the differentiated shell matters. + if constexpr (using_trans_inv) { + typedef typename TargetType::AuxIndexType mType; + static std::shared_ptr aux0(new mType(0u)); + os << "CR_DerivGauss" + << "P" << part << to_string(where) + << genintegralset_label(target_->bra(), target_->ket(), aux0, + target_->oper()); + return os.str(); + } + + os << "DerivGaussV2 P" << part << " " << to_string(where) << " "; + + // Only encode the differentiated shell — not spectators + if (where == InBra) { + BasisFunctionType sh(target_->bra(part, 0)); + sh.uncontract(); + os << sh.label(); + } else { + BasisFunctionType sh(target_->ket(part, 0)); + sh.uncontract(); + os << sh.label(); + } + + return os.str(); +} + +template +std::string +DerivGaussV2:: + spfunction_call(const std::shared_ptr& context, + const std::shared_ptr& dims) const { + std::ostringstream os; + os << context->label_to_function_name(label()) << "(inteval, " + << context->value_to_pointer(rr_target()->symbol()); + + const unsigned int nc = num_children(); + for (unsigned int c = 0; c < nc; c++) { + os << ", " << context->value_to_pointer(rr_child(c)->symbol()); + } + + // compute hsr and lsr — dimensions of spectator shells + // canonical order: for each particle p, bra then ket + // hsr = product of dims before (part, where) + // lsr = product of dims after (part, where) + unsigned int hsr = 1; + unsigned int lsr = 1; + const unsigned int np = IntType::OperType::Properties::np; + for (int p = 0; p < static_cast(np); p++) { + unsigned int nbra = target_->bra().num_members(p); + assert(nbra == 1); + for (unsigned int i = 0; i < nbra; i++) { + SubIterator* iter = target_->bra().member_subiter(p, i); + if (p < part || (p == part && where == InKet)) hsr *= iter->num_iter(); + // skip p == part && where == InBra (the differentiated shell) + if (p > part) lsr *= iter->num_iter(); + delete iter; + } + unsigned int nket = target_->ket().num_members(p); + assert(nket == 1); + for (unsigned int i = 0; i < nket; i++) { + SubIterator* iter = target_->ket().member_subiter(p, i); + if (p < part) hsr *= iter->num_iter(); + // skip p == part && where == InKet (the differentiated shell) + if (p > part || (p == part && where == InBra)) lsr *= iter->num_iter(); + delete iter; + } + } + + // Use TaskParameters to keep track of maximum ranks + LibraryTaskManager& taskmgr = LibraryTaskManager::Instance(); + taskmgr.current().params()->max_hrr_hsrank(hsr); + + if (expl_high_dim()) os << "," << hsr; + if (expl_low_dim()) os << "," << lsr; + os << ")" << context->end_of_stat() << std::endl; + return os.str(); +} + +template +bool DerivGaussV2::expl_high_dim() const { + // translational invariance: no code sharing, no explicit dims + if (using_trans_inv) return false; + // need explicit high dim unless this is the first position + if (part == 0 && where == InBra) return false; + return true; +} + +template +bool DerivGaussV2::expl_low_dim() const { + // translational invariance: no code sharing, no explicit dims + if (using_trans_inv) return false; + unsigned int np = IntType::OperType::Properties::np; + // need explicit low dim unless this is the last position + if (static_cast(np) - 1 == part && where == InKet) return false; + // corner case: 1-particle operator + if (np == 1) return true; + return true; +} + +template +std::shared_ptr +DerivGaussV2:: + adapt_dims_(const std::shared_ptr& dims) const { + bool high_rank = expl_high_dim(); + bool low_rank = expl_low_dim(); + + std::shared_ptr high_dim, low_dim; + if (high_rank) { + high_dim = + std::shared_ptr(new RTimeEntity("highdim")); + } else { + high_dim = dims->high(); + } + if (low_rank) { + low_dim = + std::shared_ptr(new RTimeEntity("lowdim")); + } else { + low_dim = dims->low(); + } + + std::shared_ptr localdims( + new ImplicitDimensions(high_dim, low_dim, dims->vecdim())); + return localdims; +} + +}; // namespace libint2 + +#endif diff --git a/src/bin/libint/dg.cc b/src/bin/libint/dg.cc index eded6a54f..9e03277ae 100644 --- a/src/bin/libint/dg.cc +++ b/src/bin/libint/dg.cc @@ -499,10 +499,11 @@ void DirectedGraph::apply_to(const std::shared_ptr& vertex, // Optimize out simple recurrence relations void DirectedGraph::optimize_rr_out( - const std::shared_ptr& context) { + const std::shared_ptr& context, + const std::shared_ptr& dims) { replace_rr_with_expr(); remove_trivial_arithmetics(); - handle_trivial_nodes(context); + handle_trivial_nodes(context, dims); remove_disconnected_vertices(); find_subtrees(); } @@ -797,7 +798,8 @@ inline std::string to_vector_symbol(const std::shared_ptr& v) { // refer to another node so that no code is generated for it. // void DirectedGraph::handle_trivial_nodes( - const std::shared_ptr& context) { + const std::shared_ptr& context, + const std::shared_ptr& dims) { typedef vertices::iterator iter; for (iter v = stack_.begin(); v != stack_.end(); ++v) { const ver_ptr& vptr = vertex_ptr(*v); @@ -821,8 +823,6 @@ void DirectedGraph::handle_trivial_nodes( // if (child->symbol_set() == false) { const std::string stack_name("stack"); - const std::shared_ptr& dims = - ImplicitDimensions::default_dims(); std::string low_rank = dims->low_label(); std::string veclen = dims->vecdim_label(); diff --git a/src/bin/libint/dg.h b/src/bin/libint/dg.h index 0f85abc8f..19e96074c 100644 --- a/src/bin/libint/dg.h +++ b/src/bin/libint/dg.h @@ -22,6 +22,7 @@ #define _libint2_src_bin_libint_dg_h_ #include +#include #include #include #include @@ -253,7 +254,9 @@ class DirectedGraph : public std::enable_shared_from_this { optimized away. optimize_rr_out() will replace all simple recurrence relations with code representing them. */ - void optimize_rr_out(const std::shared_ptr& context); + void optimize_rr_out(const std::shared_ptr& context, + const std::shared_ptr& dims = + ImplicitDimensions::default_dims()); /** after all apply's have been called, traverse() construct a heuristic order of traversal for the graph. @@ -438,7 +441,9 @@ class DirectedGraph : public std::enable_shared_from_this { to their equivalents (such as (ss|ss) shell quartet can only be connected to (ss|ss) integral) */ - void handle_trivial_nodes(const std::shared_ptr& context); + void handle_trivial_nodes(const std::shared_ptr& context, + const std::shared_ptr& dims = + ImplicitDimensions::default_dims()); /// This functions removes vertices not connected to other vertices void remove_disconnected_vertices(); /** Finds (binary) subtrees. The subtrees correspond to a single-line code (no diff --git a/src/bin/libint/gauss.cc b/src/bin/libint/gauss.cc index 3899283ab..aa60b1de7 100644 --- a/src/bin/libint/gauss.cc +++ b/src/bin/libint/gauss.cc @@ -115,8 +115,15 @@ std::string CGF::label() const { unsigned int am = qn_[0] + qn_[1] + qn_[2]; std::string deriv_label; if (!deriv_.zero()) deriv_label = deriv_.label(); - const std::string am_string = am_to_symbol(am, contracted()); + std::string am_string = am_to_symbol(am, contracted()); std::ostringstream oss; + + // Some OSs can have case-insensitive filesystem e.g., MacOS. So here we add + // additional identifier for primitive function labels + if (!this->contracted()) { + am_string += "_"; + } + oss << (pure_sh_ && am > 0 ? "W" : "") << am_string << deriv_label << "_"; if (am == 0) return oss.str(); @@ -223,8 +230,16 @@ CGShell::~CGShell() {} std::string CGShell::label() const { if (is_unit()) return "unit"; - std::string result = std::string(pure_sh_ && qn_[0] > 0 ? "W" : "") + - am_to_symbol(qn_[0], contracted()); + std::string am_symbol = am_to_symbol(qn_[0], contracted()); + + // Some OSs can have case-insensitive filesystem e.g., MacOS. So here we add + // additional identifier for primitive shell labels + if (!this->contracted()) { + am_symbol += "_"; + } + + std::string result = + std::string(pure_sh_ && qn_[0] > 0 ? "W" : "") + am_symbol; if (!deriv_.zero()) result += deriv_.label(); return result; } diff --git a/src/bin/libint/master_ints_list.h b/src/bin/libint/master_ints_list.h index 1aa8c3e64..158052fba 100644 --- a/src/bin/libint/master_ints_list.h +++ b/src/bin/libint/master_ints_list.h @@ -21,12 +21,15 @@ #ifndef _libint2_src_bin_libint_masterintslist_h_ #define _libint2_src_bin_libint_masterintslist_h_ -// need extra-long mpl list +// need extra-long mpl list — split across two sub-lists and joined via +// boost::mpl::joint_view, since boost only ships pre-generated list headers up +// to size 50. #define BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS #define BOOST_MPL_LIMIT_LIST_SIZE 50 #include #include +#include #include #if LIBINT_SUPPORT_ONEBODYINTS #include @@ -47,6 +50,8 @@ typedef GenIntegralSet_1_1 ElecPot_1_1_sh; typedef GenIntegralSet_1_1 ElecPot_1_1_int; typedef GenIntegralSet_1_1 σpVσp_1_1_sh; typedef GenIntegralSet_1_1 σpVσp_1_1_int; +typedef GenIntegralSet_1_1 σpRσp_1_1_sh; +typedef GenIntegralSet_1_1 σpRσp_1_1_int; typedef GenIntegralSet_1_1, EmptySet> CMultipole_1_1_sh; typedef GenIntegralSet_1_1, EmptySet> @@ -106,6 +111,16 @@ typedef GenIntegralSet_1_1, CartesianMultipoleOper<1u>, ////////////////////////// typedef GenIntegralSet_11_11 TwoPRep_11_11_sq; typedef GenIntegralSet_11_11 TwoPRep_11_11_int; +typedef GenIntegralSet_11_11 + Coulombσpσp_11_11_sq; +typedef GenIntegralSet_11_11 Coulombσpσp_11_11_int; +typedef GenIntegralSet_11_11 + σpσpCoulombσpσp_11_11_sq; +typedef GenIntegralSet_11_11 + σpσpCoulombσpσp_11_11_int; +typedef GenIntegralSet_11_11 + σpCoulombσp_11_11_sq; +typedef GenIntegralSet_11_11 σpCoulombσp_11_11_int; typedef GenIntegralSet_11_11 R12kG12_11_11_sq; typedef GenIntegralSet_11_11 R12kG12_11_11_int; typedef GenIntegralSet_11_11 @@ -138,17 +153,24 @@ typedef boost::mpl::list< Overlap_1_1_sh_y, Overlap_1_1_int_y, Overlap_1_1_sh_z, Overlap_1_1_int_z, Kinetic_1_1_sh, Kinetic_1_1_int, Kinetic_1_1_sh_x, Kinetic_1_1_int_x, Kinetic_1_1_sh_y, Kinetic_1_1_int_y, Kinetic_1_1_sh_z, Kinetic_1_1_int_z, - ElecPot_1_1_sh, ElecPot_1_1_int, σpVσp_1_1_sh, σpVσp_1_1_int, - CMultipole_1_1_sh, CMultipole_1_1_int, CMultipole_1_1_sh_x, + ElecPot_1_1_sh, ElecPot_1_1_int, σpVσp_1_1_sh, σpVσp_1_1_int, σpRσp_1_1_sh, + σpRσp_1_1_int, CMultipole_1_1_sh, CMultipole_1_1_int, CMultipole_1_1_sh_x, CMultipole_1_1_sh_y, CMultipole_1_1_sh_z, CMultipole_1_1_int_x, CMultipole_1_1_int_y, CMultipole_1_1_int_z, SMultipole_1_1_sh, - SMultipole_1_1_int, #endif - TwoPRep_11_11_sq, TwoPRep_11_11_int, R12kG12_11_11_sq, R12kG12_11_11_int, - R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, TiG12_11_11_sq, - TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, + SMultipole_1_1_int> + MasterIntegralTypeList_1body_part; +typedef boost::mpl::list< + TwoPRep_11_11_sq, TwoPRep_11_11_int, Coulombσpσp_11_11_sq, + Coulombσpσp_11_11_int, σpσpCoulombσpσp_11_11_sq, σpσpCoulombσpσp_11_11_int, + σpCoulombσp_11_11_sq, σpCoulombσp_11_11_int, R12kG12_11_11_sq, + R12kG12_11_11_int, R12kR12lG12_11_11_sq, R12kR12lG12_11_11_int, + TiG12_11_11_sq, TiG12_11_11_int, G12TiG12_11_11_sq, G12TiG12_11_11_int, DivG12prime_xTx_11_11_sq, DivG12prime_xTx_11_11_int, DummySymmIntegral_11_11_sq, DummySymmIntegral_11_11_int> + MasterIntegralTypeList_2body_part; +typedef boost::mpl::joint_view MasterIntegralTypeList; }; // namespace libint2 diff --git a/src/bin/libint/master_rrs_list.h b/src/bin/libint/master_rrs_list.h index f3ec4e2d2..ae4e32311 100644 --- a/src/bin/libint/master_rrs_list.h +++ b/src/bin/libint/master_rrs_list.h @@ -21,12 +21,17 @@ #ifndef _libint2_src_bin_libint_masterrrslist_h_ #define _libint2_src_bin_libint_masterrrslist_h_ +#include #include #include +#include #include #include +#include +#include #include #include +#include #include #include #include @@ -178,6 +183,9 @@ typedef VRR_1_ElecPot_1 VRR_b_1_ElecPot_1_int; typedef CR_1_σpVσp_1 CR_1_σpVσp_1_sh; typedef CR_1_σpVσp_1 CR_1_σpVσp_1_int; +typedef CR_1_σpRσp_1 CR_1_σpRσp_1_sh; +typedef CR_1_σpRσp_1 CR_1_σpRσp_1_int; + // TODO investigate whether need to stay away from HRR for now to be sure that // multipoles are computed as precisely as possible typedef HRR @@ -266,6 +274,59 @@ typedef CR_DerivGauss Deriv_d_11_TwoPRep_11_int; +// DerivGaussV2 for TwoPRep (shell sets) +typedef DerivGaussV2 + DerivV2_a_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_b_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_c_11_TwoPRep_11_sh; +typedef DerivGaussV2 + DerivV2_d_11_TwoPRep_11_sh; +// DerivGaussV2 for TwoPRep (individual integrals) +typedef DerivGaussV2 + DerivV2_a_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_b_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_c_11_TwoPRep_11_int; +typedef DerivGaussV2 + DerivV2_d_11_TwoPRep_11_int; + +// DerivGaussV2 for DummySymmIntegral (used by register_with_rrstack) +typedef DerivGaussV2 + DerivV2_a_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_b_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_c_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_d_11_Dummy_11_sh; +typedef DerivGaussV2 + DerivV2_a_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_b_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_c_11_Dummy_11_int; +typedef DerivGaussV2 + DerivV2_d_11_Dummy_11_int; + +typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_sh; +typedef CR_11_Coulombσpσp_11 CR_11_Coulombσpσp_11_int; + +typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_sh; +typedef CR_11_σpσpCoulombσpσp_11 CR_11_σpσpCoulombσpσp_11_int; + +typedef CR_11_σpCoulombσp_11 CR_11_σpCoulombσp_11_sh; +typedef CR_11_σpCoulombσp_11 CR_11_σpCoulombσp_11_int; }; // namespace libint2 #endif // header guard diff --git a/src/bin/libint/oper.h b/src/bin/libint/oper.h index 6afbff587..29b367378 100644 --- a/src/bin/libint/oper.h +++ b/src/bin/libint/oper.h @@ -289,23 +289,22 @@ BOOST_PP_LIST_FOR_EACH(BOOST_PP_DECLARE_HERMITIAN_ONEBODY_DESCRIPTOR, struct σpVσp_Descr : public Contractable<σpVσp_Descr> { typedef MultiplicativeODep1Body_Props Properties; - σpVσp_Descr() : quaternionic_index_(0) {} - σpVσp_Descr(int quaternionic_index) - : quaternionic_index_(quaternionic_index) { - assert(quaternionic_index <= 3); + σpVσp_Descr() : quaternion_index_(0) {} + σpVσp_Descr(int quaternion_index) : quaternion_index_(quaternion_index) { + assert(quaternion_index <= 3); } static const unsigned int max_key = 4; - unsigned int key() const { return quaternionic_index(); } + unsigned int key() const { return quaternion_index(); } std::string description() const { std::string descr("opVop["); - if (quaternionic_index() == 0) + if (quaternion_index() == 0) descr += "0"; - else if (quaternionic_index() == 1) + else if (quaternion_index() == 1) descr += "X"; - else if (quaternionic_index() == 2) + else if (quaternion_index() == 2) descr += "Y"; - else if (quaternionic_index() == 3) + else if (quaternion_index() == 3) descr += "Z"; else abort(); @@ -315,13 +314,52 @@ struct σpVσp_Descr : public Contractable<σpVσp_Descr> { int psymm(int i, int j) const { abort(); } int hermitian(int i) const { return +1; } - int quaternionic_index() const { return quaternionic_index_; } + int quaternion_index() const { return quaternion_index_; } private: - const int quaternionic_index_ = -1; + const int quaternion_index_ = -1; }; typedef GenOper<σpVσp_Descr> σpVσpOper; +/** opRop: (μ σ·p | r | σ·p ν), one-body σ·p-on-both-sides analog of dipole. + * σ_a σ_b = δ_ab + iε_abc σ_c folds the 9 raw σ_a∂_a r_k σ_b∂_b dyadics per + * dipole direction k down to 4 Pauli-quaternion components (trace + 3 + * antisym), mirroring σpVσp's fold of σ·p V σ·p. 12 outputs total = 3 dipole + * directions × 4 Pauli components, indexed composite_index = 4·k + q. + */ +struct σpRσp_Descr : public Contractable<σpRσp_Descr> { + typedef MultiplicativeODep1Body_Props Properties; + + σpRσp_Descr() : composite_index_(0) {} + σpRσp_Descr(int composite_index) : composite_index_(composite_index) { + assert(composite_index >= 0 && composite_index < 12); + } + + static const unsigned int max_key = 12; + unsigned int key() const { return composite_index(); } + std::string description() const { + static const char* dipole_lbl[] = {"x", "y", "z"}; + static const char* pauli_lbl[] = {"0", "X", "Y", "Z"}; + const auto ci = composite_index(); + if (ci < 0 || ci >= 12) abort(); + return std::string("opRop[") + dipole_lbl[ci / 4] + "," + + pauli_lbl[ci % 4] + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int composite_index() const { return composite_index_; } + /// dipole direction ∈ {0=x, 1=y, 2=z} + int dipole_dir() const { return composite_index_ / 4; } + /// Pauli quaternion ∈ {0=trace, 1=σ_x, 2=σ_y, 3=σ_z} + int quaternion_index() const { return composite_index_ % 4; } + + private: + const int composite_index_ = -1; +}; +typedef GenOper<σpRσp_Descr> σpRσpOper; + /// cartesian multipole operator in \c NDIM dimensions /// \f$ \hat{O}(\vec{k}) \equiv \vec{r}^{\cdot \vec{k}} = r_1^{k_1} r_2^{k_2} /// \cdots \f$ \internal OriginDerivative is used to store cartesian @@ -400,6 +438,146 @@ struct TwoPRep_Descr : public Contractable { }; typedef GenOper TwoPRep; +/** Coulombσpσp is the two-body repulsion operator. + */ +struct Coulombσpσp_Descr : public Contractable { + typedef MultiplicativeSymm2Body_Props Properties; + + Coulombσpσp_Descr() : quaternion_index_(0) {} + Coulombσpσp_Descr(int quaternion_index) + : quaternion_index_(quaternion_index) { + assert(quaternion_index <= 3); + } + + static const unsigned int max_key = 4; + unsigned int key() const { return quaternion_index(); } + std::string description() const { + std::string descr("coulomb_opop["); + if (quaternion_index() == 0) + descr += "0"; + else if (quaternion_index() == 1) + descr += "X"; + else if (quaternion_index() == 2) + descr += "Y"; + else if (quaternion_index() == 3) + descr += "Z"; + else + abort(); + return descr + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int quaternion_index() const { return quaternion_index_; } + + private: + const int quaternion_index_ = -1; +}; +typedef GenOper CoulombσpσpOper; + +struct σpσpCoulombσpσp_Descr : public Contractable<σpσpCoulombσpσp_Descr> { + typedef MultiplicativeSymm2Body_Props Properties; + + σpσpCoulombσpσp_Descr() : quaternion_index_(0) {} + σpσpCoulombσpσp_Descr(int quaternion_index) + : quaternion_index_(quaternion_index) { + assert(quaternion_index >= 0 && quaternion_index <= 15); + } + + /// 16 components from tensor product of two independent spin spaces: + /// index = 4 * bra_spin_index + ket_spin_index + /// where spin indices are: 0=S (scalar), 1=X, 2=Y, 3=Z (cross product) + static const unsigned int max_key = 16; + unsigned int key() const { return quaternion_index(); } + std::string description() const { + // clang-format off + // Option A (tensor product order): index = 4 * bra_spin + ket_spin + static const char* labels[] = { + "SS", "SX", "SY", "SZ", + "XS", "XX", "XY", "XZ", + "YS", "YX", "YY", "YZ", + "ZS", "ZX", "ZY", "ZZ" + }; + // clang-format on + const auto qi = quaternion_index(); + if (qi > 15) abort(); + return std::string("opop_coulomb_opop[") + labels[qi] + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int quaternion_index() const { return quaternion_index_; } + + private: + const int quaternion_index_ = -1; +}; +typedef GenOper<σpσpCoulombσpσp_Descr> σpσpCoulombσpσpOper; + +/** σpCoulombσp: (μ σ·p ν | 1/r_{12} | κ σ·p λ). + * Gaunt LS "bilinear" operator with one σ·p on each side. + * Outputs the SO(3) irreducible decomposition of the 3×3 gradient-gradient + * tensor T_{ab} = ∂_a ∂_b (μν|κλ): 1 scalar trace + 3 antisymmetric + * (curl-curl) + 5 symmetric-traceless = 9 components. Same storage as the + * raw dyadic, but indexed by physics-meaningful irreps so consumers do not + * need to hand-build trace/antisym/sym-TL combinations at every contraction + * site. + */ +struct σpCoulombσp_Descr : public Contractable<σpCoulombσp_Descr> { + typedef MultiplicativeSymm2Body_Props Properties; + + /// SO(3) irreducible components of the rank-2 Cartesian tensor T_{ab}. + enum Component : int { + Scalar = 0, ///< T_xx + T_yy + T_zz (trace, ∇·∇) + AntisymX = 1, ///< T_yz − T_zy = (∇×∇)_x + AntisymY = 2, ///< T_zx − T_xz = (∇×∇)_y + AntisymZ = 3, ///< T_xy − T_yx = (∇×∇)_z + SymTLDiagA = 4, ///< T_xx − T_yy + SymTLDiagB = 5, ///< 2·T_zz − T_xx − T_yy + SymTLOffXY = 6, ///< T_xy + T_yx + SymTLOffXZ = 7, ///< T_xz + T_zx + SymTLOffYZ = 8, ///< T_yz + T_zy + }; + + σpCoulombσp_Descr() : component_index_(0) {} + σpCoulombσp_Descr(int component_index) : component_index_(component_index) { + assert(component_index >= 0 && component_index <= 8); + } + + static const unsigned int max_key = 9; + unsigned int key() const { return component_index(); } + std::string description() const { + static const char* labels[] = { + "scalar", "antisym_x", "antisym_y", + "antisym_z", "symtl_diag_a", "symtl_diag_b", + "symtl_off_xy", "symtl_off_xz", "symtl_off_yz", + }; + const auto ci = component_index(); + if (ci > 8) abort(); + return std::string("op_coulomb_op[") + labels[ci] + "]"; + } + std::string label() const { return description(); } + int psymm(int i, int j) const { abort(); } + int hermitian(int i) const { return +1; } + + int component_index() const { return component_index_; } + + bool is_scalar() const { return component_index_ == Scalar; } + bool is_antisym() const { + return component_index_ >= AntisymX && component_index_ <= AntisymZ; + } + bool is_sym_tl() const { + return component_index_ >= SymTLDiagA && component_index_ <= SymTLOffYZ; + } + /// for antisym components: 0=x, 1=y, 2=z (only valid if is_antisym()) + int antisym_cart() const { return component_index_ - AntisymX; } + + private: + const int component_index_ = -1; +}; +typedef GenOper<σpCoulombσp_Descr> σpCoulombσpOper; + /** GTG_1d is the two-body 1-dimensional Gaussian geminal */ struct GTG_1d_Descr : public Contractable { diff --git a/src/bin/libint/rr.cc b/src/bin/libint/rr.cc index 32bbb9df9..a653bf4b2 100644 --- a/src/bin/libint/rr.cc +++ b/src/bin/libint/rr.cc @@ -127,8 +127,11 @@ void RecurrenceRelation::generate_code( // Assign symbols for the target and source integral sets std::shared_ptr symbols(new CodeSymbols); assign_symbols_(symbols); + // Compute local dimensions before optimize_rr_out so that + // handle_trivial_nodes uses the correct dims (e.g., "lowdim" instead of "1") + std::shared_ptr localdims = adapt_dims_(dims); // Traverse the graph - dg->optimize_rr_out(context); + dg->optimize_rr_out(context, localdims); dg->traverse(); #if PRINT_DAG_GRAPHVIZ { @@ -138,7 +141,6 @@ void RecurrenceRelation::generate_code( #endif // Generate code std::shared_ptr memman(new WorstFitMemoryManager()); - std::shared_ptr localdims = adapt_dims_(dims); dg->generate_code(context, memman, localdims, symbols, funcname, decl, def); // extract all external symbols -- these will be members of the evaluator diff --git a/src/bin/libint/strategy.cc b/src/bin/libint/strategy.cc index bbfc3fe21..e01f76a7d 100644 --- a/src/bin/libint/strategy.cc +++ b/src/bin/libint/strategy.cc @@ -70,51 +70,80 @@ struct MasterStrategy; #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_a_11_TwoPRep_11_sh, VRR_c_11_TwoPRep_11_sh> value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_a_11_TwoPRep_11_int, VRR_c_11_TwoPRep_11_int> value; }; #else // 0B0D strategy template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_b_11_TwoPRep_11_sh, VRR_d_11_TwoPRep_11_sh> value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + VRR_b_11_TwoPRep_11_int, VRR_d_11_TwoPRep_11_int> value; }; #endif +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpσpCoulombσpσp_11_11_sq> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpσpCoulombσpσp_11_11_int> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpCoulombσp_11_11_sq> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpCoulombσp_11_11_int> { + typedef boost::mpl::list value; +}; + #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { @@ -197,21 +226,31 @@ struct MasterStrategy { #if LIBINT_SHELLQUARTET_STRATEGY == LIBINT_SHELLQUARTET_STRATEGY_A0C0 template <> struct MasterStrategy { - typedef boost::mpl::list value; + typedef boost::mpl::list + value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + typedef boost::mpl::list value; }; #else // 0B0D strategy template <> struct MasterStrategy { - typedef boost::mpl::list value; + typedef boost::mpl::list + value; }; template <> struct MasterStrategy { - typedef boost::mpl::list + typedef boost::mpl::list value; }; #endif @@ -276,6 +315,14 @@ struct MasterStrategy<σpVσp_1_1_int> { typedef boost::mpl::list value; }; template <> +struct MasterStrategy<σpRσp_1_1_sh> { + typedef boost::mpl::list value; +}; +template <> +struct MasterStrategy<σpRσp_1_1_int> { + typedef boost::mpl::list value; +}; +template <> struct MasterStrategy { typedef boost::mpl::list> value; };