Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ jobs:
run: pip install .

- name: Run simulation examples (a2a3sim, ${{ matrix.runtime }})
run: ./ci.sh -p a2a3sim -r ${{ matrix.runtime }} -c 6622890 -t 600 --clone-protocol https
run: python tools/ci.py -p a2a3sim -r ${{ matrix.runtime }} -c 6622890 -t 600 --clone-protocol https

st-sim-a5:
needs: pre-commit
Expand Down Expand Up @@ -169,7 +169,7 @@ jobs:
run: pip install .

- name: Run simulation examples (a5sim, ${{ matrix.runtime }})
run: ./ci.sh -p a5sim -r ${{ matrix.runtime }} -c 6622890 -t 600 --clone-protocol https
run: python tools/ci.py -p a5sim -r ${{ matrix.runtime }} -c 6622890 -t 600 --clone-protocol https

# ---------- Python unit tests (a2a3 hardware) ----------
ut-py-a2a3:
Expand Down Expand Up @@ -205,7 +205,7 @@ jobs:
- name: Run on-device examples (a2a3)
run: |
export PATH="$HOME/.local/bin:$PATH"
source ${ASCEND_HOME_PATH}/bin/setenv.bash && ./ci.sh -p a2a3 -d ${DEVICE_RANGE} --parallel -c 6622890 -t 600 --clone-protocol https
source ${ASCEND_HOME_PATH}/bin/setenv.bash && python tools/ci.py -p a2a3 -d ${DEVICE_RANGE} --parallel -c 6622890 -t 600 --clone-protocol https


# ---------- Detect A5 changes (runs on GitHub server, not A5 machine) ----------
Expand Down Expand Up @@ -278,4 +278,4 @@ jobs:
- name: Run on-device examples (a5)
run: |
export PATH="$HOME/.local/bin:$PATH"
source ${ASCEND_HOME_PATH}/bin/setenv.bash && ./ci.sh -p a5 -d ${DEVICE_RANGE} --parallel -c 6622890 -t 600 --clone-protocol https
source ${ASCEND_HOME_PATH}/bin/setenv.bash && python tools/ci.py -p a5 -d ${DEVICE_RANGE} --parallel -c 6622890 -t 600 --clone-protocol https
81 changes: 57 additions & 24 deletions src/a2a3/platform/sim/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "device_runner.h"

#include <stdlib.h>

#include <cstdio>
#include <string>
#include <vector>
Expand All @@ -40,6 +42,44 @@ typedef void (*aicore_execute_func_t)(
Runtime* runtime, int block_idx, CoreType core_type, uint32_t physical_core_id, uint64_t regs);
typedef void (*set_platform_regs_func_t)(uint64_t regs);

namespace {

bool write_all_bytes(int fd, const uint8_t* data, size_t size) {
size_t total_written = 0;
while (total_written < size) {
ssize_t written = write(fd, data + total_written, size - total_written);
if (written <= 0) {
return false;
}
total_written += static_cast<size_t>(written);
}
return true;
}

bool create_temp_so_file(const std::string& path_template, const uint8_t* data, size_t size, std::string* out_path) {
std::vector<char> path_buf(path_template.begin(), path_template.end());
path_buf.push_back('\0');

int fd = mkstemp(path_buf.data());
if (fd < 0) {
return false;
}

bool ok = write_all_bytes(fd, data, size);
if (close(fd) != 0) {
ok = false;
}
if (!ok) {
unlink(path_buf.data());
return false;
}

*out_path = path_buf.data();
return true;
}

} // namespace

// =============================================================================
// DeviceRunner Implementation
// =============================================================================
Expand All @@ -64,14 +104,11 @@ int DeviceRunner::ensure_binaries_loaded(

// Write AICPU binary to temp file and dlopen
if (!aicpu_so_binary.empty()) {
aicpu_so_path_ = "/tmp/aicpu_sim_" + std::to_string(getpid()) + ".so";
std::ofstream ofs(aicpu_so_path_, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file for AICPU SO: %s", aicpu_so_path_.c_str());
if (!create_temp_so_file("/tmp/aicpu_sim_XXXXXX", aicpu_so_binary.data(), aicpu_so_binary.size(),
&aicpu_so_path_)) {
LOG_ERROR("Failed to create temp file for AICPU SO");
return -1;
}
ofs.write(reinterpret_cast<const char*>(aicpu_so_binary.data()), aicpu_so_binary.size());
ofs.close();

aicpu_so_handle_ = dlopen(aicpu_so_path_.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (aicpu_so_handle_ == nullptr) {
Expand All @@ -96,14 +133,13 @@ int DeviceRunner::ensure_binaries_loaded(

// Write AICore binary to temp file and dlopen
if (!aicore_kernel_binary.empty()) {
aicore_so_path_ = "/tmp/aicore_sim_" + std::to_string(getpid()) + ".so";
std::ofstream ofs(aicore_so_path_, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file for AICore SO: %s", aicore_so_path_.c_str());
if (!create_temp_so_file("/tmp/aicore_sim_XXXXXX",
aicore_kernel_binary.data(),
aicore_kernel_binary.size(),
&aicore_so_path_)) {
LOG_ERROR("Failed to create temp file for AICore SO");
return -1;
}
ofs.write(reinterpret_cast<const char*>(aicore_kernel_binary.data()), aicore_kernel_binary.size());
ofs.close();

aicore_so_handle_ = dlopen(aicore_so_path_.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (aicore_so_handle_ == nullptr) {
Expand Down Expand Up @@ -474,25 +510,22 @@ uint64_t DeviceRunner::upload_kernel_binary(int func_id, const uint8_t* bin_data
size_t kernel_size = callable->binary_size();

// 1. Generate temp file path
char tmpfile[256];
snprintf(tmpfile, sizeof(tmpfile), "/tmp/kernel_%d_%d.so", func_id, getpid());

// 2. Write extracted kernel binary to temp file
std::ofstream ofs(tmpfile, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file: %s", tmpfile);
std::string tmpfile;
if (!create_temp_so_file("/tmp/kernel_" + std::to_string(func_id) + "_XXXXXX",
reinterpret_cast<const uint8_t*>(kernel_binary),
kernel_size,
&tmpfile)) {
LOG_ERROR("Failed to create temp file for kernel func_id=%d", func_id);
return 0;
}
ofs.write(reinterpret_cast<const char*>(kernel_binary), kernel_size);
ofs.close();

LOG_DEBUG("Uploading kernel .so: %s (size=%zu bytes)", tmpfile, kernel_size);
LOG_DEBUG("Uploading kernel .so: %s (size=%zu bytes)", tmpfile.c_str(), kernel_size);

// 3. dlopen to load .so (RTLD_NOW ensures all symbols resolved immediately)
void* handle = dlopen(tmpfile, RTLD_NOW | RTLD_LOCAL);
void* handle = dlopen(tmpfile.c_str(), RTLD_NOW | RTLD_LOCAL);

// 4. Remove temp file immediately (.so is already in memory)
std::remove(tmpfile);
std::remove(tmpfile.c_str());

if (!handle) {
LOG_ERROR("dlopen failed: %s", dlerror());
Expand Down
62 changes: 44 additions & 18 deletions src/a2a3/runtime/host_build_graph/host/runtime_maker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>

#include "callable.h" // NOLINT(build/include_subdir)
#include "runtime.h" // Includes unified_log.h and provides LOG_* macros // NOLINT(build/include_subdir)
Expand All @@ -47,6 +48,42 @@
*/
typedef int (*OrchestrationFunc)(Runtime* runtime, const ChipStorageTaskArgs& orch_args);

namespace {

bool write_all_bytes(int fd, const uint8_t* data, size_t size) {
size_t total_written = 0;
while (total_written < size) {
ssize_t written = write(fd, data + total_written, size - total_written);
if (written <= 0) {
return false;
}
total_written += static_cast<size_t>(written);
}
return true;
}

bool create_temp_so_file(const uint8_t* data, size_t size, std::string* out_path) {
char path_template[] = "/tmp/orch_so_XXXXXX";
int fd = mkstemp(path_template);
if (fd < 0) {
return false;
}

bool ok = write_all_bytes(fd, data, size);
if (close(fd) != 0) {
ok = false;
}
if (!ok) {
unlink(path_template);
return false;
}

*out_path = path_template;
return true;
}

} // namespace

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -101,26 +138,14 @@ int init_runtime_impl(Runtime* runtime, const ChipCallable* callable, const Chip
}

// Load orchestration SO from binary data via temp file
char fd_path[128];
snprintf(fd_path, sizeof(fd_path), "/tmp/orch_so_%d.so", getpid());

int fd = open(fd_path, O_WRONLY | O_CREAT | O_TRUNC, 0700);
if (fd < 0) {
std::string fd_path;
if (!create_temp_so_file(orch_so_binary, orch_so_size, &fd_path)) {
LOG_ERROR("Failed to create temp SO file");
return -1;
}

ssize_t written = write(fd, orch_so_binary, static_cast<size_t>(orch_so_size));
if (written < 0 || static_cast<uint64_t>(written) != orch_so_size) {
LOG_ERROR("Failed to write orchestration SO to temp file");
close(fd);
unlink(fd_path);
return -1;
}
close(fd);

void* handle = dlopen(fd_path, RTLD_NOW | RTLD_LOCAL);
unlink(fd_path);
void* handle = dlopen(fd_path.c_str(), RTLD_NOW | RTLD_LOCAL);
unlink(fd_path.c_str());
if (handle == nullptr) {
LOG_ERROR("dlopen failed: %s", dlerror());
return -1;
Expand Down Expand Up @@ -159,8 +184,9 @@ int init_runtime_impl(Runtime* runtime, const ChipCallable* callable, const Chip

LOG_INFO("Runtime initialized. Ready for execution from Python.");

// Note: We intentionally leak the dlopen handle to keep the SO loaded
// for the lifetime of the process.
// Host orchestration is complete once orch_func returns. The task graph now
// lives in Runtime, so the orchestration SO can be closed immediately.
dlclose(handle);

return 0;
}
Expand Down
81 changes: 57 additions & 24 deletions src/a5/platform/sim/host/device_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "device_runner.h"

#include <stdlib.h>

#include <cstdio>
#include <string>
#include <vector>
Expand All @@ -40,6 +42,44 @@ typedef void (*aicore_execute_func_t)(
Runtime* runtime, int block_idx, CoreType core_type, uint32_t physical_core_id, uint64_t regs);
typedef void (*set_platform_regs_func_t)(uint64_t regs);

namespace {

bool write_all_bytes(int fd, const uint8_t* data, size_t size) {
size_t total_written = 0;
while (total_written < size) {
ssize_t written = write(fd, data + total_written, size - total_written);
if (written <= 0) {
return false;
}
total_written += static_cast<size_t>(written);
}
return true;
}

bool create_temp_so_file(const std::string& path_template, const uint8_t* data, size_t size, std::string* out_path) {
std::vector<char> path_buf(path_template.begin(), path_template.end());
path_buf.push_back('\0');

int fd = mkstemp(path_buf.data());
if (fd < 0) {
return false;
}

bool ok = write_all_bytes(fd, data, size);
if (close(fd) != 0) {
ok = false;
}
if (!ok) {
unlink(path_buf.data());
return false;
}

*out_path = path_buf.data();
return true;
}

} // namespace

// =============================================================================
// DeviceRunner Implementation
// =============================================================================
Expand All @@ -64,14 +104,11 @@ int DeviceRunner::ensure_binaries_loaded(

// Write AICPU binary to temp file and dlopen
if (!aicpu_so_binary.empty()) {
aicpu_so_path_ = "/tmp/aicpu_sim_" + std::to_string(getpid()) + ".so";
std::ofstream ofs(aicpu_so_path_, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file for AICPU SO: %s", aicpu_so_path_.c_str());
if (!create_temp_so_file("/tmp/aicpu_sim_XXXXXX", aicpu_so_binary.data(), aicpu_so_binary.size(),
&aicpu_so_path_)) {
LOG_ERROR("Failed to create temp file for AICPU SO");
return -1;
}
ofs.write(reinterpret_cast<const char*>(aicpu_so_binary.data()), aicpu_so_binary.size());
ofs.close();

aicpu_so_handle_ = dlopen(aicpu_so_path_.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (aicpu_so_handle_ == nullptr) {
Expand All @@ -96,14 +133,13 @@ int DeviceRunner::ensure_binaries_loaded(

// Write AICore binary to temp file and dlopen
if (!aicore_kernel_binary.empty()) {
aicore_so_path_ = "/tmp/aicore_sim_" + std::to_string(getpid()) + ".so";
std::ofstream ofs(aicore_so_path_, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file for AICore SO: %s", aicore_so_path_.c_str());
if (!create_temp_so_file("/tmp/aicore_sim_XXXXXX",
aicore_kernel_binary.data(),
aicore_kernel_binary.size(),
&aicore_so_path_)) {
LOG_ERROR("Failed to create temp file for AICore SO");
return -1;
}
ofs.write(reinterpret_cast<const char*>(aicore_kernel_binary.data()), aicore_kernel_binary.size());
ofs.close();

aicore_so_handle_ = dlopen(aicore_so_path_.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (aicore_so_handle_ == nullptr) {
Expand Down Expand Up @@ -477,25 +513,22 @@ uint64_t DeviceRunner::upload_kernel_binary(int func_id, const uint8_t* bin_data
size_t kernel_size = callable->binary_size();

// 1. Generate temp file path
char tmpfile[256];
snprintf(tmpfile, sizeof(tmpfile), "/tmp/kernel_%d_%d.so", func_id, getpid());

// 2. Write extracted kernel binary to temp file
std::ofstream ofs(tmpfile, std::ios::binary);
if (!ofs) {
LOG_ERROR("Failed to create temp file: %s", tmpfile);
std::string tmpfile;
if (!create_temp_so_file("/tmp/kernel_" + std::to_string(func_id) + "_XXXXXX",
reinterpret_cast<const uint8_t*>(kernel_binary),
kernel_size,
&tmpfile)) {
LOG_ERROR("Failed to create temp file for kernel func_id=%d", func_id);
return 0;
}
ofs.write(reinterpret_cast<const char*>(kernel_binary), kernel_size);
ofs.close();

LOG_DEBUG("Uploading kernel .so: %s (size=%zu bytes)", tmpfile, kernel_size);
LOG_DEBUG("Uploading kernel .so: %s (size=%zu bytes)", tmpfile.c_str(), kernel_size);

// 3. dlopen to load .so (RTLD_NOW ensures all symbols resolved immediately)
void* handle = dlopen(tmpfile, RTLD_NOW | RTLD_LOCAL);
void* handle = dlopen(tmpfile.c_str(), RTLD_NOW | RTLD_LOCAL);

// 4. Remove temp file immediately (.so is already in memory)
std::remove(tmpfile);
std::remove(tmpfile.c_str());

if (!handle) {
LOG_ERROR("dlopen failed: %s", dlerror());
Expand Down
Loading
Loading