From 4e680e46d01dfb512a14a2ecca52898ed968bfaf Mon Sep 17 00:00:00 2001 From: lipracer Date: Mon, 15 Dec 2025 13:28:05 +0800 Subject: [PATCH 1/2] fix: correct naming typo in unused file --- fserver/csrc/public.hpp | 2 +- include/ps/internal/assign_op.h | 28 ++++++++++++++-------------- include/ps/internal/postoffice.h | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fserver/csrc/public.hpp b/fserver/csrc/public.hpp index a1143e3..59d2b92 100644 --- a/fserver/csrc/public.hpp +++ b/fserver/csrc/public.hpp @@ -180,7 +180,7 @@ void init() { worker_mask_ = (1 << num_worker_) - 1; q_.resize(num_worker_); - q_signal_.store(0);; + q_signal_.store(0); ps::StartPS(0, role_, group_size_ * node_rank_ + gpu_ + offset, true); if (role_ == Node::WORKER) { fworker_ = new AFTensorWorker(instance_id_ ); diff --git a/include/ps/internal/assign_op.h b/include/ps/internal/assign_op.h index 3e90e8b..31e98b5 100644 --- a/include/ps/internal/assign_op.h +++ b/include/ps/internal/assign_op.h @@ -27,22 +27,22 @@ template inline void AssignFunc(const T& lhs, AssignOp op, T* rhs) { switch (op) { case ASSIGN: - *right = left; + *rhs = lhs; break; case PLUS: - *right += left; + *rhs += lhs; break; case MINUS: - *right -= left; + *rhs -= lhs; break; case TIMES: - *right *= left; + *rhs *= lhs; break; case DIVIDE: - *right /= left; + *rhs /= lhs; break; default: - LOG(FATAL) << "use AssignOpInt.."; + PS_LOG(FATAL) << "use AssignOpInt.."; } } @@ -54,28 +54,28 @@ template inline void AssignFuncInt(const T& lhs, AssignOp op, T* rhs) { switch (op) { case ASSIGN: - *right = left; + *rhs = lhs; break; case PLUS: - *right += left; + *rhs += lhs; break; case MINUS: - *right -= left; + *rhs -= lhs; break; case TIMES: - *right *= left; + *rhs *= lhs; break; case DIVIDE: - *right /= left; + *rhs /= lhs; break; case AND: - *right &= left; + *rhs &= lhs; break; case OR: - *right |= left; + *rhs |= lhs; break; case XOR: - *right ^= left; + *rhs ^= lhs; break; } } diff --git a/include/ps/internal/postoffice.h b/include/ps/internal/postoffice.h index c471600..47ba7be 100644 --- a/include/ps/internal/postoffice.h +++ b/include/ps/internal/postoffice.h @@ -155,7 +155,7 @@ class Postoffice { /** * \brief convert a server group's rank into a instance id with the - * provded instance offset from that group + * provided instance offset from that group * \param rank the server group rank * \param instance_idx the offset of the instance in the group */ From f5b87dc9604517d01a4bc54fa1789396ed559fe4 Mon Sep 17 00:00:00 2001 From: lipracer Date: Mon, 15 Dec 2025 19:11:28 +0800 Subject: [PATCH 2/2] refine --- fserver/csrc/public.hpp | 9 ++++----- include/ps/internal/backend.h | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fserver/csrc/public.hpp b/fserver/csrc/public.hpp index 59d2b92..7299ef9 100644 --- a/fserver/csrc/public.hpp +++ b/fserver/csrc/public.hpp @@ -55,12 +55,11 @@ void RequestHandler(const AFTensorMeta& req_meta, AFTensorServer* server) { std::lock_guard lock(mu_); meta_map_[handler_counter_] = req_meta; - q_[req_meta.sender_rank].emplace_back(handler_counter_, - std::move(tensors), + q_[req_meta.sender_rank].emplace_back(handler_counter_, std::move(tensors), keys); q_signal_.fetch_or(1 << req_meta.sender_rank); + ++handler_counter_; } - ++handler_counter_; } std::vector get_batch() { @@ -183,10 +182,10 @@ void init() { q_signal_.store(0); ps::StartPS(0, role_, group_size_ * node_rank_ + gpu_ + offset, true); if (role_ == Node::WORKER) { - fworker_ = new AFTensorWorker(instance_id_ ); + fworker_ = new AFTensorWorker(instance_id_); barrier(true, true); } else if (role_ == Node::SERVER) { - fserver_ = new AFTensorServer(instance_id_ ); + fserver_ = new AFTensorServer(instance_id_); fserver_->SetRequestHandle(RequestHandler); ps::RegisterExitCallback([]() { delete fserver_; }); barrier(true, true); diff --git a/include/ps/internal/backend.h b/include/ps/internal/backend.h index 1c84aa0..63dfb39 100644 --- a/include/ps/internal/backend.h +++ b/include/ps/internal/backend.h @@ -109,6 +109,9 @@ class Backend { static Backend* backend_impl = nullptr; if (backend_impl == nullptr) { std::unique_lock lock(backends_mutex_); + if (backend_impl) { + return backend_impl; + } std::string backend_type = "GPU"; backend_type = Environment::Get()->find("STEPMESH_BAKCEND", backend_type); PS_CHECK_NE(backends_.find(backend_type), backends_.end())