Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea/
81 changes: 41 additions & 40 deletions manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tensorflow as tf
#from tensorflow.python.client import device_lib


def check_gpus():
'''
GPU available check
Expand All @@ -33,8 +34,9 @@ def check_gpus():
return False
return True


if check_gpus():
def parse(line,qargs):
def parse(line, qargs):
'''
line:
a line of text
Expand All @@ -45,11 +47,11 @@ def parse(line,qargs):
Pasing a line of csv format text returned by nvidia-smi
解析一行nvidia-smi返回的csv格式文本
'''
numberic_args = ['memory.free', 'memory.total', 'power.draw', 'power.limit']#可计数的参数
power_manage_enable=lambda v:(not 'Not Support' in v)#lambda表达式,显卡是否滋瓷power management(笔记本可能不滋瓷
to_numberic=lambda v:float(v.upper().strip().replace('MIB','').replace('W',''))#带单位字符串去掉单位
process = lambda k,v:((int(to_numberic(v)) if power_manage_enable(v) else 1) if k in numberic_args else v.strip())
return {k:process(k,v) for k,v in zip(qargs,line.strip().split(','))}
numberic_args = ['memory.free', 'memory.total', 'power.draw', 'power.limit'] # 可计数的参数
power_manage_enable = lambda v: ('Not Support' not in v) # lambda表达式,显卡是否支持power management(笔记本可能不支持
to_numberic = lambda v: float(v.upper().strip().replace('MIB', '').replace('W', '')) # 带单位字符串去掉单位
process = lambda k, v: ((int(to_numberic(v)) if power_manage_enable(v) else 1) if k in numberic_args else v.strip())
return {k: process(k, v) for k, v in zip(qargs, line.strip().split(','))}

def query_gpu(qargs=[]):
'''
Expand All @@ -60,16 +62,16 @@ def query_gpu(qargs=[]):
Querying GPUs infos
查询GPU信息
'''
qargs =['index','gpu_name', 'memory.free', 'memory.total', 'power.draw', 'power.limit']+ qargs
qargs =['index', 'gpu_name', 'memory.free', 'memory.total', 'power.draw', 'power.limit'] + qargs
cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(','.join(qargs))
results = os.popen(cmd).readlines()
return [parse(line,qargs) for line in results]
return [parse(line, qargs) for line in results]

def by_power(d):
'''
helper function fo sorting gpus by power
'''
power_infos=(d['power.draw'],d['power.limit'])
power_infos = (d['power.draw'], d['power.limit'])
if any(v==1 for v in power_infos):
print('Power management unable for GPU {}'.format(d['index']))
return 1
Expand All @@ -86,34 +88,33 @@ class GPUManager():
最空闲的设备。在一个GPUManager对象内会记录每个GPU是否已被指定,
优先选择未指定的GPU。
'''
def __init__(self,qargs=[]):
'''
'''
self.qargs=qargs
self.gpus=query_gpu(qargs)

def __init__(self, qargs=[]):
self.qargs = qargs
self.gpus = query_gpu(qargs)
for gpu in self.gpus:
gpu['specified']=False
self.gpu_num=len(self.gpus)
gpu['specified'] = False
self.gpu_num = len(self.gpus)

def _sort_by_memory(self,gpus,by_size=False):
def _sort_by_memory(self, gpus, by_size=False):
if by_size:
print('Sorted by free memory size')
return sorted(gpus,key=lambda d:d['memory.free'],reverse=True)
return sorted(gpus, key=lambda d: d['memory.free'], reverse=True)
else:
print('Sorted by free memory rate')
return sorted(gpus,key=lambda d:float(d['memory.free'])/ d['memory.total'],reverse=True)
return sorted(gpus, key=lambda d: float(d['memory.free'])/ d['memory.total'], reverse=True)

def _sort_by_power(self,gpus):
return sorted(gpus,key=by_power)
def _sort_by_power(self, gpus):
return sorted(gpus, key=by_power)

def _sort_by_custom(self,gpus,key,reverse=False,qargs=[]):
if isinstance(key,str) and (key in qargs):
return sorted(gpus,key=lambda d:d[key],reverse=reverse)
if isinstance(key,type(lambda a:a)):
return sorted(gpus,key=key,reverse=reverse)
raise ValueError("The argument 'key' must be a function or a key in query args,please read the documention of nvidia-smi")
def _sort_by_custom(self, gpus, key, reverse=False, qargs=[]):
if isinstance(key, str) and (key in qargs):
return sorted(gpus, key=lambda d: d[key], reverse=reverse)
if isinstance(key, type(lambda a: a)):
return sorted(gpus, key=key, reverse=reverse)
raise ValueError("The argument 'key' must be a function or a key in query args, please read the documention of nvidia-smi")

def auto_choice(self,mode=0):
def auto_choice(self, mode=0):
'''
mode:
0:(default)sorted by free memory size
Expand All @@ -123,25 +124,25 @@ def auto_choice(self,mode=0):
ones
自动选择最空闲GPU
'''
for old_infos,new_infos in zip(self.gpus,query_gpu(self.qargs)):
for old_infos, new_infos in zip(self.gpus, query_gpu(self.qargs)):
old_infos.update(new_infos)
unspecified_gpus=[gpu for gpu in self.gpus if not gpu['specified']] or self.gpus
unspecified_gpus = [gpu for gpu in self.gpus if not gpu['specified']] or self.gpus

if mode==0:
if mode == 0:
print('Choosing the GPU device has largest free memory...')
chosen_gpu=self._sort_by_memory(unspecified_gpus,True)[0]
elif mode==1:
chosen_gpu = self._sort_by_memory(unspecified_gpus, True)[0]
elif mode == 1:
print('Choosing the GPU device has highest free memory rate...')
chosen_gpu=self._sort_by_power(unspecified_gpus)[0]
elif mode==2:
chosen_gpu = self._sort_by_power(unspecified_gpus)[0]
elif mode == 2:
print('Choosing the GPU device by power...')
chosen_gpu=self._sort_by_power(unspecified_gpus)[0]
chosen_gpu = self._sort_by_power(unspecified_gpus)[0]
else:
print('Given an unaviliable mode,will be chosen by memory')
chosen_gpu=self._sort_by_memory(unspecified_gpus)[0]
chosen_gpu['specified']=True
index=chosen_gpu['index']
print('Using GPU {i}:\n{info}'.format(i=index,info='\n'.join([str(k)+':'+str(v) for k,v in chosen_gpu.items()])))
chosen_gpu = self._sort_by_memory(unspecified_gpus)[0]
chosen_gpu['specified'] = True
index = chosen_gpu['index']
print('Using GPU {i}:\n{info}'.format(i=index, info='\n'.join([str(k)+':'+str(v) for k, v in chosen_gpu.items()])))
return tf.device('/gpu:{}'.format(index))
else:
raise ImportError('GPU available check failed')