|
| 1 | +import os |
| 2 | +import logging |
| 3 | +import numpy as np |
| 4 | +import tensorrt as trt |
| 5 | +import pycuda.driver as cuda |
| 6 | +import pycuda.autoinit |
| 7 | + |
| 8 | +logging.basicConfig(level=logging.DEBUG, |
| 9 | + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
| 10 | + datefmt="%Y-%m-%d %H:%M:%S") |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | +class SimpleCalibrator(trt.IInt8EntropyCalibrator2): |
| 14 | + def __init__(self, network, config): |
| 15 | + super().__init__() |
| 16 | + |
| 17 | + # TODO: Not sure of difference between get_batch_size and what's returned in get_batch ? |
| 18 | + # Notes: |
| 19 | + # get_batch_size() is required to return non-null value |
| 20 | + # get_batch_size() can return 0 with seemingly no consequence with/without calibration cache |
| 21 | + # get_batch_size() can return -1 with seemingly no consequence with/without calibration cache |
| 22 | + # get_batch() seems to do the work, as long as get_batch_size doesn't throw an error |
| 23 | + self.batch_size = -1 |
| 24 | + self.shapes = [] |
| 25 | + self.device_inputs = None |
| 26 | + num_calibration_samples = 1000 |
| 27 | + self.iterator = (i for i in range(num_calibration_samples)) |
| 28 | + self.cache_file = "simple_calibration.cache" |
| 29 | + self.network = network |
| 30 | + self.calib_profile = config.get_calibration_profile() |
| 31 | + |
| 32 | + def get_batch(self, input_names, p_str=None): |
| 33 | + try: |
| 34 | + # Use iterator here to avoid having to pass input names to constructor |
| 35 | + next(self.iterator) |
| 36 | + if not self.shapes: |
| 37 | + self.set_shapes(input_names) |
| 38 | + |
| 39 | + if not self.device_inputs: |
| 40 | + self.device_inputs = [cuda.mem_alloc(np.zeros(s, dtype=np.float32).nbytes) for s in self.shapes] |
| 41 | + |
| 42 | + if not self.batch_size: |
| 43 | + # Get batch size from first input in calibration shapes. Assumes batch sizes |
| 44 | + # are the same for every input |
| 45 | + self.batch_size = self.shapes[0][0] |
| 46 | + |
| 47 | + batches = [np.random.random(s).astype(np.float32) for s in self.shapes] |
| 48 | + for i in range(len(batches)): |
| 49 | + cuda.memcpy_htod(self.device_inputs[i], batches[i]) |
| 50 | + |
| 51 | + return [int(d) for d in self.device_inputs] |
| 52 | + except StopIteration: |
| 53 | + return None |
| 54 | + |
| 55 | + def get_batch_size(self): |
| 56 | + return self.batch_size |
| 57 | + |
| 58 | + def set_shapes(self, input_names): |
| 59 | + if self.calib_profile: |
| 60 | + self.shapes = [self.calib_profile.get_shape(name) for name in input_names] |
| 61 | + else: |
| 62 | + self.shapes = [] |
| 63 | + # This assumes order of input_names matches the network input indices |
| 64 | + for i, name in enumerate(input_names): |
| 65 | + shape = self.network.get_input(i).shape |
| 66 | + _shape = [] |
| 67 | + found_dynamic = False |
| 68 | + # Replace any dynamic dimensions with ones if any |
| 69 | + for dim in shape: |
| 70 | + if dim < 0: |
| 71 | + dim = 1 |
| 72 | + found_dynamic = True |
| 73 | + |
| 74 | + _shape.append(dim) |
| 75 | + |
| 76 | + _shape = tuple(_shape) |
| 77 | + if found_dynamic: |
| 78 | + logger.warning("[{}] has dynamic shape: {}. Set to {} instead.".format(name, shape, _shape)) |
| 79 | + |
| 80 | + self.shapes.append(_shape) |
| 81 | + |
| 82 | + def read_calibration_cache(self): |
| 83 | + # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. |
| 84 | + if os.path.exists(self.cache_file): |
| 85 | + with open(self.cache_file, "rb") as f: |
| 86 | + logger.info("Using calibration cache to save time: {:}".format(self.cache_file)) |
| 87 | + return f.read() |
| 88 | + |
| 89 | + def write_calibration_cache(self, cache): |
| 90 | + with open(self.cache_file, "wb") as f: |
| 91 | + logger.info("Caching calibration data for future use: {:}".format(self.cache_file)) |
| 92 | + f.write(cache) |
0 commit comments