88from .common_structs import *
99from .streams .SystemInfoStream import PROCESSOR_ARCHITECTURE
1010
11- class AMinidumpBufferedMemorySegment :
12- def __init__ (self ):
13- self .start_address = None
14- self .end_address = None
11+
12+ class VirtualSegment :
13+ def __init__ (self , start , end , start_file_address ):
14+ self .start = start
15+ self .end = end
16+ self .start_file_address = start_file_address
17+
1518 self .data = None
19+
20+ def inrange (self , start , end ):
21+ return self .start <= start and end <= self .end
1622
17- async def load (self , memory_segment , file_handle ):
23+ class AMinidumpBufferedMemorySegment :
24+ def __init__ (self , memory_segment , chunksize = 10 * 1024 ):
1825 self .start_address = memory_segment .start_virtual_address
1926 self .end_address = memory_segment .end_virtual_address
20- await file_handle .seek (memory_segment .start_file_address )
21- self .data = await file_handle .read (memory_segment .size )
27+ self .total_size = memory_segment .end_virtual_address - memory_segment .start_virtual_address
28+ self .start_file_address = memory_segment .start_file_address
29+ self .chunksize = chunksize
30+ self .chunks = []
2231
2332 def inrange (self , position ):
2433 return self .start_address <= position <= self .end_address
@@ -28,10 +37,43 @@ def remaining_len(self, position):
2837 return None
2938 return self .end_address - position
3039
40+ async def find (self , file_handle , pattern , startpos ):
41+ data = await self .read (file_handle , 0 , - 1 )
42+ return data .find (pattern , startpos )
43+
44+ async def read (self , file_handle , start , end ):
45+ if end is None :
46+ await file_handle .seek (self .start_file_address + start )
47+ return await file_handle .read (self .end_address - (self .start_file_address + start ))
48+
49+ for chunk in self .chunks :
50+ if chunk .inrange (start , end ):
51+ return chunk .data [start - chunk .start : end - chunk .start ]
52+
53+ if self .total_size <= 2 * self .chunksize :
54+ chunksize = self .total_size
55+ vs = VirtualSegment (0 , chunksize , self .start_file_address )
56+ await file_handle .seek (self .start_file_address )
57+ vs .data = await file_handle .read (chunksize )
58+ self .chunks .append (vs )
59+ return vs .data [start - vs .start : end - vs .start ]
60+
61+ chunksize = max ((end - start ), self .chunksize )
62+ if start + chunksize > self .end_address :
63+ chunksize = self .end_address - start
64+
65+ vs = VirtualSegment (start , start + chunksize , self .start_file_address + start )
66+ await file_handle .seek (vs .start_file_address )
67+ vs .data = await file_handle .read (chunksize )
68+ self .chunks .append (vs )
69+
70+ return vs .data [start - vs .start : end - vs .start ]
71+
3172class AMinidumpBufferedReader :
32- def __init__ (self , reader ):
73+ def __init__ (self , reader , segment_chunk_size = 10 * 1024 ):
3374 self .reader = reader
3475 self .memory_segments = []
76+ self .segment_chunk_size = segment_chunk_size
3577
3678 self .current_segment = None
3779 self .current_position = None
@@ -50,8 +92,7 @@ async def _select_segment(self, requested_position):
5092 # not in cache, check if it's present in memory space. if yes then create a new buffered memeory object, and copy data
5193 for memory_segment in self .reader .memory_segments :
5294 if memory_segment .inrange (requested_position ):
53- newsegment = AMinidumpBufferedMemorySegment ()
54- await newsegment .load (memory_segment , self .reader .file_handle )
95+ newsegment = AMinidumpBufferedMemorySegment (memory_segment , chunksize = self .segment_chunk_size )
5596 self .memory_segments .append (newsegment )
5697 self .current_segment = newsegment
5798 self .current_position = requested_position
@@ -118,7 +159,7 @@ async def peek(self, length):
118159 t = self .current_position + length
119160 if not self .current_segment .inrange (t ):
120161 raise Exception ('Would read over segment boundaries!' )
121- return self .current_segment .data [ self .current_position - self .current_segment .start_address : t - self .current_segment .start_address ]
162+ return await self .current_segment .read ( self .reader . file_handle , self . current_position - self .current_segment .start_address , t - self .current_segment .start_address )
122163
123164 async def read (self , size = - 1 ):
124165 """
@@ -133,15 +174,15 @@ async def read(self, size = -1):
133174
134175 old_new_pos = self .current_position
135176 self .current_position = self .current_segment .end_address
136- return self .current_segment .data [ old_new_pos - self .current_segment .start_address :]
177+ return await self .current_segment .read ( self . reader . file_handle , old_new_pos - self .current_segment .start_address , None )
137178
138179 t = self .current_position + size
139180 if not self .current_segment .inrange (t ):
140181 raise Exception ('Would read over segment boundaries!' )
141182
142183 old_new_pos = self .current_position
143184 self .current_position = t
144- return self .current_segment .data [ old_new_pos - self .current_segment .start_address : t - self .current_segment .start_address ]
185+ return await self .current_segment .read ( self . reader . file_handle , old_new_pos - self .current_segment .start_address , t - self .current_segment .start_address )
145186
146187 async def read_int (self ):
147188 """
@@ -173,7 +214,7 @@ async def find(self, pattern):
173214 """
174215 Searches for a pattern in the current memory segment
175216 """
176- pos = self .current_segment .data . find (pattern )
217+ pos = await self .current_segment .find (self . reader . file_handle , pattern )
177218 if pos == - 1 :
178219 return - 1
179220 return pos + self .current_position
@@ -185,7 +226,7 @@ async def find_all(self, pattern):
185226 pos = []
186227 last_found = - 1
187228 while True :
188- last_found = self .current_segment .data . find (pattern , last_found + 1 )
229+ last_found = await self .current_segment .find (self . reader . file_handle , pattern , last_found + 1 )
189230 if last_found == - 1 :
190231 break
191232 pos .append (last_found + self .current_segment .start_address )
@@ -227,7 +268,7 @@ async def get_ptr_with_offset(self, pos):
227268 return await self .read_uint ()
228269
229270 async def find_in_module (self , module_name , pattern , find_first = False , reverse_order = False ):
230- t = await self .reader .search_module (module_name , pattern , find_first = find_first , reverse_order = reverse_order )
271+ t = await self .reader .search_module (module_name , pattern , find_first = find_first , reverse_order = reverse_order , chunksize = self . segment_chunk_size )
231272 return t
232273
233274
@@ -262,32 +303,32 @@ def __init__(self, minidumpfile):
262303 else :
263304 raise Exception ('Unknown processor architecture %s! Please fix and submit PR!' % self .sysinfo .ProcessorArchitecture )
264305
265- def get_buffered_reader (self ):
266- return AMinidumpBufferedReader (self )
306+ def get_buffered_reader (self , segment_chunk_size = 10 * 1024 ):
307+ return AMinidumpBufferedReader (self , segment_chunk_size = segment_chunk_size )
267308
268309 def get_module_by_name (self , module_name ):
269310 for mod in self .modules :
270311 if ntpath .basename (mod .name ).find (module_name ) != - 1 :
271312 return mod
272313 return None
273314
274- async def search_module (self , module_name , pattern , find_first = False , reverse_order = False ):
315+ async def search_module (self , module_name , pattern , find_first = False , reverse_order = False , chunksize = 10 * 1024 ):
275316 mod = self .get_module_by_name (module_name )
276317 if mod is None :
277318 raise Exception ('Could not find module! %s' % module_name )
278319 needles = []
279320 for ms in self .memory_segments :
280321 if mod .baseaddress <= ms .start_virtual_address < mod .endaddress :
281- needles += await ms .asearch (pattern , self .file_handle , find_first = find_first )
322+ needles += await ms .asearch (pattern , self .file_handle , find_first = find_first , chunksize = chunksize )
282323 if len (needles ) > 0 and find_first is True :
283324 return needles
284325
285326 return needles
286327
287- async def search (self , pattern , find_first = False ):
328+ async def search (self , pattern , find_first = False , chunksize = 10 * 1024 ):
288329 t = []
289330 for ms in self .memory_segments :
290- t += await ms .asearch (pattern , self .file_handle )
331+ t += await ms .asearch (pattern , self .file_handle , find_first = find_first , chunksize = chunksize )
291332
292333 return t
293334
0 commit comments