perf: TTS 파이프라인 ArrayPool 완전 적용으로 LOH 제거

ImGdevel · ImGdevel · commit ca62d4d595d3 · 2025-09-14T15:14:39.000+09:00
TextToSpeechResponse에 IMemoryOwner 필드 추가
TextToSpeechClient에서 MemoryPool.Shared.Rent 사용
ChatTTSProcessor에서 WithAudioMemory로 변경
이중/삼중 LOH 할당 제거로 GC 압박 대폭 감소
diff --git a/ProjectVG.Application/Services/Chat/Processors/ChatTTSProcessor.cs b/ProjectVG.Application/Services/Chat/Processors/ChatTTSProcessor.cs
@@ -46,10 +46,14 @@ public async Task ProcessAsync(ChatProcessContext context)
             var processedCount = 0;
 
             foreach (var (idx, ttsResult) in ttsResults.OrderBy(x => x.idx)) {
-                if (ttsResult.Success == true && ttsResult.AudioData != null) {
+                if (ttsResult.Success == true && ttsResult.AudioMemoryOwner != null) {
                     var segment = context.Segments?[idx];
                     if (segment != null && context.Segments != null) {
-                        context.Segments[idx] = segment.WithAudioData(ttsResult.AudioData, ttsResult.ContentType!, ttsResult.AudioLength ?? 0f);
+                        context.Segments[idx] = segment.WithAudioMemory(
+                            ttsResult.AudioMemoryOwner,
+                            ttsResult.AudioDataSize,
+                            ttsResult.ContentType!,
+                            ttsResult.AudioLength ?? 0f);
                     }
                     
                     if (ttsResult.AudioLength.HasValue) {
diff --git a/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/Models/TextToSpeechResponse.cs b/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/Models/TextToSpeechResponse.cs
@@ -1,4 +1,5 @@
 using System.Text.Json.Serialization;
+using System.Buffers;
 
 namespace ProjectVG.Infrastructure.Integrations.TextToSpeechClient.Models
 {
@@ -17,11 +18,23 @@ public class TextToSpeechResponse
         public string? ErrorMessage { get; set; }
 
         /// <summary>
-        /// 오디오 데이터 (바이트 배열)
+        /// 오디오 데이터 (바이트 배열) - 레거시 호환성용
         /// </summary>
         [JsonIgnore]
         public byte[]? AudioData { get; set; }
 
+        /// <summary>
+        /// ArrayPool 기반 오디오 메모리 소유자 (LOH 방지)
+        /// </summary>
+        [JsonIgnore]
+        public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
+
+        /// <summary>
+        /// 실제 오디오 데이터 크기
+        /// </summary>
+        [JsonIgnore]
+        public int AudioDataSize { get; set; }
+
         /// <summary>
         /// 오디오 길이 (초)
         /// </summary>
diff --git a/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/TextToSpeechClient.cs b/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/TextToSpeechClient.cs
@@ -50,8 +50,10 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
                     return voiceResponse;
                 }
 
-                // 스트림 기반으로 음성 데이터 읽기 (LOH 방지)
-                voiceResponse.AudioData = await ReadAudioDataWithPoolAsync(response.Content);
+                // ArrayPool 기반으로 음성 데이터 읽기 (LOH 방지)
+                var (memoryOwner, dataSize) = await ReadAudioDataWithPoolAsync(response.Content);
+                voiceResponse.AudioMemoryOwner = memoryOwner;
+                voiceResponse.AudioDataSize = dataSize;
                 voiceResponse.ContentType = response.Content.Headers.ContentType?.ToString();
 
                 if (response.Headers.Contains("X-Audio-Length"))
@@ -64,7 +66,7 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
                 }
 
                 _logger.LogDebug("[TTS][Response] 오디오 길이: {AudioLength:F2}초, ContentType: {ContentType}, 바이트: {Length}, 소요시간: {Elapsed}ms",
-                    voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioData?.Length ?? 0, elapsed);
+                    voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioDataSize, elapsed);
 
                 return voiceResponse;
             }
@@ -82,42 +84,50 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
         /// <summary>
         /// ArrayPool을 사용하여 스트림 기반으로 음성 데이터를 읽습니다 (LOH 할당 방지)
         /// </summary>
-        private async Task<byte[]?> ReadAudioDataWithPoolAsync(HttpContent content)
+        private async Task<(IMemoryOwner<byte>?, int)> ReadAudioDataWithPoolAsync(HttpContent content)
         {
             const int chunkSize = 32768; // 32KB 청크 크기
-            byte[]? buffer = null;
+            byte[]? readBuffer = null;
             MemoryStream? memoryStream = null;
 
             try
             {
-                buffer = _arrayPool.Rent(chunkSize);
+                readBuffer = _arrayPool.Rent(chunkSize);
                 memoryStream = new MemoryStream();
 
                 using var stream = await content.ReadAsStreamAsync();
                 int bytesRead;
 
                 // 청크 단위로 데이터 읽어서 MemoryStream에 복사
-                while ((bytesRead = await stream.ReadAsync(buffer, 0, chunkSize)) > 0)
+                while ((bytesRead = await stream.ReadAsync(readBuffer, 0, chunkSize)) > 0)
                 {
-                    await memoryStream.WriteAsync(buffer, 0, bytesRead);
+                    await memoryStream.WriteAsync(readBuffer, 0, bytesRead);
                 }
 
-                var result = memoryStream.ToArray();
+                var totalSize = (int)memoryStream.Length;
+
+                // ArrayPool에서 최종 데이터 크기만큼 메모리 할당
+                var resultMemoryOwner = MemoryPool<byte>.Shared.Rent(totalSize);
+
+                // MemoryStream에서 최종 메모리로 복사
+                memoryStream.Position = 0;
+                await memoryStream.ReadAsync(resultMemoryOwner.Memory.Slice(0, totalSize));
+
                 _logger.LogDebug("[TTS][ArrayPool] 음성 데이터 읽기 완료: {Size} bytes, 청크 크기: {ChunkSize}",
-                    result.Length, chunkSize);
+                    totalSize, chunkSize);
 
-                return result;
+                return (resultMemoryOwner, totalSize);
             }
             catch (Exception ex)
             {
                 _logger.LogError(ex, "[TTS][ArrayPool] 음성 데이터 읽기 실패");
-                return null;
+                return (null, 0);
             }
             finally
             {
-                if (buffer != null)
+                if (readBuffer != null)
                 {
-                    _arrayPool.Return(buffer);
+                    _arrayPool.Return(readBuffer);
                 }
                 memoryStream?.Dispose();
             }

Original file line number	Diff line number	Diff line change
`@@ -50,8 +50,10 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re`
`50`	`50`	`return voiceResponse;`
`51`	`51`	`}`
`52`	`52`
`53`		`- // 스트림 기반으로 음성 데이터 읽기 (LOH 방지)`
`54`		`- voiceResponse.AudioData = await ReadAudioDataWithPoolAsync(response.Content);`
	`53`	`+ // ArrayPool 기반으로 음성 데이터 읽기 (LOH 방지)`
	`54`	`+ var (memoryOwner, dataSize) = await ReadAudioDataWithPoolAsync(response.Content);`
	`55`	`+ voiceResponse.AudioMemoryOwner = memoryOwner;`
	`56`	`+ voiceResponse.AudioDataSize = dataSize;`
`55`	`57`	`voiceResponse.ContentType = response.Content.Headers.ContentType?.ToString();`
`56`	`58`
`57`	`59`	`if (response.Headers.Contains("X-Audio-Length"))`
`@@ -64,7 +66,7 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re`
`64`	`66`	`}`
`65`	`67`
`66`	`68`	`_logger.LogDebug("[TTS][Response] 오디오 길이: {AudioLength:F2}초, ContentType: {ContentType}, 바이트: {Length}, 소요시간: {Elapsed}ms",`
`67`		`- voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioData?.Length ?? 0, elapsed);`
	`69`	`+ voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioDataSize, elapsed);`
`68`	`70`
`69`	`71`	`return voiceResponse;`
`70`	`72`	`}`
`@@ -82,42 +84,50 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re`
`82`	`84`	`/// <summary>`
`83`	`85`	`/// ArrayPool을 사용하여 스트림 기반으로 음성 데이터를 읽습니다 (LOH 할당 방지)`
`84`	`86`	`/// </summary>`
`85`		`- private async Task<byte[]?> ReadAudioDataWithPoolAsync(HttpContent content)`
	`87`	`+ private async Task<(IMemoryOwner<byte>?, int)> ReadAudioDataWithPoolAsync(HttpContent content)`
`86`	`88`	`{`
`87`	`89`	`const int chunkSize = 32768; // 32KB 청크 크기`
`88`		`- byte[]? buffer = null;`
	`90`	`+ byte[]? readBuffer = null;`
`89`	`91`	`MemoryStream? memoryStream = null;`
`90`	`92`
`91`	`93`	`try`
`92`	`94`	`{`
`93`		`- buffer = _arrayPool.Rent(chunkSize);`
	`95`	`+ readBuffer = _arrayPool.Rent(chunkSize);`
`94`	`96`	`memoryStream = new MemoryStream();`
`95`	`97`
`96`	`98`	`using var stream = await content.ReadAsStreamAsync();`
`97`	`99`	`int bytesRead;`
`98`	`100`
`99`	`101`	`// 청크 단위로 데이터 읽어서 MemoryStream에 복사`
`100`		`- while ((bytesRead = await stream.ReadAsync(buffer, 0, chunkSize)) > 0)`
	`102`	`+ while ((bytesRead = await stream.ReadAsync(readBuffer, 0, chunkSize)) > 0)`
`101`	`103`	`{`
`102`		`- await memoryStream.WriteAsync(buffer, 0, bytesRead);`
	`104`	`+ await memoryStream.WriteAsync(readBuffer, 0, bytesRead);`
`103`	`105`	`}`
`104`	`106`
`105`		`- var result = memoryStream.ToArray();`
	`107`	`+ var totalSize = (int)memoryStream.Length;`
	`108`	`+`
	`109`	`+ // ArrayPool에서 최종 데이터 크기만큼 메모리 할당`
	`110`	`+ var resultMemoryOwner = MemoryPool<byte>.Shared.Rent(totalSize);`
	`111`	`+`
	`112`	`+ // MemoryStream에서 최종 메모리로 복사`
	`113`	`+ memoryStream.Position = 0;`
	`114`	`+ await memoryStream.ReadAsync(resultMemoryOwner.Memory.Slice(0, totalSize));`
	`115`	`+`
`106`	`116`	`_logger.LogDebug("[TTS][ArrayPool] 음성 데이터 읽기 완료: {Size} bytes, 청크 크기: {ChunkSize}",`
`107`		`- result.Length, chunkSize);`
	`117`	`+ totalSize, chunkSize);`
`108`	`118`
`109`		`- return result;`
	`119`	`+ return (resultMemoryOwner, totalSize);`
`110`	`120`	`}`
`111`	`121`	`catch (Exception ex)`
`112`	`122`	`{`
`113`	`123`	`_logger.LogError(ex, "[TTS][ArrayPool] 음성 데이터 읽기 실패");`
`114`		`- return null;`
	`124`	`+ return (null, 0);`
`115`	`125`	`}`
`116`	`126`	`finally`
`117`	`127`	`{`
`118`		`- if (buffer != null)`
	`128`	`+ if (readBuffer != null)`
`119`	`129`	`{`
`120`		`- _arrayPool.Return(buffer);`
	`130`	`+ _arrayPool.Return(readBuffer);`
`121`	`131`	`}`
`122`	`132`	`memoryStream?.Dispose();`
`123`	`133`	`}`