hampsterx
diff --git a/‎kinesis/__init__.py‎
Lines changed: 11 additions & 11 deletions b/‎kinesis/__init__.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎kinesis/aggregators.py‎
Lines changed: 2 additions & 2 deletions b/‎kinesis/aggregators.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎kinesis/base.py‎
Lines changed: 19 additions & 14 deletions b/‎kinesis/base.py‎
Lines changed: 19 additions & 14 deletions
diff --git a/‎kinesis/checkpointers.py‎
Lines changed: 10 additions & 10 deletions b/‎kinesis/checkpointers.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎kinesis/consumer.py‎
Lines changed: 65 additions & 24 deletions b/‎kinesis/consumer.py‎
Lines changed: 65 additions & 24 deletions
diff --git a/‎kinesis/processors.py‎
Lines changed: 5 additions & 6 deletions b/‎kinesis/processors.py‎
Lines changed: 5 additions & 6 deletions
@@ -1,17 +1,17 @@
-from .producer import Producer
+from .aggregators import (
+    ListAggregator,
+    NetstringAggregator,
+    NewlineAggregator,
+    SimpleAggregator,
+)
+from .checkpointers import MemoryCheckPointer, RedisCheckPointer
 from .consumer import Consumer
 from .processors import (
-    StringProcessor,
-    JsonProcessor,
     JsonLineProcessor,
     JsonListProcessor,
+    JsonProcessor,
     MsgpackProcessor,
+    StringProcessor,
 )
-from .serializers import StringSerializer, JsonSerializer, MsgpackSerializer
-from .checkpointers import MemoryCheckPointer, RedisCheckPointer
-from .aggregators import (
-    SimpleAggregator,
-    NewlineAggregator,
-    NetstringAggregator,
-    ListAggregator,
-)
+from .producer import Producer
+from .serializers import JsonSerializer, MsgpackSerializer, StringSerializer
@@ -1,8 +1,8 @@
 import logging
 import math
 from collections import namedtuple
-from .exceptions import ValidationError
-from .exceptions import ExceededPutLimit
+
+from .exceptions import ExceededPutLimit, ValidationError
 
 try:
     import aws_kinesis_agg
 
@@ -1,15 +1,15 @@
 import asyncio
 import logging
 import time
-from typing import Optional, Any, Dict, List, Union
+from asyncio import CancelledError
+from typing import Any, Dict, List, Optional
 
-from .timeout_compat import timeout
 from aiobotocore.session import AioSession
-from asyncio import CancelledError
-from botocore.exceptions import ClientError
 from botocore.config import Config
+from botocore.exceptions import ClientError
 
 from . import exceptions
+from .timeout_compat import timeout
 
 log = logging.getLogger(__name__)
 
@@ -44,6 +44,7 @@ def __init__(
         self.shards: Optional[List[Dict[str, Any]]] = None
 
         self.stream_status: Optional[str] = None
+        self.client: Optional[Any] = None
 
         self.retry_limit: Optional[int] = retry_limit
         self.expo_backoff: Optional[float] = expo_backoff
@@ -75,14 +76,15 @@ async def __aenter__(self) -> "Base":
         except exceptions.StreamDoesNotExist:
             await self.close()
             raise
-        except:
+        except Exception:
             raise
 
         return self
 
     async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
         await self.close()
-        await self.client.__aexit__(exc_type, exc, tb)
+        if self.client is not None:
+            await self.client.__aexit__(exc_type, exc, tb)
 
     @property
     def address(self) -> Dict[str, str]:
@@ -187,7 +189,7 @@ async def get_conn(self):
             if self.stream_status == self.INITIALIZE:
                 try:
                     await self.start()
-                    log.info(f"Connection Successfully Initialized")
+                    log.info("Connection Successfully Initialized")
                 except exceptions.StreamDoesNotExist:
                     # Do not attempt to reconnect if stream does not exist
                     log.error(f"Stream does not exist ({self.stream_name})")
@@ -230,14 +232,17 @@ async def _get_reconn_helper(self):
                     raise e
                 log.warning(e)
                 conn_attempts += 1
-                if isinstance(self.retry_limit, int):
-                    if conn_attempts >= (self.retry_limit + 1):
-                        await self.close()
-                        raise ConnectionError(
-                            f"Kinesis client has exceeded {self.retry_limit} connection attempts"
-                        )
+                # Default retry limit of 5 if not specified
+                retry_limit = (
+                    self.retry_limit if isinstance(self.retry_limit, int) else 5
+                )
+                if conn_attempts >= (retry_limit + 1):
+                    await self.close()
+                    raise ConnectionError(
+                        f"Kinesis client has exceeded {retry_limit} connection attempts"
+                    )
                 if self.expo_backoff:
-                    backoff_delay = (conn_attempts ** 2) * self.expo_backoff
+                    backoff_delay = (conn_attempts**2) * self.expo_backoff
                     if backoff_delay >= self.expo_backoff_limit:
                         backoff_delay = self.expo_backoff_limit
                 await self.close()
 
@@ -1,32 +1,32 @@
-import logging
 import asyncio
-import os
 import json
-from datetime import timezone, datetime
-from typing import Protocol, Optional, Union, Dict, Tuple, Any
+import logging
+import os
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional, Protocol, Tuple, Union
 
 log = logging.getLogger(__name__)
 
 
 class CheckPointer(Protocol):
     """Protocol for checkpointer implementations."""
-    
+
     async def allocate(self, shard_id: str) -> Tuple[bool, Optional[str]]:
         """Allocate a shard for processing."""
         ...
-    
+
     async def deallocate(self, shard_id: str) -> None:
         """Deallocate a shard."""
         ...
-    
+
     async def checkpoint(self, shard_id: str, sequence_number: str) -> None:
         """Checkpoint progress for a shard."""
         ...
-    
+
     def get_all_checkpoints(self) -> Dict[str, str]:
         """Get all checkpoints."""
         ...
-    
+
     async def close(self) -> None:
         """Close the checkpointer."""
         ...
@@ -111,7 +111,7 @@ def is_allocated(self, shard_id):
     async def allocate(self, shard_id):
         if self.is_allocated(shard_id):
             return False, None
-            
+
         if shard_id not in self._items:
             self._items[shard_id] = {"sequence": None}
 
 
@@ -1,18 +1,18 @@
 import asyncio
 import logging
+from asyncio import TimeoutError
+from asyncio.queues import QueueEmpty
 from datetime import datetime, timezone
-from typing import Optional, Any, Dict, Iterator, AsyncIterator
+from typing import Any, AsyncIterator, Dict, Optional
 
+from aiobotocore.session import AioSession
 from aiohttp import ClientConnectionError
-from asyncio import TimeoutError
-from asyncio.queues import QueueEmpty
 from botocore.exceptions import ClientError
-from aiobotocore.session import AioSession
 
-from .utils import Throttler
 from .base import Base
-from .checkpointers import MemoryCheckPointer, CheckPointer
+from .checkpointers import CheckPointer, MemoryCheckPointer
 from .processors import JsonProcessor, Processor
+from .utils import Throttler
 
 log = logging.getLogger(__name__)
 
@@ -106,7 +106,8 @@ async def close(self):
 
             if self.checkpointer:
                 await self.checkpointer.close()
-        await self.client.close()
+        if self.client is not None:
+            await self.client.close()
 
     async def flush(self):
 
@@ -123,15 +124,29 @@ async def flush(self):
                     await shard["fetch"]
 
     async def _fetch(self):
+        error_count = 0
+        max_errors = 10
+
         while self.is_fetching:
             # Ensure fetch is performed at most 5 times per second (the limit per shard)
             await asyncio.sleep(0.2)
             try:
                 await self.fetch()
+                error_count = 0  # Reset error count on successful fetch
             except asyncio.CancelledError:
-                pass
+                log.debug("Fetch task cancelled")
+                self.is_fetching = False
+                break
             except Exception as e:
                 log.exception(e)
+                error_count += 1
+                if error_count >= max_errors:
+                    log.error(
+                        f"Too many fetch errors ({max_errors}), stopping fetch task"
+                    )
+                    self.is_fetching = False
+                    break
+                await asyncio.sleep(min(2**error_count, 30))  # Exponential backoff
 
     async def fetch(self):
 
@@ -218,7 +233,13 @@ async def fetch(self):
                             for n, output in enumerate(
                                 self.processor.parse(row["Data"])
                             ):
-                                await self.queue.put(output)
+                                try:
+                                    await asyncio.wait_for(
+                                        self.queue.put(output), timeout=30.0
+                                    )
+                                except asyncio.TimeoutError:
+                                    log.warning("Queue put timed out, skipping record")
+                                    continue
                             total_items += n + 1
 
                         # Get approx minutes behind..
@@ -253,14 +274,23 @@ async def fetch(self):
 
                         # Add checkpoint record
                         last_record = result["Records"][-1]
-                        await self.queue.put(
-                            {
-                                "__CHECKPOINT__": {
-                                    "ShardId": shard["ShardId"],
-                                    "SequenceNumber": last_record["SequenceNumber"],
-                                }
-                            }
-                        )
+                        try:
+                            await asyncio.wait_for(
+                                self.queue.put(
+                                    {
+                                        "__CHECKPOINT__": {
+                                            "ShardId": shard["ShardId"],
+                                            "SequenceNumber": last_record[
+                                                "SequenceNumber"
+                                            ],
+                                        }
+                                    }
+                                ),
+                                timeout=30.0,
+                            )
+                        except asyncio.TimeoutError:
+                            log.warning("Checkpoint queue put timed out")
+                            # Continue without checkpoint - not critical
 
                         shard["LastSequenceNumber"] = last_record["SequenceNumber"]
 
@@ -302,7 +332,7 @@ async def get_records(self, shard):
                 shard["stats"].succeded()
                 return result
 
-            except ClientConnectionError as e:
+            except ClientConnectionError:
                 await self.get_conn()
             except TimeoutError as e:
                 log.warning("Timeout {}. sleeping..".format(e))
@@ -358,17 +388,17 @@ async def get_shard_iterator(self, shard_id, last_sequence_number=None):
 
         params = {
             "ShardId": shard_id,
-            "ShardIteratorType": "AFTER_SEQUENCE_NUMBER"
-            if last_sequence_number
-            else self.iterator_type,
+            "ShardIteratorType": (
+                "AFTER_SEQUENCE_NUMBER" if last_sequence_number else self.iterator_type
+            ),
         }
         params.update(self.address)
 
         if last_sequence_number:
             params["StartingSequenceNumber"] = last_sequence_number
 
-        if self.iterator_type == 'AT_TIMESTAMP' and self.timestamp:
-            params['Timestamp'] = self.timestamp
+        if self.iterator_type == "AT_TIMESTAMP" and self.timestamp:
+            params["Timestamp"] = self.timestamp
 
         response = await self.client.get_shard_iterator(**params)
         return response["ShardIterator"]
@@ -397,7 +427,12 @@ async def __anext__(self):
         # Raise exception from Fetch Task to main task otherwise raise exception inside
         # Fetch Task will fail silently
         if self.fetch_task.done():
-            raise self.fetch_task.exception()
+            exception = self.fetch_task.exception()
+            if exception:
+                raise exception
+
+        checkpoint_count = 0
+        max_checkpoints = 100  # Prevent infinite checkpoint processing
 
         while True:
             try:
@@ -409,6 +444,12 @@ async def __anext__(self):
                             item["__CHECKPOINT__"]["ShardId"],
                             item["__CHECKPOINT__"]["SequenceNumber"],
                         )
+                    checkpoint_count += 1
+                    if checkpoint_count >= max_checkpoints:
+                        log.warning(
+                            f"Processed {max_checkpoints} checkpoints, stopping iteration"
+                        )
+                        raise StopAsyncIteration
                     continue
 
                 return item
 
@@ -1,17 +1,16 @@
-from typing import Protocol, Any, Iterator, List
 from .aggregators import (
+    KPLAggregator,
+    ListAggregator,
+    NetstringAggregator,
     NewlineAggregator,
     SimpleAggregator,
-    NetstringAggregator,
-    ListAggregator,
-    KPLAggregator,
-    OutputItem,
 )
-from .serializers import StringSerializer, JsonSerializer, MsgpackSerializer
+from .serializers import JsonSerializer, MsgpackSerializer, StringSerializer
 
 
 class Processor:
     """Base class for processors that combine aggregation and serialization."""
+
     pass