Skip to content

Commit 8be760a

Browse files
authored
fix lints (#41)
* fix lints * .
1 parent b1abf01 commit 8be760a

File tree

12 files changed

+183
-126
lines changed

12 files changed

+183
-126
lines changed

kinesis/__init__.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
from .producer import Producer
1+
from .aggregators import (
2+
ListAggregator,
3+
NetstringAggregator,
4+
NewlineAggregator,
5+
SimpleAggregator,
6+
)
7+
from .checkpointers import MemoryCheckPointer, RedisCheckPointer
28
from .consumer import Consumer
39
from .processors import (
4-
StringProcessor,
5-
JsonProcessor,
610
JsonLineProcessor,
711
JsonListProcessor,
12+
JsonProcessor,
813
MsgpackProcessor,
14+
StringProcessor,
915
)
10-
from .serializers import StringSerializer, JsonSerializer, MsgpackSerializer
11-
from .checkpointers import MemoryCheckPointer, RedisCheckPointer
12-
from .aggregators import (
13-
SimpleAggregator,
14-
NewlineAggregator,
15-
NetstringAggregator,
16-
ListAggregator,
17-
)
16+
from .producer import Producer
17+
from .serializers import JsonSerializer, MsgpackSerializer, StringSerializer

kinesis/aggregators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import logging
22
import math
33
from collections import namedtuple
4-
from .exceptions import ValidationError
5-
from .exceptions import ExceededPutLimit
4+
5+
from .exceptions import ExceededPutLimit, ValidationError
66

77
try:
88
import aws_kinesis_agg

kinesis/base.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
import asyncio
22
import logging
33
import time
4-
from typing import Optional, Any, Dict, List, Union
4+
from asyncio import CancelledError
5+
from typing import Any, Dict, List, Optional
56

6-
from .timeout_compat import timeout
77
from aiobotocore.session import AioSession
8-
from asyncio import CancelledError
9-
from botocore.exceptions import ClientError
108
from botocore.config import Config
9+
from botocore.exceptions import ClientError
1110

1211
from . import exceptions
12+
from .timeout_compat import timeout
1313

1414
log = logging.getLogger(__name__)
1515

@@ -44,6 +44,7 @@ def __init__(
4444
self.shards: Optional[List[Dict[str, Any]]] = None
4545

4646
self.stream_status: Optional[str] = None
47+
self.client: Optional[Any] = None
4748

4849
self.retry_limit: Optional[int] = retry_limit
4950
self.expo_backoff: Optional[float] = expo_backoff
@@ -75,14 +76,15 @@ async def __aenter__(self) -> "Base":
7576
except exceptions.StreamDoesNotExist:
7677
await self.close()
7778
raise
78-
except:
79+
except Exception:
7980
raise
8081

8182
return self
8283

8384
async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
8485
await self.close()
85-
await self.client.__aexit__(exc_type, exc, tb)
86+
if self.client is not None:
87+
await self.client.__aexit__(exc_type, exc, tb)
8688

8789
@property
8890
def address(self) -> Dict[str, str]:
@@ -187,7 +189,7 @@ async def get_conn(self):
187189
if self.stream_status == self.INITIALIZE:
188190
try:
189191
await self.start()
190-
log.info(f"Connection Successfully Initialized")
192+
log.info("Connection Successfully Initialized")
191193
except exceptions.StreamDoesNotExist:
192194
# Do not attempt to reconnect if stream does not exist
193195
log.error(f"Stream does not exist ({self.stream_name})")
@@ -230,14 +232,17 @@ async def _get_reconn_helper(self):
230232
raise e
231233
log.warning(e)
232234
conn_attempts += 1
233-
if isinstance(self.retry_limit, int):
234-
if conn_attempts >= (self.retry_limit + 1):
235-
await self.close()
236-
raise ConnectionError(
237-
f"Kinesis client has exceeded {self.retry_limit} connection attempts"
238-
)
235+
# Default retry limit of 5 if not specified
236+
retry_limit = (
237+
self.retry_limit if isinstance(self.retry_limit, int) else 5
238+
)
239+
if conn_attempts >= (retry_limit + 1):
240+
await self.close()
241+
raise ConnectionError(
242+
f"Kinesis client has exceeded {retry_limit} connection attempts"
243+
)
239244
if self.expo_backoff:
240-
backoff_delay = (conn_attempts ** 2) * self.expo_backoff
245+
backoff_delay = (conn_attempts**2) * self.expo_backoff
241246
if backoff_delay >= self.expo_backoff_limit:
242247
backoff_delay = self.expo_backoff_limit
243248
await self.close()

kinesis/checkpointers.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
1-
import logging
21
import asyncio
3-
import os
42
import json
5-
from datetime import timezone, datetime
6-
from typing import Protocol, Optional, Union, Dict, Tuple, Any
3+
import logging
4+
import os
5+
from datetime import datetime, timezone
6+
from typing import Any, Dict, Optional, Protocol, Tuple, Union
77

88
log = logging.getLogger(__name__)
99

1010

1111
class CheckPointer(Protocol):
1212
"""Protocol for checkpointer implementations."""
13-
13+
1414
async def allocate(self, shard_id: str) -> Tuple[bool, Optional[str]]:
1515
"""Allocate a shard for processing."""
1616
...
17-
17+
1818
async def deallocate(self, shard_id: str) -> None:
1919
"""Deallocate a shard."""
2020
...
21-
21+
2222
async def checkpoint(self, shard_id: str, sequence_number: str) -> None:
2323
"""Checkpoint progress for a shard."""
2424
...
25-
25+
2626
def get_all_checkpoints(self) -> Dict[str, str]:
2727
"""Get all checkpoints."""
2828
...
29-
29+
3030
async def close(self) -> None:
3131
"""Close the checkpointer."""
3232
...
@@ -111,7 +111,7 @@ def is_allocated(self, shard_id):
111111
async def allocate(self, shard_id):
112112
if self.is_allocated(shard_id):
113113
return False, None
114-
114+
115115
if shard_id not in self._items:
116116
self._items[shard_id] = {"sequence": None}
117117

kinesis/consumer.py

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import asyncio
22
import logging
3+
from asyncio import TimeoutError
4+
from asyncio.queues import QueueEmpty
35
from datetime import datetime, timezone
4-
from typing import Optional, Any, Dict, Iterator, AsyncIterator
6+
from typing import Any, AsyncIterator, Dict, Optional
57

8+
from aiobotocore.session import AioSession
69
from aiohttp import ClientConnectionError
7-
from asyncio import TimeoutError
8-
from asyncio.queues import QueueEmpty
910
from botocore.exceptions import ClientError
10-
from aiobotocore.session import AioSession
1111

12-
from .utils import Throttler
1312
from .base import Base
14-
from .checkpointers import MemoryCheckPointer, CheckPointer
13+
from .checkpointers import CheckPointer, MemoryCheckPointer
1514
from .processors import JsonProcessor, Processor
15+
from .utils import Throttler
1616

1717
log = logging.getLogger(__name__)
1818

@@ -106,7 +106,8 @@ async def close(self):
106106

107107
if self.checkpointer:
108108
await self.checkpointer.close()
109-
await self.client.close()
109+
if self.client is not None:
110+
await self.client.close()
110111

111112
async def flush(self):
112113

@@ -123,15 +124,29 @@ async def flush(self):
123124
await shard["fetch"]
124125

125126
async def _fetch(self):
127+
error_count = 0
128+
max_errors = 10
129+
126130
while self.is_fetching:
127131
# Ensure fetch is performed at most 5 times per second (the limit per shard)
128132
await asyncio.sleep(0.2)
129133
try:
130134
await self.fetch()
135+
error_count = 0 # Reset error count on successful fetch
131136
except asyncio.CancelledError:
132-
pass
137+
log.debug("Fetch task cancelled")
138+
self.is_fetching = False
139+
break
133140
except Exception as e:
134141
log.exception(e)
142+
error_count += 1
143+
if error_count >= max_errors:
144+
log.error(
145+
f"Too many fetch errors ({max_errors}), stopping fetch task"
146+
)
147+
self.is_fetching = False
148+
break
149+
await asyncio.sleep(min(2**error_count, 30)) # Exponential backoff
135150

136151
async def fetch(self):
137152

@@ -218,7 +233,13 @@ async def fetch(self):
218233
for n, output in enumerate(
219234
self.processor.parse(row["Data"])
220235
):
221-
await self.queue.put(output)
236+
try:
237+
await asyncio.wait_for(
238+
self.queue.put(output), timeout=30.0
239+
)
240+
except asyncio.TimeoutError:
241+
log.warning("Queue put timed out, skipping record")
242+
continue
222243
total_items += n + 1
223244

224245
# Get approx minutes behind..
@@ -253,14 +274,23 @@ async def fetch(self):
253274

254275
# Add checkpoint record
255276
last_record = result["Records"][-1]
256-
await self.queue.put(
257-
{
258-
"__CHECKPOINT__": {
259-
"ShardId": shard["ShardId"],
260-
"SequenceNumber": last_record["SequenceNumber"],
261-
}
262-
}
263-
)
277+
try:
278+
await asyncio.wait_for(
279+
self.queue.put(
280+
{
281+
"__CHECKPOINT__": {
282+
"ShardId": shard["ShardId"],
283+
"SequenceNumber": last_record[
284+
"SequenceNumber"
285+
],
286+
}
287+
}
288+
),
289+
timeout=30.0,
290+
)
291+
except asyncio.TimeoutError:
292+
log.warning("Checkpoint queue put timed out")
293+
# Continue without checkpoint - not critical
264294

265295
shard["LastSequenceNumber"] = last_record["SequenceNumber"]
266296

@@ -302,7 +332,7 @@ async def get_records(self, shard):
302332
shard["stats"].succeded()
303333
return result
304334

305-
except ClientConnectionError as e:
335+
except ClientConnectionError:
306336
await self.get_conn()
307337
except TimeoutError as e:
308338
log.warning("Timeout {}. sleeping..".format(e))
@@ -358,17 +388,17 @@ async def get_shard_iterator(self, shard_id, last_sequence_number=None):
358388

359389
params = {
360390
"ShardId": shard_id,
361-
"ShardIteratorType": "AFTER_SEQUENCE_NUMBER"
362-
if last_sequence_number
363-
else self.iterator_type,
391+
"ShardIteratorType": (
392+
"AFTER_SEQUENCE_NUMBER" if last_sequence_number else self.iterator_type
393+
),
364394
}
365395
params.update(self.address)
366396

367397
if last_sequence_number:
368398
params["StartingSequenceNumber"] = last_sequence_number
369399

370-
if self.iterator_type == 'AT_TIMESTAMP' and self.timestamp:
371-
params['Timestamp'] = self.timestamp
400+
if self.iterator_type == "AT_TIMESTAMP" and self.timestamp:
401+
params["Timestamp"] = self.timestamp
372402

373403
response = await self.client.get_shard_iterator(**params)
374404
return response["ShardIterator"]
@@ -397,7 +427,12 @@ async def __anext__(self):
397427
# Raise exception from Fetch Task to main task otherwise raise exception inside
398428
# Fetch Task will fail silently
399429
if self.fetch_task.done():
400-
raise self.fetch_task.exception()
430+
exception = self.fetch_task.exception()
431+
if exception:
432+
raise exception
433+
434+
checkpoint_count = 0
435+
max_checkpoints = 100 # Prevent infinite checkpoint processing
401436

402437
while True:
403438
try:
@@ -409,6 +444,12 @@ async def __anext__(self):
409444
item["__CHECKPOINT__"]["ShardId"],
410445
item["__CHECKPOINT__"]["SequenceNumber"],
411446
)
447+
checkpoint_count += 1
448+
if checkpoint_count >= max_checkpoints:
449+
log.warning(
450+
f"Processed {max_checkpoints} checkpoints, stopping iteration"
451+
)
452+
raise StopAsyncIteration
412453
continue
413454

414455
return item

kinesis/processors.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
1-
from typing import Protocol, Any, Iterator, List
21
from .aggregators import (
2+
KPLAggregator,
3+
ListAggregator,
4+
NetstringAggregator,
35
NewlineAggregator,
46
SimpleAggregator,
5-
NetstringAggregator,
6-
ListAggregator,
7-
KPLAggregator,
8-
OutputItem,
97
)
10-
from .serializers import StringSerializer, JsonSerializer, MsgpackSerializer
8+
from .serializers import JsonSerializer, MsgpackSerializer, StringSerializer
119

1210

1311
class Processor:
1412
"""Base class for processors that combine aggregation and serialization."""
13+
1514
pass
1615

1716

0 commit comments

Comments
 (0)