diff --git a/scrapegraphai/telemetry/telemetry.py b/scrapegraphai/telemetry/telemetry.py index 7b186cb5..0dc87e02 100644 --- a/scrapegraphai/telemetry/telemetry.py +++ b/scrapegraphai/telemetry/telemetry.py @@ -1,20 +1,3 @@ -""" -This module contains code that relates to sending ScrapeGraphAI usage telemetry. - -To disable sending telemetry there are three ways: - -1. Set it to false programmatically in your driver: - >>> from scrapegraphai import telemetry - >>> telemetry.disable_telemetry() -2. Set it to `false` in ~/.scrapegraphai.conf under `DEFAULT` - [DEFAULT] - telemetry_enabled = False -3. Set SCRAPEGRAPHAI_TELEMETRY_ENABLED=false as an environment variable: - SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python run.py - or: - export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false -""" - import configparser import functools import importlib.metadata @@ -27,17 +10,19 @@ from typing import Callable, Dict from urllib import request +# Load version VERSION = importlib.metadata.version("scrapegraphai") STR_VERSION = ".".join([str(i) for i in VERSION]) -HOST = "https://eu.i.posthog.com" -TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints -API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn" + +# 🚀 Your proxy service endpoint (instead of PostHog) +PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/" + TIMEOUT = 2 DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf") logger = logging.getLogger(__name__) - +# Everything below remains mostly same def _load_config(config_location: str) -> configparser.ConfigParser: config = configparser.ConfigParser() try: @@ -59,28 +44,22 @@ def _load_config(config_location: str) -> configparser.ConfigParser: return config -def _check_config_and_environ_for_telemetry_flag( - telemetry_default: bool, config_obj: configparser.ConfigParser -) -> bool: - telemetry_enabled = telemetry_default +def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj): + telemetry_enabled = default_value if "telemetry_enabled" in config_obj["DEFAULT"]: try: telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - f"""Unable to parse value for - `telemetry_enabled` from config. Encountered {e}""" - ) + except Exception: + pass + if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None: - env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") - config_obj["DEFAULT"]["telemetry_enabled"] = env_value try: - telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` - from environment. Encountered {e}""" + telemetry_enabled = config_obj.getboolean( + "DEFAULT", "telemetry_enabled" ) + except Exception: + pass + return telemetry_enabled @@ -90,87 +69,70 @@ def _check_config_and_environ_for_telemetry_flag( CALL_COUNTER = 0 MAX_COUNT_SESSION = 1000 + BASE_PROPERTIES = { "os_type": os.name, "os_version": platform.platform(), "python_version": f"{platform.python_version()}/{platform.python_implementation()}", "distinct_id": g_anonymous_id, "scrapegraphai_version": VERSION, - "telemetry_version": "0.0.3", + "telemetry_version": "0.0.4-proxy", } def disable_telemetry(): - """ - function for disabling the telemetries - """ global g_telemetry_enabled g_telemetry_enabled = False def is_telemetry_enabled() -> bool: - """ - function for checking if a telemetry is enables - """ if g_telemetry_enabled: global CALL_COUNTER - if CALL_COUNTER == 0: - logger.debug( - "Note: ScrapeGraphAI collects anonymous usage data to improve the library. " - "You can disable telemetry by setting SCRAPEGRAPHAI_TELEMETRY_ENABLED=false or " - "by editing ~/.scrapegraphai.conf." - ) CALL_COUNTER += 1 if CALL_COUNTER > MAX_COUNT_SESSION: return False return True - else: - return False + return False +# ⭐ UPDATED FOR PROXY — send without API key def _send_event_json(event_json: dict): headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {API_KEY}", "User-Agent": f"scrapegraphai/{STR_VERSION}", } try: data = json.dumps(event_json).encode() - req = request.Request(TRACK_URL, data=data, headers=headers) + req = request.Request(PROXY_URL, data=data, headers=headers) + with request.urlopen(req, timeout=TIMEOUT) as f: - res = f.read() + response_body = f.read() if f.code != 200: - raise RuntimeError(res) + raise RuntimeError(response_body) except Exception as e: - logger.debug(f"Failed to send telemetry data: {e}") + logger.debug(f"Failed to send telemetry data to proxy: {e}") else: - logger.debug(f"Telemetry data sent: {data}") + logger.debug(f"Telemetry payload forwarded to proxy: {data}") def send_event_json(event_json: dict): - """ - fucntion for sending event json - """ if not g_telemetry_enabled: raise RuntimeError("Telemetry tracking is disabled!") try: th = threading.Thread(target=_send_event_json, args=(event_json,)) th.start() except Exception as e: - logger.debug(f"Failed to send telemetry data in a thread: {e}") + logger.debug(f"Telemetry dispatch thread failed: {e}") def log_event(event: str, properties: Dict[str, any]): - """ - function for logging the events - """ if is_telemetry_enabled(): - event_json = { - "api_key": API_KEY, + payload = { "event": event, + "distinct_id": g_anonymous_id, "properties": {**BASE_PROPERTIES, **properties}, } - send_event_json(event_json) + send_event_json(payload) def log_graph_execution( @@ -188,10 +150,7 @@ def log_graph_execution( exception: str = None, total_tokens: int = None, ): - """ - function for logging the graph execution - """ - properties = { + props = { "graph_name": graph_name, "source": source, "prompt": prompt, @@ -207,26 +166,15 @@ def log_graph_execution( "total_tokens": total_tokens, "type": "community-library", } - log_event("graph_execution", properties) + log_event("graph_execution", props) def capture_function_usage(call_fn: Callable) -> Callable: - """ - function that captures the usage - """ - @functools.wraps(call_fn) def wrapped_fn(*args, **kwargs): try: return call_fn(*args, **kwargs) finally: if is_telemetry_enabled(): - try: - function_name = call_fn.__name__ - log_event("function_usage", {"function_name": function_name}) - except Exception as e: - logger.debug( - f"Failed to send telemetry for function usage. Encountered: {e}" - ) - - return wrapped_fn + log_event("function_usage", {"function_name": call_fn.__name__}) + return wrapped_fn \ No newline at end of file