-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
added posthog proxy #1031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
added posthog proxy #1031
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,20 +1,3 @@ | ||
| """ | ||
| This module contains code that relates to sending ScrapeGraphAI usage telemetry. | ||
|
|
||
| To disable sending telemetry there are three ways: | ||
|
|
||
| 1. Set it to false programmatically in your driver: | ||
| >>> from scrapegraphai import telemetry | ||
| >>> telemetry.disable_telemetry() | ||
| 2. Set it to `false` in ~/.scrapegraphai.conf under `DEFAULT` | ||
| [DEFAULT] | ||
| telemetry_enabled = False | ||
| 3. Set SCRAPEGRAPHAI_TELEMETRY_ENABLED=false as an environment variable: | ||
| SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python run.py | ||
| or: | ||
| export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false | ||
| """ | ||
|
|
||
| import configparser | ||
| import functools | ||
| import importlib.metadata | ||
|
|
@@ -27,17 +10,19 @@ | |
| from typing import Callable, Dict | ||
| from urllib import request | ||
|
|
||
| # Load version | ||
| VERSION = importlib.metadata.version("scrapegraphai") | ||
| STR_VERSION = ".".join([str(i) for i in VERSION]) | ||
| HOST = "https://eu.i.posthog.com" | ||
| TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints | ||
| API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn" | ||
|
|
||
| # 🚀 Your proxy service endpoint (instead of PostHog) | ||
| PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/" | ||
|
|
||
| TIMEOUT = 2 | ||
| DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf") | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| # Everything below remains mostly same | ||
| def _load_config(config_location: str) -> configparser.ConfigParser: | ||
| config = configparser.ConfigParser() | ||
| try: | ||
|
|
@@ -59,28 +44,22 @@ def _load_config(config_location: str) -> configparser.ConfigParser: | |
| return config | ||
|
|
||
|
|
||
| def _check_config_and_environ_for_telemetry_flag( | ||
| telemetry_default: bool, config_obj: configparser.ConfigParser | ||
| ) -> bool: | ||
| telemetry_enabled = telemetry_default | ||
| def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj): | ||
| telemetry_enabled = default_value | ||
| if "telemetry_enabled" in config_obj["DEFAULT"]: | ||
| try: | ||
| telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") | ||
| except ValueError as e: | ||
| logger.debug( | ||
| f"""Unable to parse value for | ||
| `telemetry_enabled` from config. Encountered {e}""" | ||
| ) | ||
| except Exception: | ||
| pass | ||
|
|
||
| if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None: | ||
| env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") | ||
| config_obj["DEFAULT"]["telemetry_enabled"] = env_value | ||
| try: | ||
| telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") | ||
| except ValueError as e: | ||
| logger.debug( | ||
| f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED` | ||
| from environment. Encountered {e}""" | ||
| telemetry_enabled = config_obj.getboolean( | ||
| "DEFAULT", "telemetry_enabled" | ||
| ) | ||
| except Exception: | ||
| pass | ||
|
|
||
| return telemetry_enabled | ||
|
|
||
|
|
||
|
|
@@ -90,87 +69,70 @@ def _check_config_and_environ_for_telemetry_flag( | |
| CALL_COUNTER = 0 | ||
| MAX_COUNT_SESSION = 1000 | ||
|
|
||
|
|
||
| BASE_PROPERTIES = { | ||
| "os_type": os.name, | ||
| "os_version": platform.platform(), | ||
| "python_version": f"{platform.python_version()}/{platform.python_implementation()}", | ||
| "distinct_id": g_anonymous_id, | ||
| "scrapegraphai_version": VERSION, | ||
| "telemetry_version": "0.0.3", | ||
| "telemetry_version": "0.0.4-proxy", | ||
| } | ||
|
|
||
|
|
||
| def disable_telemetry(): | ||
| """ | ||
| function for disabling the telemetries | ||
| """ | ||
| global g_telemetry_enabled | ||
| g_telemetry_enabled = False | ||
|
|
||
|
|
||
| def is_telemetry_enabled() -> bool: | ||
| """ | ||
| function for checking if a telemetry is enables | ||
| """ | ||
| if g_telemetry_enabled: | ||
| global CALL_COUNTER | ||
| if CALL_COUNTER == 0: | ||
| logger.debug( | ||
| "Note: ScrapeGraphAI collects anonymous usage data to improve the library. " | ||
| "You can disable telemetry by setting SCRAPEGRAPHAI_TELEMETRY_ENABLED=false or " | ||
| "by editing ~/.scrapegraphai.conf." | ||
| ) | ||
| CALL_COUNTER += 1 | ||
| if CALL_COUNTER > MAX_COUNT_SESSION: | ||
| return False | ||
| return True | ||
| else: | ||
| return False | ||
| return False | ||
|
|
||
|
|
||
| # ⭐ UPDATED FOR PROXY — send without API key | ||
| def _send_event_json(event_json: dict): | ||
| headers = { | ||
| "Content-Type": "application/json", | ||
| "Authorization": f"Bearer {API_KEY}", | ||
| "User-Agent": f"scrapegraphai/{STR_VERSION}", | ||
| } | ||
| try: | ||
| data = json.dumps(event_json).encode() | ||
| req = request.Request(TRACK_URL, data=data, headers=headers) | ||
| req = request.Request(PROXY_URL, data=data, headers=headers) | ||
|
|
||
| with request.urlopen(req, timeout=TIMEOUT) as f: | ||
| res = f.read() | ||
| response_body = f.read() | ||
| if f.code != 200: | ||
| raise RuntimeError(res) | ||
| raise RuntimeError(response_body) | ||
| except Exception as e: | ||
| logger.debug(f"Failed to send telemetry data: {e}") | ||
| logger.debug(f"Failed to send telemetry data to proxy: {e}") | ||
| else: | ||
| logger.debug(f"Telemetry data sent: {data}") | ||
| logger.debug(f"Telemetry payload forwarded to proxy: {data}") | ||
|
|
||
|
|
||
| def send_event_json(event_json: dict): | ||
| """ | ||
| fucntion for sending event json | ||
| """ | ||
| if not g_telemetry_enabled: | ||
| raise RuntimeError("Telemetry tracking is disabled!") | ||
| try: | ||
| th = threading.Thread(target=_send_event_json, args=(event_json,)) | ||
| th.start() | ||
| except Exception as e: | ||
| logger.debug(f"Failed to send telemetry data in a thread: {e}") | ||
| logger.debug(f"Telemetry dispatch thread failed: {e}") | ||
|
|
||
|
|
||
| def log_event(event: str, properties: Dict[str, any]): | ||
| """ | ||
| function for logging the events | ||
| """ | ||
| if is_telemetry_enabled(): | ||
| event_json = { | ||
| "api_key": API_KEY, | ||
| payload = { | ||
| "event": event, | ||
| "distinct_id": g_anonymous_id, | ||
| "properties": {**BASE_PROPERTIES, **properties}, | ||
| } | ||
| send_event_json(event_json) | ||
| send_event_json(payload) | ||
|
|
||
|
|
||
| def log_graph_execution( | ||
|
|
@@ -188,10 +150,7 @@ def log_graph_execution( | |
| exception: str = None, | ||
| total_tokens: int = None, | ||
| ): | ||
| """ | ||
| function for logging the graph execution | ||
| """ | ||
| properties = { | ||
| props = { | ||
| "graph_name": graph_name, | ||
| "source": source, | ||
| "prompt": prompt, | ||
|
|
@@ -207,26 +166,15 @@ def log_graph_execution( | |
| "total_tokens": total_tokens, | ||
| "type": "community-library", | ||
| } | ||
| log_event("graph_execution", properties) | ||
| log_event("graph_execution", props) | ||
|
|
||
|
|
||
| def capture_function_usage(call_fn: Callable) -> Callable: | ||
| """ | ||
| function that captures the usage | ||
| """ | ||
|
|
||
| @functools.wraps(call_fn) | ||
| def wrapped_fn(*args, **kwargs): | ||
| try: | ||
| return call_fn(*args, **kwargs) | ||
| finally: | ||
| if is_telemetry_enabled(): | ||
| try: | ||
| function_name = call_fn.__name__ | ||
| log_event("function_usage", {"function_name": function_name}) | ||
| except Exception as e: | ||
| logger.debug( | ||
| f"Failed to send telemetry for function usage. Encountered: {e}" | ||
| ) | ||
|
|
||
| return wrapped_fn | ||
| log_event("function_usage", {"function_name": call_fn.__name__}) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Removed error handling exposes race condition in decoratorThe |
||
| return wrapped_fn | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug: Environment variable telemetry override is completely broken
The environment variable
SCRAPEGRAPHAI_TELEMETRY_ENABLEDcheck is broken. The code checks if the variable is set (os.environ.get(...) is not None) but then reads fromconfig_objinstead of using the actual environment variable value. The original code first assigned the env var value intoconfig_obj["DEFAULT"]["telemetry_enabled"] = env_valuebefore reading it back, but this assignment was removed. Now settingSCRAPEGRAPHAI_TELEMETRY_ENABLED=falsewon't actually disable telemetry since the env var value is never used.