From b7c05c289532cd25c8d30b82d1676ec1487e80c2 Mon Sep 17 00:00:00 2001 From: Fabiana <30911746+fabclmnt@users.noreply.github.com> Date: Fri, 3 May 2024 01:13:05 +0100 Subject: [PATCH] feat: add analytics (#335) * feat: add telemetry features. * feat: add analytics to streamlit app * docs: Add analytics information to the documentation. * fix: fix issues related with imports * fix: add request * fix: remove print * fix: cleaning code --------- Co-authored-by: Fabiana Clemente --- docs/support/analytics.md | 44 +++++++++++++++++++ mkdocs.yml | 1 + .../pages/1_Train_a_synthesizer.py | 8 ++++ .../synthesizers/regular/model.py | 7 +++ .../synthesizers/timeseries/model.py | 6 +++ src/ydata_synthetic/utils/logger.py | 23 ++++++++++ src/ydata_synthetic/utils/utils.py | 32 ++++++++++++++ 7 files changed, 121 insertions(+) create mode 100644 docs/support/analytics.md create mode 100644 src/ydata_synthetic/utils/logger.py create mode 100644 src/ydata_synthetic/utils/utils.py diff --git a/docs/support/analytics.md b/docs/support/analytics.md new file mode 100644 index 00000000..6a4955e9 --- /dev/null +++ b/docs/support/analytics.md @@ -0,0 +1,44 @@ + +# Analytics & Telemetry + +## Overview + +`ydata-synthetic` is a powerful library designed to generate synthetic data. +As part of our ongoing efforts to improve user experience and functionality, `ydata-synthetic` +includes a telemetry feature. This feature collects anonymous usage data, helping us understand +how the library is used and identify areas for improvement. + +The primary goal of collecting telemetry data is to: + +- Enhance the functionality and performance of the ydata-synthetic library +- Prioritize new features based on user engagement +- Identify common issues and bugs to improve overall user experience + +### Data Collected +The telemetry system collects non-personal, anonymous information such as: + +- Python version +- `ydata-synthetic` version +- Frequency of use of `ydata-synthetic` features +- Errors or exceptions thrown within the library + +## Disabling usage analytics + +We respect your choice to not participate in our telemetry collection. +If you prefer to disable telemetry, you can do so by setting an environment +variable on your system. Disabling telemetry will not affect the functionality +of the ydata-profiling library, except for the ability to contribute to its usage analytics. + +### Set an Environment Variable +In your notebook or script make sure to set YDATA_SYNTHETIC_NO_ANALYTICS +environment variable to `True`. + +````python + import os + + os.environ['YDATA_SYNTHETIC_NO_ANALYTICS']='True' +```` + + + + diff --git a/mkdocs.yml b/mkdocs.yml index 6f9bbba9..c8c15442 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,6 +34,7 @@ nav: - Help & Troubleshooting: 'support/help-troubleshooting.md' - Contribution Guidelines: 'support/contribute.md' - Contribution Guidelines: 'support/contribute.md' + - Analytics: 'support/analytics.md' - Reference: - Changelog: 'reference/changelog.md' - API: diff --git a/src/ydata_synthetic/streamlit_app/pages/1_Train_a_synthesizer.py b/src/ydata_synthetic/streamlit_app/pages/1_Train_a_synthesizer.py index f7c88298..a5c573f0 100644 --- a/src/ydata_synthetic/streamlit_app/pages/1_Train_a_synthesizer.py +++ b/src/ydata_synthetic/streamlit_app/pages/1_Train_a_synthesizer.py @@ -1,11 +1,14 @@ from typing import Union import os import json +import logging + import streamlit as st from ydata.sdk.synthesizers import RegularSynthesizer from ydata.sdk.common.client import get_client +from ydata_synthetic.utils.logger import SynthesizersLogger from ydata_synthetic.synthesizers import ModelParameters, TrainParameters from ydata_synthetic.synthesizers.regular.model import Model @@ -13,6 +16,9 @@ from ydata_synthetic.streamlit_app.pages.functions.train import DataType, __CONDITIONAL_MODELS from ydata_synthetic.streamlit_app.pages.functions.train import init_synth, advanced_setttings, training_parameters +logger = SynthesizersLogger(name='streamlitSynthesizer.logger') +logger.setLevel(logging.INFO) + def get_available_models(type: Union[str, DataType]): dtype = DataType(type) @@ -114,6 +120,8 @@ def run(): st.subheader("3. Train your synthesizer") if st.button('Click here to start the training process', disabled=not valid_token): + + logger.info_def_report(model='fabric') model = RegularSynthesizer() with st.spinner("Please wait while your synthesizer trains..."): dtypes = {} diff --git a/src/ydata_synthetic/synthesizers/regular/model.py b/src/ydata_synthetic/synthesizers/regular/model.py index 3e3b2cfc..b03e81fc 100644 --- a/src/ydata_synthetic/synthesizers/regular/model.py +++ b/src/ydata_synthetic/synthesizers/regular/model.py @@ -2,6 +2,7 @@ Main synthesizer class """ from enum import Enum, unique +import logging from joblib import load @@ -17,6 +18,10 @@ from ydata_synthetic.synthesizers.regular.ctgan.model import CTGAN from ydata_synthetic.synthesizers.regular.gmm.model import GMM +from ydata_synthetic.utils.logger import SynthesizersLogger + +logger = SynthesizersLogger(name='regularsynthesizer.logger') +logger.setLevel(logging.INFO) @unique class Model(Enum): @@ -54,6 +59,8 @@ def __new__(cls, modelname: str, model_parameters =None, **kwargs): model=Model(modelname).function(**kwargs) else: model=Model(modelname).function(model_parameters, **kwargs) + + logger.info_def_report(model=modelname) return model @staticmethod diff --git a/src/ydata_synthetic/synthesizers/timeseries/model.py b/src/ydata_synthetic/synthesizers/timeseries/model.py index 1b985313..436055e9 100644 --- a/src/ydata_synthetic/synthesizers/timeseries/model.py +++ b/src/ydata_synthetic/synthesizers/timeseries/model.py @@ -3,6 +3,7 @@ """ from enum import Enum, unique import os +import logging from joblib import load from tensorflow import config as tfconfig @@ -10,6 +11,10 @@ from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN from ydata_synthetic.synthesizers.timeseries.doppelganger.model import DoppelGANger +from ydata_synthetic.utils.logger import SynthesizersLogger + +logger = SynthesizersLogger(name='timseriesSynthesizer.logger') +logger.setLevel(logging.INFO) @unique class Model(Enum): @@ -28,6 +33,7 @@ def function(self): class TimeSeriesSynthesizer(): "Abstraction class " def __new__(cls, modelname: str, model_parameters=None, **kwargs): + logger.info_def_report(model=modelname) return Model(modelname).function(model_parameters, **kwargs) @staticmethod diff --git a/src/ydata_synthetic/utils/logger.py b/src/ydata_synthetic/utils/logger.py new file mode 100644 index 00000000..33d6431c --- /dev/null +++ b/src/ydata_synthetic/utils/logger.py @@ -0,0 +1,23 @@ +""" + ydata-synthetic logger +""" +from __future__ import absolute_import, division, print_function + +import logging + +from ydata_synthetic.utils.utils import analytics_features + +class SynthesizersLogger(logging.Logger): + def __init__(self, name, level=logging.INFO): + super().__init__(name, level) + + def info( + self, + msg: object, + ) -> None: + super().info(f'[SYNTHESIZER] - {msg}.') + + def info_def_report(self, model: str): + analytics_features(model=model) + + super().info(f'[SYNTHESIZER] Creating a synthetic data generator with the following model - {model}.') \ No newline at end of file diff --git a/src/ydata_synthetic/utils/utils.py b/src/ydata_synthetic/utils/utils.py new file mode 100644 index 00000000..8ea2e213 --- /dev/null +++ b/src/ydata_synthetic/utils/utils.py @@ -0,0 +1,32 @@ +""" + Utility functions that are common to ydata-synthetic project +""" +import os +import subprocess +import platform +import requests + +from ydata_synthetic.version import __version__ +def analytics_features(model: str): + endpoint= "https://packages.ydata.ai/ydata-synthetic?" + + if bool(os.getenv("YDATA_SYNTHETIC_NO_ANALYTICS"))!= True: + package_version = __version__ + try: + subprocess.check_output("nvidia-smi") + gpu_present = True + except Exception: + gpu_present = False + + python_version = ".".join(platform.python_version().split(".")[:2]) + + try: + request_message = f"{endpoint}version={package_version}" \ + f"&python_version={python_version}" \ + f"&model={model}" \ + f"&os={platform.system()}" \ + f"&gpu={str(gpu_present)}" + + requests.get(request_message) + except Exception: + pass