lux-org · dorisjlee · Oct 23, 2020 · Oct 15, 2020 · Oct 16, 2020 · Oct 17, 2020
diff --git a/lux/action/correlation.py b/lux/action/correlation.py
@@ -47,7 +47,7 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
 	recommendation = {"action": "Correlation",
 					  "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes."}
 	ignore_rec_flag = False
-	if (len(ldf)<5): # Doesn't make sense to compute correlation if less than 4 data values
+	if (len(ldf)<5 and ldf.executor_type == "Pandas"): # Doesn't make sense to compute correlation if less than 4 data values
 		ignore_rec_flag = True
 	# Then use the data populated in the vis list to compute score
 	for vis in vlist:

diff --git a/lux/action/univariate.py b/lux/action/univariate.py
@@ -49,7 +49,7 @@ def univariate(ldf, *args):
 		intent.extend(filter_specs)
 		recommendation = {"action":"Distribution",
 						  "description":"Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."}
-		if (len(ldf)<5): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
+		if (len(ldf)<5 and ldf.executor_type == "Pandas"): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
 			ignore_rec_flag = True
 	elif (data_type_constraint == "nominal"):
 		intent = [lux.Clause("?",data_type="nominal")]
@@ -61,7 +61,7 @@ def univariate(ldf, *args):
 		intent.extend(filter_specs)
 		recommendation = {"action":"Temporal",
 						   "description":"Show trends over <p class='highlight-descriptor'>time-related</p> attributes."}
-		if (len(ldf)<3): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
+		if (len(ldf)<3 and ldf.executor_type == "Pandas"): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
 			ignore_rec_flag = True
 	if (ignore_rec_flag):
 		recommendation["collection"] = []

diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -18,6 +18,7 @@
 from lux.vis.Vis import Vis
 from lux.vis.VisList import VisList
 from lux.history.history import History
+from lux.utils.date_utils import is_datetime_series
 from lux.utils.message import Message
 from lux.utils.utils import check_import_lux_widget
 from typing import Dict, Union, List, Callable
@@ -28,7 +29,7 @@ class LuxDataFrame(pd.DataFrame):
 	A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations.
 	'''
 	# MUST register here for new properties!!
-	_metadata = ['_intent','data_type_lookup','data_type',
+	_metadata = ['_intent','data_type_lookup','data_type', 'length',
 				 'data_model_lookup','data_model','unique_values','cardinality','_rec_info', '_pandas_only',
 				'_min_max','plot_config', '_current_vis','_widget', '_recommendation','_prev','_history', '_saved_export']
 
@@ -54,6 +55,7 @@ def __init__(self,*args, **kw):
 		self._message = Message()
 		self._pandas_only=False
 		# Metadata
+		self.length = None
 		self.data_type_lookup = None
 		self.data_type = None
 		self.data_model_lookup = None
@@ -77,7 +79,7 @@ def history(self):
 		return self._history
 	def maintain_metadata(self):
 		if (not hasattr(self,"_metadata_fresh") or not self._metadata_fresh ): # Check that metadata has not yet been computed
-			if (len(self)>0): #only compute metadata information if the dataframe is non-empty
+			if (len(self)>0 or self.executor_type == "SQL"): #only compute metadata information if the dataframe is non-empty
 				self.executor.compute_stats(self)
 				self.executor.compute_dataset_metadata(self)
 				self._infer_structure()
@@ -92,6 +94,7 @@ def expire_recs(self):
 	def expire_metadata(self):
 		# Set metadata as null
 		self._metadata_fresh = False
+		self.length = None
 		self.data_type_lookup = None
 		self.data_type = None
 		self.data_model_lookup = None
@@ -113,14 +116,14 @@ def _set_axis(self, axis, labels):
 		super(LuxDataFrame, self)._set_axis(axis, labels)
 		self.expire_metadata()
 		self.expire_recs()
+	def _set_item(self, key, value):	
+		super(LuxDataFrame, self)._set_item(key, value)	
+		self.expire_metadata()	
+		self.expire_recs()
 	def _update_inplace(self,*args,**kwargs):
 		super(LuxDataFrame, self)._update_inplace(*args,**kwargs)
 		self.expire_metadata()
 		self.expire_recs()
-	def _set_item(self, key, value):
-		super(LuxDataFrame, self)._set_item(key, value)
-		self.expire_metadata()
-		self.expire_recs()
 	@property
 	def default_display(self):
 		if (self._default_pandas_display):
@@ -146,12 +149,12 @@ def _infer_structure(self):
 		# If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data
 		is_multi_index_flag = self.index.nlevels !=1
 		not_int_index_flag = self.index.dtype !='int64'
-		small_df_flag = len(self)<100
+		small_df_flag = len(self)<100 and len(self)>0
 		self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag 
 		if ("Number of Records" in self.columns):
 			self.pre_aggregated = True
 		very_small_df_flag =  len(self)<=10
-		if (very_small_df_flag):
+		if (very_small_df_flag and len(self)>0):
 			self.pre_aggregated = True
 	def set_executor_type(self, exe):
 		if (exe =="SQL"):
@@ -161,7 +164,7 @@ def set_executor_type(self, exe):
 			else:
 				import psycopg2
 			from lux.executor.SQLExecutor import SQLExecutor
-			self.executor = SQLExecutor
+			self.executor = SQLExecutor()
 		else:
 			from lux.executor.PandasExecutor import PandasExecutor
 			self.executor = PandasExecutor()
@@ -242,11 +245,11 @@ def _parse_validate_compile_intent(self):
 		from lux.processor.Parser import Parser
 		from lux.processor.Validator import Validator
 		self._intent = Parser.parse(self._intent)
-		Validator.validate_intent(self._intent,self)
 		self.maintain_metadata()
+		Validator.validate_intent(self._intent,self)
 		from lux.processor.Compiler import Compiler
 		self.current_vis = Compiler.compile_intent(self, self._intent)
-		
+
 	def copy_intent(self):
 		#creates a true copy of the dataframe's intent
 		output = []
@@ -294,92 +297,9 @@ def __repr__(self):
 	def set_SQL_connection(self, connection, t_name):
 		self.SQLconnection = connection
 		self.table_name = t_name
-		self.compute_SQL_dataset_metadata()
 		self.set_executor_type("SQL")
+		self.executor.compute_dataset_metadata(self)
 
-	def compute_SQL_dataset_metadata(self):
-		self.get_SQL_attributes()
-		for attr in list(self.columns):
-			self[attr] = None
-		self.data_type_lookup = {}
-		self.data_type = {}
-		#####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this 
-		#####      in the initialization and do it just once
-		self.compute_SQL_data_type()
-		self.compute_SQL_stats()
-		self.data_model_lookup = {}
-		self.data_model = {}
-		self.compute_data_model()
-
-	def compute_SQL_stats(self):
-		# precompute statistics
-		self.unique_values = {}
-		self._min_max = {}
-
-		self.get_SQL_unique_values()
-		#self.get_SQL_cardinality()
-		for attribute in self.columns:
-			if self.data_type_lookup[attribute] == 'quantitative':
-				self._min_max[attribute] = (self[attribute].min(), self[attribute].max())
-
-	def get_SQL_attributes(self):
-		if "." in self.table_name:
-			table_name = self.table_name[self.table_name.index(".")+1:]
-		else:
-			table_name = self.table_name
-		attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format(table_name)
-		attributes = list(pd.read_sql(attr_query, self.SQLconnection)['column_name'])
-		for attr in attributes:
-			self[attr] = None
-
-	def get_SQL_cardinality(self):
-		cardinality = {}
-		for attr in list(self.columns):
-			card_query = pd.read_sql("SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), self.SQLconnection)
-			cardinality[attr] = list(card_query["count"])[0]
-		self.cardinality = cardinality
-
-	def get_SQL_unique_values(self):
-		unique_vals = {}
-		for attr in list(self.columns):
-			unique_query = pd.read_sql("SELECT Distinct({}) FROM {}".format(attr, self.table_name), self.SQLconnection)
-			unique_vals[attr] = list(unique_query[attr])
-		self.unique_values = unique_vals
-
-	def compute_SQL_data_type(self):
-		data_type_lookup = {}
-		sql_dtypes = {}
-		self.get_SQL_cardinality()
-		if "." in self.table_name:
-			table_name = self.table_name[self.table_name.index(".")+1:]
-		else:
-			table_name = self.table_name
-		#get the data types of the attributes in the SQL table
-		for attr in list(self.columns):
-			datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(table_name, attr)
-			datatype = list(pd.read_sql(datatype_query, self.SQLconnection)['data_type'])[0]
-			sql_dtypes[attr] = datatype
-
-		data_type = {"quantitative":[], "nominal":[], "temporal":[]}
-		for attr in list(self.columns):
-			if str(attr).lower() in ["month", "year"]:
-				data_type_lookup[attr] = "temporal"
-				data_type["temporal"].append(attr)
-			elif sql_dtypes[attr] in ["character", "character varying", "boolean", "uuid", "text"]:
-				data_type_lookup[attr] = "nominal"
-				data_type["nominal"].append(attr)
-			elif sql_dtypes[attr] in ["integer", "real", "smallint", "smallserial", "serial"]:
-				if self.cardinality[attr] < 13:
-					data_type_lookup[attr] = "nominal"
-					data_type["nominal"].append(attr)
-				else:
-					data_type_lookup[attr] = "quantitative"
-					data_type["quantitative"].append(attr)
-			elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]:
-				data_type_lookup[attr] = "temporal"
-				data_type["temporal"].append(attr)
-		self.data_type_lookup = data_type_lookup
-		self.data_type = data_type
 	def _append_rec(self,rec_infolist,recommendations:Dict):
 		if (recommendations["collection"] is not None and len(recommendations["collection"])>0):
 			rec_infolist.append(recommendations)
@@ -557,8 +477,7 @@ def _repr_html_(self):
 								)
 					display(self.display_pandas())
 					return
-
-				if (len(self)<=0):
+				if (len(self)<=0 and self.executor_type == "Pandas"):
 					warnings.warn("\nLux can not operate on an empty dataframe.\nPlease check your input again.\n",stacklevel=2)
 					display(self.display_pandas()) 
 					return
@@ -567,7 +486,7 @@ def _repr_html_(self):
 					display(self.display_pandas()) 
 					return
 				self.maintain_metadata()
-				
+
 				if (self._intent!=[] and (not hasattr(self,"_compiled") or not self._compiled)):
 					from lux.processor.Compiler import Compiler
 					self.current_vis = Compiler.compile_intent(self, self._intent)

diff --git a/lux/executor/Executor.py b/lux/executor/Executor.py
@@ -1,16 +1,17 @@
-#  Copyright 2019-2020 The Lux Authors.
-# 
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+#  Copyright 2019-2020 The Lux Authors. 
+#   
+#  Licensed under the Apache License, Version 2.0 (the "License");  
+#  you may not use this file except in compliance with the License. 
+#  You may obtain a copy of the License at  
+#   
+#      http://www.apache.org/licenses/LICENSE-2.0   
+#   
+#  Unless required by applicable law or agreed to in writing, software  
+#  distributed under the License is distributed on an "AS IS" BASIS,    
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+#  See the License for the specific language governing permissions and  
+#  limitations under the License.   
+
 
 from lux.vis.VisList import VisList
 from lux.utils import utils

diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
@@ -18,7 +18,6 @@
 from lux.core.frame import LuxDataFrame
 from lux.executor.Executor import Executor
 from lux.utils import utils
-from lux.utils.date_utils import is_datetime_series
 from lux.utils.utils import check_import_lux_widget, check_if_id_like
 from lux.utils.date_utils import is_datetime_series
 import warnings
@@ -388,6 +387,7 @@ def compute_stats(self, ldf:LuxDataFrame):
         ldf.unique_values = {}
         ldf._min_max = {}
         ldf.cardinality = {}
+        ldf.length = len(ldf)
 
         for attribute in ldf.columns: