tableau · bryceglarsen · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023 · Dec 29, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 012 (December 2023)
+* Add new worksheet class
+* Add worksheet items attribute to workbook class
+
 ## 011 (November 2022)
 * Remove extraneous debug print statements
 

diff --git a/docs/docs/api-ref.md b/docs/docs/api-ref.md
@@ -34,12 +34,35 @@ Saves any changes to the workbook to a new file specified by the `new_file` para
 
 `self.worksheets:` Returns a list of worksheets found in the workbook.
 
+`self.worksheet_items:` Returns a list of Worksheet objects found in the workbook.
+
 `self.datasources:` Returns a list of Datasource objects found in the workbook.
 
 `self.filename:` Returns the filename of the workbook.
 
 `self.shapes` Returns a list of strings with the names of shapes found in the workbook.
 
+## Worksheets
+```python
+class Worksheet(wsxml):
+```
+
+The Worksheet class represents the worksheets found in a Tableau Workbook. The library will access key attributes of each worksheet it finds.
+
+**Properties:**
+
+`self.name`: Returns the name of the worksheet.
+
+`self.datasources`: Returns list of the Datasource objects that are used in the worksheet.
+
+`self.fields`: Returns list of Field objects that are used somewhere within the sheet.
+
+`self.rows`: Returns list of Field objects present on the rows shelf. Certain items will return a string value, such as "Measure Names" which is not a field.
+
+`self.cols`: Returns list of Field objects present on the columns shelf. Certain items will return a string value, such as "Measure Names" which is not a field.
+
+`self.filter_fields`: Returns list of Field objects that are present on the Filter pane. Certain items will return a string value, such as Filter Actions.
+
 ## Datasources
 ```python
 class Datasource(dsxml, filename=None)

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='tableaudocumentapi',
-    version='0.11',
+    version='0.12',
     author='Tableau',
     author_email='github@tableau.com',
     url='https://github.com/tableau/document-api-python',

diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py
@@ -1,9 +1,157 @@
 import weakref
+import re
 
-from tableaudocumentapi import Datasource, xfile
+from tableaudocumentapi import Datasource, Field, xfile
 from tableaudocumentapi.xfile import xml_open, TableauInvalidFileException
 
+def _remove_brackets(text):
+    return text.lstrip("[").rstrip("]")
+
+def _clean_columns(marks):
+    """
+    Extract rows/cols data that is stored such as [datasource].[column]
+    We use a regex to find multiple marks and another regex to extract the field name
+
+    We return a dictionary of datasource: [fields] so we can map them to field items
+    """
+    if marks is None:
+        return None
+    # find all [datasource].[column] strings by positive lookahead of ), space, or string end
+    # some will have three parts so we need to use a lookahead to ensure we capture the entire object
+    matching_marks = re.findall(r"(\[.*?\])(?=\)|\s|$)",str(marks))
+    datasource_fields = {}
+    for mark in matching_marks:
+        # split column into datasource and field display
+        column = mark.split("].[")
+        datasource = _remove_brackets(column[0])
+        # initialize dictionary entry
+        if datasource not in datasource_fields:
+            datasource_fields[datasource] = []
+        # the field is always the last item in the list
+        field_display = _remove_brackets(column[-1])
+        # use ordinal (ok), quantitative (qk), nominal (nk), or string end as lookahead
+        field_match = re.match(r".*?(?<=:)([^:]+)(?=:ok|:qk|:nk|$)", field_display)
+        # if no match, eg. Measure Names, just return the string
+        if field_match:
+            field = field_match.groups(1)[0]
+        else:
+            field = field_display
+        datasource_fields[datasource].append(field)
+    return datasource_fields
+
+def _ds_fields_to_tems(ds_fields, ds_index):
+    fields = []
+    for ds, field_ids in ds_fields.items():
+        fields_dict = ds_index[ds].fields
+        for field_id in field_ids:
+            # many field ids include brackets, so we need to check for these as well
+            field_id_brackets = f"[{field_id}]"
+            if field_id in fields_dict:
+                field = fields_dict.get(field_id)
+            elif field_id_brackets in fields_dict:
+                field = fields_dict.get(field_id_brackets)
+            else:
+                field = field_id
+            fields.append(field)
+    return fields
+
+class Worksheet(object):
+    """
+    A class to parse key attributes of a worksheet.
+    """
+
+    def __init__(self, worksheet_element, ds_index):
+        self._worksheetRoot = worksheet_element
+        self.name = worksheet_element.attrib['name']
+        self._datasource_index = ds_index
+        self._datasources = self._prepare_datasources(self._worksheetRoot, self._datasource_index)
+        self._fields = self._prepare_datasource_dependencies(self._worksheetRoot)
+        self._rows = self._prepare_rows(self._worksheetRoot, self._datasource_index)
+        self._cols = self._prepare_cols(self._worksheetRoot, self._datasource_index)
+        self._filter_fields = self._prepare_filter_fields(self._worksheetRoot, self._datasource_index)
+
+    def __repr__(self):
+        name = self.name
+        datasources = ", ".join([ds.caption or ds.name for ds in self._datasources])
+        fields = ", ".join([f.name for f in self._fields])
+        return f"name: {name}, datasources: {datasources}, fields: {fields}"
+
+    def __iter__(self):
+        keys = self.__dict__.keys()
+        filtered_keys = [key for key in keys if key != "_worksheetRoot"]
+        for key in filtered_keys:
+            yield key.lstrip("_"), getattr(self, key)
 
+    @staticmethod
+    def _prepare_filter_fields(worksheet_element, ds_index):
+        filters = []
+        slices_list = worksheet_element.find(".//slices")
+        if slices_list is None:
+            return filters
+        slices = [column.text for column in slices_list]
+        # combine slices into single string to use same function as rows/cols
+        ds_fields = _clean_columns(" ".join(slices))
+        if ds_fields == None or len(ds_fields) == 0:
+            return None
+        fields = _ds_fields_to_tems(ds_fields, ds_index)
+        return fields
+
+    @staticmethod
+    def _prepare_datasources(worksheet_element, ds_index):
+        worksheet_datasources = worksheet_element.find(".//datasources")
+        datasource_names = [ds.attrib["name"] for ds in worksheet_datasources]
+        datasource_list = [ds_index[name] for name in datasource_names]
+        return datasource_list
+
+    @property
+    def datasources(self):
+        return self._datasources
+
+    @staticmethod
+    def _prepare_datasource_dependencies(worksheet_element):
+        dependencies = worksheet_element.findall('.//datasource-dependencies')
+        for dependency in dependencies:
+            columns = dependency.findall('.//column')
+            return [Field.from_column_xml(column) for column in columns]
+
+    @property
+    def fields(self):
+        return self._prepare_datasource_dependencies
+
+    @property
+    def fields_list(self):
+        return [field.caption for field in self._fields]
+
+    @staticmethod
+    def _prepare_rows(worksheet_element, ds_index):
+        rows = worksheet_element.find('.//rows')
+        ds_fields = _clean_columns(rows.text)
+        if ds_fields == None or len(ds_fields) == 0:
+            return None
+        fields = _ds_fields_to_tems(ds_fields, ds_index)
+        return fields
+
+    @staticmethod
+    def _prepare_cols(worksheet_element, ds_index):
+        cols = worksheet_element.find('.//cols')
+        ds_fields = _clean_columns(cols.text)
+        if ds_fields == None or len(ds_fields) == 0:
+            return None
+        fields = _ds_fields_to_tems(ds_fields, ds_index)
+        return fields
+
+    @property
+    def rows(self):
+        return self._rows
+
+    @property
+    def cols(self):
+        return self._cols
+
+    @property
+    def filter_fields(self):
+        return self._filter_fields
+
 class Workbook(object):
     """A class for writing Tableau workbook files."""
 
@@ -31,6 +179,8 @@ def __init__(self, filename):
 
         self._worksheets = self._prepare_worksheets(
             self._workbookRoot, self._datasource_index)
+
+        self._worksheet_items = self._prepare_worksheet_items(self._workbookRoot, self._datasource_index)
 
         self._shapes = self._prepare_shapes(self._workbookRoot)
 
@@ -45,6 +195,10 @@ def datasources(self):
     @property
     def worksheets(self):
         return self._worksheets
+
+    @property
+    def worksheet_items(self):
+        return self._worksheet_items
 
     @property
     def filename(self):
@@ -142,7 +296,16 @@ def _prepare_worksheets(xml_root, ds_index):
                         datasource.fields[column_name].add_used_in(worksheet_name)
 
         return worksheets
-
+
+    @staticmethod
+    def _prepare_worksheet_items(xml_root, ds_index):
+        worksheets = []
+        worksheets_element = xml_root.find('.//worksheets')
+        if worksheets_element is None:
+            return worksheets
+        worksheets = [Worksheet(worksheet_element, ds_index) for worksheet_element in worksheets_element]
+        return worksheets
+
     @staticmethod
     def _prepare_shapes(xml_root):
         shapes = []

diff --git a/test/test_workbook.py b/test/test_workbook.py
@@ -49,3 +49,11 @@ def test_dashboards_setup(self):
         wb = Workbook(DASHBOARDS_FILE)
         self.assertIsNotNone(wb)
         self.assertEqual(wb.dashboards, ['setTest'])
+
+class Worksheets(unittest.TestCase):
+    def test_worksheets_setup(self):
+        wb = Workbook(DASHBOARDS_FILE)
+        self.assertEqual(len(wb.worksheet_items), 2)
+        worksheet_names = [ws.name for ws in wb.worksheet_items]
+        worksheet_names.sort()
+        self.assertEqual(worksheet_names[0], 'Sheet 1')