bids-standard · sappelhoff · Aug 10, 2020 · Jul 31, 2020 · Jul 31, 2020 · Aug 4, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -42,10 +42,13 @@ jobs:
   build_docs_pdf:
     working_directory: ~/bids-specification/pdf_build_src
     docker:
-      - image: danteev/texlive:TL2017
+      - image: danteev/texlive:latest
     steps:
       - checkout:
           path: ~/bids-specification
+      - run:
+          command: |
+            pip3 install numpy
       - run:
           name: generate pdf version docs
           command: sh build_pdf.sh

diff --git a/pdf_build_src/pandoc_script.py b/pdf_build_src/pandoc_script.py
@@ -4,8 +4,7 @@
 """
 import os
 import subprocess
-
-
+
 def build_pdf(filename):
     """Construct command with required pandoc flags and run using subprocess.
 
@@ -40,4 +39,4 @@ def build_pdf(filename):
 
 
 if __name__ == "__main__":
-    build_pdf('bids-spec.pdf')
+    build_pdf('bids-spec.pdf')
diff --git a/pdf_build_src/process_markdowns.py b/pdf_build_src/process_markdowns.py
@@ -10,6 +10,7 @@
 import subprocess
 import re
 from datetime import datetime
+import numpy as np
 
 
 def run_shell_cmd(command):
@@ -141,6 +142,194 @@ def modify_changelog():
         file.writelines(data)
 
 
+# Number of chars maximal in one line approximated from a line of the PDF
+NB_CHARS_LINE_PDF = 100
+
+def correct_table(table, offset = [20, 80], debug=False):
+    """Create the corrected table.
+
+    It computes the number of characters maximal in each column and reformat line to make sure 
+    the first and second lines have enough dashes (in proportion) and fences anr correctly aligned 
+    for correct rendering in the generated PDF.
+
+    Parameters
+    ----------
+    table : List of List of str
+        Table content extracted from the markdown file.
+    offset : [x, y]
+        Offset that can be used to ajust the correction of number of dashes in the first (x) and 
+        second (y) columns by the number specified
+    debug : Bool
+        If True, print debugging informations (By default: False)
+
+    Returns
+    -------
+    new_table : List of List of str
+        List of corrected lines of the input table with corrected number of dashes and aligned fences.
+        To be later join with |'s
+    """
+
+    nb_of_rows = len(table)
+    nb_of_cols = len(table[0])
+
+    nb_of_chars = []
+    for i, row in enumerate(table):
+         # Ignore number of dashes in the count of characters
+        if i != 1:
+            nb_of_chars.append([len(elem) for elem in row])
+
+    # Convert the list to a numpy array and computes the maximum number of chars for each column
+    nb_of_chars_arr = np.array(nb_of_chars)
+    max_chars_in_cols = nb_of_chars_arr.max(axis=0)
+
+    # Computes number of dashes based on the maximal number of characters in each column
+    nb_of_dashes = max_chars_in_cols
+    prop_of_dashes = nb_of_dashes / nb_of_dashes.sum()
+    nb_of_chars_in_pdf = prop_of_dashes * int(NB_CHARS_LINE_PDF)
+
+    # Computes the corrected number of dashes. An offset can be used to extend 
+    for i, (value, prop) in enumerate(zip(max_chars_in_cols,prop_of_dashes)):
+        # Correction for first column (Rules could be changed here for instance)
+        if i == 1:
+            if int(value) < int(NB_CHARS_LINE_PDF) and prop < 0.2 and max_chars_in_cols[2] > 2 * NB_CHARS_LINE_PDF:
+                first_column_width = int(nb_of_dashes.sum() * (value / int(NB_CHARS_LINE_PDF)) + 6 * offset[0])
+            elif int(value) < int(NB_CHARS_LINE_PDF) and prop < 0.2 and max_chars_in_cols[2] <= 2 * NB_CHARS_LINE_PDF:
+                first_column_width = int(nb_of_dashes.sum() * (value / int(NB_CHARS_LINE_PDF)) + offset[0])
+            else:
+                first_column_width = int(value)     
+        # Correction for second column
+        elif i == 2:
+            if int(value) < int(NB_CHARS_LINE_PDF) and prop < 0.2:
+                second_column_width = int(nb_of_dashes.sum() * (value / int(NB_CHARS_LINE_PDF)) + offset[1])
+            else:
+                second_column_width = int(value)
+
+    if debug:
+        print('    - Number of chars in table cells: {}'.format(max_chars_in_cols))
+        print('    - Number of dashes (per column): {}'.format(nb_of_dashes))
+        print('    - Proportion of dashes (per column): {}'.format(prop_of_dashes))
+        print('    - Number of chars max in column (PDF): {}'.format(nb_of_chars_in_pdf))
+        print('    - Final number of chars in first column: {}'.format(first_column_width))
+        print('    - Final number of chars in second column: {}'.format(second_column_width))
+
+    # Format the lines with correct number of dashes or whitespaces and 
+    # correct alignment of fences and populate the new table (A List of str)
+    new_table = []
+    for i, row in enumerate(table):
+
+        if i == 1:
+            str_format = ' {:-{align}{width}} '
+        else:
+            str_format = ' {:{align}{width}} '
+
+        row_content = []
+        for j, elem in enumerate(row):
+            # Set the column width
+            column_width = max_chars_in_cols[j]
+            if j == 1:
+                column_width = first_column_width
+            elif j == 2:
+                column_width = second_column_width
+
+            if j == 0 or j == len(row) - 1:
+                row_content.append(elem)
+            else:
+                if '`' in elem:
+                    str_format = ' {:{align}{width}} '
+                    row_content.append(str_format.format(elem, align='<', width=(column_width)))
+                elif '-:' in elem and ':-' in elem :
+                    str_format = ' {:-{align}{width}}: '
+                    row_content.append(str_format.format(':-', align='<', width=(column_width)))
+                elif not '-:' in elem and ':-' in elem :
+                    str_format = ' {:-{align}{width}} '
+                    row_content.append(str_format.format(':-', align='<', width=(column_width)))
+                elif '-:' in elem and not ':-' in elem :
+                    str_format = ' {:-{align}{width}}: '
+                    row_content.append(str_format.format('-', align='<', width=(column_width)))
+                elif i == 1 and not '-:' in elem and not ':-' in elem :
+                    str_format = ' {:-{align}{width}} '
+                    row_content.append(str_format.format('-', align='<', width=(column_width)))
+                else:
+                    row_content.append(str_format.format(elem, align='<', width=(column_width)))
+        if debug:
+            print(row_content)
+
+        new_table.append(row_content)
+
+    return new_table
+
+
+def correct_tables(root_path):
+    """Change tables in markdown files for correct rendering in PDF.
+
+    This modification makes sure that the proportion and number of dashes (---) are 
+    sufficiently enough for correct PDF rendering and fences (|) are corrected aligned.
+
+    Parameters
+    ----------
+    root_path : str
+        Path to the root directory containing the markdown files
+
+    """
+    markdown_list = []
+    for root, dirs, files in os.walk(root_path):
+        for file in files:
+            if file.endswith(".md") and file != 'index.md' and file != '01-contributors.md' and file != '04-entity-table.md':
+                print('Check tables in {}'.format(os.path.join(root, file)))
+                markdown_list.append(os.path.join(root, file))
+                with open(os.path.join(root, file),'r') as f:
+                    content = f.readlines()
+                tables = []
+                table_mode = False
+                start_line = 0
+                new_content = []
+                for line_nb, line in enumerate(content):
+                    if line:
+                        # Use dashes to detect where a table start and 
+                        # extract the header and the dashes lines
+                        if '--' in line and '|' in line and not table_mode:
+                            table_mode = True
+                            start_line = line_nb-1
+                            print('  * Detected table starting line {}'.format(start_line))
+                            table = []
+                            header_row = [c.strip() for c in content[line_nb-1].split('|')]
+                            row = [c.strip() for c in line.split('|')]
+                            table.append(header_row)
+                            table.append(row)
+                        elif table_mode:
+                            row = [c.strip() for c in line.split('|')]
+                            # Add row to table if this is not the end of the table
+                            if row != ['']:
+                                table.append(row)
+                            else:
+                                end_line = line_nb-1
+                                table_mode = False
+
+                                # Correct the given table
+                                table = correct_table(table, debug=True)
+
+                                # Update the corresponding lines in 
+                                # the markdown with the corrected table
+                                count = 0
+                                for i, new_line in enumerate(content):
+                                    if i == start_line:
+                                        new_content.pop()
+                                    if i >= start_line and i < end_line:
+                                        new_content.append('|'.join(table[count])+' \n')
+                                        count += 1   
+                                    elif i == end_line:
+                                        new_content.append('|'.join(table[count])+' \n\n')
+                                        count += 1 
+                        else:
+                            new_content.append(line)
+
+                    line_nb += 1
+
+                # Overwrite with the new markdown content
+                with open(os.path.join(root, file),'w') as f:
+                    f.writelines(new_content)
+
+
 def edit_titlepage():
     """Add title and version number of the specification to the titlepage."""
     title, version_number, build_date = extract_header_string()
@@ -188,4 +377,7 @@ def edit_titlepage():
 
     # Step 6: remove all internal links
     remove_internal_links(duplicated_src_dir_path, 'cross')
-    remove_internal_links(duplicated_src_dir_path, 'same')
+    remove_internal_links(duplicated_src_dir_path, 'same')
+
+    # Step 7: correct number of dashes and fences alignment for rendering tables in PDF
+    correct_tables(duplicated_src_dir_path)
diff --git a/src/99-appendices/01-contributors.md b/src/99-appendices/01-contributors.md
@@ -150,7 +150,7 @@ your name is not listed, please add it.
 -   Nicole C. Swann 📖
 -   François Tadel 📖🔌💡
 -   Roberto Toro 🔧
--   Sébastien Tourbier 🤔👀📢
+-   Sébastien Tourbier 🤔👀📢🐛📖
 -   William Triplett 📖
 -   Jessica A. Turner 📖
 -   Bradley Voytek 📖