confluence-asciidoctor

#!/usr/bin/env python3
# FIXME: urllib2.Error if http is used and server tries to redirect to https
# FIXME: putting directories as publishing arguments --> should be ignored or fail?

import sys
import re
PYTHON_VERSION = sys.version_info[0]

if PYTHON_VERSION == 3:
    import urllib3
    from configparser   import ConfigParser, ExtendedInterpolation
    from urllib.parse   import urlencode
    from urllib.parse   import urlparse
    from urllib.parse   import unquote

elif PYTHON_VERSION == 2:
    import urllib2
    from ConfigParser   import ConfigParser
    from urllib         import urlencode
    from urlparse       import urlparse
    import base64
    from poster.encode        import multipart_encode
    from poster.streaminghttp import register_openers
    from poster.encode        import gen_boundary
    from poster.encode        import get_headers
    from poster.encode        import MultipartParam
    import mimetypes

import json
import os
import netrc
import io
import argparse
import subprocess
from   io import StringIO # According to pylint before lxml
import lxml.etree as ET
#import xml
#import xml.etree.ElementTree as ET


# do not raise exceptions on these return codes

if PYTHON_VERSION == 2:
    class HTTPErrorHandlerExtension(urllib2.BaseHandler):
        @staticmethod
        def http_error_201(request, response, code, msg, hdrs):
            return response

        @staticmethod
        def http_error_204(request, response, code, msg, hdrs):
            return response

        @staticmethod
        def http_error_206(request, response, code, msg, hdrs):
            return response

    class HTTP(object):
        def __init__(self, basic_auth):
            handler_extension = urllib2.build_opener(HTTPErrorHandlerExtension)
            urllib2.install_opener(handler_extension)
            self.basic_auth = base64.b64encode(basic_auth)
            # File Upload
            register_openers()

        def add_headers(self, request, headers=None):
            request.add_header('Authorization', 'Basic %s' % self.basic_auth)
            request.add_header('Content-Type', 'application/json')
            if headers is None:
                return
            # will overwrite alread existing key/value pair if key name collides
            for key in headers.keys():
                request.add_header(key, headers[key])

        @staticmethod
        def handle_response(request):
            response_status = request.getcode()
            if response_status != 200:
                print(response_status)
                return None
            return request.read()

        def GET(self, url):
            request = urllib2.Request(url)
            self.add_headers(request)
            response = urllib2.urlopen(request)
            return self.handle_response(response)

        def POST(self, url, data, headers=None):
            request = urllib2.Request(url, data)
            self.add_headers(request, headers)
            request.get_method = lambda: 'POST'
            response = urllib2.urlopen(request)
            return self.handle_response(response)

        def PUT(self, url, data):
            request = urllib2.Request(url, data)
            self.add_headers(request)
            request.get_method = lambda: 'PUT'
            response = urllib2.urlopen(request)
            return self.handle_response(response)

        @classmethod
        def from_params(cls, params):

            if hasattr(params, 'items'):
                params = params.items()
            retval = []

            for item in params:
                if isinstance(item, MultipartParam):
                    retval.append(item)
                    continue

                name, value = item
                if hasattr(value, 'read'):
                    # Looks like a file object
                    filename = getattr(value, 'name', None)
                    filename = os.path.basename(filename)
                    if filename is not None:
                        filetype = mimetypes.guess_type(filename)[0]
                    else:
                        filetype = None

                    retval.append(MultipartParam(name=name,
                        filename=filename, filetype=filetype, fileobj=value))
                else:
                    retval.append(MultipartParam(name, value))

            return retval


        def multipart_encode(self, params, boundary=None):

            if boundary is None:
                boundary = gen_boundary()
            else:
                boundary = urllib.quote_plus(boundary)

            headers = get_headers(params, boundary)
            params = self.from_params(params)

            def yielder():
                for param in params:
                    for block in param.iter_encode(boundary):
                        yield block
                yield "--%s--\r\n" % boundary

            return yielder(), headers

        def POST_FILE(self, url, headers, fields, filepath, field_name, filename=None):
            file_pointer = open(filepath, 'rb')
            fields[field_name] = file_pointer
            data, headers = self.multipart_encode(fields)
            result = self.POST(url, data, headers)
            file_pointer.close()
            return result


elif PYTHON_VERSION == 3:
    class HTTP(object):

        def __init__(self, basic_auth=None):
            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

            proxy_url = self.get_proxy_url()
            if proxy_url is None:
                self.http = urllib3.PoolManager()
            else:
                self.http = urllib3.ProxyManager(proxy_url)

            self.basic_auth = basic_auth
            self.headers = urllib3.util.make_headers(basic_auth=basic_auth)
            self.headers['Content-Type'] = 'application/json'

        @staticmethod
        def get_proxy_url():
            for NAME in ['HTTPS_PROXY', 'https_proxy', 'HTTP_PROXY', 'http_proxy']:
                value = os.environ.get(NAME)
                if value is not None:
                    return value
            return value

        def add_headers(self, headers):
            headers_new = urllib3.util.make_headers(basic_auth=self.basic_auth)
            if headers is None:
                return headers_new
            # will overwrite alread existing key/value pair if key name collides
            for key in headers.keys():
                headers_new[key] = headers[key]
            return headers_new

        @staticmethod
        def handle_response(response):
            if response.status != 200:
                print(response.status)
                #print(vars(response))
                sys.exit(1)
                return None
            return response.data.decode("utf-8", "strict")

        def GET(self, url):
            response = self.http.request('GET', url, headers=self.headers)
            return self.handle_response(response)

        def POST(self, url, data):
            response = self.http.request('POST', url, headers=self.headers, body=data)
            return self.handle_response(response)

        def PUT(self, url, data):
            response = self.http.request('PUT', url, headers=self.headers, body=data)
            return self.handle_response(response)

        def POST_MULTIPART(self, url, headers, fields):
            response = self.http.request('POST', url, headers=headers, fields=fields)
            return self.handle_response(response)

        def POST_FILE(self, url, headers, fields, filepath, field_name, filename=None):
            headers = self.add_headers(headers)

            with open(filepath, "rb") as file_pointer:
                data = file_pointer.read()

            if filename is None:
                filename = os.path.basename(filepath)

            fields[field_name] = (filename, data)

            return self.POST_MULTIPART(url, fields=fields, headers=headers)


class ConfluenceRestAPI(object):
    def __init__(self, http, base_url, space_key):
        self.http = http
        self.base_url = base_url
        self.space_key = space_key

    def build_url(self, param_dict):
        return self.base_url + '/rest/api/content?' + urlencode(param_dict)

    @staticmethod
    def handle_json(result):
        if result is None:
            return {}
        return json.loads(result)

    def GET_JSON(self, url):
        result = self.http.GET(url)
        return self.handle_json(result)

    def POST_JSON(self, url, json_dict):
        encoded_data = json.dumps(json_dict)
        result = self.http.POST(url, encoded_data)
        return self.handle_json(result)

    def PUT_JSON(self, url, json_dict):
        encoded_data = json.dumps(json_dict)
        result = self.http.PUT(url, encoded_data)
        return self.handle_json(result)

    def ancestors(self, title):
        expand = 'ancestors'
        url = self.build_url({'spaceKey':self.space_key, 'expand':expand, 'title':title})
        return self.GET_JSON(url)

    def version(self, title):
        expand = 'version'
        url = self.build_url({'spaceKey':self.space_key, 'expand':expand, 'title':title})
        return self.GET_JSON(url)

    def dump(self, page_id):
        url = self.base_url + '/rest/api/content/' + page_id + '?expand=body.storage'
        return self.GET_JSON(url)

    @staticmethod
    def merge_dict(orig_dict, new_dict):
        result_dict = orig_dict.copy()
        result_dict.update(new_dict)
        return result_dict

    def create_page(self, title, xhtml, ancestor_id=None):
        my_dict = {
            'type'   : 'page',
            'title'  : title,
            'space'  : {'key' : self.space_key},
            'body'   : {'storage':{'value': xhtml, 'representation' : 'storage'}}
        }
        if ancestor_id is not None:
            dict2 = {'ancestors': [{'type':'page', 'id': ancestor_id}]}
            # Merge dict
            #dict = { **dict, **dict2}
            my_dict = self.merge_dict(my_dict, dict2)

        url = self.base_url + '/rest/api/content/'
        return self.POST_JSON(url, my_dict)

    def update_page(self, page_id, version, title, value, ancestor_id=None):
        my_dict = {
            'id'     : page_id,
            'type'   : 'page',
            'title'  : title,
            'space'  : {'key' : self.space_key},
            'body'   : {'storage':{'value': value, 'representation' : 'storage'}},
            'version': {'number': version}
        }

        if ancestor_id is not None:
            dict2 = {'ancestors':[{'type':'page', 'id': ancestor_id}]}
            # Merge dict
            my_dict = self.merge_dict(my_dict, dict2)

        url = self.base_url + '/rest/api/content/' + page_id
        return self.PUT_JSON(url, my_dict)


    def list_attachments(self, page_id):
        url = self.base_url + '/rest/api/content/' + page_id + "/child/attachment"
        return self.GET_JSON(url)

    # https://community.atlassian.com/t5/Confluence-questions/How-to-post-attachments-to-confluence-page/qaq-p/214853
    # https://community.atlassian.com/t5/Confluence-questions/Can-i-update-file-attachments-using-the-rest-api/qaq-p/308327

    def attach_file(self, page_id, filename, comment=""):
        url = self.base_url + '/rest/api/content/'+page_id+"/child/attachment"
        #url  = "http://127.0.0.1:8080" + '/rest/api/content/'+page_id+"/child/attachment"
        headers = {
            'X-Atlassian-Token': 'nocheck'
        }
        fields = {
            'comment':  comment
        }
        result = self.http.POST_FILE(url, headers, fields, filename, 'file')
        return self.handle_json(result)

    def attach_file_to_page(self, page_id, filepath, comment=""):

        if not os.path.exists(filepath):
            print("Attachment File does not exist: " + filepath)
            return None
        json_result = self.list_attachments(page_id)

        for result in json_result['results']:
            existing_filename = result['title']
            if os.path.basename(filepath) == existing_filename:
                print("Attachment already exists(skipped): " + str(existing_filename))
                return None
        print("Uploading attachment: " + filepath)
        return self.attach_file(page_id, filepath, comment)

class ConfluenceFixes:

    def  __init__(self, config):
        self.image_src_list = []
        self.a_href_list = []
        self.namespaces = {
            "xhtml":    "http://www.w3.org/1999/xhtml",
            "ac":       "http://www.atlassian.com/schema/confluence/4/ac/",
            "ri":       "http://www.atlassian.com/schema/confluence/4/ri/"
        }
        self.config = config

    # This gives the same result as the "-s" switch in asciidoctor for xhtml5
    @staticmethod
    def extract_content_div_xsl():
        return '''<xsl:stylesheet
            version="1.0"
            xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
            xmlns:x="http://www.w3.org/1999/xhtml"
            xmlns:ac="http://www.atlassian.com/schema/confluence/4/ac/"
            xmlns:ri="http://www.atlassian.com/schema/confluence/4/ri/"
            exclude-result-prefixes="x"
            >

            <xsl:output indent="yes" method="xml" omit-xml-declaration="yes"  />

            <xsl:template match="/">
                    <div>
                    <xsl:apply-templates select="x:html/x:body/x:div[@id='content']/x:*"/>
                    </div>
            </xsl:template>


            <xsl:template match="@*|node()">
            <xsl:copy>
                <xsl:apply-templates select="@*|node()"/>
            </xsl:copy>
            </xsl:template>

            <!-- Remove namespace magic -->
            <xsl:template match="x:*">
                    <xsl:element name="{local-name()}">
                            <xsl:copy-of select="namespace::*[not(. = namespace-uri(..))]"/>
                            <xsl:apply-templates select="@*|node()"/>
                    </xsl:element>
            </xsl:template>

</xsl:stylesheet>
'''


    @staticmethod
    def transform_code_section_xsl():
        return '''
<xsl:stylesheet
            version="1.0"
            xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
            xmlns:x="http://www.w3.org/1999/xhtml"
            xmlns:ac="http://www.atlassian.com/schema/confluence/4/ac/"
            xmlns:ri="http://www.atlassian.com/schema/confluence/4/ri/"
            exclude-result-prefixes="x"
            >

            <xsl:output indent="yes" method="xml" omit-xml-declaration="yes"  cdata-section-elements="ac:plain-text-body"  />

    <xsl:template match="node()|@*">
    	<xsl:copy>
    		<xsl:apply-templates select="node()|@*"/>
    		</xsl:copy>
    </xsl:template>


    <xsl:template match="//div[div[@class='content']/pre[@class='highlight'] ]" >
    	<ac:structured-macro ac:name="code" ac:schema-version="1" ac:macro-id="bcb394c5-e0b8-4630-9e1a-1c3a48076738">
    		<ac:parameter ac:name="language"><xsl:value-of select="div/pre/code/@data-lang" /></ac:parameter>
    		<ac:parameter ac:name="theme"><xsl:copy-of select="$theme"/></ac:parameter>
    		<ac:parameter ac:name="title"><xsl:copy-of select="$title"/></ac:parameter>
    		<ac:plain-text-body><xsl:value-of select="div/pre/code" /></ac:plain-text-body>
    	</ac:structured-macro>
            </xsl:template>

</xsl:stylesheet>
'''
    @staticmethod
    def transform_table_xsl():
        return '''
<xsl:stylesheet
            version="1.0"
            xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
            xmlns:x="http://www.w3.org/1999/xhtml"
            xmlns:ac="http://www.atlassian.com/schema/confluence/4/ac/"
            exclude-result-prefixes="x"
            >

    <xsl:output indent="yes" method="xml" omit-xml-declaration="yes"  cdata-section-elements="ac:plain-text-body"  />

    <xsl:template match="node()|@*">
    	<xsl:copy>
    		<xsl:apply-templates select="node()|@*"/>
    		</xsl:copy>
    </xsl:template>


    <xsl:template match="x:table">
    	<xsl:element name="table" namespace="http://www.w3.org/1999/xhtml"  >
    		<xsl:attribute name="class">fixed-table wrapped</xsl:attribute>
    	
    	<xsl:element name="colgroup" namespace="http://www.w3.org/1999/xhtml" >
    		<xsl:copy-of select="./x:colgroup/x:col"/>
    	</xsl:element>

    	<!-- for confluence, thead and tbody rows have to be merged into tbody
    	     Additionally we need to remove the class attributes which are applied to th and td,
    	     therefore we generate this out of a table -->

    	<xsl:element name="tbody" namespace="http://www.w3.org/1999/xhtml" >
    		<!--<xsl:copy-of select="./x:thead/x:tr"/> -->
    		<!-- <xsl:copy-of select="./x:tbody/x:tr"/> -->
    		<xsl:for-each select="./x:thead/x:tr">
    			<xsl:element name="tr" namespace="http://www.w3.org/1999/xhtml" >
    				<xsl:for-each select="./x:th">
    				<xsl:element name="th" namespace="http://www.w3.org/1999/xhtml" >
    						<xsl:copy-of select="node()"/>	
    				</xsl:element>
    				</xsl:for-each>
    			</xsl:element>
    		</xsl:for-each>

    		<xsl:for-each select="./x:tbody/x:tr">
    			<xsl:element name="tr" namespace="http://www.w3.org/1999/xhtml" >
    				<xsl:for-each select="./x:td/x:p">
    				<xsl:element name="td" namespace="http://www.w3.org/1999/xhtml" >
    						<xsl:copy-of select="node()"/>	
    				</xsl:element>
    				</xsl:for-each>
    			</xsl:element>
    		</xsl:for-each>


    	</xsl:element>

    	</xsl:element>

    </xsl:template>
    
</xsl:stylesheet>
'''
    def transform_table(self, dom):
        xslt = ET.XML(self.transform_table_xsl())
        transform = ET.XSLT(xslt)
        newdom = transform(dom)
        return newdom

    def transform_code_section(self, dom):
        theme = self.config['default']['code_theme']
        title = ''
        xslt = ET.XML(self.transform_code_section_xsl())
        transform = ET.XSLT(xslt)
        # Sets two variables for the XSL template
        newdom = transform(dom, theme=ET.XSLT.strparam(theme), title=ET.XSLT.strparam(title))

        elements = newdom.xpath("//ac:parameter[@ac:name='language']", namespaces=self.namespaces)
        for element in elements:
            value = element.text.lower()
            if value in ("python", 'python3'):
                value = "py"
            elif value == "yaml":
                value = "yml"
            element.text = value
        return newdom

    def extract_content(self, dom):
        xslt = ET.XML(self.extract_content_div_xsl())
        transform = ET.XSLT(xslt)
        newdom = transform(dom)
        return newdom

    def create_standard_attachment(self, filename):
        new_ac_element = ET.Element('{http://www.atlassian.com/schema/confluence/4/ac/}image', nsmap=self.namespaces)
        new_ri_element = ET.SubElement(new_ac_element,
             '{http://www.atlassian.com/schema/confluence/4/ri/}attachment',
            {'{http://www.atlassian.com/schema/confluence/4/ri/}filename': filename},
                    nsmap=self.namespaces)
        return new_ac_element

    def create_macro_attachment(self, filename, height="250"):
        new_ac_element = ET.Element('{http://www.atlassian.com/schema/confluence/4/ac/}structured-macro',
            {'{http://www.atlassian.com/schema/confluence/4/ac/}name':          'view-file',
             '{http://www.atlassian.com/schema/confluence/4/ac/}schema-version':'1'},
            nsmap=self.namespaces)

        new_ac_parameter_element = ET.SubElement(new_ac_element,
             '{http://www.atlassian.com/schema/confluence/4/ac/}parameter',
            {'{http://www.atlassian.com/schema/confluence/4/ac/}name': 'name'},
                    nsmap=self.namespaces)

        new_ri_element = ET.SubElement(new_ac_parameter_element,
             '{http://www.atlassian.com/schema/confluence/4/ri/}attachment',
            {'{http://www.atlassian.com/schema/confluence/4/ri/}filename': filename},
                    nsmap=self.namespaces)

        new_ac_parameter2_element = ET.SubElement(new_ac_element,
             '{http://www.atlassian.com/schema/confluence/4/ac/}parameter',
            {'{http://www.atlassian.com/schema/confluence/4/ac/}name': 'height'},
                    nsmap=self.namespaces)
        new_ac_parameter2_element.text = height
        return new_ac_element

    def img_query(self, dom):
        result = dom.xpath('/xhtml:html/xhtml:body//xhtml:img', namespaces=self.namespaces)
        return result

    def xxx(self, dom):
        img_list = self.img_query(dom)
        for img in img_list:
            src = img.attrib['src']
            # Only basename
            src = os.path.basename(src)
            src = unquote(src)

            parent = img.getparent()
            if src.endswith('pptx') or src.endswith('pdf'):
                new_ac_element = self.create_macro_attachment(src)
            else:
                new_ac_element = self.create_standard_attachment(src)
            parent.replace(img, new_ac_element)

    def extract_img_source(self, dom):
        image_src_list = dom.xpath('/xhtml:html/xhtml:body//xhtml:img/@src', namespaces=self.namespaces)

        image_src_list2 = []
        for img in image_src_list:
            img2 = unquote(img)
            image_src_list2.append(img2)
        self.image_src_list = image_src_list2

    def get_image_src_list(self):
        return self.image_src_list

    def extract_a_source(self, dom):
        self.a_href_list = dom.xpath('/xhtml:html/xhtml:body//xhtml:a/@href', namespaces=self.namespaces)

    def get_a_href_list(self):
        return self.a_href_list

    def process_asciidoctor_dom(self, dom):
        self.extract_img_source(dom)
        self.xxx(dom)

        newdom1 = self.extract_content(dom)
        newdom2 = self.transform_code_section(newdom1)
        #print(ET.tostring(newdom2, pretty_print=True, encoding="UTF-8").decode('utf-8'))
        newdom3 = self.transform_table(newdom2)
        #print(ET.tostring(newdom3, pretty_print=True, encoding="UTF-8").decode('utf-8'))

        result = ET.tostring(newdom3, pretty_print=True, encoding="UTF-8").decode('utf-8')
        return result

    def process_asciidoctor_xml(self, xhtml_textstring):
        dom = ET.XML(xhtml_textstring)
        result = self.process_asciidoctor_dom(dom)
        return result
    @staticmethod
    def get_namespaces():
        xmlns = {
                "xhtml":    "http://www.w3.org/1999/xhtml",
                "ac":       "http://www.atlassian.com/schema/confluence/4/ac/",
                "ri":       "http://www.atlassian.com/schema/confluence/4/ri/"
        }
        list  = []
        for key, value in xmlns.items():
            list.append("=".join([ "xmlns:" + key, "\"" + value +"\""]))

        namespace_string = " ".join(list) 
        return namespace_string

    @staticmethod
    def get_entitities():
        entities = {
#                "amp": "&",
                "auml": "ä",
                "Auml": "Ä",
                "euro": "€",
                "gt": ">",
                "lt": "<",
                "nbsp": " ",
                "ouml": "ö",
                "Ouml": "Ö",
                "szlig": "ß",
                "uuml": "ü",
                "Üuml": "Ü"
        }
        list = []
        for key, value in entities.items():
            list.append("<!ENTITY " + " ".join([key, "\"" + value + "\"" ]) + ">")
        txt  = "".join(list)

        return txt

    # read xml file with confluence formatting (ac: tagas ...) and prcess it for consumption (so when it crashes, it will not upload)
    def process_confluence_xml(self, xhtml_textstring):

        namespace_string = self.get_namespaces()
        entities = self.get_entitities()
        xhtml_textstring ='<!DOCTYPE ac:root SYSTEM "test" [' + entities + "]><ac:root " + namespace_string + ">" + xhtml_textstring + "</ac:root>"
        dom = ET.fromstring(xhtml_textstring)
        result = ET.tostring(dom, pretty_print=True, encoding="UTF-8").decode('utf-8')
        # cut  the outer root tag
        result = result[(result.find(">")+1):]
        result = result[:result.rfind("<")]
        return result


class NETRC:

    def __init__(self, filepath):
        if os.path.isfile(filepath):
            self.netrc = netrc.netrc(filepath)
        else:
            self.netrc = None

    def get_entry(self, hostname):
        if self.netrc is None:
            return (None, None, None)
        return self.netrc.authenticators(hostname)

class ToolConfiguration2:

    @staticmethod
    def merge(filepath, configuration_default):
        result_configuration = configuration_default.copy()
        config_parser = ConfigParser()
        config_parser.read(filepath)
        for section_name in config_parser.sections():
            my_dict = {}
            for option in config_parser.options(section_name):
                my_dict[option] = config_parser.get(section_name, option)
            result_configuration[section_name] = my_dict
        return result_configuration

class ShellCmd(object):
    def __init__(self):
        self.exit_code = 0

    @staticmethod
    def subprocess(args):
        proc = subprocess.Popen(args,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        return proc

    @staticmethod
    def get_input_stream(stream):
        # Python 2 ( file doesn't work in python3, therefore stringify
        if str(type(stream)) == "<type 'file'>":
            return stream
        return io.TextIOWrapper(stream, encoding='utf-8')

    @staticmethod
    def read_stream(stream):
        return stream.read()

    def execute(self, args):
        proc = self.subprocess(args)
        input_stream = self.get_input_stream(proc.stdout)
        content = self.read_stream(input_stream)
        self.exit_code = proc.wait()
        return content

class Confluence(object):
    def __init__(self, net_rc, config):
        self.config = config

        base_url = config['default']['base_url']
        space_key = config['default']['space_key']

        result = urlparse(base_url)
        hostname = result.netloc.split(":")[0]

        array = net_rc.authenticators(hostname)

        if array is None:
            print("hostname not found: "+hostname)
            exit(2)

        self.hostname = hostname

        login, account, password = array
        self.http = HTTP(login + ":" + password)
        self.c = ConfluenceRestAPI(self.http, base_url, space_key)
        self.cache = {}

    @staticmethod
    def json_pretty_print(json_data):
        result = json.dumps(json_data, indent=2)
        print(result)

    @staticmethod
    def parse_cmdline():
        parser = argparse.ArgumentParser("confluence-ascii")
        parser.add_argument('file', metavar='file', nargs='+', help='file')
        parser.add_argument('-p', '--parent-page-title', metavar='parent_page_title', help='title of the parent page')
        parser.add_argument('-d', '--dump', help='dump page content')
        parsed_args = parser.parse_args()
        return parsed_args

    @staticmethod
    def filename_to_title(name):
        basename = os.path.basename(name)
        title = ('.').join(basename.split('.')[:-1])  # Remove .txt or .xml extension
        if title == "":
            title = basename
        return title

    @staticmethod
    def relpath_to_lookup_name(relpath):
        return relpath

    @staticmethod
    def parse_path(path):
        result = ""
        stack = []
        expect_number = False
        for c in path:
            if expect_number:
                if c == ']':
                    if not result.isdigit():
                        print("no digit")
                        return None
                    stack.append(result)
                    result = ""
                    expect_number = False
                    continue
            else:
                if c == '[':
                    stack.append(result)
                    result = ""
                    expect_number = True
                    continue
                if c == '.':
                    if result != "":
                        stack.append(result)
                    result = ""
                    continue

            result = result + c

        stack.append(result)
        return stack

    def get_element_by_path(self, path, lookup):
        elements = self.parse_path(path)
        for element in elements:
            if element.isdigit():
                pos = int(element)
                if pos >= len(lookup):
                    return None
                lookup = lookup[pos]
            else:
                if element in lookup:
                    lookup = lookup[element]
                else:
                    return None

        return lookup

    @staticmethod
    def read_from_asciidoctor(filename):
        shell_cmd = ShellCmd()
        args = ['asciidoctor', '-b', 'xhtml5', '-o', '-', filename]
#        args = [ 'asciidoctor', '-s', '-b', 'xhtml5',  '-o', '-',  filename ]
#        args = [ 'asciidoctor','-s','-d','book','-b', 'xhtml5',  '-o', '-',  filename ]
        content = shell_cmd.execute(args)
        return content

    def get_ancestor_id(self, title):
        json_data = self.c.version(title)
        ancestor_id = self.get_element_by_path("results[0].id", json_data)
        return ancestor_id

    def get_cached_ancestor_id(self, title):
        if title in self.cache:
            ancestor_id = self.cache[title]
        else:
            ancestor_id = self.get_ancestor_id(title)
            self.cache[title] = ancestor_id

        return ancestor_id

    def publish(self, title, content, ancestor_id):
        json_result = self.c.version(title)
        if not json_result.get('results', None):
            json_result2 = self.c.create_page(title, content, ancestor_id)
        else:
            page_id = self.get_element_by_path("results[0].id", json_result)
            version = self.get_element_by_path("results[0].version.number", json_result)
            version = version + 1
            json_result2 = self.c.update_page(page_id, version, title, content, ancestor_id)
        return json_result2

    def process_file(self, filename, parent_page_title=None):
        title = self.filename_to_title(filename)
        abspath = os.path.dirname(os.path.abspath(filename))

        if abspath == self.config['internal']['root_dir']:
            relpath = ""
        else:
            relpath = abspath[(len(self.config['internal']['root_dir'])+1):]

        if parent_page_title is None and relpath != "":
            parent_page_title = self.relpath_to_lookup_name(relpath)
            parent_page_title = os.path.basename(parent_page_title)

        ancestor_id = self.get_cached_ancestor_id(parent_page_title)

        xml, image_src_list = self.file_to_xml(filename)

        message = "Publishing '{0}' to '{1}'".format(title, self.hostname)
        if parent_page_title:
            message = message + " under parent page '{0}'".format(parent_page_title)

        result = self.publish(title, xml, ancestor_id)
        page_id = result["id"]

        for img in image_src_list:
            print("  Appending image '{0}'".format(img))
            self.c.attach_file_to_page(page_id, img, "initial")

    def file_to_xml(self, filename):
        fix = ConfluenceFixes(self.config)
        if filename.endswith(".xml"):
            with open(filename) as myfile:
                xhtml = myfile.read()
                xhtml = fix.process_confluence_xml(xhtml)

        if filename.endswith(".txt"):
            xhtml = self.read_from_asciidoctor(filename)
            xhtml = fix.process_asciidoctor_xml(xhtml)

        return (xhtml, fix.get_image_src_list())


    def dump(self, filename, page_name):
        json_data = self.c.version(page_name)
        ancestor_id = self.get_element_by_path("results[0].id", json_data)
        result = self.c.dump(ancestor_id)
        body = self.get_element_by_path("body.storage.value", result)
        print(body)

    def dump_xhtml(self, filename):
        xhtml = self.read_from_asciidoctor(filename)
        print(xhtml)
        if xhtml == "":
            return
        fix = ConfluenceFixes(self.config)
        xhtml2 = fix.process_asciidoctor_xml(xhtml)
        print(xhtml2)

    def run(self):
        parsed_args = self.parse_cmdline()
        #print(parsed_args)
        if 'dump' in parsed_args:
            for filename in parsed_args.file:
                self.dump(filename, parsed_args.dump)
            return

        for filename in parsed_args.file:
            self.process_file(filename, parsed_args.parent_page_title)

def load_netrc():
    home_directory = os.environ['HOME']
    filepath = os.path.join(home_directory, '.netrc')
    if not os.path.isfile(filepath):
        print("Not found: "+filepath)
        exit(1)
    return netrc.netrc(filepath)

class SimpleConfiguration:
    def __init__(self, directory_name='.confluence-asciidoctor', configfile_name='config'):
        self.directory_name = directory_name
        self.configfile_name = configfile_name
        self.configuration = self.init(os.getcwd())

    # provide an abspath to the repo or a file in the repo
    def init(self, work_dir):
        configuration = {}
        configuration['work_dir'] = work_dir

        # The directory where the hidden "."-directory resides is the root_dir

        config_dir = self.find_root_init(work_dir, self.directory_name)
        if config_dir is None:
            root_dir = work_dir
            config_dir = os.path.join(root_dir, self.directory_name)
        else:
            root_dir = os.path.dirname(config_dir)

        configuration["root_dir"] = root_dir
        configuration["config_dir"] = config_dir

        config_file = os.path.join(config_dir, self.configfile_name)
        configuration["config_file"] = config_file

        self.__dict__ = configuration
        return configuration


    def find_root_init(self, abspath, config_dir):
        # Check existence , if not try root dir
        while not os.path.exists(abspath):
            abspath = os.path.dirname(abspath)

        # If it is a file, use parent dir
        if not os.path.isdir(abspath):
            abspath = os.path.dirname(abspath)

        return self.find_root(abspath, config_dir)

    @staticmethod
    def find_root(dirpath, config_dir):
        while dirpath != os.sep:
            target_dir = os.path.join(dirpath, config_dir)
            if os.path.isdir(target_dir):
                return target_dir
            dirpath = os.path.dirname(dirpath)
        return None


def get_default_config():
    return """
[default]
base_url=https://example.com/space
space_key=EXA
code_theme=Confluence
"""

def get_internal_default_config():
    config_content = get_default_config()
    if PYTHON_VERSION == 3:
        config_parser = ConfigParser(interpolation=ExtendedInterpolation())
        config_parser.read_string(config_content)
    else:
        config_parser = ConfigParser()
        config_parser.readfp(io.BytesIO(config_content))

    return serialize_config(config_parser)

def serialize_config(config_parser):
    result_hash = {}
    for section_name in config_parser.sections():
        section_hash = {}
        for option in config_parser.options(section_name):
            section_hash[option] = config_parser.get(section_name, option)
        result_hash[section_name] = section_hash
    return result_hash

def parse_cmdline():
    parser = argparse.ArgumentParser("main")
    # no arguments given, print help
    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(0)

    parser.set_defaults()
    parser.add_argument('--code-theme', metavar='code_theme', help='confluence ui theme to pick')

    # "dest" parameter helps to determine which subparser was used later on in namespace result (stored in 'command')
    subparsers = parser.add_subparsers(help='commands', dest='command')

    # Publish page
    if PYTHON_VERSION == 2:
        parser_publish = subparsers.add_parser('publish', help='publish help')
    else:
        parser_publish = subparsers.add_parser('publish', aliases=['pu'], help='publish help')
    parser_publish.add_argument('-p', '--parent-page-title', metavar='parent_page_title', help='title of the parent page')
    parser_publish.add_argument('file', metavar='file', nargs='+', help='file')

    # Download Page
    if PYTHON_VERSION == 2:
        parser_download = subparsers.add_parser('download', help='download help')
    else:
        parser_download = subparsers.add_parser('download', aliases=['do'], help='download help')

    parser_download.add_argument('page_title', help='page title')

    # Dump xhtml
    parser_dump = subparsers.add_parser('dump', help='dump')
    parser_dump.add_argument('page_title', help='page_title')

    # Dump xhtml
    parser_dumpxhtml = subparsers.add_parser('dumpxhtml', help='dump xhtml')
    parser_dumpxhtml.add_argument('file', help='file')

    # initialize environment
    parser_init = subparsers.add_parser('init', help='init')
    parser_init.add_argument('--url', default='http://127.0.0.1/display/EXAMPLE', help='url')

    parsed_args = parser.parse_args()

    # make command canonical

    if parsed_args.command == 'do':
        parsed_args.command = 'download'
    elif parsed_args.command == 'pu':
        parsed_args.command = 'publish'

    return parsed_args

def merge_dict_deep(target, source):
    orig = target
    target = orig.copy()
    for key in source.keys():
        if key in target.keys():
            value_target = target[key]
            value_source = source[key]
            if isinstance(value_target, dict) and isinstance(value_source, dict):
                result = merge_dict_deep(value_target, value_source)
                target[key] = result
            else:
                target[key] = value_source
        else:
            target[key] = source[key]

    return target


def main():
    default_config = get_internal_default_config()
    simple_config = SimpleConfiguration()

    parsed_args = parse_cmdline()
    if parsed_args.code_theme:
        default_config['default']['code_theme'] = parsed_args.code_theme

    if parsed_args.command == 'init':
        if not os.path.exists(simple_config.config_dir):
            os.mkdir(simple_config.config_dir)

        match = re.match(r'^(.*)?/display/([^/]+)', parsed_args.url)
        if match:
            base_url, space_key = match.groups()
        else:
            print("Could not match url: " + parsed_args.url)
            exit(1)

        if not os.path.exists(simple_config.config_file):
            file_pointer = open(simple_config.config_file, "w")
            file_pointer.write("[default]\nbase_url={0}\nspace_key={1}\ncode_theme=Confluence\n".format(base_url, space_key))
            file_pointer.close()
        exit(0)

    default_config['internal'] = simple_config.__dict__

    if not os.path.exists(simple_config.config_file):
        print("No config found. confluence-asciidoctor init might be required")
        exit(0)

    config_parser = ConfigParser()
    config_parser.read(simple_config.config_file)
    file_config = serialize_config(config_parser)
    # merge file_config into default_config
    default_config = merge_dict_deep(default_config, file_config)

    #print(default_config)

    net_rc = load_netrc()
    confluence = Confluence(net_rc, default_config)

    if parsed_args.command == 'download':
        confluence.dump("", parsed_args.page_title)

    if parsed_args.command == 'publish':
        for filename_iterator in parsed_args.file:
            confluence.process_file(filename_iterator, parsed_args.parent_page_title)

    if parsed_args.command == 'dumpxhtml':
        confluence.dump_xhtml(parsed_args.file)

    if parsed_args.command == 'dump':
        confluence.dump("", parsed_args.page_title)

if __name__ == "__main__":
    main()