From b13ab8baa7f46950d3d71682b5a576f7841771d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6ttl?= Date: Tue, 3 Oct 2017 15:42:22 +0200 Subject: [PATCH 1/3] Update script for current version of Google Docs --- gdoc2latex.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gdoc2latex.py b/gdoc2latex.py index f9c01d9..fb3bfaf 100644 --- a/gdoc2latex.py +++ b/gdoc2latex.py @@ -59,7 +59,7 @@ def fetchGoogleDoc(urlOrGdocFile,email='',password=''): # find the doc url if urlOrGdocFile.startswith("https://"): url = urlOrGdocFile - elif urlOrGdocFile.endswith(".gdoc"): + elif urlOrGdocFile.endswith(".gdoc") or urlOrGdocFile.endswith(".gddoc"): filename = urlOrGdocFile f = open(filename, "r") content = json.load(f) @@ -69,7 +69,11 @@ def fetchGoogleDoc(urlOrGdocFile,email='',password=''): raise Exception(str(urlOrGdocFile) + " not a google doc URL or .gdoc filename") # pull out the document id try: - docId = re.search("/document/d/([^/]+)/", url).group(1) + result = re.search("/document/d/([^/]+)/|/open\?id=([^&/]+)", url) + docId = result.group(1) or result.group(2) + # Two possible formats (2017-10-03): + # https://drive.google.com/open?id=idididid + # https://docs.google.com/document/d/idididid/edit?usp=sharing except Exception: raise Exception("can't find a google document ID in " + str(urlOrGdocFile)) # construct an export URL From e2377d578f1df256595aa948ffe8116fe1541676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6ttl?= Date: Tue, 3 Oct 2017 15:44:52 +0200 Subject: [PATCH 2/3] Add script to download document as plain text --- gdoc2text.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 gdoc2text.py diff --git a/gdoc2text.py b/gdoc2text.py new file mode 100644 index 0000000..cd5473e --- /dev/null +++ b/gdoc2text.py @@ -0,0 +1,40 @@ + +# Note that the Google Drive REST API v3 is great for downloading files in a range of formats +# using the HTTP Accept header (e.g. Accept: text/plain). +# https://developers.google.com/drive/v3/web/manage-downloads#downloading_google_documents + +""" +usage: + python gdoc2text.py [] + python gdoc2text.py <.gdoc or .gddoc filename> [] + +example: + python gdoc2text.py https://docs.google.com/document/d/1yEyXxtEeQ5_E7PibjYpofPC6kP4jMG-EieKhwkK7oQE/edit + python gdoc2text.py test.gddoc + +example for private documents: + python gdoc2text.py https://docs.google.com/document/d/1yEyXxtEeQ5_E7PibjYpofPC6kP4jMG-EieKhwkK7oQE/edit USERNAME +""" + +from gdoc2latex import fetchGoogleDoc, html_to_text +import getpass +import sys + +def main(): + arg_count = len(sys.argv) - 1 + if arg_count == 0 or arg_count > 2: + sys.stderr.write(__doc__) + sys.exit(1) + + if arg_count == 1: + html = fetchGoogleDoc(sys.argv[1]) + else: + password = getpass.getpass() + html = fetchGoogleDoc(sys.argv[1], sys.argv[2], password) + + text = html_to_text(html) + sys.stdout.write(text) + + +if __name__ == '__main__': + main() From f3457d8fc57273af8117258f15d0063d6fe902d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Sch=C3=B6ttl?= Date: Tue, 3 Oct 2017 16:05:34 +0200 Subject: [PATCH 3/3] Make script executable --- gdoc2text.py | 2 ++ 1 file changed, 2 insertions(+) mode change 100644 => 100755 gdoc2text.py diff --git a/gdoc2text.py b/gdoc2text.py old mode 100644 new mode 100755 index cd5473e..c74c2e4 --- a/gdoc2text.py +++ b/gdoc2text.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python2.7 +# gdoc2latex.py uses Python 2.7 syntax. # Note that the Google Drive REST API v3 is great for downloading files in a range of formats # using the HTTP Accept header (e.g. Accept: text/plain).