From 0b294dccd0bd7c5c6421c4448c8e1c18ab3d4c2d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sat, 6 Apr 2024 09:11:54 -0700 Subject: [PATCH] Added initial code brought over from datasette-paste Closes #1 Made very slight modifications - mainly replacing 'paste' with 'import' Revision history of this code is here: https://github.com/datasette/datasette-paste/commits/b16ded30471cdb2737026cf4c43a1c61438ebe19/ --- README.md | 14 +- datasette_import/__init__.py | 61 ++- .../static/papaparse-5-4-1.min.js | 7 + .../templates/import_create_table.html | 391 ++++++++++++++++++ pyproject.toml | 5 +- tests/test_import.py | 26 +- 6 files changed, 495 insertions(+), 9 deletions(-) create mode 100644 datasette_import/static/papaparse-5-4-1.min.js create mode 100644 datasette_import/templates/import_create_table.html diff --git a/README.md b/README.md index 7ee6bf5..be55d4b 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,19 @@ datasette install datasette-import ``` ## Usage -Usage instructions go here. +This plugin adds a database action item called "Create table with imported data". + +This action is available to users with the `create-table` permission. + +It links to a page that allows users to upload files or paste in CSV, TSV or JSON data, and then use that to create and populate a new table in Datasette. + +CSV and TSV data must have headers on the first row. + +JSON data must be an array of objects with the same keys, or a container object object where one of the keys is an array of objects. + +## Credits + +The CSV and TSV parsing is performed using [Papa Parse](https://www.papaparse.com/), an MIT licensed JavaScript library that is bundled with this plugin. ## Development diff --git a/datasette_import/__init__.py b/datasette_import/__init__.py index 399cf9f..b0c02f8 100644 --- a/datasette_import/__init__.py +++ b/datasette_import/__init__.py @@ -1 +1,60 @@ -from datasette import hookimpl +from datasette import hookimpl, Response, Forbidden + + +async def import_create_table(datasette, request): + database = request.url_vars["database"] + if not await can_import(datasette, request.actor, database): + raise Forbidden("Permission denied to import data") + return Response.html( + await datasette.render_template( + "import_create_table.html", + { + "database": database, + "papaparse_url": datasette.urls.static_plugins( + "datasette-import", "papaparse-5-4-1.min.js" + ), + }, + request=request, + ) + ) + + +@hookimpl +def register_routes(): + return [ + (r"^/(?P[^/]+)/-/import$", import_create_table), + ] + + +@hookimpl +def database_actions(datasette, actor, database): + async def inner(): + if not await can_import(datasette, actor, database): + return [] + return [ + { + "href": datasette.urls.database(database) + "/-/import", + "label": "Create table with imported data", + "description": "Import using JSON, CSV or TSV data (e.g. from Google Sheets)", + } + ] + + return inner + + +async def can_import(datasette, actor, database_name, to_table=None): + if actor is None: + return False + if not to_table: + # Need create-table for database + can_create_table = await datasette.permission_allowed( + actor, "create-table", resource=database_name + ) + if not can_create_table: + return False + return True + else: + # Need insert-row for that table + return await datasette.permission_allowed( + actor, "insert-row", resource=(database_name, to_table) + ) diff --git a/datasette_import/static/papaparse-5-4-1.min.js b/datasette_import/static/papaparse-5-4-1.min.js new file mode 100644 index 0000000..eeaf983 --- /dev/null +++ b/datasette_import/static/papaparse-5-4-1.min.js @@ -0,0 +1,7 @@ +/* @license +Papa Parse +v5.4.1 +https://github.com/mholt/PapaParse +License: MIT +*/ +!function(e,t){"function"==typeof define&&define.amd?define([],t):"object"==typeof module&&"undefined"!=typeof exports?module.exports=t():e.Papa=t()}(this,function s(){"use strict";var f="undefined"!=typeof self?self:"undefined"!=typeof window?window:void 0!==f?f:{};var n=!f.document&&!!f.postMessage,o=f.IS_PAPA_WORKER||!1,a={},u=0,b={parse:function(e,t){var r=(t=t||{}).dynamicTyping||!1;J(r)&&(t.dynamicTypingFunction=r,r={});if(t.dynamicTyping=r,t.transform=!!J(t.transform)&&t.transform,t.worker&&b.WORKERS_SUPPORTED){var i=function(){if(!b.WORKERS_SUPPORTED)return!1;var e=(r=f.URL||f.webkitURL||null,i=s.toString(),b.BLOB_URL||(b.BLOB_URL=r.createObjectURL(new Blob(["var global = (function() { if (typeof self !== 'undefined') { return self; } if (typeof window !== 'undefined') { return window; } if (typeof global !== 'undefined') { return global; } return {}; })(); global.IS_PAPA_WORKER=true; ","(",i,")();"],{type:"text/javascript"})))),t=new f.Worker(e);var r,i;return t.onmessage=_,t.id=u++,a[t.id]=t}();return i.userStep=t.step,i.userChunk=t.chunk,i.userComplete=t.complete,i.userError=t.error,t.step=J(t.step),t.chunk=J(t.chunk),t.complete=J(t.complete),t.error=J(t.error),delete t.worker,void i.postMessage({input:e,config:t,workerId:i.id})}var n=null;b.NODE_STREAM_INPUT,"string"==typeof e?(e=function(e){if(65279===e.charCodeAt(0))return e.slice(1);return e}(e),n=t.download?new l(t):new p(t)):!0===e.readable&&J(e.read)&&J(e.on)?n=new g(t):(f.File&&e instanceof File||e instanceof Object)&&(n=new c(t));return n.stream(e)},unparse:function(e,t){var n=!1,_=!0,m=",",y="\r\n",s='"',a=s+s,r=!1,i=null,o=!1;!function(){if("object"!=typeof t)return;"string"!=typeof t.delimiter||b.BAD_DELIMITERS.filter(function(e){return-1!==t.delimiter.indexOf(e)}).length||(m=t.delimiter);("boolean"==typeof t.quotes||"function"==typeof t.quotes||Array.isArray(t.quotes))&&(n=t.quotes);"boolean"!=typeof t.skipEmptyLines&&"string"!=typeof t.skipEmptyLines||(r=t.skipEmptyLines);"string"==typeof t.newline&&(y=t.newline);"string"==typeof t.quoteChar&&(s=t.quoteChar);"boolean"==typeof t.header&&(_=t.header);if(Array.isArray(t.columns)){if(0===t.columns.length)throw new Error("Option columns is empty");i=t.columns}void 0!==t.escapeChar&&(a=t.escapeChar+s);("boolean"==typeof t.escapeFormulae||t.escapeFormulae instanceof RegExp)&&(o=t.escapeFormulae instanceof RegExp?t.escapeFormulae:/^[=+\-@\t\r].*$/)}();var u=new RegExp(Q(s),"g");"string"==typeof e&&(e=JSON.parse(e));if(Array.isArray(e)){if(!e.length||Array.isArray(e[0]))return h(null,e,r);if("object"==typeof e[0])return h(i||Object.keys(e[0]),e,r)}else if("object"==typeof e)return"string"==typeof e.data&&(e.data=JSON.parse(e.data)),Array.isArray(e.data)&&(e.fields||(e.fields=e.meta&&e.meta.fields||i),e.fields||(e.fields=Array.isArray(e.data[0])?e.fields:"object"==typeof e.data[0]?Object.keys(e.data[0]):[]),Array.isArray(e.data[0])||"object"==typeof e.data[0]||(e.data=[e.data])),h(e.fields||[],e.data||[],r);throw new Error("Unable to serialize unrecognized input");function h(e,t,r){var i="";"string"==typeof e&&(e=JSON.parse(e)),"string"==typeof t&&(t=JSON.parse(t));var n=Array.isArray(e)&&0=this._config.preview;if(o)f.postMessage({results:n,workerId:b.WORKER_ID,finished:a});else if(J(this._config.chunk)&&!t){if(this._config.chunk(n,this._handle),this._handle.paused()||this._handle.aborted())return void(this._halted=!0);n=void 0,this._completeResults=void 0}return this._config.step||this._config.chunk||(this._completeResults.data=this._completeResults.data.concat(n.data),this._completeResults.errors=this._completeResults.errors.concat(n.errors),this._completeResults.meta=n.meta),this._completed||!a||!J(this._config.complete)||n&&n.meta.aborted||(this._config.complete(this._completeResults,this._input),this._completed=!0),a||n&&n.meta.paused||this._nextChunk(),n}this._halted=!0},this._sendError=function(e){J(this._config.error)?this._config.error(e):o&&this._config.error&&f.postMessage({workerId:b.WORKER_ID,error:e,finished:!1})}}function l(e){var i;(e=e||{}).chunkSize||(e.chunkSize=b.RemoteChunkSize),h.call(this,e),this._nextChunk=n?function(){this._readChunk(),this._chunkLoaded()}:function(){this._readChunk()},this.stream=function(e){this._input=e,this._nextChunk()},this._readChunk=function(){if(this._finished)this._chunkLoaded();else{if(i=new XMLHttpRequest,this._config.withCredentials&&(i.withCredentials=this._config.withCredentials),n||(i.onload=v(this._chunkLoaded,this),i.onerror=v(this._chunkError,this)),i.open(this._config.downloadRequestBody?"POST":"GET",this._input,!n),this._config.downloadRequestHeaders){var e=this._config.downloadRequestHeaders;for(var t in e)i.setRequestHeader(t,e[t])}if(this._config.chunkSize){var r=this._start+this._config.chunkSize-1;i.setRequestHeader("Range","bytes="+this._start+"-"+r)}try{i.send(this._config.downloadRequestBody)}catch(e){this._chunkError(e.message)}n&&0===i.status&&this._chunkError()}},this._chunkLoaded=function(){4===i.readyState&&(i.status<200||400<=i.status?this._chunkError():(this._start+=this._config.chunkSize?this._config.chunkSize:i.responseText.length,this._finished=!this._config.chunkSize||this._start>=function(e){var t=e.getResponseHeader("Content-Range");if(null===t)return-1;return parseInt(t.substring(t.lastIndexOf("/")+1))}(i),this.parseChunk(i.responseText)))},this._chunkError=function(e){var t=i.statusText||e;this._sendError(new Error(t))}}function c(e){var i,n;(e=e||{}).chunkSize||(e.chunkSize=b.LocalChunkSize),h.call(this,e);var s="undefined"!=typeof FileReader;this.stream=function(e){this._input=e,n=e.slice||e.webkitSlice||e.mozSlice,s?((i=new FileReader).onload=v(this._chunkLoaded,this),i.onerror=v(this._chunkError,this)):i=new FileReaderSync,this._nextChunk()},this._nextChunk=function(){this._finished||this._config.preview&&!(this._rowCount=this._input.size,this.parseChunk(e.target.result)},this._chunkError=function(){this._sendError(i.error)}}function p(e){var r;h.call(this,e=e||{}),this.stream=function(e){return r=e,this._nextChunk()},this._nextChunk=function(){if(!this._finished){var e,t=this._config.chunkSize;return t?(e=r.substring(0,t),r=r.substring(t)):(e=r,r=""),this._finished=!r,this.parseChunk(e)}}}function g(e){h.call(this,e=e||{});var t=[],r=!0,i=!1;this.pause=function(){h.prototype.pause.apply(this,arguments),this._input.pause()},this.resume=function(){h.prototype.resume.apply(this,arguments),this._input.resume()},this.stream=function(e){this._input=e,this._input.on("data",this._streamData),this._input.on("end",this._streamEnd),this._input.on("error",this._streamError)},this._checkIsFinished=function(){i&&1===t.length&&(this._finished=!0)},this._nextChunk=function(){this._checkIsFinished(),t.length?this.parseChunk(t.shift()):r=!0},this._streamData=v(function(e){try{t.push("string"==typeof e?e:e.toString(this._config.encoding)),r&&(r=!1,this._checkIsFinished(),this.parseChunk(t.shift()))}catch(e){this._streamError(e)}},this),this._streamError=v(function(e){this._streamCleanUp(),this._sendError(e)},this),this._streamEnd=v(function(){this._streamCleanUp(),i=!0,this._streamData("")},this),this._streamCleanUp=v(function(){this._input.removeListener("data",this._streamData),this._input.removeListener("end",this._streamEnd),this._input.removeListener("error",this._streamError)},this)}function r(m){var a,o,u,i=Math.pow(2,53),n=-i,s=/^\s*-?(\d+\.?|\.\d+|\d+\.\d+)([eE][-+]?\d+)?\s*$/,h=/^((\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z)))$/,t=this,r=0,f=0,d=!1,e=!1,l=[],c={data:[],errors:[],meta:{}};if(J(m.step)){var p=m.step;m.step=function(e){if(c=e,_())g();else{if(g(),0===c.data.length)return;r+=e.data.length,m.preview&&r>m.preview?o.abort():(c.data=c.data[0],p(c,t))}}}function y(e){return"greedy"===m.skipEmptyLines?""===e.join("").trim():1===e.length&&0===e[0].length}function g(){return c&&u&&(k("Delimiter","UndetectableDelimiter","Unable to auto-detect delimiting character; defaulted to '"+b.DefaultDelimiter+"'"),u=!1),m.skipEmptyLines&&(c.data=c.data.filter(function(e){return!y(e)})),_()&&function(){if(!c)return;function e(e,t){J(m.transformHeader)&&(e=m.transformHeader(e,t)),l.push(e)}if(Array.isArray(c.data[0])){for(var t=0;_()&&t=l.length?"__parsed_extra":l[r]),m.transform&&(s=m.transform(s,n)),s=v(n,s),"__parsed_extra"===n?(i[n]=i[n]||[],i[n].push(s)):i[n]=s}return m.header&&(r>l.length?k("FieldMismatch","TooManyFields","Too many fields: expected "+l.length+" fields but parsed "+r,f+t):r=i.length/2?"\r\n":"\r"}(e,i)),u=!1,m.delimiter)J(m.delimiter)&&(m.delimiter=m.delimiter(e),c.meta.delimiter=m.delimiter);else{var n=function(e,t,r,i,n){var s,a,o,u;n=n||[",","\t","|",";",b.RECORD_SEP,b.UNIT_SEP];for(var h=0;h=N)return L(!0)}else for(S=W,W++;;){if(-1===(S=i.indexOf(z,S+1)))return r||h.push({type:"Quotes",code:"MissingQuotes",message:"Quoted field unterminated",row:u.length,index:W}),T();if(S===n-1)return T(i.substring(W,S).replace(C,z));if(z!==K||i[S+1]!==K){if(z===K||0===S||i[S-1]!==K){-1!==w&&w=N)return L(!0);break}h.push({type:"Quotes",code:"InvalidQuotes",message:"Trailing quote on quoted field is malformed",row:u.length,index:W}),S++}}else S++}return T();function I(e){u.push(e),d=W}function A(e){var t=0;if(-1!==e){var r=i.substring(S+1,e);r&&""===r.trim()&&(t=r.length)}return t}function T(e){return r||(void 0===e&&(e=i.substring(W)),f.push(e),W=n,I(f),o&&F()),L()}function D(e){W=e,I(f),f=[],R=i.indexOf(P,W)}function L(e){return{data:u,errors:h,meta:{delimiter:M,linebreak:P,aborted:H,truncated:!!e,cursor:d+(t||0)}}}function F(){q(L()),u=[],h=[]}},this.abort=function(){H=!0},this.getCharIndex=function(){return W}}function _(e){var t=e.data,r=a[t.workerId],i=!1;if(t.error)r.userError(t.error,t.file);else if(t.results&&t.results.data){var n={abort:function(){i=!0,m(t.workerId,{data:[],errors:[],meta:{aborted:!0}})},pause:y,resume:y};if(J(r.userStep)){for(var s=0;s + +{% endblock %} + +{% block content %} +

Import data to create a table

+ +

Select a range of cells in Google Sheets / Excel / Numbers (or select-all), hit copy, then paste into this box to import that data and create a new table.

+ + + +
+ +

This feature requires JavaScript

+ + + +{% endblock %} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8924506..185697a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ classifiers=[ ] requires-python = ">=3.8" dependencies = [ - "datasette" + "datasette>=1.0a13" ] [project.urls] @@ -28,3 +28,6 @@ test = ["pytest", "pytest-asyncio"] [tool.pytest.ini_options] asyncio_mode = "strict" + +[tool.setuptools.package-data] +datasette_import = ["static/*", "templates/*"] \ No newline at end of file diff --git a/tests/test_import.py b/tests/test_import.py index 0b3d7c3..4f1bc0e 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -3,9 +3,23 @@ @pytest.mark.asyncio -async def test_plugin_is_installed(): - datasette = Datasette(memory=True) - response = await datasette.client.get("/-/plugins.json") - assert response.status_code == 200 - installed_plugins = {p["name"] for p in response.json()} - assert "datasette-import" in installed_plugins +async def test_create_table_permissions(): + datasette = Datasette() + datasette.add_memory_database("test") + anon_response = await datasette.client.get("/test") + assert anon_response.status_code == 200 + fragment = ">Create table with imported data" + assert fragment not in anon_response.text + root_response = await datasette.client.get( + "/test", cookies={"ds_actor": datasette.client.actor_cookie({"id": "root"})} + ) + assert root_response.status_code == 200 + assert fragment in root_response.text + # And check access to the /-/import page + anon_import_response = await datasette.client.get("/test/-/import") + assert anon_import_response.status_code == 403 + root_import_response = await datasette.client.get( + "/test/-/import", + cookies={"ds_actor": datasette.client.actor_cookie({"id": "root"})}, + ) + assert root_import_response.status_code == 200