diff --git a/src/display/node_stream.js b/src/display/node_stream.js index 3ffabffe0df13..d61b9a5850951 100644 --- a/src/display/node_stream.js +++ b/src/display/node_stream.js @@ -26,29 +26,20 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { ); } -const fileUriRegex = /^file:\/\/\/[a-zA-Z]:\//; +const urlRegex = /^[a-z][a-z0-9\-+.]+:/i; -function parseUrl(sourceUrl) { - const url = NodePackages.get("url"); - const parsedUrl = url.parse(sourceUrl); - if (parsedUrl.protocol === "file:" || parsedUrl.host) { - return parsedUrl; - } - // Prepending 'file:///' to Windows absolute path. - if (/^[a-z]:[/\\]/i.test(sourceUrl)) { - return url.parse(`file:///${sourceUrl}`); +function parseUrlOrPath(sourceUrl) { + if (urlRegex.test(sourceUrl)) { + return new URL(sourceUrl); } - // Changes protocol to 'file:' if url refers to filesystem. - if (!parsedUrl.host) { - parsedUrl.protocol = "file:"; - } - return parsedUrl; + const url = NodePackages.get("url"); + return new URL(url.pathToFileURL(sourceUrl)); } class PDFNodeStream { constructor(source) { this.source = source; - this.url = parseUrl(source.url); + this.url = parseUrlOrPath(source.url); this.isHttp = this.url.protocol === "http:" || this.url.protocol === "https:"; // Check if url refers to filesystem. @@ -287,18 +278,6 @@ class BaseRangeReader { } } -function createRequestOptions(parsedUrl, headers) { - return { - protocol: parsedUrl.protocol, - auth: parsedUrl.auth, - host: parsedUrl.hostname, - port: parsedUrl.port, - path: parsedUrl.path, - method: "GET", - headers, - }; -} - class PDFNodeStreamFullReader extends BaseFullReader { constructor(stream) { super(stream); @@ -337,13 +316,15 @@ class PDFNodeStreamFullReader extends BaseFullReader { if (this._url.protocol === "http:") { const http = NodePackages.get("http"); this._request = http.request( - createRequestOptions(this._url, stream.httpHeaders), + this._url, + { headers: stream.httpHeaders }, handleResponse ); } else { const https = NodePackages.get("https"); this._request = https.request( - createRequestOptions(this._url, stream.httpHeaders), + this._url, + { headers: stream.httpHeaders }, handleResponse ); } @@ -386,13 +367,15 @@ class PDFNodeStreamRangeReader extends BaseRangeReader { if (this._url.protocol === "http:") { const http = NodePackages.get("http"); this._request = http.request( - createRequestOptions(this._url, this._httpHeaders), + this._url, + { headers: this._httpHeaders }, handleResponse ); } else { const https = NodePackages.get("https"); this._request = https.request( - createRequestOptions(this._url, this._httpHeaders), + this._url, + { headers: this._httpHeaders }, handleResponse ); } @@ -408,25 +391,18 @@ class PDFNodeStreamFsFullReader extends BaseFullReader { constructor(stream) { super(stream); - let path = decodeURIComponent(this._url.path); - - // Remove the extra slash to get right path from url like `file:///C:/` - if (fileUriRegex.test(this._url.href)) { - path = path.replace(/^\//, ""); - } - const fs = NodePackages.get("fs"); - fs.promises.lstat(path).then( + fs.promises.lstat(this._url).then( stat => { // Setting right content length. this._contentLength = stat.size; - this._setReadableStream(fs.createReadStream(path)); + this._setReadableStream(fs.createReadStream(this._url)); this._headersCapability.resolve(); }, error => { if (error.code === "ENOENT") { - error = new MissingPDFException(`Missing PDF "${path}".`); + error = new MissingPDFException(`Missing PDF "${this._url}".`); } this._storedError = error; this._headersCapability.reject(error); @@ -439,15 +415,10 @@ class PDFNodeStreamFsRangeReader extends BaseRangeReader { constructor(stream, start, end) { super(stream); - let path = decodeURIComponent(this._url.path); - - // Remove the extra slash to get right path from url like `file:///C:/` - if (fileUriRegex.test(this._url.href)) { - path = path.replace(/^\//, ""); - } - const fs = NodePackages.get("fs"); - this._setReadableStream(fs.createReadStream(path, { start, end: end - 1 })); + this._setReadableStream( + fs.createReadStream(this._url, { start, end: end - 1 }) + ); } } diff --git a/test/test.mjs b/test/test.mjs index 7b77342d9c4a5..62a02b99979d4 100644 --- a/test/test.mjs +++ b/test/test.mjs @@ -26,7 +26,6 @@ import path from "path"; import puppeteer from "puppeteer"; import readline from "readline"; import { translateFont } from "./font/ttxdriver.mjs"; -import url from "url"; import { WebServer } from "./webserver.mjs"; import yargs from "yargs"; @@ -670,8 +669,7 @@ function checkRefTestResults(browser, id, results) { }); } -function refTestPostHandler(req, res) { - var parsedUrl = url.parse(req.url, true); +function refTestPostHandler(parsedUrl, req, res) { var pathname = parsedUrl.pathname; if ( pathname !== "/tellMeToQuit" && @@ -691,7 +689,7 @@ function refTestPostHandler(req, res) { var session; if (pathname === "/tellMeToQuit") { - session = getSession(parsedUrl.query.browser); + session = getSession(parsedUrl.searchParams.get("browser")); monitorBrowserTimeout(session, null); closeSession(session.name); return; @@ -821,8 +819,7 @@ async function startIntegrationTest() { await Promise.all(sessions.map(session => closeSession(session.name))); } -function unitTestPostHandler(req, res) { - var parsedUrl = url.parse(req.url); +function unitTestPostHandler(parsedUrl, req, res) { var pathname = parsedUrl.pathname; if ( pathname !== "/tellMeToQuit" && diff --git a/test/unit/node_stream_spec.js b/test/unit/node_stream_spec.js index f3a2daa754545..775131caa3f50 100644 --- a/test/unit/node_stream_spec.js +++ b/test/unit/node_stream_spec.js @@ -24,17 +24,13 @@ if (!isNodeJS) { ); } -const path = await __non_webpack_import__("path"); const url = await __non_webpack_import__("url"); describe("node_stream", function () { let tempServer = null; - const pdf = url.parse( - encodeURI( - "file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf") - ) - ).href; + const cwdURL = url.pathToFileURL(process.cwd()) + "/"; + const pdf = new URL("./test/pdfs/tracemonkey.pdf", cwdURL).href; const pdfLength = 1016315; beforeAll(function () { diff --git a/test/webserver.mjs b/test/webserver.mjs index d597aa2512d7d..e0295f3a2865e 100644 --- a/test/webserver.mjs +++ b/test/webserver.mjs @@ -21,6 +21,7 @@ import fs from "fs"; import fsPromises from "fs/promises"; import http from "http"; import path from "path"; +import { pathToFileURL } from "url"; const MIME_TYPES = { ".css": "text/css", @@ -42,7 +43,8 @@ const DEFAULT_MIME_TYPE = "application/octet-stream"; class WebServer { constructor({ root, host, port, cacheExpirationTime }) { - this.root = root || "."; + const cwdURL = pathToFileURL(process.cwd()) + "/"; + this.rootURL = new URL(`${root || "."}/`, cwdURL); this.host = host || "localhost"; this.port = port || 0; this.server = null; @@ -82,27 +84,10 @@ class WebServer { } async #handler(request, response) { - // Validate and parse the request URL. - const url = request.url.replaceAll("//", "/"); - const urlParts = /([^?]*)((?:\?(.*))?)/.exec(url); - let pathPart; - try { - // Guard against directory traversal attacks such as - // `/../../../../../../../etc/passwd`, which let you make GET requests - // for files outside of `this.root`. - pathPart = path.normalize(decodeURI(urlParts[1])); - // `path.normalize` returns a path on the basis of the current platform. - // Windows paths cause issues in `checkRequest` and underlying methods. - // Converting to a Unix path avoids platform checks in said functions. - pathPart = pathPart.replaceAll("\\", "/"); - } catch { - // If the URI cannot be decoded, a `URIError` is thrown. This happens for - // malformed URIs such as `http://localhost:8888/%s%s` and should be - // handled as a bad request. - response.writeHead(400); - response.end("Bad request", "utf8"); - return; - } + // URLs are normalized and automatically disallow directory traversal + // attacks. For example, http://HOST:PORT/../../../../../../../etc/passwd + // is equivalent to http://HOST:PORT/etc/passwd. + const url = new URL(`http://${this.host}:${this.port}${request.url}`); // Validate the request method and execute method hooks. const methodHooks = this.hooks[request.method]; @@ -111,24 +96,34 @@ class WebServer { response.end("Unsupported request method", "utf8"); return; } - const handled = methodHooks.some(hook => hook(request, response)); + const handled = methodHooks.some(hook => hook(url, request, response)); if (handled) { return; } // Check the request and serve the file/folder contents. - if (pathPart === "/favicon.ico") { - pathPart = "test/resources/favicon.ico"; + if (url.pathname === "/favicon.ico") { + url.pathname = "/test/resources/favicon.ico"; } - await this.#checkRequest(request, response, url, urlParts, pathPart); + await this.#checkRequest(request, response, url); } - async #checkRequest(request, response, url, urlParts, pathPart) { + async #checkRequest(request, response, url) { + const localURL = new URL(`.${url.pathname}`, this.rootURL); + // Check if the file/folder exists. - let filePath; try { - filePath = await fsPromises.realpath(path.join(this.root, pathPart)); - } catch { + await fsPromises.realpath(localURL); + } catch (e) { + if (e instanceof URIError) { + // If the URI cannot be decoded, a `URIError` is thrown. This happens + // for malformed URIs such as `http://localhost:8888/%s%s` and should be + // handled as a bad request. + response.writeHead(400); + response.end("Bad request", "utf8"); + return; + } + response.writeHead(404); response.end(); if (this.verbose) { @@ -140,7 +135,7 @@ class WebServer { // Get the properties of the file/folder. let stats; try { - stats = await fsPromises.stat(filePath); + stats = await fsPromises.stat(localURL); } catch { response.writeHead(500); response.end(); @@ -150,15 +145,14 @@ class WebServer { const isDir = stats.isDirectory(); // If a folder is requested, serve the directory listing. - if (isDir && !/\/$/.test(pathPart)) { - response.setHeader("Location", `${pathPart}/${urlParts[2]}`); + if (isDir && !/\/$/.test(url.pathname)) { + response.setHeader("Location", `${url.pathname}/${url.search}`); response.writeHead(301); response.end("Redirected", "utf8"); return; } if (isDir) { - const queryPart = urlParts[3]; - await this.#serveDirectoryIndex(response, pathPart, queryPart, filePath); + await this.#serveDirectoryIndex(response, url, localURL); return; } @@ -182,7 +176,7 @@ class WebServer { } this.#serveFileRange( response, - filePath, + localURL, fileSize, start, isNaN(end) ? fileSize : end + 1 @@ -194,19 +188,19 @@ class WebServer { if (this.verbose) { console.log(url); } - this.#serveFile(response, filePath, fileSize); + this.#serveFile(response, localURL, fileSize); } - async #serveDirectoryIndex(response, pathPart, queryPart, directory) { + async #serveDirectoryIndex(response, url, localUrl) { response.setHeader("Content-Type", "text/html"); response.writeHead(200); - if (queryPart === "frame") { + if (url.searchParams.has("frame")) { response.end( `
`, "utf8" @@ -216,7 +210,7 @@ class WebServer { let files; try { - files = await fsPromises.readdir(directory); + files = await fsPromises.readdir(localUrl); } catch { response.end(); return; @@ -228,13 +222,13 @@ class WebServer { -No files found
"); } - if (!all && queryPart !== "side") { + if (!all && !url.searchParams.has("side")) { response.write( '(only PDF files are shown, show all)
' ); @@ -292,8 +286,8 @@ class WebServer { response.end("