Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable downloading using curl #1055

Merged
merged 5 commits into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/run-tests/config/www/000-default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

<Directory /www>
Options Indexes FollowSymLinks
AllowOverride None
AllowOverride All
Require all granted
</Directory>
</VirtualHost>
Expand Down
3 changes: 2 additions & 1 deletion .github/actions/run-tests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ services:
- ./volumes/cookbook:/nextcloud/custom_apps/cookbook:ro

www:
image: php:apache
build: www
# ports:
# - 8888:80
volumes:
- ./volumes/www/:/www:ro
- ./config/www/000-default.conf:/etc/apache2/sites-available/000-default.conf:ro

2 changes: 1 addition & 1 deletion .github/actions/run-tests/run-locally.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ build_images() {
docker-compose build --pull --force-rm $PROGRESS \
--build-arg PHPVERSION=$PHP_VERSION \
dut occ php fpm
docker-compose build --pull --force-rm mysql
docker-compose build --pull --force-rm mysql www
echo 'Building images finished.'
}

Expand Down
2 changes: 2 additions & 0 deletions .github/actions/run-tests/www/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
FROM php:apache
RUN a2enmod headers
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ on:
paths-ignore:
- 'docs/**'
- 'l10n/**'
- 'CHANGELOG.md'
pull_request:

jobs:
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
### Changed
- Extracted user folder handling into its own helper class
[#1007](https://github.com/nextcloud/cookbook/pull/1007) @christianlupus
- Switched to cURL for downloading of external files
[#1055](https://github.com/nextcloud/cookbook/pull/1055) @christianlupus

### Fixed
- Fix visual regression in edit mode to prevent overflow of breadcrumbs
Expand Down
1 change: 1 addition & 0 deletions lib/Db/RecipeDb.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public function __construct(
) {
$this->db = $db;
$this->types = $polyfillTypes;
$this->l = $l;
}

/**
Expand Down
9 changes: 9 additions & 0 deletions lib/Exception/NoDownloadWasCarriedOutException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?php

namespace OCA\Cookbook\Exception;

class NoDownloadWasCarriedOutException extends \Exception {
public function __construct($message = null, $code = null, $previous = null) {
parent::__construct($message, $code, $previous);
}
}
145 changes: 145 additions & 0 deletions lib/Helper/DownloadHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
<?php

namespace OCA\Cookbook\Helper;

use OCA\Cookbook\Exception\NoDownloadWasCarriedOutException;
use OCP\IL10N;

/**
* This class is mainly a wrapper for cURL and its PHP extension to download files/content from the internet.
*/
class DownloadHelper {
/**
* Flag that indicates if a download has successfully carried out
*
* @var bool
*/
private $downloaded;
/**
* The content of the last download
*
* @var ?string
*/
private $content;
/**
* The array of the headers of the last download
*
* @var array
*/
private $headers;
/**
* The HTTP status of the last download
*
* @var ?int
*/
private $status;

/** @var IL10N */
private $l;

public function __construct(
IL10N $l
) {
$this->downloaded = false;
$this->l = $l;
$this->headers = [];
}

/**
* Download a file from the internet.
*
* The content of the file will be stored in RAM, so beware not to download huge files.
*
* @param string $url The URL of the file to fetch
* @param array $options Options to pass on for curl. This allows to fine-tune the transfer.
* @throws NoDownloadWasCarriedOutException if the download fails for some reason
*/
public function downloadFile(string $url, array $options = []): void {
$this->downloaded = false;

$ch = curl_init($url);

$hp = tmpfile();

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_WRITEHEADER, $hp);
curl_setopt_array($ch, $options);

$ret = curl_exec($ch);
if ($ret === false) {
$ex = new NoDownloadWasCarriedOutException($this->l->t('Downloading of a file failed returned the following error message: %s', [curl_error($ch)]));
fclose($hp);
curl_close($ch);
throw $ex;
}

$this->content = $ret;
$this->status = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);

fseek($hp, 0);
$this->headers = [];
for ($buffer = fgets($hp); $buffer !== false; $buffer = fgets($hp)) {
$this->headers[] = $buffer;
}

fclose($hp);
curl_close($ch);
$this->downloaded = true;
}

/**
* Get the content downloaded in the last run.
*
* Note: You must first trigger the download using downloadFile method.
*
* @return string The content of the downloaded file
* @throws NoDownloadWasCarriedOutException if there was no successful download carried out before calling this method.
*/
public function getContent(): string {
if ($this->downloaded) {
return $this->content;
} else {
throw new NoDownloadWasCarriedOutException();
}
}

/**
* Get the Content-Type header from the last download run.
*
* Note: You must first trigger the download using downloadFile method.
*
* @return ?string The content of the Content-Type header or null if no Content-Type header was found
* @throws NoDownloadWasCarriedOutException if there was no successful download carried out before calling this method.
*/
public function getContentType(): ?string {
if (! $this->downloaded) {
throw new NoDownloadWasCarriedOutException();
}

foreach ($this->headers as $s) {
$parts = explode(':', $s, 2);
if (trim($parts[0]) === 'Content-Type') {
return trim($parts[1]);
}
}

return null;
}

/**
* Get the HTTP status code from the last download run.
*
* Note: You must first trigger the download using downloadFile method.
*
* @return int The HTTP status code
* @throws NoDownloadWasCarriedOutException if there was no successful download carried out before calling this method.
*/
public function getStatus(): int {
if (! $this->downloaded) {
throw new NoDownloadWasCarriedOutException();
}

return $this->status;
}
}
36 changes: 25 additions & 11 deletions lib/Service/HtmlDownloadService.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

use DOMDocument;
use OCA\Cookbook\Exception\ImportException;
use OCA\Cookbook\Exception\NoDownloadWasCarriedOutException;
use OCA\Cookbook\Helper\DownloadHelper;
use OCA\Cookbook\Helper\HTMLFilter\AbstractHtmlFilter;
use OCA\Cookbook\Helper\HTMLFilter\HtmlEntityDecodeFilter;
use OCA\Cookbook\Helper\HtmlToDomParser;
Expand Down Expand Up @@ -31,17 +33,26 @@ class HtmlDownloadService {
*/
private $htmlParser;

/** @var DownloadHelper */
private $downloadHelper;

/**
* @var DOMDocument
*/
private $dom;

public function __construct(HtmlEntityDecodeFilter $htmlEntityDecodeFilter,
ILogger $logger, IL10N $l10n, HtmlToDomParser $htmlParser) {
public function __construct(
HtmlEntityDecodeFilter $htmlEntityDecodeFilter,
ILogger $logger,
IL10N $l10n,
HtmlToDomParser $htmlParser,
DownloadHelper $downloadHelper
) {
$this->htmlFilters = [ $htmlEntityDecodeFilter ];
$this->logger = $logger;
$this->l = $l10n;
$this->htmlParser = $htmlParser;
$this->downloadHelper = $downloadHelper;
}

/**
Expand Down Expand Up @@ -89,21 +100,24 @@ private function fetchHtmlPage(string $url): string {
throw new ImportException($this->l->t('Could not parse URL'));
}

$opts = [
"http" => [
"method" => "GET",
"header" => "User-Agent: Nextcloud Cookbook App"
]
$opt = [
CURLOPT_USERAGENT => 'Nextcloud Cookbook App',
];

$context = stream_context_create($opts);
try {
$this->downloadHelper->downloadFile($url, $opt);
} catch (NoDownloadWasCarriedOutException $ex) {
throw new ImportException($this->l->t('Exception while downloading recipe from %s.', [$url]), null, $ex);
}

$html = file_get_contents($url, false, $context);
$status = $this->downloadHelper->getStatus();

if ($html === false) {
throw new ImportException($this->l->t('Could not parse HTML code for site {url}', $url));
if ($status < 200 || $status >= 300) {
throw new ImportException($this->l->t('Download from %s failed as HTTP status code %d is not in expected range.', [$url, $status]));
}

$html = $this->downloadHelper->getContent();

return $html;
}
}
106 changes: 106 additions & 0 deletions tests/Integration/Helper/DownloadHelper/DownloadHelperTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
<?php

namespace OCA\Cookbook\tests\Integration\Helper\DownloadHelper;

use OCA\Cookbook\Exception\NoDownloadWasCarriedOutException;
use OCA\Cookbook\Helper\DownloadHelper;
use OCP\IL10N;
use PHPUnit\Framework\MockObject\Stub;
use PHPUnit\Framework\TestCase;

/**
* @covers OCA\Cookbook\Helper\DownloadHelper
* @covers OCA\Cookbook\Exception\NoDownloadWasCarriedOutException
*/
class DownloadHelperTest extends TestCase {
/** @var DownloadHelper */
private $dut;

protected function setUp(): void {
/** @var IL10N|Stub */
$l = $this->createStub(IL10N::class);
$l->method('t')->willReturnArgument(0);

$this->dut = new DownloadHelper($l);

if (file_exists('/www/.htaccess')) {
unlink('/www/.htaccess');
}
}

protected function tearDown(): void {
if (file_exists('/www/.htaccess')) {
unlink('/www/.htaccess');
}
}

public function testGetContentNotDownloaded() {
$this->expectException(NoDownloadWasCarriedOutException::class);
$this->dut->getContent();
}

public function testGetContentTypeNotDownloaded() {
$this->expectException(NoDownloadWasCarriedOutException::class);
$this->dut->getContentType();
}

public function testGetStatusNotDownloaded() {
$this->expectException(NoDownloadWasCarriedOutException::class);
$this->dut->getStatus();
}

public function testDownload() {
$str = "This is the content of the file.\n";
file_put_contents('/www/test.txt', $str);
$this->dut->downloadFile('http://www/test.txt');
$this->assertEquals($str, $this->dut->getContent());
$this->assertEquals(200, $this->dut->getStatus());
}

public function dpContentType() {
return [
['text/html;charset=ascii'],
['text/html;charset=UTF-8'],
['image/jpeg'],
// ['text/html;charset=ascii'],
];
}

/**
* @dataProvider dpContentType
* @param mixed $contentType
*/
public function testContentType($contentType) {
copy(__DIR__ . '/res/content-type.php', '/www/test.php');
$this->dut->downloadFile('http://www/test.php?content=' . urlencode($contentType));
$this->assertEquals(trim($contentType), $this->dut->getContentType());
$this->assertEquals(200, $this->dut->getStatus());
}

public function testDownload404() {
if (file_exists('/www/test.txt')) {
unlink('/www/test.txt');
}
$this->dut->downloadFile('http://www/test.txt');
$this->assertEquals(404, $this->dut->getStatus());
}

/**
* @medium
*/
public function testFailedDownload() {
$this->expectException(NoDownloadWasCarriedOutException::class);
$opt = [
CURLOPT_TIMEOUT => 1,
CURLOPT_CONNECTTIMEOUT => 1,
];
$this->dut->downloadFile('http://www2/test.txt', $opt);
}

public function testNoContentType() {
copy(__DIR__ . '/res/htaccess', '/www/.htaccess');
// $this->expectException(NoDownloadWasCarriedOutException::class);
$this->dut->downloadFile('http://www/test.php');
$this->assertNull($this->dut->getContentType());
}
}
Loading