Skip to content

Commit

Permalink
Allow streaming directly to stdout with --stdout
Browse files Browse the repository at this point in the history
Fixes #65 and #83
  • Loading branch information
dantleech committed Jun 8, 2019
1 parent 5cd4d02 commit b49ee7e
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 34 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## develop

- Record referrer title if client encounters error while requesting URL, fixes #88
- Verbose mode streams report to STDOUT with `--stdout` #83 #65

## [0.8.0] 2019-04-07

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Options
- `--publisher=csv` Set the publisher (defaults to `json`) can be either
`json` or `csv`.
- `--rate` Set a maximum number of requests to make in a second.
- `--stdout` Stream to STDOUT directly, disables display and any specified outfile.

Examples
--------
Expand Down
23 changes: 14 additions & 9 deletions lib/Console/Command/CrawlCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class CrawlCommand extends Command
private const OPT_CLIENT_MAX_BODY_SIZE = 'client-max-body-size';
private const OPT_CLIENT_SECURITY_LEVEL = 'client-security-level';
private const OPT_DISPLAY = 'display';
private const OPT_STDOUT = 'stdout';

/**
* @var DispatcherBuilderFactory
Expand Down Expand Up @@ -106,6 +107,7 @@ protected function configure()
$this->addOption(self::OPT_RATE, null, InputOption::VALUE_REQUIRED, 'Set max request rate (as requests per second)', []);
$this->addOption(self::OPT_INCLUDE_LINK, null, InputOption::VALUE_REQUIRED|InputOption::VALUE_IS_ARRAY, 'Add an additional URL to the set of URLs under the base URL', []);
$this->addOption(self::OPT_DISPLAY, 'd', InputOption::VALUE_REQUIRED, 'Display specification, e.g. +memory', '');
$this->addOption(self::OPT_STDOUT, null, InputOption::VALUE_NONE, 'Stream directly to stdout (disable realtime display and out file)');
}

protected function execute(InputInterface $input, OutputInterface $output)
Expand All @@ -115,21 +117,20 @@ protected function execute(InputInterface $input, OutputInterface $output)
$dispatcher = $this->buildDispatcher($input);
$dispatcher->dispatch();

Loop::repeat($this->castToInt($input->getOption(self::OPT_REQUEST_INTERVAL)), function () use ($dispatcher) {
$dispatcher->dispatch();
});

$section1 = $output->section();

$display = $this->displayBuilder->build($this->castToString($input->getOption(self::OPT_DISPLAY)));

Loop::repeat(self::DISPLAY_POLL_TIME, function () use ($display, $section1, $dispatcher) {
if (false === $this->shuttingDown) {
Loop::repeat($this->castToInt($input->getOption(self::OPT_REQUEST_INTERVAL)), function () use ($dispatcher) {
$dispatcher->dispatch();
});

Loop::repeat(self::DISPLAY_POLL_TIME, function () use ($display, $section1, $dispatcher, $input) {
if (false === $input->getOption(self::OPT_STDOUT) && false === $this->shuttingDown) {
$section1->overwrite($display->render($section1->getFormatter(), $dispatcher->status()));
}

$status = $dispatcher->status();
if ($status->nbConcurrentRequests === 0 && $status->queueSize === 0) {
if ($dispatcher->status()->nbConcurrentRequests === 0 && $dispatcher->status()->queueSize === 0) {
Loop::stop();

if ($dispatcher->status()->nbFailures) {
Expand Down Expand Up @@ -159,7 +160,7 @@ protected function execute(InputInterface $input, OutputInterface $output)
private function buildDispatcher(InputInterface $input): Dispatcher
{
$urls = $this->castToArray($input->getArgument(self::ARG_URLS));

$maxConcurrency = $this->castToInt($input->getOption(self::OPT_CONCURRENCY));
$outfile = $input->getOption(self::OPT_OUTPUT);
$noDedupe = $this->castToBool($input->getOption(self::OPT_NO_DEDUPE));
Expand Down Expand Up @@ -208,6 +209,10 @@ private function buildDispatcher(InputInterface $input): Dispatcher
$builder->publishTo($this->castToString($outfile));
}

if ($input->getOption(self::OPT_STDOUT)) {
$builder->publishResource(STDOUT);
}

if ($cookieFile) {
$builder->loadCookies($this->castToString($cookieFile));
}
Expand Down
45 changes: 33 additions & 12 deletions lib/DispatcherBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@ class DispatcherBuilder
*/
private $clientSslSecurityLevel;

/**
* @var resource|null
*/
private $publishToResource;

public function __construct(Urls $baseUrls)
{
$this->baseUrls = $baseUrls;
Expand Down Expand Up @@ -190,6 +195,13 @@ public function publishTo(string $outfile): self
return $this;
}

public function publishResource($resource): self
{
$this->publishToResource = $resource;

return $this;
}

public function noPeerVerification(bool $value): self
{
$this->noPeerVerification = $value;
Expand Down Expand Up @@ -365,13 +377,13 @@ private function buildClient(): Client

private function buildPublisher()
{
if ($this->publishTo) {
if ($this->publishTo || $this->publishToResource) {
if ($this->publisherType === self::PUBLISHER_JSON) {
return $this->buildJsonPublisher();
}

if ($this->publisherType === self::PUBLISHER_CSV) {
return new CsvStreamPublisher($this->publishTo, true);
return new CsvStreamPublisher($this->buildPublishStream(), true);
}

throw new RuntimeException(sprintf(
Expand All @@ -386,16 +398,7 @@ private function buildPublisher()

private function buildJsonPublisher()
{
$resource = fopen($this->publishTo, 'w');

if (false === $resource) {
throw new RuntimeException(sprintf(
'Could not open file "%s"',
$this->publishTo
));
}

return new JsonStreamPublisher(new ResourceOutputStream($resource));
return new JsonStreamPublisher(new ResourceOutputStream($this->buildPublishStream()));
}

private function buildLimiter(): Limiter
Expand All @@ -410,4 +413,22 @@ private function buildLimiter(): Limiter

return new ChainLimiter($limiters);
}

private function buildPublishStream()
{
if ($this->publishToResource) {
return $this->publishToResource;
}

$resource = fopen($this->publishTo, 'w');

if (false === $resource) {
throw new RuntimeException(sprintf(
'Could not open file "%s"',
$this->publishTo
));
}

return $resource;
}
}
11 changes: 1 addition & 10 deletions lib/Model/Publisher/CsvStreamPublisher.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

use DTL\Extension\Fink\Model\Publisher;
use DTL\Extension\Fink\Model\Report;
use RuntimeException;

class CsvStreamPublisher implements Publisher
{
Expand All @@ -23,16 +22,8 @@ class CsvStreamPublisher implements Publisher
*/
private $stream;

public function __construct(string $path, bool $withHeaders)
public function __construct($stream, bool $withHeaders)
{
$stream = fopen($path, 'w');
if (false === $stream) {
throw new RuntimeException(sprintf(
'Could not open stream for writing at path "%s"',
$path
));
}

$this->stream = $stream;
$this->withHeaders = $withHeaders;
}
Expand Down
10 changes: 10 additions & 0 deletions tests/EndToEnd/Command/CrawlCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,16 @@ public function testAllowsDisplayCustomization()
$this->assertProcessSuccess($process);
}

public function testStreamsToStdout()
{
$process = $this->execute([
self::EXAMPLE_URL,
'--stdout',
]);
$this->assertProcessSuccess($process);
$this->assertStringContainsString('blog.html', $process->getOutput());
}

private function assertStatus(array $results, int $code, string $target): void
{
$target = self::EXAMPLE_URL . '/'. $target;
Expand Down
3 changes: 1 addition & 2 deletions tests/EndToEnd/EndToEndTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,8 @@ protected function assertProcessSuccess(Process $process)

protected function finkProcess(array $args): Process
{
$fink = new Process(array_merge([
return new Process(array_merge([
'bin/fink'
], $args), __DIR__ . '/../..');
return $fink;
}
}
3 changes: 2 additions & 1 deletion tests/Integration/Model/Publisher/CsvStreamPublisherTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public function testPublishesToCsvFileWithHeaders()

private function create(bool $withHeaders = false): Publisher
{
return new CsvStreamPublisher($this->workspace()->path(self::EXAMPLE_FILEANME), $withHeaders);
$resource = fopen($this->workspace()->path(self::EXAMPLE_FILEANME), 'w');
return new CsvStreamPublisher($resource, $withHeaders);
}
}

0 comments on commit b49ee7e

Please sign in to comment.