Skip to content

Commit

Permalink
Merge pull request #1545 from hydephp/documentation-intelligence
Browse files Browse the repository at this point in the history
Add internal documentation intelligence tools
  • Loading branch information
caendesilva authored Feb 11, 2024
2 parents 29b7391 + 259a0b5 commit 4233563
Show file tree
Hide file tree
Showing 4 changed files with 383 additions and 0 deletions.
298 changes: 298 additions & 0 deletions monorepo/DocumentationIntelligence/DocumentationIntelligence.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
<?php

/**
* @internal This script is part of the internal monorepo tools.
*/

declare(strict_types=1);

use Illuminate\Support\Str;
use Hyde\Foundation\HydeKernel;
use Hyde\Markdown\Models\MarkdownDocument;

if (php_sapi_name() !== 'cli') {
// Run the file and proxy the dashboard page
exec('php '.realpath(__FILE__).' 2>&1', $output, $returnCode);

if ($returnCode !== 0) {
echo '<pre>'.implode("\n", $output).'</pre>';
exit(1);
}

return require_once __DIR__.'/results/dashboard.html';
}

require_once __DIR__.'/../../vendor/autoload.php';
require_once __DIR__.'/../../packages/hydefront/.github/scripts/minima.php';

define('TIME_START', microtime(true));
define('BASE_PATH', realpath(__DIR__.'/../../'));
define('OUTPUT_PATH', realpath(__DIR__.'/results'));

Command::main(function () {
/** @var Command $this */
$this->info('Generating documentation intelligence...');
$this->line();

$generator = new DocumentationIntelligence();

task('discover pages', fn () => $generator->discoverPages());
task('assemble model', fn () => $generator->assembleModel());
task('create pruned model', fn () => $generator->createPrunedModel());
task('generate model data', fn () => $generator->getModelStatistics());
task('create dashboard page', fn () => $generator->createDashboardPage());

$this->line();

$this->line(sprintf(<<<'EOF'
Full model details:
Model size: %s
Model words: %s
Model lines: %s
Pruned model details:
Model size: %s
Model words: %s
Model lines: %s
Pruned model compression: %s%%
EOF,
...array_values($generator->getModelStatistics())
));

$this->line();
$this->info(sprintf('Time taken: %s. Memory used: %s',
number_format((microtime(true) - TIME_START) * 1000, 2).'ms',
number_format(memory_get_peak_usage() / 1024 / 1024, 2).'MB'
));

return 0;
});

class DocumentationIntelligence
{
protected HydeKernel $kernel;

/** @var array<string, \Hyde\Markdown\Models\MarkdownDocument> */
protected array $pages = [];

public function __construct()
{
$this->kernel = new HydeKernel(BASE_PATH);

HydeKernel::setInstance($this->kernel);
}

public function discoverPages(): void
{
$files = glob(BASE_PATH.'/docs/**/*.md');

foreach ($files as $file) {
$filepath = str_replace(BASE_PATH.'/docs/', '', $file);
$this->pages[$filepath] = MarkdownDocument::parse($file);
}
}

public function assembleModel(): void
{
// This script generates a single .txt model of all HydePHP documentation

$model = sprintf("Start HydePHP Documentation (Framework version v%s)\n\n", HydeKernel::VERSION);

foreach ($this->pages as $path => $page) {
$model .= sprintf("--- %s ---\n\n%s\n\n", $path, $page->markdown);
}

$model = str_replace("\r\n", "\n", $model);

file_put_contents(OUTPUT_PATH.'/model.txt', $model);
}

public function createPrunedModel(): void
{
// Remove all code blocks from the full model

$model = file_get_contents(OUTPUT_PATH.'/model.txt');

// Remove all code blocks
$model = preg_replace('/```.*?```/s', '', $model);
$model = preg_replace('/<pre.*?<\/pre>/s', '', $model);

$model = explode("\n", $model);

// Normalizing pass
$needles = ['<!-- ', '[//]: # ', '[Blade]: ', '--- redirects/', '<meta http-equiv="refresh" ', 'Redirecting you to ['];
foreach ($model as $index => $line) {
// Remove non-informative lines
if (Str::startsWith($line, $needles)) {
unset($model[$index]);
continue;
} elseif (Str::startsWith($line, '{') && Str::endsWith($line, '}')) {
unset($model[$index]);
continue;
} elseif (Str::startsWith($line, '<div') && Str::endsWith($line, '>')) {
unset($model[$index]);
continue;
} elseif (Str::startsWith($line, '<a name') && Str::endsWith($line, '></a>')) {
unset($model[$index]);
continue;
} elseif (Str::startsWith($line, '|-') && Str::endsWith($line, '-|')) {
// Table dividers
unset($model[$index]);
continue;
} elseif (filled($line) && empty(str_replace(['|', ' '], '', $line))) {
// Empty table row
unset($model[$index]);
continue;
} elseif ($line === '</div>') {
unset($model[$index]);
continue;
} elseif ($line === '---') {
unset($model[$index]);
continue;
}

// Remove multiple spaces
$line = preg_replace('/\s+/', ' ', $line);

// Remove heading tags
$line = ltrim($line, '# ');

// Trim colored and normal blockquotes (['>danger', '>info', '>success', '>warning'])
$line = preg_replace('/^>\w+ /', '', $line);
$line = ltrim($line, '> ');

// Now we remove even more Markdown syntax to only keep the text

// Replace links with their text (but put the text within quotes)
$line = preg_replace('/\[(.*?)\]\(.*?\)/', "'\$1'", $line);

// Remove bold and italic
$line = preg_replace('/\*\*(.*?)\*\*/', '$1', $line);

// Remove italic (underscore syntax where there are spaces around the underscore, as it otherwise removes snake_case words)
$line = preg_replace('/\b_(.*?)_\b/', '$1', $line);

// Remove HTML tags (This does remove some examples, like `<identifier>`)
$line = strip_tags($line);

// Replace tables with comma-separated values
if (Str::startsWith($line, '|') && Str::endsWith($line, '|')) {
$line = str_replace(['| |'], '|', $line);
$line = trim(str_replace([' | '], ', ', $line), '| ');
$line = str_replace('.,', '.', $line);
}

// Simplify lists with removed text
if (Str::startsWith($line, '- \'') && Str::endsWith($line, '\'')) {
// Remove the quotes
$line = '- '.substr($line, 3, -1);
}

$model[$index] = rtrim($line);
}

// Uniqueness pass (we do this manually as array_unique removes empty lines)
$uniqueLines = [];
foreach ($model as $line) {
if (empty($line) || ! in_array($line, $uniqueLines, true)) {
$uniqueLines[] = $line;
}
}
$model = $uniqueLines;

$model = implode("\n", $model);

// Remove multiple newlines
$model = preg_replace('/\n{3,}/', "\n\n", $model);

file_put_contents(OUTPUT_PATH.'/model-pruned.txt', rtrim($model)."\n");
}

public function createDashboardPage(): void
{
$data = $this->getDashboardData();
$dashboard = file_get_contents(__DIR__.'/dashboard-template.php');

foreach ($data as $key => $value) {
$dashboard = str_replace('<?php echo $'.$key.'; ?>', $value, $dashboard);
}

file_put_contents(OUTPUT_PATH.'/dashboard.html', $dashboard);
}

/** @return array<string, mixed> */
protected function getDashboardData(): array
{
return [
'modelStatistics' => $this->makeModelStatisticsTable(),
'modelSections' => $this->makeModelSections(),
'modelRaw' => e(file_get_contents(OUTPUT_PATH.'/model.txt')),
];
}

protected function makeModelStatisticsTable(): string
{
$data = $this->getModelStatistics();

$table = ['<tr><th>Model</th><th>Size</th><th>Words</th><th>Lines</th></tr>'];

$table[] = sprintf('<tr><th>Full</th><td>%s</td><td>%s</td><td>%s</td></tr>',
$data['Model size'],
$data['Model words'],
$data['Model lines']
);

$table[] = sprintf('<tr><th>Pruned</th><td>%s</td><td>%s</td><td>%s</td></tr>',
$data['Pruned model size'],
$data['Pruned model words'],
$data['Pruned model lines']
);

$extraData = [
'Compression' => $data['Pruned model compression'],
'Reading time' => \Hyde\Support\ReadingTime::fromFile(OUTPUT_PATH.'/model-pruned.txt')->getFormatted('%d mins'),
];

foreach ($extraData as $key => $value) {
$table[] = sprintf('<tr><th>%s</th><td colspan="3" align="right">%s</td></tr>', $key, $value);
}

return implode("\n".str_repeat(' ', 20), $table);
}

protected function makeModelSections(): string
{
// Create textarea for each section
$sections = explode('--- ', file_get_contents(OUTPUT_PATH.'/model.txt'));

// Skip the first empty section
array_shift($sections);

$html = '';
foreach ($sections as $section) {
// Extract title from first line
$section = explode("\n", $section, 2);
$title = rtrim(array_shift($section), '- ');
$section = implode("\n", $section);

$html .= '<h4 class="mt-2">'.e($title).'</h4>';
$html .= '<textarea rows="10" cols="80" style="width: 100%; white-space: pre; font-family: monospace;">'.e($section).'</textarea>';
}

return $html;
}

public function getModelStatistics(): array
{
return $this->statistics ??= [
'Model size' => number_format(filesize(OUTPUT_PATH.'/model.txt') / 1024, 2).' KB',
'Model words' => number_format(str_word_count(file_get_contents(OUTPUT_PATH.'/model.txt'))),
'Model lines' => number_format(count(file(OUTPUT_PATH.'/model.txt')) + 1),

'Pruned model size' => number_format(filesize(OUTPUT_PATH.'/model-pruned.txt') / 1024, 2).' KB',
'Pruned model words' => number_format(str_word_count(file_get_contents(OUTPUT_PATH.'/model-pruned.txt'))),
'Pruned model lines' => number_format(count(file(OUTPUT_PATH.'/model-pruned.txt')) + 1),
'Pruned model compression' => number_format((1 - (filesize(OUTPUT_PATH.'/model-pruned.txt') / filesize(OUTPUT_PATH.'/model.txt'))) * 100, 2).'%',
];
}
}
13 changes: 13 additions & 0 deletions monorepo/DocumentationIntelligence/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Internal Documentation Intelligence

This internal monorepo module contains tools to analyse the documentation to improve its quality.
All code here is internal and should not be used outside the monorepo and no support will be provided for it.

## Usage

As the resulting data models and dashboards can get quite large, they are excluded from the repository.
To generate them, run the following command:

```bash
php monorepo/DocumentationIntelligence/DocumentationIntelligence.php
```
70 changes: 70 additions & 0 deletions monorepo/DocumentationIntelligence/dashboard-template.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>HydePHP Documentation Intelligence Dashboard</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-rbsA2VBKQhggwzxH7pPCaAqO46MgnOM80zW1RWuH61DGLwZJEdK2Kadq2F9CUG65" crossorigin="anonymous">
<style>
.w-fit {
width: fit-content;
}
</style>
</head>
<body>

<header class="container">
<div class="row">
<div class="col-12 py-4 text-center">
<h1>HydePHP Documentation Intelligence Dashboard</h1>
<p class="lead">
This internal monorepo module contains tools to analyse the documentation to improve its quality.
</p>
</div>
</div>
</header>

<main>
<div class="container">
<div class="row">
<div class="col-12">
<h2>Statistics</h2>
<table class="table table-bordered table-sm w-fit">
<caption>Model Statistics</caption>
<?php echo $modelStatistics; ?>
</table>
</div>
</div>
</div>

<hr>

<div class="container">
<div class="row">
<div class="col-12">
<h2>Model Data</h2>

<article>
<h3>Full model</h3>
<details>
<summary>Click to view the model</summary>

<textarea rows="30" cols="80" style="width: 100%; white-space: pre; font-family: monospace;"><?php echo $modelRaw; ?></textarea>

</details>
</article>
<br>
<article>
<h3>Sections</h3>
<details>
<summary>Click to view the sections</summary>
<?php echo $modelSections; ?>
</details>
</article>
</div>
</div>
</div>
</main>

</body>
</html>
2 changes: 2 additions & 0 deletions monorepo/DocumentationIntelligence/results/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore

0 comments on commit 4233563

Please sign in to comment.