-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
keyword-extractor.php
53 lines (43 loc) · 1.32 KB
/
keyword-extractor.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
<?php
require 'vendor/autoload.php';
$url = 'https://en.wikipedia.org/wiki/Online_advertising';
// Instantiate the library
$web = new \spekulatius\phpscraper();
// Navigate to the test page.
$web->go($url);
// check the number of keywords.
$keywords = $web->contentKeywordsWithScores;
echo "This page contains around " . count($keywords) . " keywords/phrases.\nBelow are some selected keyword extractions.";
// Loop through selected sub-sets of keywords
echo "\n\nSelected keywords with years:\n\n";
foreach ($keywords as $keyword => $score) {
if (
preg_match(
'/\s[0-9]{4}\s/',
$keyword,
$matches
)
) {
echo sprintf(" - %s (%s)\n", $keyword, number_format($score, 1));
}
}
// With "content"
echo "\n\nSelected keywords with \"content\":\n\n";
foreach ($keywords as $keyword => $score) {
if ($score > 100 || $score < 5 || stripos($keyword, "content") === false) {
continue;
}
echo sprintf(" - %s (%s)\n", $keyword, number_format($score, 1));
}
echo "\n\nLong Tail Keywords:\n\n";
foreach ($keywords as $keyword => $score) {
if (
preg_match(
'/\s(\w{3,}\s\w{3,}\s\w{3,}\s\w{3,})\s/',
$keyword,
$matches
)
) {
echo sprintf(" - %s (%s)\n", $matches[1], number_format($score, 1));
}
}