-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikia-robots-txt.php
103 lines (89 loc) · 3.23 KB
/
wikia-robots-txt.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
<?php
if ( !defined( 'MW_NO_SETUP' ) ) {
define( 'MW_NO_SETUP', 1 );
}
require_once( __DIR__ . '/includes/WebStart.php' );
require_once( __DIR__ . '/includes/Setup.php' );
$robots = new RobotsTxt();
$allowRobots = ( $wgWikiaEnvironment === WIKIA_ENV_PROD || $wgRequest->getBool( 'forcerobots' ) );
$experimentalRobots = null;
if ( !empty( $wgExperimentalRobotsTxt ) && preg_match( '/^[a-z0-9-]+$/m', $wgExperimentalRobotsTxt ) ) {
$file = __DIR__ . '/robots.txt.d/' . $wgExperimentalRobotsTxt . '.txt';
if ( is_file( $file ) && is_readable( $file ) ) {
$experimentalRobots = file_get_contents( $file );
}
}
if ( !$allowRobots ) {
// No crawling preview, verify, sandboxes, showcase, etc
$robots->disallowPath( '/' );
} elseif ( $experimentalRobots ) {
// Sitemap
if ( !empty( $wgEnableSpecialSitemapExt ) ) {
$robots->setSitemap( sprintf( 'http://%s/sitemap-index.xml', $_SERVER['SERVER_NAME'] ) );
}
// Experimental content
$robots->setExperimentalAllowDisallowSection( $experimentalRobots );
} else {
// Sitemap
if ( !empty( $wgEnableSpecialSitemapExt ) ) {
$robots->setSitemap( sprintf( 'http://%s/sitemap-index.xml', $_SERVER['SERVER_NAME'] ) );
}
// Special pages
$robots->disallowNamespace( NS_SPECIAL );
$robots->disallowNamespace( NS_TEMPLATE );
$robots->disallowNamespace( NS_TEMPLATE_TALK );
//$robots->allowSpecialPage( 'Allpages' ); // TODO: SEO-64
$robots->allowSpecialPage( 'CreateNewWiki' );
$robots->allowSpecialPage( 'Forum' );
$robots->allowSpecialPage( 'Sitemap' );
$robots->allowSpecialPage( 'Videos' );
if ( !empty( $wgAllowSpecialImagesInRobots ) ) {
$robots->allowSpecialPage( 'Images' );
}
// Params
$robots->disallowParam( 'action' );
$robots->disallowParam( 'feed' );
$robots->disallowParam( 'oldid' );
$robots->disallowParam( 'printable' );
$robots->disallowParam( 'useskin' );
$robots->disallowParam( 'uselang' );
// Nasty robots
$robots->blockRobot( 'IsraBot' );
$robots->blockRobot( 'Orthogaffe' );
$robots->blockRobot( 'UbiCrawler' );
$robots->blockRobot( 'DOC' );
$robots->blockRobot( 'Zao' );
$robots->blockRobot( 'sitecheck.internetseer.com' );
$robots->blockRobot( 'Zealbot' );
$robots->blockRobot( 'MSIECrawler' );
$robots->blockRobot( 'SiteSnagger' );
$robots->blockRobot( 'WebStripper' );
$robots->blockRobot( 'WebCopier' );
$robots->blockRobot( 'Fetch' );
$robots->blockRobot( 'Offline Explorer' );
$robots->blockRobot( 'Teleport' );
$robots->blockRobot( 'TeleportPro' );
$robots->blockRobot( 'WebZIP' );
$robots->blockRobot( 'linko' );
$robots->blockRobot( 'HTTrack' );
$robots->blockRobot( 'Microsoft.URL.Control' );
$robots->blockRobot( 'Xenu' );
$robots->blockRobot( 'larbin' );
$robots->blockRobot( 'libwww' );
$robots->blockRobot( 'ZyBORG' );
$robots->blockRobot( 'Download Ninja' );
$robots->blockRobot( 'sitebot' );
$robots->blockRobot( 'wget' );
$robots->blockRobot( 'k2spider' );
$robots->blockRobot( 'NPBot' );
$robots->blockRobot( 'WebReaper' );
// Deprecated items, probably we should delete them
$robots->disallowPath( '/w/' );
$robots->disallowPath( '/trap/' );
$robots->disallowPath( '/dbdumps/' );
$robots->disallowPath( '/wikistats/' );
}
foreach ( $robots->getHeaders() as $header ) {
header( $header );
}
echo join( PHP_EOL, $robots->getContents() ) . PHP_EOL;