Skip to content

Commit

Permalink
allow to set obey_robots from a corpus' settings page (cf #421)
Browse files Browse the repository at this point in the history
  • Loading branch information
boogheta committed Oct 22, 2021
1 parent e4b0bb3 commit 259e786
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion hyphe_backend/core.tac
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class Core(customJSONRPC):
self.corpora[corpus]['options']['proxy'].update(options.pop("proxy"))
if "obey_robots" in options and options["obey_robots"] != self.corpora[corpus]["options"]["obey_robots"]:
redeploy = True
self.corpora[corpus]['options']['obey_robots'].update(options.pop("obey_robots"))
self.corpora[corpus]['options']['obey_robots'] = options.pop("obey_robots")
if 'phantom' in options and options['phantom'] != self.corpora[corpus]['options']['phantom']:
redeploy = True
self.corpora[corpus]["options"]["phantom"].update(options.pop("phantom"))
Expand Down
10 changes: 10 additions & 0 deletions hyphe_frontend/app/views/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ <h3 style="margin:0px">CRAWLING</h3>
<input type="number" min="0" max="{{maxmax_depth}}" ng-model="ed_max_depth" aria-label="editMaxDepth">
</md-input-container>

<dt>Should the crawler respect robots.txt rules if any?</dt>
<md-radio-group ng-model="ed_obey_robots" class="md-primary">
<md-radio-button value="1" class="md-primary">yes</md-radio-button>
<md-radio-button value="0" class="md-primary">no</md-radio-button>
</md-radio-group>

<div layout = "column">
<dt style="margin-bottom: 18px">What StartPages to use by default when crawling a new Web Entity?</dt>
<div flex="50">
Expand All @@ -111,6 +117,7 @@ <h3 style="margin:0px">CRAWLING</h3>
</md-input-container> most cited known pages
</div>
</div>

</dl>
</md-content>
</div>
Expand Down Expand Up @@ -277,6 +284,9 @@ <h3 style="margin:0px">CRAWLING</h3>
<dt>Maximum Depth</dt>
<dd>{{options.max_depth}}</dd>

<dt>Should the crawler respect robots.txt rules if any?</dt>
<dd>{{options.obey_robots ? 'yes' : 'no'}}</dd>

<dt>What Startpages to use by default when crawling a new Web Entity?</dt>
<dd>{{options.defaultStartpagesMode | arrayToString}}</dd>

Expand Down
2 changes: 2 additions & 0 deletions hyphe_frontend/app/views/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ angular.module('hyphe.settingsController', [])

var modifiedOptions = {
"max_depth": $scope.ed_max_depth,
"obey_robots": !!(parseInt($scope.ed_obey_robots)),
"defaultStartpagesMode": $scope.ed_defaultStartpagesMode,
"proxy": {
"port": $scope.ed_proxy_port,
Expand Down Expand Up @@ -197,6 +198,7 @@ angular.module('hyphe.settingsController', [])

$scope.setEditableOptions = function() {
$scope.ed_max_depth = $scope.options.max_depth
$scope.ed_obey_robots = !!$scope.options.obey_robots
$scope.ed_proxy_host = $scope.options.proxy.host + ""
$scope.ed_proxy_port = $scope.options.proxy.port + 0
$scope.ed_timeout = $scope.options.phantom.timeout + 0
Expand Down

0 comments on commit 259e786

Please sign in to comment.