Skip to content

Commit

Permalink
add button to export metadata on all known pages of a webentity (closes
Browse files Browse the repository at this point in the history
  • Loading branch information
boogheta committed Aug 25, 2023
1 parent 33efd2b commit e7eebcc
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 4 deletions.
9 changes: 7 additions & 2 deletions hyphe_frontend/app/views/webentity.html
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,8 @@ <h1 class="word-break">
layout="row"
style="height: 48px;"
>
<h3 style="padding: 8px; margin: 0px" ng-if="webentity.pages_total<=0">No known page</h3>
<h3 style="padding: 8px; margin: 0px" ng-if="webentity.pages_total>=1">
<h3 style="padding: 4px 8px; margin: 0px" ng-if="webentity.pages_total<=0">No known page</h3>
<h3 style="padding: 4px 8px; margin: 0px" ng-if="webentity.pages_total>=1">
{{webentity.pages_total|number}} Page{{webentity.pages_total|plural}}
<small ng-if="webentity.pages_crawled>0">({{webentity.pages_crawled|number}} crawled)</small>
</h3>
Expand All @@ -467,6 +467,7 @@ <h3 style="padding: 8px; margin: 0px" ng-if="webentity.pages_total>=1">
<md-icon>equalizer</md-icon>
Most cited
</md-button> -->

<md-button class="md-raised" ng-href="#/project/{{corpusId}}/webentityExplorer/{{webentity.id}}">
<md-tooltip md-direction="bottom">Explore the pages of the web entity as a tree of subdirectories</md-tooltip>
<md-icon>folder</md-icon>
Expand All @@ -489,6 +490,10 @@ <h3 style="padding: 8px; margin: 0px" ng-if="webentity.pages_total>=1">
>
</md-input-container>

<md-button class="md-raised md-mini md-fab md-secondary" ng-click="downloadPagesCSV()" ng-show="webentity.pages_total>=1" style="position: relative; left: 0px; top: -22px; background-color: lightgrey;">
<md-tooltip md-direction="top">Export all pages metadata as CSV</md-tooltip>
<md-icon>file_download</md-icon>
</md-button>
<md-button class="md-raised md-mini md-fab md-secondary" style="position: absolute; right: 16px; top: 225px; background-color: lightgrey" ng-click="pagesOnlyCrawled = !pagesOnlyCrawled">
<md-tooltip md-direction="left">Display only crawled pages</md-tooltip>
<md-icon ng-style="{'opacity': pagesOnlyCrawled ? 0.2 : 1}">opacity</md-icon>
Expand Down
96 changes: 94 additions & 2 deletions hyphe_frontend/app/views/webentity.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ angular.module('hyphe.webentityController', [])
$scope.crawls = []

$scope.pages = []
$scope.rawPages = []
$scope.pagesLoading = true
$scope.pagesToken = null
$scope.loadAllPages = false
Expand All @@ -53,6 +54,54 @@ angular.module('hyphe.webentityController', [])
$scope.ego = {}
})

$scope.CSVfields = {
webentity_name: {
type: 'string'
}
,webentity_id: {
type: 'string'
}
,url: {
type: 'string'
}
,lru: {
type: 'string'
}
,crawled: {
type: 'boolean'
}
,crawl_timestamp: {
type: 'date'
}
,depth: {
type: 'number'
}
,status: {
type: 'number'
}
,error: {
type: 'string'
}
,size: {
type: 'number'
}
,content_type: {
type: 'string'
}
,encoding: {
type: 'string'
}
,archive_url: {
type: 'string'
}
,archive_date_requested: {
type: 'string'
}
,archive_date_obtained: {
type: 'string'
}
}


$scope.$watch('tagCategories', synchronizeTags, true)

Expand Down Expand Up @@ -166,7 +215,7 @@ angular.module('hyphe.webentityController', [])
}

// Functions
$scope.loadPages = function(){
$scope.loadPages = function(callback){
$scope.pagesLoading = true
if (!$scope.loadAllPages) {
$scope.status = {message: 'Loading pages'}
Expand All @@ -180,6 +229,7 @@ angular.module('hyphe.webentityController', [])
,token: $scope.pagesToken
}
,function(result){
$scope.rawPages = $scope.rawPages.concat(result.pages)
var pagesBatch = []
var required_fields = ["crawled", "archive_url", "archive_date_obtained", "archive_date_requested", "archive_permalink"]
result.pages.forEach(function(page){
Expand Down Expand Up @@ -208,10 +258,11 @@ angular.module('hyphe.webentityController', [])
if ($scope.loadAllPages && $scope.pagesToken) {
var percent = 99.5 * $scope.pages.length / $scope.webentity.pages_total
$scope.status = {message: 'Loading pages ' + Math.round(percent) + ' %', progress: percent}
$timeout($scope.loadPages, 0)
$timeout(function() { $scope.loadPages(callback) }, 0)
} else {
$scope.pagesLoading = false
$scope.status = {}
if (callback) callback();
}
}
,function(){
Expand All @@ -221,6 +272,47 @@ angular.module('hyphe.webentityController', [])
)
}

function triggerPagesCSVDownload() {
// Build Headline
var headline = Object.keys($scope.CSVfields)
// Build Table Content
var tableContent = $scope.rawPages.map(function (page) {
page.webentity_name = $scope.webentity.name
page.webentity_id = $scope.webentity.id
return headline.map(function(field){
var value = page[field]
let type = $scope.CSVfields[field].type
if (type == 'date' && value) {
value = new Date(+value).toISOString()
} else if (type == 'array of string') {
value = value.sort().join(' ')
}
if (value === null || value === undefined)
return ""
return value
})
})
// Parsing
var fileContent = []
,csvElement = function(txt){
txt = ''+txt //cast
return '"'+txt.replace(/"/gi, '""')+'"'
}
fileContent.push(headline.join(','))
tableContent.forEach(function (row) {
fileContent.push('\n' + row.map(csvElement).join(','))
})
var blob = new Blob(fileContent, {'type': "text/csv;charset=utf-8"});
saveAs(blob, $scope.corpusName + "_webentity-" + $scope.webentity.id + "_pages.csv", true);
}

$scope.downloadPagesCSV = function(){
if ($scope.rawPages.length < $scope.webentity.pages_total) {
$scope.loadAllPages = true;
$scope.loadPages(triggerPagesCSVDownload);
} else triggerPagesCSVDownload();
}

$scope.toggleStartPages = function(page){
var remove, msg1, msg2, func;
if (page.isStartPage){
Expand Down

0 comments on commit e7eebcc

Please sign in to comment.