forked from sundy-li/html2article
-
Notifications
You must be signed in to change notification settings - Fork 0
/
info.go
75 lines (64 loc) · 1.8 KB
/
info.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package html2article
import (
"math"
"golang.org/x/net/html"
)
type Info struct {
TextCount int
LinkTextCount int
TagCount int
LinkTagCount int
LeafList []int
Density float64
Pcount int
InputCount int
ImageCount int
Data string
score float64
node *html.Node
}
func NewInfo() *Info {
return &Info{}
}
func (info *Info) CalScore(sn_sum, swn_sum float64) {
//avg * ln((cn-lcn)/lcn) * (sn/snm + 1) * (swn/swnm + 1) * abs(ln((cn+1)/(tn+1)) * ln(pn+2) * (tn-in+1)/(tn+1) * (tn-mn+1)/(tn+1) )
a1 := info.TextCount - info.LinkTextCount
a2 := info.LinkTextCount
sn := countSn(info.Data)
swn := countStopWords(info.Data)
a3 := math.Abs(math.Log(float64(info.TextCount+1) / float64(info.TagCount+1)))
a4 := float64(info.TagCount-info.InputCount+1) / float64(info.TagCount+1)
a5 := float64(info.TagCount-info.ImageCount+1) / float64(info.TagCount+1)
if a1 == 0 {
a1 = 1
}
if a2 == 0 {
a2 = 1
}
info.Density = math.Log(float64(a1)/float64(a2)) * (float64(sn)/sn_sum + 1) * (float64(swn)/swn_sum + 1) * a3 * a4 * a5
avg := info.getAvg()
info.score = math.Log(avg) * float64(info.Density) * math.Log10(float64(info.Pcount+2))
// if info.score >= 1 {
// c := attr(info.node, "class")
// if c != "" {
// println("class:", c, info.score, info.Density, avg, math.Log10(float64(info.Pcount+2)), math.Log(float64(a1)/float64(a2)), (float64(sn)/sn_sum + 1), (float64(swn)/swn_sum + 1), a3, a4, a5)
// }
// }
return
}
func (info *Info) getAvg() float64 {
if len(info.LeafList) == 0 {
return 0
}
flen := float64(len(info.LeafList))
sum := 0
for _, l := range info.LeafList {
sum += l
}
var sum2 float64 = 0
avg := float64(sum) / flen
for _, l := range info.LeafList {
sum2 += (avg - float64(l)) * (avg - float64(l))
}
return math.Sqrt(sum2/flen + 1.0)
}