-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchanges.py
154 lines (145 loc) · 5.39 KB
/
changes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# -*- encoding: utf-8 -*-
import re
import difflib
from basic import Basic
class Changes(Basic):
"""Handle recent changes."""
maximum = 500
def get_changes(self):
if self.ts != None:
self.changes = self.api.get_recentchanges(self.maximum, self.ts, "0|4|14")
if len(self.changes) == 0:
self.log("No recent changes since %s; skipping..." % self.ts)
else:
self.log("Found %s recent changes since %s." % (len(self.changes), self.ts))
else:
if self.maximum>0:
self.log("No escape time defined, getting the previous %s recent changes..." % self.maximum)
self.changes = self.api.get_recentchanges(self.maximum, None, "0|4|14")
else:
self.log("No recent changes permitted; skipping...")
def detect_spam_creation(self, content, title, anon):
if not anon:
return 0
if re.match(".*?[^0-9][0-9]{1,2}$", title) and re.match("^\[\[(File|Image):.*?[0-9]{3,4}.jpg\|thumb\|\]\]", content, re.I):
return 75000
return 0
def handle_changes(self):
self.get_changes()
if self.changes == None or len(self.changes) == 0:
self.log("No recent changes found, skipping...")
return
evaluation = []
for change in self.changes:
self.log("Checking edit %s on `%s'..." % (change['revid'], change['title']))
score = 0
if change['type'] == 'edit':
edit = self.api.get_edit(change['revid'], change['old_revid'])
elif change['type'] == 'new':
edit = self.api.get_edit(change['revid'], None)
#self.log("Not sure how to handle edits of type `new'... yet, skipping.")
score += self.detect_spam_creation(edit[1]['content'], change['title'], edit[1]['anon'])
score += 100
self.log("Gave edit score: %s" % score)
evaluation.append({'revid' : change['revid'], 'score' : score, 'title' : change['title'], 'user': edit[1]['user']})
continue
else:
self.log("Unknown edit type, `%s'..." % change['type'])
continue
if edit == None or edit[0]['content'] == None or edit[1]['content'] == None:
continue
sizematters = True
# Time for some score marking.
if edit[1]['anon']:
score += 100
else:
user = self.api.get_user(edit[1]['user'])
if 'sysop' in user['groups']:
self.log("This is a sysop! ESCAPE!")
continue
if 'autoconfirmed' in user['groups']:
score -= 200
else:
score += 200
if edit[1]['comment'] != '':
score -= 100
undocoms = ["undid", "undo", "reverted"]
for undo in undocoms:
if undo in edit[1]['comment'] or undo.capitalize() in edit[1]['comment']:
score -= 1000
sizematters = False
if sizematters:
sizediff = int(edit[1]['size'])-int(edit[0]['size'])
if sizediff < 0:
score += abs(sizediff)/2
else:
score += sizediff/3
diff = difflib.ndiff(edit[0]['content'].splitlines(1), edit[1]['content'].splitlines(1))
preline = ''
t = False
links = 0
linesincommon = 0
for line in diff:
if line[0:2] == '? ':
continue
if line[0:2] == ' ':
linesincommon += 1
if line[0:2] == '- ':
preline = line[2:].strip()
if line[0:2] == '+ ':
line = line[2:].strip()
# check for external links
extlinks = re.findall("(\[http://[^ ]*?\]|\[http://[^ ]*? .*?\])", line, re.I)
for ext in extlinks:
if not ext in preline:
score += 250 + 250*links
links += 1
# check for {{e}} stuff
appearref = re.findall("\{\{(elink|clink)\|[^\}]*\}\}", line, re.I)
for apr in appearref:
score -= 250
# appear fixing
appearref = re.findall("\{\{e\|[^\}]*\}\}", line, re.I)
for apr in appearref:
score -= 250
if "{{elink|" in preline:
score -= 200
# check for table stuff
tableelements = re.findall("(\{\||\|-|\|\})", line)
for tbe in tableelements:
score -= 150
r = re.compile("\#REDIRECT\:", re.I)
if r.match(edit[0]['content']) or r.match(edit[1]['content']):
score -= 1000
elif linesincommon <= 2 and len(edit[0]['content'].splitlines(1)) > 10:
score += 2500*(3-linesincommon)
self.log("Gave edit score: %s" % score)
evaluation.append({'revid' : change['revid'], 'score' : score, 'title' : change['title'], 'user': edit[1]['user']})
self.log("Writing evaluation...")
content = ''
for e in evaluation:
if e['score'] < 500 and e['score'] > -500:
continue # restrict uninteresting scores to not be written.
if e['score'] > 1000 or e['score'] < -1000:
a = "'''"
else:
a = ''
content += '* %s[http://theinfosphere.org/index.php?diff=prev&oldid=%s Edit %s on "%s"]: Score: %s%s\n' % (a, e['revid'], e['revid'], e['title'], e['score'], a)
content = "%s\n%s" % (self.api.get_content('User:SvipBot/evaluation'), content)
if self.api.edit_page('User:SvipBot/evaluation', content, 'Evaluation update.'):
self.log("Evaluation written.")
else:
self.log("Failed writing evaluation.")
self.log("Handling evaluation of edits...")
for e in evaluation:
if e['score'] > 50000:
self.log("Score above 50000 for %s on `%s'..." % (e['revid'], e['title']))
self.log("> Deleting page and blocking user...")
if not self.api.delete_page(e['title'], "Bot assumed this edit to be bad, deleting it on grounds of vandalism."):
self.log(">> Delete unsuccessful.")
if not self.api.block_user(e['user'], "1 year", "Bot caught this user vandalising..."):
self.log(">> Block unsuccessful.")
def set_ts(self, ts):
self.ts = ts
def __init__(self, debug, api, verbose):
Basic.__init__(self, debug, api, verbose)