Skip to content

Commit feb8200

Browse files
kyrazzxfaretek1
andauthored
refactor: improve filter logic and efficiency (#484)
Co-authored-by: retek <[email protected]>
1 parent dba464a commit feb8200

File tree

4 files changed

+103
-98
lines changed

4 files changed

+103
-98
lines changed
Lines changed: 90 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,34 @@
11
"""FilterBot class"""
22
from __future__ import annotations
3-
43
from .message_events import MessageEvents
54
import time
5+
from collections import deque
66

77
class HardFilter:
8-
8+
99
def __init__(self, filter_name="UntitledFilter", *, equals=None, contains=None, author_name=None, project_id=None, profile=None, case_sensitive=False):
10-
self.equals=equals
11-
self.contains=contains
12-
self.author_name=author_name
13-
self.project_id=project_id
14-
self.profile=profile
15-
self.case_sensitive=case_sensitive
10+
self.equals = equals
11+
self.contains = contains
12+
self.author_name = author_name
13+
self.project_id = project_id
14+
self.profile = profile
15+
self.case_sensitive = case_sensitive
1616
self.filter_name = filter_name
1717

1818
def apply(self, content, author_name, source_id):
19-
if not self.case_sensitive:
20-
content = content.lower()
19+
text_to_check = content if self.case_sensitive else content.lower()
2120
if self.equals is not None:
22-
if self.case_sensitive:
23-
if self.equals == content:
24-
return True
25-
else:
26-
if self.equals.lower() == content:
27-
return True
21+
comparison_equals = self.equals if self.case_sensitive else self.equals.lower()
22+
if text_to_check == comparison_equals:
23+
return True
2824
if self.contains is not None:
29-
if self.case_sensitive:
30-
if self.contains.lower() in content:
31-
return True
32-
else:
33-
if self.contains in content:
34-
return True
35-
if self.author_name == author_name:
25+
comparison_contains = self.contains if self.case_sensitive else self.contains.lower()
26+
if comparison_contains in text_to_check:
27+
return True
28+
if self.author_name is not None and self.author_name == author_name:
3629
return True
37-
if self.project_id == source_id or self.profile == source_id:
30+
if (self.project_id is not None and self.project_id == source_id) or \
31+
(self.profile is not None and self.profile == source_id):
3832
return True
3933
return False
4034

@@ -45,20 +39,27 @@ def __init__(self, score:float, filter_name="UntitledFilter", *, equals=None, co
4539

4640
class SpamFilter(HardFilter):
4741
def __init__(self, filter_name="UntitledFilter", *, equals=None, contains=None, author_name=None, project_id=None, profile=None, case_sensitive=False):
48-
self.memory = []
4942
super().__init__(filter_name, equals=equals, contains=contains, author_name=author_name, project_id=project_id, profile=profile, case_sensitive=case_sensitive)
43+
self.memory = deque()
44+
self.retention_period = 300
5045

5146
def apply(self, content, author_name, source_id):
52-
applies = super().apply(content, author_name, source_id)
53-
if not applies:
47+
if not super().apply(content, author_name, source_id):
5448
return False
55-
self.memory.insert(0, {"content":content, "time":time.time()})
56-
print(content, self.memory)
57-
for comment in list(self.memory)[1:]:
58-
if comment["time"] < time.time() -300:
59-
self.memory.remove(comment)
60-
if comment["content"].lower() == content.lower():
49+
current_time = time.time()
50+
51+
# Prune old entries from memory
52+
while self.memory and self.memory[-1]["time"] < current_time - self.retention_period:
53+
self.memory.pop()
54+
55+
content_lower = content.lower()
56+
# Check for duplicates
57+
for comment in self.memory:
58+
if comment["content"].lower() == content_lower:
6159
return True
60+
61+
# Add new comment to memory
62+
self.memory.appendleft({"content": content, "time": current_time})
6263
return False
6364

6465
class Filterbot(MessageEvents):
@@ -75,10 +76,10 @@ def __init__(self, user, *, log_deletions=True):
7576
self.update_interval = 2
7677

7778
def add_filter(self, filter_obj):
78-
if isinstance(filter_obj, SoftFilter):
79-
self.soft_filters.append(filter_obj)
80-
elif isinstance(filter_obj, SpamFilter): # careful: SpamFilter is also HardFilter due to inheritence
79+
if isinstance(filter_obj, SpamFilter):
8180
self.spam_filters.append(filter_obj)
81+
elif isinstance(filter_obj, SoftFilter):
82+
self.soft_filters.append(filter_obj)
8283
elif isinstance(filter_obj, HardFilter):
8384
self.hard_filters.append(filter_obj)
8485

@@ -105,57 +106,58 @@ def add_genalpha_nonsense_filter(self):
105106
self.add_filter(HardFilter("(genalpha_nonsene_filter) 'fanum tax'", contains="fanum tax"))
106107

107108
def on_message(self, message):
108-
if message.type == "addcomment":
109-
delete = False
110-
content = message.comment_fragment
111-
112-
if message.comment_type == 0: # project comment
113-
source_id = message.comment_obj_id
114-
if self.user._session.connect_project(message.comment_obj_id).author_name != self.user.username:
115-
return # no permission to delete
116-
if message.comment_type == 1: # profile comment
117-
source_id = message.comment_obj_title
118-
if message.comment_obj_title != self.user.username:
119-
return # no permission to delete
120-
if message.comment_type == 2: # studio comment
121-
return # studio comments aren't handled
122-
123-
# Apply hard filters
124-
for hard_filter in self.hard_filters:
125-
if hard_filter.apply(content, message.actor_username, source_id):
126-
delete=True
127-
if self.log_deletions:
128-
print(f"DETECTED: #{message.comment_id} violates hard filter: {hard_filter.filter_name}")
109+
if message.type != "addcomment":
110+
return
111+
source_id = None
112+
content = message.comment_fragment
113+
if message.comment_type == 0: # project comment
114+
source_id = message.comment_obj_id
115+
if self.user._session.connect_project(message.comment_obj_id).author_name != self.user.username:
116+
return # no permission to delete comments that aren't on our own project
117+
elif message.comment_type == 1: # profile comment
118+
source_id = message.comment_obj_title
119+
if source_id != self.user.username:
120+
return # no permission to delete messages that are not on our profile
121+
elif message.comment_type == 2: # studio comment
122+
return # studio comments aren't handled
123+
else:
124+
return
125+
delete = False
126+
reason = ""
127+
128+
# Apply hard filters
129+
for hard_filter in self.hard_filters:
130+
if hard_filter.apply(content, message.actor_username, source_id):
131+
delete = True
132+
reason = f"hard filter: {hard_filter.filter_name}"
133+
break
134+
135+
# Apply spam filters
136+
if not delete:
137+
for spam_filter in self.spam_filters:
138+
if spam_filter.apply(content, message.actor_username, source_id):
139+
delete = True
140+
reason = f"spam filter: {spam_filter.filter_name}"
129141
break
130142

131-
# Apply spam filters
132-
if delete is False:
133-
for spam_filter in self.spam_filters:
134-
if spam_filter.apply(content, message.actor_username, source_id):
135-
delete=True
136-
if self.log_deletions:
137-
print(f"DETECTED: #{message.comment_id} violates spam filter: {spam_filter.filter_name}")
138-
break
139-
140-
# Apply soft filters
141-
if delete is False:
142-
score = 0
143-
violated_filers = []
144-
for soft_filter in self.soft_filters:
145-
if soft_filter.apply(content, message.actor_username, source_id):
146-
score += soft_filter.score
147-
violated_filers.append(soft_filter.name)
148-
if score >= 1:
149-
print(f"DETECTED: #{message.comment_id} violates too many soft filters: {violated_filers}")
150-
delete = True
151-
152-
if delete is True:
153-
try:
154-
message.target().delete()
155-
if self.log_deletions:
156-
print(f"DELETED: #{message.comment_id} by f{message.actor_username}: '{content}'")
157-
except Exception as e:
158-
if self.log_deletions:
159-
print(f"DELETION FAILED: #{message.comment_id} by f{message.actor_username}: '{content}'")
160-
161-
143+
# Apply soft filters
144+
if not delete:
145+
score = 0
146+
violated_filters = []
147+
for soft_filter in self.soft_filters:
148+
if soft_filter.apply(content, message.actor_username, source_id):
149+
score += soft_filter.score
150+
violated_filters.append(soft_filter.filter_name)
151+
if score >= 1:
152+
delete = True
153+
reason = f"too many soft filters: {violated_filters}"
154+
if delete:
155+
if self.log_deletions:
156+
print(f"DETECTED: #{message.comment_id} violates {reason}")
157+
try:
158+
resp = message.target().delete()
159+
if self.log_deletions:
160+
print(f"DELETED: #{message.comment_id} by {message.actor_username!r}: '{content}' with message {resp.content!r} & headers {resp.headers!r}")
161+
except Exception as e:
162+
if self.log_deletions:
163+
print(f"DELETION FAILED: #{message.comment_id} by {message.actor_username!r}: '{content}'; exception: {e}")

scratchattach/site/comment.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,15 +199,17 @@ def delete(self):
199199
"""
200200
self._assert_auth()
201201
if self.source == CommentSource.USER_PROFILE:
202-
user.User(username=self.source_id, _session=self._session).delete_comment(comment_id=self.id)
202+
return user.User(username=self.source_id, _session=self._session).delete_comment(comment_id=self.id)
203203

204204
elif self.source == CommentSource.PROJECT:
205205
p = project.Project(id=self.source_id, _session=self._session)
206206
p.update()
207-
p.delete_comment(comment_id=self.id)
207+
return p.delete_comment(comment_id=self.id)
208208

209209
elif self.source == CommentSource.STUDIO:
210-
studio.Studio(id=self.source_id, _session=self._session).delete_comment(comment_id=self.id)
210+
return studio.Studio(id=self.source_id, _session=self._session).delete_comment(comment_id=self.id)
211+
212+
return None # raise error?
211213

212214
def report(self):
213215
"""

scratchattach/site/project.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ def delete_comment(self, *, comment_id):
677677
f"https://api.scratch.mit.edu/proxy/comments/project/{self.id}/comment/{comment_id}/",
678678
headers=self._headers,
679679
cookies=self._cookies,
680-
).headers
680+
)
681681

682682
def report_comment(self, *, comment_id):
683683
"""

scratchattach/site/user.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -775,12 +775,13 @@ def delete_comment(self, *, comment_id):
775775
comment_id: The id of the comment that should be deleted
776776
"""
777777
self._assert_permission()
778-
return requests.post(
779-
f"https://scratch.mit.edu/site-api/comments/user/{self.username}/del/",
780-
headers = headers,
781-
cookies = self._cookies,
782-
data = json.dumps({"id":str(comment_id)})
783-
)
778+
with requests.no_error_handling():
779+
return requests.post(
780+
f"https://scratch.mit.edu/site-api/comments/user/{self.username}/del/",
781+
headers = headers,
782+
cookies = self._cookies,
783+
data = json.dumps({"id":str(comment_id)})
784+
)
784785

785786
def report_comment(self, *, comment_id):
786787
"""

0 commit comments

Comments
 (0)