Skip to content

Commit

Permalink
Merge pull request #207 from openSUSE/antispam
Browse files Browse the repository at this point in the history
Create a way to deal with spam more efficiently
  • Loading branch information
hellcp authored Sep 17, 2024
2 parents a64ec22 + 23e0fa2 commit bd8851b
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 12 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ gem 'font-awesome-sass'
# Support for s3 in activestorage
gem 'aws-sdk-s3', require: false

gem 'classifier-reborn'

# Use Redis adapter to run Action Cable in production
# gem 'redis', '~> 4.0'

Expand Down
5 changes: 5 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ GEM
rack-test (>= 0.6.3)
regexp_parser (>= 1.5, < 3.0)
xpath (~> 3.2)
classifier-reborn (2.3.0)
fast-stemmer (~> 1.0)
matrix (~> 0.4)
concurrent-ruby (1.3.1)
connection_pool (2.4.1)
crass (1.0.6)
Expand All @@ -139,6 +142,7 @@ GEM
faraday (>= 1, < 3)
faraday-net_http (3.1.0)
net-http
fast-stemmer (1.0.2)
ffi (1.16.3)
font-awesome-sass (6.5.2)
sassc (~> 2.0)
Expand Down Expand Up @@ -443,6 +447,7 @@ DEPENDENCIES
bootsnap
bootstrap
capybara
classifier-reborn
debug
faker
font-awesome-sass
Expand Down
40 changes: 38 additions & 2 deletions app/controllers/pastes_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# TODO: Implement authentication from the outside
class PastesController < ApplicationController
before_action :set_paste, only: %i[show destroy raw]
before_action :qualify_content, only: :create
after_action :verify_authorized, except: :index
after_action :verify_policy_scoped, only: :index
# Set up activestorage for development
Expand Down Expand Up @@ -57,6 +58,16 @@ def raw
redirect_to @paste.content.attachment.url, allow_other_host: true
end

def spam
authorize Paste.new

if Paste.where(id: pastes_params[:ids]).update(marked_kind: pastes_params[:marked_kinds], marked_by: current_user)
redirect_to pastes_url, notice: t(:paste_update)
else
render :index, status: :unprocessable_entity
end
end

private

def set_paste
Expand All @@ -66,8 +77,33 @@ def set_paste
end

def paste_params
defaults = {}
defaults = { marked_kind: @marked_kind }
defaults[:user_id] = current_user.id if user_signed_in?
params.require(:paste).permit(:author, :title, :private, :remove_after, :content, :code, :auth_key).merge(defaults)
params.require(:paste).permit(:author, :title, :private, :remove_after,
:content, :code, :auth_key).merge(defaults.compact)
end

def pastes_params
params.require(:pastes).permit(:marked_kinds, ids: [])
end

def qualify_content
return unless text_content

classifier = Rails.application.config.classifier
@marked_kind = classifier.classify(text_content).downcase
end

def text?
return false if paste_params[:content].blank?

content_type = paste_params[:content].content_type
Marcel::Magic.new(content_type).text? || MimeMagic.new(content_type).text?
end

def text_content
return paste_params[:content]&.read&.force_encoding('utf-8') if text?

paste_params[:code].presence
end
end
20 changes: 20 additions & 0 deletions app/models/paste.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

# The primary application model for storing pastes
class Paste < ApplicationRecord
self.implicit_order_column = 'created_at'

PERIODS = [[I18n.t(:months, count: 3), 3.months],
[I18n.t(:months, count: 1), 1.month],
[I18n.t(:weeks, count: 2), 2.weeks],
Expand All @@ -15,10 +17,15 @@ class Paste < ApplicationRecord

has_one_attached :content
belongs_to :user, optional: true
belongs_to :marked_by, class_name: 'User', optional: true

enum :marked_kind, %w[unclassified ham spam]

attribute :author, default: -> { Paste.default_author }
attribute :remove_at, default: -> { Time.zone.now + 7.days.seconds }

before_save :train_classifier
before_save :delete_spam
before_create :create_permalink
after_save :enqueue_removal

Expand Down Expand Up @@ -85,4 +92,17 @@ def create_permalink
def enqueue_removal
PastesCleanupJob.set(wait_until: remove_at).perform_later(self&.id)
end

def train_classifier
return unless saved_change_to_marked_kind? || saved_change_to_marked_by_id?
return if marked_kind == 'unclassified'
return if marked_by.nil?

classifier = Rails.application.config.classifier
classifier.train marked_kind, content.attachment.open(&:read).force_encoding('utf-8')
end

def delete_spam
destroy! if marked_by.present? && marked_kind == 'spam'
end
end
10 changes: 7 additions & 3 deletions app/policies/paste_policy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ class PastePolicy < ApplicationPolicy
class Scope < Scope
def resolve
if user&.mod?
scope.all
scope.order('created_at DESC').all
elsif user
scope.where(user:).or(scope.where(private: false))
scope.order('created_at DESC').where(user:).or(scope.where(private: false, marked_kind: 'ham'))
else
scope.where(private: false)
scope.order('created_at DESC').where(private: false, marked_kind: 'ham')
end
end
end
Expand All @@ -31,4 +31,8 @@ def destroy?
# Pastes may have a nil user, so we have to check for that here
(!record.user.nil? && record.user == user) || user&.mod?
end

def spam?
user&.mod?
end
end
3 changes: 2 additions & 1 deletion app/views/pastes/_title.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
expires_in = tag.b distance_of_time_in_words(paste.remove_after),
title: paste.remove_at
private = paste.private ? badge(t(:private)) : ''
= t(:paste_title_html, title:, private:, author:, expires_in:)
marked = policy(paste).spam? ? badge(t(paste.marked_kind)) : ''
= t(:paste_title_html, title:, private:, marked:, author:, expires_in:)
24 changes: 20 additions & 4 deletions app/views/pastes/index.html.haml
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
%h1= t(:pastes_list)
.list-group
- @pastes.each do |paste|
= link_to paste, class: 'list-group-item' do
= render 'pastes/title', paste:
- if policy(Paste.new).spam?
= form_for(:pastes, url: spam_pastes_path, method: :post) do |f|
%nav.bg-body-tertiary.rounded.border.mb-3.sticky-top
.input-group.p-3
= f.select :marked_kinds,
Paste.marked_kinds.keys.index_by { |k| t(k) },
{}, class: 'form-select'
= f.submit 'Set', class: 'btn btn-primary'
.list-group
= f.collection_check_boxes(:ids, @pastes, :id, :title) do |c|
.list-group-item
= c.check_box class: 'form-check-input', checked: false
= c.label class: 'form-check-label' do
= link_to c.object, class: 'text-decoration-none' do
= render 'pastes/title', paste: c.object
- else
.list-group
- @pastes.each do |paste|
= link_to paste, class: 'list-group-item' do
= render 'pastes/title', paste:
4 changes: 4 additions & 0 deletions config/initializers/classifier.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Let's set up our redis backend
redis_backend = ClassifierReborn::BayesRedisBackend.new url: ENV.fetch('REDIS_URL') if ENV['REDIS_URL']
# Set up the classifier for ham and spam
Rails.application.config.classifier = ClassifierReborn::Bayes.new 'ham', 'spam', { backend: redis_backend }.compact
5 changes: 4 additions & 1 deletion config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ en:
minutes:
one: a minute
other: '%{count} minutes'
paste_title_html: "%{title} %{private} created by %{author} will expire in %{expires_in}"
paste_title_html: "%{title} %{private} %{marked} created by %{author} will expire in %{expires_in}"
adjective_animal: '%{adjective} %{animal}'
user_paste: "%{user}'s paste"
anonymous_paste: Anonymous paste
Expand Down Expand Up @@ -101,6 +101,9 @@ en:
contract: Contract
remove: Remove
not_authorized: You are not authorized to perform this action.
spam: Spam
ham: Not Spam
unclassified: Unknown if spam
helpers:
label:
auth:
Expand Down
5 changes: 5 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
resources :pastes, only: %w[new create index show destroy], param: :permalink do
get :raw
end

resource :pastes do
post :spam
end

# Define your application routes per the DSL in https://guides.rubyonrails.org/routing.html

get '/up' => 'rails/health#show'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class AddMarkedByAndMarkedKindToPaste < ActiveRecord::Migration[7.1]
def change
add_reference :pastes, :marked_by, foreign_key: { to_table: :users }
add_column :pastes, :marked_kind, :integer, null: false, default: 0
end
end
6 changes: 5 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added vendor/cache/classifier-reborn-2.3.0.gem
Binary file not shown.
Binary file added vendor/cache/fast-stemmer-1.0.2.gem
Binary file not shown.

0 comments on commit bd8851b

Please sign in to comment.