Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tag normalization and migration not removing duplicate tags #11441

Merged
merged 1 commit into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions app/models/tag.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def history

class << self
def find_or_create_by_names(name_or_names)
Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name|
Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name|
tag = matching_name(normalized_name).first || create(name: normalized_name)

yield tag if block_given?
Expand All @@ -77,7 +77,7 @@ def find_or_create_by_names(name_or_names)
def search_for(term, limit = 5, offset = 0)
pattern = sanitize_sql_like(normalize(term.strip)) + '%'

Tag.where(arel_table[:name].lower.matches(pattern.downcase))
Tag.where(arel_table[:name].lower.matches(pattern.mb_chars.downcase.to_s))
.order(:name)
.limit(limit)
.offset(offset)
Expand All @@ -92,7 +92,7 @@ def find_normalized!(name)
end

def matching_name(name_or_names)
names = Array(name_or_names).map { |name| normalize(name).downcase }
names = Array(name_or_names).map { |name| normalize(name).mb_chars.downcase.to_s }

if names.size == 1
where(arel_table[:name].lower.eq(names.first))
Expand All @@ -104,7 +104,7 @@ def matching_name(name_or_names)
private

def normalize(str)
str.gsub(/\A#/, '').mb_chars.to_s
str.gsub(/\A#/, '')
end
end

Expand Down
13 changes: 13 additions & 0 deletions db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@ class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2]
disable_ddl_transaction!

def up
Tag.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM tags GROUP BY lower(name) HAVING count(*) > 1').to_hash.each do |row|
canonical_tag_id = row['ids'].split(',').first
redundant_tag_ids = row['ids'].split(',')[1..-1]

safety_assured do
execute "UPDATE accounts_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM accounts_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
execute "UPDATE statuses_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM statuses_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.status_id = t0.status_id)"
execute "UPDATE featured_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM featured_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
end

Tag.where(id: redundant_tag_ids).in_batches.delete_all
end

safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' }
remove_index :tags, name: 'index_tags_on_name'
remove_index :tags, name: 'hashtag_search_index'
Expand Down
34 changes: 34 additions & 0 deletions spec/models/tag_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,40 @@
end
end

describe '.find_normalized' do
it 'returns tag for a multibyte case-insensitive name' do
upcase_string = 'abcABCabcABCやゆよ'
downcase_string = 'abcabcabcabcやゆよ';

tag = Fabricate(:tag, name: downcase_string)
expect(Tag.find_normalized(upcase_string)).to eq tag
end
end

describe '.matching_name' do
it 'returns tags for multibyte case-insensitive names' do
upcase_string = 'abcABCabcABCやゆよ'
downcase_string = 'abcabcabcabcやゆよ';

tag = Fabricate(:tag, name: downcase_string)
expect(Tag.matching_name(upcase_string)).to eq [tag]
end
end

describe '.find_or_create_by_names' do
it 'runs a passed block once per tag regardless of duplicates' do
upcase_string = 'abcABCabcABCやゆよ'
downcase_string = 'abcabcabcabcやゆよ';
count = 0

Tag.find_or_create_by_names([upcase_string, downcase_string]) do |tag|
count += 1
end

expect(count).to eq 1
end
end

describe '.search_for' do
it 'finds tag records with matching names' do
tag = Fabricate(:tag, name: "match")
Expand Down