tootlab-mastodon/app/models/glitch/keyword_mute.rb

68 lines
1.6 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
# == Schema Information
#
# Table name: glitch_keyword_mutes
#
# id :integer not null, primary key
# account_id :integer not null
# keyword :string not null
# whole_word :boolean default(TRUE), not null
# created_at :datetime not null
# updated_at :datetime not null
#
class Glitch::KeywordMute < ApplicationRecord
belongs_to :account, required: true
validates_presence_of :keyword
after_commit :invalidate_cached_matcher
def self.matcher_for(account_id)
Matcher.new(account_id)
end
private
def invalidate_cached_matcher
Rails.cache.delete("keyword_mutes:regex:#{account_id}")
end
class Matcher
attr_reader :account_id
attr_reader :regex
def initialize(account_id)
@account_id = account_id
@regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account }
end
def keywords
Glitch::KeywordMute.
where(account_id: account_id).
select(:keyword, :id, :whole_word)
end
def regex_for_account
re_text = [].tap do |arr|
keywords.find_each do |kw|
arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword))
end
Use more idiomatic string concatentation. #164. The intent of the previous concatenation was to minimize object allocations, which can end up being a slow killer. However, it turns out that under MRI 2.4.x, the shove-strings-in-an-array-and-join method is not only arguably more common but (in this particular case) actually allocates *fewer* objects than the string concatenation. Or, at least, that's what I gather by running this: words = %w(palmettoes nudged hibernation bullish stockade's tightened Hades Dixie's formalize superego's commissaries Zappa's viceroy's apothecaries tablespoonful's barons Chennai tollgate ticked expands) a = Account.first KeywordMute.transaction do words.each { |w| KeywordMute.create!(keyword: w, account: a) } GC.start s1 = GC.stat re = String.new.tap do |str| scoped = KeywordMute.where(account: a) keywords = scoped.select(:id, :keyword) count = scoped.count keywords.find_each.with_index do |kw, index| str << Regexp.escape(kw.keyword.strip) str << '|' if index < count - 1 end end s2 = GC.stat puts s1.inspect, s2.inspect raise ActiveRecord::Rollback end vs this: words = %w( palmettoes nudged hibernation bullish stockade's tightened Hades Dixie's formalize superego's commissaries Zappa's viceroy's apothecaries tablespoonful's barons Chennai tollgate ticked expands ) a = Account.first KeywordMute.transaction do words.each { |w| KeywordMute.create!(keyword: w, account: a) } GC.start s1 = GC.stat re = [].tap do |arr| KeywordMute.where(account: a).select(:keyword, :id).find_each do |m| arr << Regexp.escape(m.keyword.strip) end end.join('|') s2 = GC.stat puts s1.inspect, s2.inspect raise ActiveRecord::Rollback end Using rails r, here is a comparison of the total_allocated_objects and malloc_increase_bytes GC stat data: total_allocated_objects malloc_increase_bytes string concat 3200241 -> 3201428 (+1187) 1176 -> 45216 (44040) array join 3200380 -> 3201299 (+919) 1176 -> 36448 (35272)
2017-10-15 03:32:03 -04:00
end.join('|')
/#{re_text}/i unless re_text.empty?
end
def boundary_regex_for_keyword(keyword)
sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
"#{sb}#{Regexp.escape(keyword)}#{eb}"
end
2017-10-21 16:44:47 -04:00
def =~(str)
regex ? regex =~ str : false
2017-10-21 16:44:47 -04:00
end
2017-10-14 03:28:20 -04:00
end
end