require 'rubygems' require 'ferret' include Ferret::Analysis CHARACTER_MAPPINGS = { ['à','á','â','ã','ä','å','ā','ă'] => 'a', 'æ' => 'ae', ['ď','đ'] => 'd', ['ç','ć','č','ĉ','ċ'] => 'c', ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e', ['ƒ'] => 'f', ['ĝ','ğ','ġ','ģ'] => 'g', ['ĥ','ħ'] => 'h', ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i', ['į','ı','ij','ĵ'] => 'j', ['ķ','ĸ'] => 'k', ['ł','ľ','ĺ','ļ','ŀ'] => 'l', ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n', ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o', 'œ' => 'oek', 'ą' => 'q', ['ŕ','ř','ŗ'] => 'r', ['ś','š','ş','ŝ','ș'] => 's', ['ť','ţ','ŧ','ț'] => 't', ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u', 'ŵ' => 'w', ['ý','ÿ','ŷ'] => 'y', ['ž','ż','ź'] => 'z' } module Ferret::Analysis class NewAnalyzer def token_stream( field, data ) MappingFilter.new( HyphenFilter.new( LowerCaseFilter.new( StandardTokenizer.new(data) ) ), CHARACTER_MAPPINGS ) end end end pfa = PerFieldAnalyzer.new( StandardAnalyzer.new ) pfa[:name] = NewAnalyzer.new (1..1000).each do |i| pfa.token_stream(:name, "this is a query to be analyzed") sleep 0.1 end