1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
require 'rubygems'
require 'benchmark'
require 'iconv'
require 'ffi'

page = "test\321test" * 100000
page2 = "test test"   * 100000

module FFIIconv
  # BUFFER_SIZE = 8_192 * 4
  class IconvError < StandardError; end

  module LibIconv
    extend FFI::Library
    ffi_lib 'libiconv', 'libm'

    attach_function :iconv_open, [:string, :string], :pointer
    attach_function :iconv, [:pointer, :pointer, :pointer, :pointer, :pointer], :long
    attach_function :iconv_close, [:pointer], :int
  end

  class << self
    def reset_outptr(outbuf, outptr, outlength, str_len)
      outptr.write_pointer(outbuf)
      outlength.write_long(str_len)
    end

    def with_buffers(str, str_len)
      inbuf = FFI::MemoryPointer.from_string(str)
      outbuf = FFI::MemoryPointer.new(:char, str_len)

      inptr  = FFI::MemoryPointer.new(:pointer)
      outptr = FFI::MemoryPointer.new(:pointer)

      inlength  = FFI::MemoryPointer.new(:size_t)
      outlength = FFI::MemoryPointer.new(:size_t)

      inptr.write_pointer(inbuf)
      inlength.write_long(str.length)
      reset_outptr(outbuf, outptr, outlength, str_len)

      yield [outbuf, inptr, outptr, inlength, outlength]
    ensure
      inbuf.free if inbuf
      outbuf.free if outbuf
      inptr.free if inptr
      outptr.free if outptr
      inlength.free if inlength
      outlength.free if outlength
    end

    def conv(to, from, str)
      iconv = LibIconv.iconv_open(to, from)

      if iconv == FFI::Pointer.new(-1)
        iconv = nil
        raise IconvError, "invalid encodings"
      end

      out = ''

      str_len = str.length
      in_bytes_left = str.length

      with_buffers(str, str_len) do |outbuf, inptr, outptr, inlength, outlength|
        while in_bytes_left > 0
          res = LibIconv.iconv(iconv, inptr, inlength, outptr, outlength)
          in_bytes_left = inlength.read_long
          if res == -1
            errno = FFI.errno
            if errno  == Errno::EILSEQ::Errno || errno == Errno::EINVAL::Errno
              inptr.write_pointer(inptr.read_pointer + 1)
              inlength.write_long(in_bytes_left - 1)
            end
          end
          output_bytes_left = outlength.read_long
          if output_bytes_left < 100 || in_bytes_left == 0
            out << outbuf.read_string(str_len - output_bytes_left)
            reset_outptr(outbuf, outptr, outlength, str_len)
          end
        end
      end

      out
    ensure
      LibIconv.iconv_close(iconv) if iconv
    end
  end
end


def a(page)
  out = ""
  Iconv.open("utf-8", "utf-8") do |iconv|
    begin
      out << iconv.iconv(page)
    rescue Iconv::IllegalSequence, Iconv::InvalidCharacter => e
      out << e.success
      page = e.failed[1..-1]
      retry
    end
  end
  out
end


# FFIIconv.conv('dfadsfa', 'fadsfas', page)
raise 'fail' unless FFIIconv.conv('utf-8', 'utf-8', page) == a(page)
raise 'fail' unless FFIIconv.conv('utf-8', 'utf-8', page2) == a(page2)

Benchmark.bm do |bm|
  GC.start
  bm.report('rescue. incorrect utf8') { a(page) }
  GC.start
  bm.report('rescue. correct utf8') { a(page2) }
  GC.start
  bm.report('ffi. incorrect utf8') { FFIIconv.conv('utf-8', 'utf-8', page) }
  GC.start
  bm.report('ffi. correct utf8') { FFIIconv.conv('utf-8', 'utf-8', page2) }
end

output

1
2
3
4
5
     user     system      total        real
rescue. incorrect utf8  1.780000   0.020000   1.800000 (  1.992797)
rescue. correct utf8  0.010000   0.000000   0.010000 (  0.016532)
ffi. incorrect utf8  0.520000   0.000000   0.520000 (  0.576972)
ffi. correct utf8  0.010000   0.000000   0.010000 (  0.009533)