class CharDet::UTF8Prober

Public Class Methods

new() click to toggle source
Calls superclass method CharDet::CharSetProber::new
# File lib/rchardet/utf8prober.rb, line 33
def initialize
  super()
  @codingSM = CodingStateMachine.new(UTF8SMModel)
  reset()
end

Public Instance Methods

feed(aBuf) click to toggle source
# File lib/rchardet/utf8prober.rb, line 49
def feed(aBuf)
  aBuf.each_byte do |b|
    c = b.chr
    codingState = @codingSM.next_state(c)
    if codingState == EError
      @state = ENotMe
      break
    elsif codingState == EItsMe
      @state = EFoundIt
      break
    elsif codingState == EStart
      if @codingSM.get_current_charlen() >= 2
        @numOfMBChar += 1
      end
    end
  end

  if get_state() == EDetecting
    if get_confidence() > SHORTCUT_THRESHOLD
      @state = EFoundIt
    end
  end

  return get_state()
end
get_charset_name() click to toggle source
# File lib/rchardet/utf8prober.rb, line 45
def get_charset_name
  return "utf-8"
end
get_confidence() click to toggle source
# File lib/rchardet/utf8prober.rb, line 75
def get_confidence
  unlike = 0.99
  if @numOfMBChar < 6
    for i in (0...@numOfMBChar)
      unlike = unlike * ONE_CHAR_PROB
    end
    return 1.0 - unlike
  else
    return unlike
  end
end
reset() click to toggle source
Calls superclass method CharDet::CharSetProber#reset
# File lib/rchardet/utf8prober.rb, line 39
def reset
  super()
  @codingSM.reset()
  @numOfMBChar = 0
end