class CharDet::UTF8Prober
Public Class Methods
new()
click to toggle source
Calls superclass method
CharDet::CharSetProber::new
# File lib/rchardet/utf8prober.rb, line 33 def initialize super() @codingSM = CodingStateMachine.new(UTF8SMModel) reset() end
Public Instance Methods
feed(aBuf)
click to toggle source
# File lib/rchardet/utf8prober.rb, line 49 def feed(aBuf) aBuf.each_byte do |b| c = b.chr codingState = @codingSM.next_state(c) if codingState == EError @state = ENotMe break elsif codingState == EItsMe @state = EFoundIt break elsif codingState == EStart if @codingSM.get_current_charlen() >= 2 @numOfMBChar += 1 end end end if get_state() == EDetecting if get_confidence() > SHORTCUT_THRESHOLD @state = EFoundIt end end return get_state() end
get_charset_name()
click to toggle source
# File lib/rchardet/utf8prober.rb, line 45 def get_charset_name return "utf-8" end
get_confidence()
click to toggle source
# File lib/rchardet/utf8prober.rb, line 75 def get_confidence unlike = 0.99 if @numOfMBChar < 6 for i in (0...@numOfMBChar) unlike = unlike * ONE_CHAR_PROB end return 1.0 - unlike else return unlike end end
reset()
click to toggle source
Calls superclass method
CharDet::CharSetProber#reset
# File lib/rchardet/utf8prober.rb, line 39 def reset super() @codingSM.reset() @numOfMBChar = 0 end