presage 0.9.2~beta
reverseTokenizer.cpp
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#include "reverseTokenizer.h"
26
28 const std::string blanks,
29 const std::string separs)
30 : Tokenizer(stream, blanks, separs)
31{
32 offset = offend;
33 //assert( stream.good());
34 //assert(!stream.fail());
35 //assert(!stream.bad() );
36 //assert(!stream.eof() );
37 // stream clearing needed because offset is positioned at end
38 stream.clear();
39
40 //std::cerr << "ReverseTokenizer::ReverseTokenizer() offbeg: " << offbeg
41 // << " offset: " << offset << " offend: " << offend << std::endl;
42}
43
45{}
46
48{
50
51 // store current seek pointer position
52 std::streamoff curroff = offset;
53
54 // position get pointer at end of stream
55 offset = offend;
56
57 int count = 0;
58 while (hasMoreTokens()) {
59 nextToken();
60 count++;
61 }
62
63 // reposition seek get pointer to original position
64 offset = curroff;
65
66 return count;
67}
68
70{
71 //std::cerr << "ReverseTokenizer::hasMoreTokens() offbeg: " << offbeg
72 // << " offset: " << offset << " offend: " << offend << std::endl;
73 if (offbeg < offset) {
74 return true;
75 } else {
76 return false;
77 }
78}
79
81{
83
84 int current;
85 std::string str;
86
87 if (stream.good()) {
88 while (offbeg < offset
89 && str.empty()) {
90 stream.seekg(offset - 1);
91 current = stream.peek();
92
93 if (offset == offend &&
94 (isSeparator(current) || isBlankspace(current))) {
95 offset--;
96 return str;
97 }
98
99 while ((isBlankspace(current) || isSeparator(current))
100 && offbeg < offset ) {
101 offset--;
102 stream.seekg(offset - 1);
103 current = stream.peek();
104 }
105
106 while (!isBlankspace(current)
107 && !isSeparator(current)
108 && offbeg < offset) {
109
110 if( lowercaseMode() ) {
111 current = tolower( current );
112 }
113
114 // since the token is read backwards, the string
115 // needs to be reversed by inserting the char at
116 // the front
117 str.insert(str.begin(), current);
118
119 offset--;
120 stream.seekg(offset - 1);
121 current = stream.peek();
122 }
123 }
124 }
125
126// if (stream.good()) {
127// do {
128// do {
129// current = stream.peek();
130// offset--;
131// stream.seekg(offset);
132//
133// // handle case where last character is a separator by
134// // returning an empty token
135// if (offset == offend - 2
136// && isSeparator(current)) {
137// return "";
138// }
139//
140// //std::cerr << "[DEBUG] read: "
141// // << static_cast<char>(current)
142// // << std::endl;
143//
144// if (!isBlankspace(current)
145// && !isSeparator(current)
146// && offset >= offbeg - 1) {
147//
148// if( lowercaseMode() ) {
149// current = tolower( current );
150// }
151//
152// // since the token is read backwards, the string
153// // needs to be reversed by inserting the char at
154// // the front
155// str.insert(str.begin(), current);
156//
157// //std::cerr << "[DEBUG] pushed: "
158// // << static_cast<char>(current)
159// // << std::endl;
160// //std::cerr << "[DEBUG] partial string: "
161// // << str << std::endl;
162// }
163// } while (!isBlankspace(current)
164// && !isSeparator(current)
165// && (offset >= offbeg));
166// } while (str.empty() && (offset >= offbeg));
167// }
168
169 //std::cerr << "[DEBUG] token: " << str << std::endl;
170
171 return str;
172}
173
175{
176 return static_cast<double>(offend - offset) / (offend - offbeg);
177}
virtual double progress() const
virtual bool hasMoreTokens() const
ReverseTokenizer(std::istream &stream, const std::string blanks, const std::string separs)
virtual int countTokens()
virtual std::string nextToken()
virtual ~ReverseTokenizer()
std::istream & stream
Definition: tokenizer.h:144
std::streamoff offend
Definition: tokenizer.h:147
std::streamoff offbeg
Definition: tokenizer.h:146
bool isSeparator(const int character) const
Definition: tokenizer.cpp:101
std::streamoff offset
Definition: tokenizer.h:148
bool lowercaseMode() const
Definition: tokenizer.cpp:86
bool isBlankspace(const int character) const
Definition: tokenizer.cpp:91