cprover
character_refine_preprocess.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Preprocess a goto-programs so that calls to the java Character
4 library are replaced by simple expressions.
5
6Author: Romain Brenguier
7
8Date: March 2017
9
10\*******************************************************************/
11
15
17
18#include <util/arith_tools.h>
19#include <util/bitvector_expr.h>
20#include <util/std_expr.h>
21
26 exprt (*expr_function)(const exprt &chr, const typet &type),
27 conversion_inputt &target)
28{
29 const code_function_callt &function_call=target;
30 assert(function_call.arguments().size()==1);
31 const exprt &arg=function_call.arguments()[0];
32 const exprt &result=function_call.lhs();
33 const typet &type=result.type();
34 return code_assignt(result, expr_function(arg, type));
35}
36
44 const exprt &chr,
45 const mp_integer &lower_bound,
46 const mp_integer &upper_bound)
47{
48 return and_exprt(
49 binary_relation_exprt(chr, ID_ge, from_integer(lower_bound, chr.type())),
50 binary_relation_exprt(chr, ID_le, from_integer(upper_bound, chr.type())));
51}
52
59 const exprt &chr, const std::list<mp_integer> &list)
60{
62 for(const auto &i : list)
63 ops.push_back(equal_exprt(chr, from_integer(i, chr.type())));
64 return disjunction(ops);
65}
66
73 const exprt &chr, const typet &type)
74{
75 exprt u010000=from_integer(0x010000, type);
76 binary_relation_exprt small(chr, ID_lt, u010000);
77 return if_exprt(small, from_integer(1, type), from_integer(2, type));
78}
79
84 conversion_inputt &target)
85{
88}
89
93 const exprt &chr, const typet &type)
94{
95 return typecast_exprt(chr, type);
96}
97
102 conversion_inputt &target)
103{
106}
107
112{
113 const code_function_callt &function_call=target;
114 assert(function_call.arguments().size()==2);
115 const exprt &char1=function_call.arguments()[0];
116 const exprt &char2=function_call.arguments()[1];
117 const exprt &result=function_call.lhs();
118 const typet &type=result.type();
119 binary_relation_exprt smaller(char1, ID_lt, char2);
120 binary_relation_exprt greater(char1, ID_gt, char2);
121 if_exprt expr(
122 smaller,
123 from_integer(-1, type),
124 if_exprt(greater, from_integer(1, type), from_integer(0, type)));
125
126 return code_assignt(result, expr);
127}
128
137 conversion_inputt &target)
138{
139 const code_function_callt &function_call=target;
140 const std::size_t nb_args=function_call.arguments().size();
141 PRECONDITION(nb_args==1 || nb_args==2);
142 const exprt &arg=function_call.arguments()[0];
143 // If there is no radix argument we set it to 36 which is the maximum possible
144 const exprt &radix=
145 nb_args>1?function_call.arguments()[1]:from_integer(36, arg.type());
146 const exprt &result=function_call.lhs();
147 const typet &type=result.type();
148
149 // TODO: If the radix is not in the range MIN_RADIX <= radix <= MAX_RADIX or
150 // if the value of ch is not a valid digit in the specified radix,
151 // -1 is returned.
152
153 // Case 1: The method isDigit is true of the character and the Unicode
154 // decimal digit value of the character (or its single-character
155 // decomposition) is less than the specified radix.
156 exprt invalid=from_integer(-1, arg.type());
157 exprt c0=from_integer('0', arg.type());
158 exprt latin_digit=in_interval_expr(arg, '0', '9');
159 minus_exprt value1(arg, c0);
160 // TODO: this is only valid for latin digits
161 if_exprt case1(
162 binary_relation_exprt(value1, ID_lt, radix), value1, invalid);
163
164 // Case 2: The character is one of the uppercase Latin letters 'A'
165 // through 'Z' and its code is less than radix + 'A' - 10,
166 // then ch - 'A' + 10 is returned.
167 exprt cA=from_integer('A', arg.type());
168 exprt i10=from_integer(10, arg.type());
169 exprt upper_case=in_interval_expr(arg, 'A', 'Z');
170 plus_exprt value2(minus_exprt(arg, cA), i10);
171 if_exprt case2(
172 binary_relation_exprt(value2, ID_lt, radix), value2, invalid);
173
174 // The character is one of the lowercase Latin letters 'a' through 'z' and
175 // its code is less than radix + 'a' - 10, then ch - 'a' + 10 is returned.
176 exprt ca=from_integer('a', arg.type());
177 exprt lower_case=in_interval_expr(arg, 'a', 'z');
178 plus_exprt value3(minus_exprt(arg, ca), i10);
179 if_exprt case3(
180 binary_relation_exprt(value3, ID_lt, radix), value3, invalid);
181
182
183 // The character is one of the fullwidth uppercase Latin letters A ('\uFF21')
184 // through Z ('\uFF3A') and its code is less than radix + '\uFF21' - 10.
185 // In this case, ch - '\uFF21' + 10 is returned.
186 exprt uFF21=from_integer(0xFF21, arg.type());
187 exprt fullwidth_upper_case=in_interval_expr(arg, 0xFF21, 0xFF3A);
188 plus_exprt value4(minus_exprt(arg, uFF21), i10);
189 if_exprt case4(
190 binary_relation_exprt(value4, ID_lt, radix), value4, invalid);
191
192 // The character is one of the fullwidth lowercase Latin letters a ('\uFF41')
193 // through z ('\uFF5A') and its code is less than radix + '\uFF41' - 10.
194 // In this case, ch - '\uFF41' + 10 is returned.
195 exprt uFF41=from_integer(0xFF41, arg.type());
196 plus_exprt value5(minus_exprt(arg, uFF41), i10);
197 if_exprt case5(
198 binary_relation_exprt(value5, ID_lt, radix), value5, invalid);
199
200 if_exprt fullwidth_cases(fullwidth_upper_case, case4, case5);
201 if_exprt expr(
202 latin_digit,
203 case1,
204 if_exprt(upper_case, case2, if_exprt(lower_case, case3, fullwidth_cases)));
205 typecast_exprt tc_expr(expr, type);
206
207 return code_assignt(result, tc_expr);
208}
209
214{
215 return convert_digit_char(target);
216}
217
224{
225 const code_function_callt &function_call=target;
226 assert(function_call.arguments().size()==2);
227 const exprt &digit=function_call.arguments()[0];
228 const exprt &result=function_call.lhs();
229 const typet &type=result.type();
230 typecast_exprt casted_digit(digit, type);
231
232 exprt d10=from_integer(10, type);
233 binary_relation_exprt small(casted_digit, ID_le, d10);
234 plus_exprt value1(casted_digit, from_integer('0', type));
235 plus_exprt value2(minus_exprt(casted_digit, d10), from_integer('a', type));
236 return code_assignt(result, if_exprt(small, value1, value2));
237}
238
243 conversion_inputt &target)
244{
245 // TODO: This is unimplemented for now as it requires analyzing
246 // the UnicodeData file to find characters directionality.
247 return target;
248}
249
254 conversion_inputt &target)
255{
256 return convert_get_directionality_char(target);
257}
258
264 conversion_inputt &target)
265{
266 return convert_digit_char(target);
267}
268
275 conversion_inputt &target)
276{
277 return convert_digit_int(target);
278}
279
284 conversion_inputt &target)
285{
286 // TODO: This is unimplemented for now as it requires analyzing
287 // the UnicodeData file to categorize characters.
288 return target;
289}
290
295 conversion_inputt &target)
296{
297 return convert_get_type_char(target);
298}
299
304{
305 return convert_char_value(target);
306}
307
315 const exprt &chr, const typet &type)
316{
317 exprt u10000=from_integer(0x010000, type);
318 exprt uD800=from_integer(0xD800, type);
319 exprt u400=from_integer(0x0400, type);
320
321 plus_exprt high_surrogate(uD800, div_exprt(minus_exprt(chr, u10000), u400));
322 return std::move(high_surrogate);
323}
324
330 const exprt &chr, const typet &type)
331{
332 (void)type; // unused parameter
333 return in_interval_expr(chr, 'a', 'z');
334}
335
341 const exprt &chr, const typet &type)
342{
343 (void)type; // unused parameter
344 return in_interval_expr(chr, 'A', 'Z');
345}
346
356 const exprt &chr, const typet &type)
357{
358 return or_exprt(
360 expr_of_is_ascii_lower_case(chr, type));
361}
362
374 const exprt &chr, const typet &type)
375{
376 return expr_of_is_letter(chr, type);
377}
378
383 conversion_inputt &target)
384{
387}
388
396 const exprt &chr, const typet &type)
397{
398 (void)type; // unused parameter
399 return and_exprt(
400 binary_relation_exprt(chr, ID_le, from_integer(0xFFFF, chr.type())),
401 binary_relation_exprt(chr, ID_ge, from_integer(0, chr.type())));
402}
403
408 conversion_inputt &target)
409{
412}
413
419 const exprt &chr, const typet &type)
420{
421 (void)type; // unused parameter
422 // The following intervals are undefined in unicode, according to
423 // the Unicode Character Database: http://www.unicode.org/Public/UCD/latest/
424 exprt::operandst intervals;
425 intervals.push_back(in_interval_expr(chr, 0x0750, 0x077F));
426 intervals.push_back(in_interval_expr(chr, 0x07C0, 0x08FF));
427 intervals.push_back(in_interval_expr(chr, 0x1380, 0x139F));
428 intervals.push_back(in_interval_expr(chr, 0x18B0, 0x18FF));
429 intervals.push_back(in_interval_expr(chr, 0x1980, 0x19DF));
430 intervals.push_back(in_interval_expr(chr, 0x1A00, 0x1CFF));
431 intervals.push_back(in_interval_expr(chr, 0x1D80, 0x1DFF));
432 intervals.push_back(in_interval_expr(chr, 0x2C00, 0x2E7F));
433 intervals.push_back(in_interval_expr(chr, 0x2FE0, 0x2FEF));
434 intervals.push_back(in_interval_expr(chr, 0x31C0, 0x31EF));
435 intervals.push_back(in_interval_expr(chr, 0x9FB0, 0x9FFF));
436 intervals.push_back(in_interval_expr(chr, 0xA4D0, 0xABFF));
437 intervals.push_back(in_interval_expr(chr, 0xD7B0, 0xD7FF));
438 intervals.push_back(in_interval_expr(chr, 0xFE10, 0xFE1F));
439 intervals.push_back(in_interval_expr(chr, 0x10140, 0x102FF));
440 intervals.push_back(in_interval_expr(chr, 0x104B0, 0x107FF));
441 intervals.push_back(in_interval_expr(chr, 0x10840, 0x1CFFF));
442 intervals.push_back(in_interval_expr(chr, 0x1D200, 0x1D2FF));
443 intervals.push_back(in_interval_expr(chr, 0x1D360, 0x1D3FF));
444 intervals.push_back(
445 binary_relation_exprt(chr, ID_ge, from_integer(0x1D800, chr.type())));
446
447 return not_exprt(disjunction(intervals));
448}
449
454 conversion_inputt &target)
455{
458}
459
464 conversion_inputt &target)
465{
466 return convert_is_defined_char(target);
467}
468
484 const exprt &chr, const typet &type)
485{
486 (void)type; // unused parameter
487 exprt latin_digit=in_interval_expr(chr, '0', '9');
488 exprt arabic_indic_digit=in_interval_expr(chr, 0x660, 0x669);
489 exprt extended_digit=in_interval_expr(chr, 0x6F0, 0x6F9);
490 exprt devanagari_digit=in_interval_expr(chr, 0x966, 0x96F);
491 exprt fullwidth_digit=in_interval_expr(chr, 0xFF10, 0xFF19);
492 or_exprt digit(
493 or_exprt(latin_digit, or_exprt(arabic_indic_digit, extended_digit)),
494 or_exprt(devanagari_digit, fullwidth_digit));
495 return std::move(digit);
496}
497
502 conversion_inputt &target)
503{
506}
507
512 conversion_inputt &target)
513{
514 return convert_is_digit_char(target);
515}
516
523 const exprt &chr, const typet &type)
524{
525 (void)type; // unused parameter
526 return in_interval_expr(chr, 0xD800, 0xDBFF);
527}
528
533 conversion_inputt &target)
534{
537}
538
548 const exprt &chr, const typet &type)
549{
550 (void)type; // unused parameter
551 or_exprt ignorable(
552 in_interval_expr(chr, 0x0000, 0x0008),
553 or_exprt(
554 in_interval_expr(chr, 0x000E, 0x001B),
555 in_interval_expr(chr, 0x007F, 0x009F)));
556 return std::move(ignorable);
557}
558
565 conversion_inputt &target)
566{
569}
570
577 conversion_inputt &target)
578{
580}
581
586 conversion_inputt &target)
587{
588 const code_function_callt &function_call=target;
589 assert(function_call.arguments().size()==1);
590 const exprt &arg=function_call.arguments()[0];
591 const exprt &result=function_call.lhs();
592 exprt is_ideograph=in_interval_expr(arg, 0x4E00, 0x9FFF);
593 return code_assignt(result, is_ideograph);
594}
595
600 conversion_inputt &target)
601{
602 const code_function_callt &function_call=target;
603 assert(function_call.arguments().size()==1);
604 const exprt &arg=function_call.arguments()[0];
605 const exprt &result=function_call.lhs();
606 or_exprt iso(
607 in_interval_expr(arg, 0x00, 0x1F), in_interval_expr(arg, 0x7F, 0x9F));
608 return code_assignt(result, iso);
609}
610
615 conversion_inputt &target)
616{
617 return convert_is_ISO_control_char(target);
618}
619
627 conversion_inputt &target)
628{
631}
632
637 conversion_inputt &target)
638{
640}
641
650 conversion_inputt &target)
651{
654}
655
660 conversion_inputt &target)
661{
663}
664
669 conversion_inputt &target)
670{
672}
673
678 conversion_inputt &target)
679{
681}
682
687 conversion_inputt &target)
688{
691}
692
697 conversion_inputt &target)
698{
699 return convert_is_letter_char(target);
700}
701
707 const exprt &chr, const typet &type)
708{
709 return or_exprt(expr_of_is_letter(chr, type), expr_of_is_digit(chr, type));
710}
711
716 conversion_inputt &target)
717{
720}
721
726 conversion_inputt &target)
727{
728 return convert_is_letter_or_digit_char(target);
729}
730
737 conversion_inputt &target)
738{
741}
742
749 conversion_inputt &target)
750{
751 return convert_is_lower_case_char(target);
752}
753
758 conversion_inputt &target)
759{
760 const code_function_callt &function_call=target;
761 assert(function_call.arguments().size()==1);
762 const exprt &arg=function_call.arguments()[0];
763 const exprt &result=function_call.lhs();
764 exprt is_low_surrogate=in_interval_expr(arg, 0xDC00, 0xDFFF);
765 return code_assignt(result, is_low_surrogate);
766}
767
776 const exprt &chr, const typet &type)
777{
778 (void)type; // unused parameter
779 return in_list_expr(chr, {0x28, 0x29, 0x3C, 0x3E, 0x5B, 0x5D, 0x7B, 0x7D});
780}
781
788 conversion_inputt &target)
789{
792}
793
800 conversion_inputt &target)
801{
802 return convert_is_mirrored_char(target);
803}
804
809{
810 return convert_is_whitespace_char(target);
811}
812
819 const exprt &chr, const typet &type)
820{
821 (void)type; // unused parameter
822 std::list<mp_integer> space_characters=
823 {0x20, 0x00A0, 0x1680, 0x202F, 0x205F, 0x3000, 0x2028, 0x2029};
824 exprt condition0=in_list_expr(chr, space_characters);
825 exprt condition1=in_interval_expr(chr, 0x2000, 0x200A);
826 return or_exprt(condition0, condition1);
827}
828
833 conversion_inputt &target)
834{
837}
838
843 conversion_inputt &target)
844{
845 return convert_is_space_char(target);
846}
847
854 const exprt &chr, const typet &type)
855{
856 (void)type; // unused parameter
857 return binary_relation_exprt(chr, ID_gt, from_integer(0xFFFF, chr.type()));
858}
859
864 conversion_inputt &target)
865{
868}
869
875 const exprt &chr, const typet &type)
876{
877 (void)type; // unused parameter
878 return in_interval_expr(chr, 0xD800, 0xDFFF);
879}
880
885 conversion_inputt &target)
886{
889}
890
895 conversion_inputt &target)
896{
897 const code_function_callt &function_call=target;
898 assert(function_call.arguments().size()==2);
899 const exprt &arg0=function_call.arguments()[0];
900 const exprt &arg1=function_call.arguments()[1];
901 const exprt &result=function_call.lhs();
902 exprt is_low_surrogate=in_interval_expr(arg1, 0xDC00, 0xDFFF);
903 exprt is_high_surrogate=in_interval_expr(arg0, 0xD800, 0xDBFF);
905}
906
912 const exprt &chr, const typet &type)
913{
914 (void)type; // unused parameter
915 std::list<mp_integer>title_case_chars=
916 {0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x1FBC, 0x1FCC, 0x1FFC};
917 exprt::operandst conditions;
918 conditions.push_back(in_list_expr(chr, title_case_chars));
919 conditions.push_back(in_interval_expr(chr, 0x1F88, 0x1F8F));
920 conditions.push_back(in_interval_expr(chr, 0x1F98, 0x1F9F));
921 conditions.push_back(in_interval_expr(chr, 0x1FA8, 0x1FAF));
922 return disjunction(conditions);
923}
924
929 conversion_inputt &target)
930{
933}
934
939 conversion_inputt &target)
940{
941 return convert_is_title_case_char(target);
942}
943
950 const exprt &chr, const typet &type)
951{
952 (void)type; // unused parameter
953 // The following set of characters is the general category "Nl" in the
954 // Unicode specification.
955 exprt cond0=in_interval_expr(chr, 0x16EE, 0x16F0);
956 exprt cond1=in_interval_expr(chr, 0x2160, 0x2188);
957 exprt cond2=in_interval_expr(chr, 0x3021, 0x3029);
958 exprt cond3=in_interval_expr(chr, 0x3038, 0x303A);
959 exprt cond4=in_interval_expr(chr, 0xA6E6, 0xA6EF);
960 exprt cond5=in_interval_expr(chr, 0x10140, 0x10174);
961 exprt cond6=in_interval_expr(chr, 0x103D1, 0x103D5);
962 exprt cond7=in_interval_expr(chr, 0x12400, 0x1246E);
963 exprt cond8=in_list_expr(chr, {0x3007, 0x10341, 0x1034A});
964 return or_exprt(
965 or_exprt(or_exprt(cond0, cond1), or_exprt(cond2, cond3)),
966 or_exprt(or_exprt(cond4, cond5), or_exprt(cond6, or_exprt(cond7, cond8))));
967}
968
969
978 const exprt &chr, const typet &type)
979{
980 exprt::operandst conditions;
981 conditions.push_back(expr_of_is_unicode_identifier_start(chr, type));
982 conditions.push_back(expr_of_is_digit(chr, type));
983 conditions.push_back(expr_of_is_identifier_ignorable(chr, type));
984 return disjunction(conditions);
985}
986
991 conversion_inputt &target)
992{
995}
996
1001 conversion_inputt &target)
1002{
1004}
1005
1012 const exprt &chr, const typet &type)
1013{
1014 return or_exprt(
1015 expr_of_is_letter(chr, type), expr_of_is_letter_number(chr, type));
1016}
1017
1022 conversion_inputt &target)
1023{
1024 return convert_char_function(
1026}
1027
1032 conversion_inputt &target)
1033{
1035}
1036
1043 conversion_inputt &target)
1044{
1045 return convert_char_function(
1047}
1048
1053 conversion_inputt &target)
1054{
1055 return convert_is_upper_case_char(target);
1056}
1057
1064 const exprt &chr, const typet &type)
1065{
1066 (void)type; // unused parameter
1067 return binary_relation_exprt(chr, ID_le, from_integer(0x10FFFF, chr.type()));
1068}
1069
1074 conversion_inputt &target)
1075{
1076 return convert_char_function(
1078}
1079
1089 const exprt &chr, const typet &type)
1090{
1091 (void)type; // unused parameter
1092 exprt::operandst conditions;
1093 std::list<mp_integer> space_characters=
1094 {0x20, 0x1680, 0x205F, 0x3000, 0x2028, 0x2029};
1095 conditions.push_back(in_list_expr(chr, space_characters));
1096 conditions.push_back(in_interval_expr(chr, 0x2000, 0x2006));
1097 conditions.push_back(in_interval_expr(chr, 0x2008, 0x200A));
1098 conditions.push_back(in_interval_expr(chr, 0x09, 0x0D));
1099 conditions.push_back(in_interval_expr(chr, 0x1C, 0x1F));
1100 return disjunction(conditions);
1101}
1102
1107 conversion_inputt &target)
1108{
1109 return convert_char_function(
1111}
1112
1117 conversion_inputt &target)
1118{
1119 return convert_is_whitespace_char(target);
1120}
1121
1129 const exprt &chr, const typet &type)
1130{
1131 exprt uDC00=from_integer(0xDC00, type);
1132 exprt u0400=from_integer(0x0400, type);
1133 return plus_exprt(uDC00, mod_exprt(chr, u0400));
1134}
1135
1142 const exprt &chr, const typet &type)
1143{
1144 shl_exprt first_byte(chr, from_integer(8, type));
1145 lshr_exprt second_byte(chr, from_integer(8, type));
1146 return plus_exprt(first_byte, second_byte);
1147}
1148
1153 conversion_inputt &target)
1154{
1155 return convert_char_function(
1157}
1158
1168 const exprt &chr, const typet &type)
1169{
1170 minus_exprt transformed(
1171 plus_exprt(chr, from_integer('a', type)), from_integer('A', type));
1172
1173 return if_exprt(expr_of_is_ascii_upper_case(chr, type), transformed, chr);
1174}
1175
1180 conversion_inputt &target)
1181{
1182 return convert_char_function(
1184}
1185
1190 conversion_inputt &target)
1191{
1192 return convert_to_lower_case_char(target);
1193}
1194
1200 const exprt &chr, const typet &type)
1201{
1202 std::list<mp_integer> increment_list={0x01C4, 0x01C7, 0x01CA, 0x01F1};
1203 std::list<mp_integer> decrement_list={0x01C6, 0x01C9, 0x01CC, 0x01F3};
1204 exprt plus_8_interval1=in_interval_expr(chr, 0x1F80, 0x1F87);
1205 exprt plus_8_interval2=in_interval_expr(chr, 0x1F90, 0x1F97);
1206 exprt plus_8_interval3=in_interval_expr(chr, 0x1FA0, 0x1FA7);
1207 std::list<mp_integer> plus_9_list={0x1FB3, 0x1FC3, 0x1FF3};
1208 minus_exprt minus_1(chr, from_integer(1, type));
1209 plus_exprt plus_1(chr, from_integer(1, type));
1210 plus_exprt plus_8(chr, from_integer(8, type));
1211 plus_exprt plus_9(chr, from_integer(9, type));
1212 or_exprt plus_8_set(
1213 plus_8_interval1, or_exprt(plus_8_interval2, plus_8_interval3));
1214
1215 return if_exprt(
1216 in_list_expr(chr, increment_list),
1217 plus_1,
1218 if_exprt(
1219 in_list_expr(chr, decrement_list),
1220 minus_1,
1221 if_exprt(
1222 plus_8_set,
1223 plus_8,
1224 if_exprt(in_list_expr(chr, plus_9_list), plus_9, chr))));
1225}
1226
1231 conversion_inputt &target)
1232{
1233 return convert_char_function(
1235}
1236
1241 conversion_inputt &target)
1242{
1243 return convert_to_title_case_char(target);
1244}
1245
1255 const exprt &chr, const typet &type)
1256{
1257 minus_exprt transformed(
1258 plus_exprt(chr, from_integer('A', type)), from_integer('a', type));
1259
1260 return if_exprt(expr_of_is_ascii_lower_case(chr, type), transformed, chr);
1261}
1262
1267 conversion_inputt &target)
1268{
1269 return convert_char_function(
1271}
1272
1277 conversion_inputt &target)
1278{
1279 return convert_to_upper_case_char(target);
1280}
1281
1289 const code_function_callt &code) const
1290{
1291 if(code.function().id()==ID_symbol)
1292 {
1293 const irep_idt &function_id=
1295 auto it=conversion_table.find(function_id);
1296 if(it!=conversion_table.end())
1297 return (it->second)(code);
1298 }
1299
1300 return code;
1301}
1302
1305{
1306 // All methods are listed here in alphabetic order
1307 // The ones that are not supported by this module (though they may be
1308 // supported by the string solver) have no entry in the conversion
1309 // table and are marked in this way:
1310 // Not supported "java::java.lang.Character.<init>()"
1311
1312 conversion_table["java::java.lang.Character.charCount:(I)I"]=
1314 conversion_table["java::java.lang.Character.charValue:()C"]=
1316
1317 // Not supported "java::java.lang.Character.codePointAt:([CI)I
1318 // Not supported "java::java.lang.Character.codePointAt:([CII)I"
1319 // Not supported "java::java.lang.Character.codePointAt:"
1320 // "(Ljava.lang.CharSequence;I)I"
1321 // Not supported "java::java.lang.Character.codePointBefore:([CI)I"
1322 // Not supported "java::java.lang.Character.codePointBefore:([CII)I"
1323 // Not supported "java::java.lang.Character.codePointBefore:"
1324 // "(Ljava.lang.CharSequence;I)I"
1325 // Not supported "java::java.lang.Character.codePointCount:([CII)I"
1326 // Not supported "java::java.lang.Character.codePointCount:"
1327 // "(Ljava.lang.CharSequence;II)I"
1328 // Not supported "java::java.lang.Character.compareTo:"
1329 // "(Ljava.lang.Character;)I"
1330
1331 conversion_table["java::java.lang.Character.compare:(CC)I"]=
1333 conversion_table["java::java.lang.Character.digit:(CI)I"]=
1335 conversion_table["java::java.lang.Character.digit:(II)I"]=
1337
1338 // Not supported "java::java.lang.Character.equals:(Ljava.lang.Object;)Z"
1339
1340 conversion_table["java::java.lang.Character.forDigit:(II)C"]=
1342 conversion_table["java::java.lang.Character.getDirectionality:(C)B"]=
1344 conversion_table["java::java.lang.Character.getDirectionality:(I)B"]=
1346
1347 // Not supported "java::java.lang.Character.getName:(I)Ljava.lang.String;"
1348
1349 conversion_table["java::java.lang.Character.getNumericValue:(C)I"]=
1351 conversion_table["java::java.lang.Character.getNumericValue:(I)I"]=
1353 conversion_table["java::java.lang.Character.getType:(C)I"]=
1355 conversion_table["java::java.lang.Character.getType:(I)I"]=
1357 conversion_table["java::java.lang.Character.hashCode:()I"]=
1359 conversion_table["java::java.lang.Character.isAlphabetic:(I)Z"]=
1361 conversion_table["java::java.lang.Character.isBmpCodePoint:(I)Z"]=
1363 conversion_table["java::java.lang.Character.isDefined:(C)Z"]=
1365 conversion_table["java::java.lang.Character.isDefined:(I)Z"]=
1367 conversion_table["java::java.lang.Character.isDigit:(C)Z"]=
1369 conversion_table["java::java.lang.Character.isDigit:(I)Z"]=
1371 conversion_table["java::java.lang.Character.isHighSurrogate:(C)Z"]=
1373 conversion_table["java::java.lang.Character.isIdentifierIgnorable:(C)Z"]=
1375 conversion_table["java::java.lang.Character.isIdentifierIgnorable:(I)Z"]=
1377 conversion_table["java::java.lang.Character.isIdeographic:(I)Z"]=
1379 conversion_table["java::java.lang.Character.isISOControl:(C)Z"]=
1381 conversion_table["java::java.lang.Character.isISOControl:(I)Z"]=
1383 conversion_table["java::java.lang.Character.isJavaIdentifierPart:(C)Z"]=
1385 conversion_table["java::java.lang.Character.isJavaIdentifierPart:(I)Z"]=
1387 conversion_table["java::java.lang.Character.isJavaIdentifierStart:(C)Z"]=
1389 conversion_table["java::java.lang.Character.isJavaIdentifierStart:(I)Z"]=
1391 conversion_table["java::java.lang.Character.isJavaLetter:(C)Z"]=
1393 conversion_table["java::java.lang.Character.isJavaLetterOrDigit:(C)Z"]=
1395 conversion_table["java::java.lang.Character.isLetter:(C)Z"]=
1397 conversion_table["java::java.lang.Character.isLetter:(I)Z"]=
1399 conversion_table["java::java.lang.Character.isLetterOrDigit:(C)Z"]=
1401 conversion_table["java::java.lang.Character.isLetterOrDigit:(I)Z"]=
1403 conversion_table["java::java.lang.Character.isLowerCase:(C)Z"]=
1405 conversion_table["java::java.lang.Character.isLowerCase:(I)Z"]=
1407 conversion_table["java::java.lang.Character.isLowSurrogate:(C)Z"]=
1409 conversion_table["java::java.lang.Character.isMirrored:(C)Z"]=
1411 conversion_table["java::java.lang.Character.isMirrored:(I)Z"]=
1413 conversion_table["java::java.lang.Character.isSpace:(C)Z"]=
1415 conversion_table["java::java.lang.Character.isSpaceChar:(C)Z"]=
1417 conversion_table["java::java.lang.Character.isSpaceChar:(I)Z"]=
1419 conversion_table["java::java.lang.Character.isSupplementaryCodePoint:(I)Z"]=
1421 conversion_table["java::java.lang.Character.isSurrogate:(C)Z"]=
1423 conversion_table["java::java.lang.Character.isSurrogatePair:(CC)Z"]=
1425 conversion_table["java::java.lang.Character.isTitleCase:(C)Z"]=
1427 conversion_table["java::java.lang.Character.isTitleCase:(I)Z"]=
1429 conversion_table["java::java.lang.Character.isUnicodeIdentifierPart:(C)Z"]=
1431 conversion_table["java::java.lang.Character.isUnicodeIdentifierPart:(I)Z"]=
1433 conversion_table["java::java.lang.Character.isUnicodeIdentifierStart:(C)Z"]=
1435 conversion_table["java::java.lang.Character.isUnicodeIdentifierStart:(I)Z"]=
1437 conversion_table["java::java.lang.Character.isUpperCase:(C)Z"]=
1439 conversion_table["java::java.lang.Character.isUpperCase:(I)Z"]=
1441 conversion_table["java::java.lang.Character.isValidCodePoint:(I)Z"]=
1443 conversion_table["java::java.lang.Character.isWhitespace:(C)Z"]=
1445 conversion_table["java::java.lang.Character.isWhitespace:(I)Z"]=
1447
1448 // Not supported "java::java.lang.Character.offsetByCodePoints:([CIIII)I"
1449 // Not supported "java::java.lang.Character.offsetByCodePoints:"
1450 // "(Ljava.lang.CharacterSequence;II)I"
1451
1452 conversion_table["java::java.lang.Character.reverseBytes:(C)C"]=
1454
1455 // Not supported "java::java.lang.Character.toChars:(I[CI)I"
1456
1457 conversion_table["java::java.lang.Character.toLowerCase:(C)C"]=
1459 conversion_table["java::java.lang.Character.toLowerCase:(I)I"]=
1461
1462 // Not supported "java::java.lang.Character.toString:()Ljava.lang.String;"
1463 // Not supported "java::java.lang.Character.toString:(C)Ljava.lang.String;"
1464
1465 conversion_table["java::java.lang.Character.toTitleCase:(C)C"]=
1467 conversion_table["java::java.lang.Character.toTitleCase:(I)I"]=
1469 conversion_table["java::java.lang.Character.toUpperCase:(C)C"]=
1471 conversion_table["java::java.lang.Character.toUpperCase:(I)I"]=
1473
1474 // Not supported "java::java.lang.Character.valueOf:(C)Ljava.lang.Character;"
1475}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
API to expression classes for bitvectors.
Preprocess a goto-programs so that calls to the java Character library are replaced by simple express...
Boolean AND.
Definition: std_expr.h:1974
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
static codet convert_is_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_high_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_function(exprt(*expr_function)(const exprt &chr, const typet &type), conversion_inputt &target)
converts based on a function on expressions
static codet convert_is_whitespace_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter_number(const exprt &chr, const typet &type)
Determines if the specified character is in the LETTER_NUMBER category of Unicode.
static exprt expr_of_is_alphabetic(const exprt &chr, const typet &type)
Determines if the specified character (Unicode code point) is alphabetic.
static codet convert_is_ISO_control_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_numeric_value_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_identifier_ignorable(const exprt &chr, const typet &type)
Determines if the character is one of ignorable in a Java identifier, that is, it is in one of these ...
static codet convert_is_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_upper_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII uppercase character.
static exprt expr_of_is_defined(const exprt &chr, const typet &type)
Determines if a character is defined in Unicode.
static exprt expr_of_is_unicode_identifier_start(const exprt &chr, const typet &type)
Determines if the specified character is permissible as the first character in a Unicode identifier.
std::unordered_map< irep_idt, conversion_functiont > conversion_table
static codet convert_is_surrogate_pair(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_compare(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_count(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_value(const exprt &chr, const typet &type)
Casts the given expression to the given type.
static exprt expr_of_is_high_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode high-surrogate code unit (also known as leading-surro...
static codet convert_is_space(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_valid_code_point(const exprt &chr, const typet &type)
Determines whether the specified code point is a valid Unicode code point value.
static exprt in_interval_expr(const exprt &chr, const mp_integer &lower_bound, const mp_integer &upper_bound)
The returned expression is true when the first argument is in the interval defined by the lower and u...
void initialize_conversion_table()
fill maps with correspondance from java method names to conversion functions
static exprt expr_of_is_letter_or_digit(const exprt &chr, const typet &type)
Determines if the specified character is a letter or digit.
static codet convert_is_unicode_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_mirrored(const exprt &chr, const typet &type)
Determines whether the character is mirrored according to the Unicode specification.
static codet convert_to_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_letter(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_defined_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_identifier_ignorable_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_whitespace(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Java.
codet replace_character_call(const code_function_callt &call) const
replace function calls to functions of the Character by an affectation if possible,...
static codet convert_is_letter_or_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_unicode_identifier_part(const exprt &chr, const typet &type)
Determines if the character may be part of a Unicode identifier.
static codet convert_is_ideographic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_lower_case(const exprt &chr, const typet &type)
Converts the character argument to lowercase.
static codet convert_is_bmp_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_supplementary_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the supplementary character ran...
static exprt expr_of_reverse_bytes(const exprt &chr, const typet &type)
Returns the value obtained by reversing the order of the bytes in the specified char value.
static codet convert_is_defined_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_title_case(const exprt &chr, const typet &type)
Converts the character argument to titlecase.
static codet convert_is_supplementary_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_value(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_low_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_lower_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII lowercase character.
static codet convert_hash_code(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt in_list_expr(const exprt &chr, const std::list< mp_integer > &list)
The returned expression is true when the given character is equal to one of the element in the list.
static codet convert_is_java_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_for_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_high_surrogate(const exprt &chr, const typet &type)
Returns the leading surrogate (a high surrogate code unit) of the surrogate pair representing the spe...
static codet convert_is_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_is_identifier_ignorable_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_ISO_control_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_count(const exprt &chr, const typet &type)
Determines the number of char values needed to represent the specified character (Unicode code point)...
static codet convert_is_java_letter_or_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaLette...
static exprt expr_of_is_space_char(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Unicode (SPACE_SEPARATOR,...
static exprt expr_of_is_digit(const exprt &chr, const typet &type)
Determines if the specified character is a digit.
static exprt expr_of_is_bmp_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the Basic Multilingual Plane (B...
static codet convert_reverse_bytes(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode surrogate code unit.
static codet convert_get_numeric_value_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_or_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_whitespace_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter(const exprt &chr, const typet &type)
Determines if the specified character is a letter.
static exprt expr_of_is_title_case(const exprt &chr, const typet &type)
Determines if the specified character is a titlecase character.
static codet convert_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_upper_case(const exprt &chr, const typet &type)
Converts the character argument to uppercase.
static codet convert_is_valid_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_alphabetic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static exprt expr_of_low_surrogate(const exprt &chr, const typet &type)
Returns the trailing surrogate (a low surrogate code unit) of the surrogate pair representing the spe...
A codet representing an assignment in the program.
codet representation of a function call statement.
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
Division.
Definition: std_expr.h:1064
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
Equality.
Definition: std_expr.h:1225
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
typet & type()
Return the type of the expression.
Definition: expr.h:82
The trinary if-then-else operator.
Definition: std_expr.h:2226
const irep_idt & id() const
Definition: irep.h:396
Logical right shift.
Binary minus.
Definition: std_expr.h:973
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition: std_expr.h:1135
Boolean negation.
Definition: std_expr.h:2181
Boolean OR.
Definition: std_expr.h:2082
The plus expression Associativity is not specified.
Definition: std_expr.h:914
Left shift.
const irep_idt & get_identifier() const
Definition: std_expr.h:109
Semantic type conversion.
Definition: std_expr.h:1920
The type of an expression, extends irept.
Definition: type.h:29
BigInt mp_integer
Definition: smt_terms.h:12
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:22
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....