cprover
convert_string_literal.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: C/C++ Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
13
14#include <util/arith_tools.h>
15#include <util/c_types.h>
16#include <util/unicode.h>
18
19#include "unescape_string.h"
20
21std::basic_string<unsigned int> convert_one_string_literal(
22 const std::string &src)
23{
24 assert(src.size()>=2);
25
26 if(src[0]=='u' && src[1]=='8')
27 {
28 assert(src[src.size()-1]=='"');
29 assert(src[2]=='"');
30
31 std::basic_string<unsigned int> value=
32 unescape_wide_string(std::string(src, 3, src.size()-4));
33
34 // turn into utf-8
35 const std::string utf8_value = utf32_native_endian_to_utf8(value);
36
37 // pad into wide string
38 value.resize(utf8_value.size());
39 for(std::size_t i=0; i<utf8_value.size(); i++)
40 value[i]=utf8_value[i];
41
42 return value;
43 }
44 else if(src[0]=='L' || src[0]=='u' || src[0]=='U')
45 {
46 assert(src[src.size()-1]=='"');
47 assert(src[1]=='"');
48
49 return unescape_wide_string(std::string(src, 2, src.size()-3));
50 }
51 else
52 {
53 assert(src[0]=='"');
54 assert(src[src.size()-1]=='"');
55
56 std::string char_value=
57 unescape_string(std::string(src, 1, src.size()-2));
58
59 // pad into wide string
60 std::basic_string<unsigned int> value;
61 value.resize(char_value.size());
62 for(std::size_t i=0; i<char_value.size(); i++)
63 value[i]=char_value[i];
64
65 return value;
66 }
67}
68
69exprt convert_string_literal(const std::string &src)
70{
71 // note that 'src' could be a concatenation of string literals,
72 // e.g., something like "asd" "xyz".
73 // GCC allows "asd" L"xyz"!
74
75 std::basic_string<unsigned int> value;
76
77 char wide=0;
78
79 for(std::size_t i=0; i<src.size(); i++)
80 {
81 char ch=src[i];
82
83 // skip whitespace/newline
84 if(ch!='L' && ch!='u' && ch!='U' && ch!='"')
85 continue;
86
87 if(ch=='L')
88 wide=ch;
89 if((ch=='u' || ch=='U') && i+1<src.size() && src[i+1]=='"')
90 wide=ch;
91
92 // find start of sequence
93 std::size_t j=src.find('"', i);
94 CHECK_RETURN(j != std::string::npos);
95
96 // find end of sequence, considering escaping
97 for(++j; j<src.size() && src[j]!='"'; ++j)
98 if(src[j]=='\\') // skip next character
99 ++j;
100
101 INVARIANT(j < src.size(), "non-terminated string constant '" + src + "'");
102
103 std::string tmp_src=std::string(src, i, j-i+1);
104 std::basic_string<unsigned int> tmp_value=
106 value.append(tmp_value);
107 i=j;
108 }
109
110 if(wide!=0)
111 {
112 // add implicit trailing zero
113 value.push_back(0);
114
115 // L is wchar_t, u is char16_t, U is char32_t.
116 typet subtype;
117
118 switch(wide)
119 {
120 case 'L': subtype=wchar_t_type(); break;
121 case 'u': subtype=char16_t_type(); break;
122 case 'U': subtype=char32_t_type(); break;
123 default: assert(false);
124 }
125
126 exprt result=exprt(ID_array);
127 result.set(ID_C_string_constant, true);
128 result.type()=typet(ID_array);
129 result.type().subtype()=subtype;
130 result.type().set(ID_size, from_integer(value.size(), c_index_type()));
131
132 result.operands().resize(value.size());
133 for(std::size_t i=0; i<value.size(); i++)
134 result.operands()[i]=from_integer(value[i], subtype);
135
136 return result;
137 }
138 else
139 {
140 std::string char_value;
141
142 char_value.resize(value.size());
143
144 for(std::size_t i=0; i<value.size(); i++)
145 {
146 // Loss of data here if value[i]>255.
147 // gcc issues a warning in this case.
148 char_value[i]=value[i];
149 }
150
151 return string_constantt(char_value);
152 }
153}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
unsignedbv_typet char32_t_type()
Definition: c_types.cpp:185
bitvector_typet wchar_t_type()
Definition: c_types.cpp:159
bitvector_typet c_index_type()
Definition: c_types.cpp:16
unsignedbv_typet char16_t_type()
Definition: c_types.cpp:175
Base class for all expressions.
Definition: expr.h:54
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
const irept & find(const irep_idt &name) const
Definition: irep.cpp:106
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:420
The type of an expression, extends irept.
Definition: type.h:29
const typet & subtype() const
Definition: type.h:48
std::basic_string< unsigned int > convert_one_string_literal(const std::string &src)
exprt convert_string_literal(const std::string &src)
C/C++ Language Conversion.
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
std::basic_string< unsigned int > unescape_wide_string(const std::string &src)
std::string unescape_string(const std::string &src)
ANSI-C Language Conversion.
std::string utf32_native_endian_to_utf8(const std::basic_string< unsigned int > &s)
Definition: unicode.cpp:136