cprover
Toggle main menu visibility
Loading...
Searching...
No Matches
convert_string_literal.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module: C/C++ Language Conversion
4
5
Author: Daniel Kroening, kroening@kroening.com
6
7
\*******************************************************************/
8
11
12
#include "
convert_string_literal.h
"
13
14
#include <
util/arith_tools.h
>
15
#include <
util/c_types.h
>
16
#include <
util/unicode.h
>
17
#include <
util/string_constant.h
>
18
19
#include "
unescape_string.h
"
20
21
std::basic_string<char32_t>
convert_one_string_literal
(
const
std::string &src)
22
{
23
PRECONDITION
(src.size() >= 2);
24
25
if
(src[0]==
'u'
&& src[1]==
'8'
)
26
{
27
PRECONDITION
(src[src.size() - 1] ==
'"'
);
28
PRECONDITION
(src[2] ==
'"'
);
29
30
std::basic_string<char32_t> value =
31
unescape_wide_string
(std::string(src, 3, src.size() - 4));
32
33
// turn into utf-8
34
const
std::string utf8_value =
utf32_native_endian_to_utf8
(value);
35
36
// pad into wide string
37
value.resize(utf8_value.size());
38
for
(std::size_t i=0; i<utf8_value.size(); i++)
39
value[i]=utf8_value[i];
40
41
return
value;
42
}
43
else
if
(src[0]==
'L'
|| src[0]==
'u'
|| src[0]==
'U'
)
44
{
45
PRECONDITION
(src[src.size() - 1] ==
'"'
);
46
PRECONDITION
(src[1] ==
'"'
);
47
48
return
unescape_wide_string
(std::string(src, 2, src.size()-3));
49
}
50
else
51
{
52
PRECONDITION
(src[0] ==
'"'
);
53
PRECONDITION
(src[src.size() - 1] ==
'"'
);
54
55
std::string char_value=
56
unescape_string
(std::string(src, 1, src.size()-2));
57
58
// pad into wide string
59
std::basic_string<char32_t> value;
60
value.resize(char_value.size());
61
for
(std::size_t i=0; i<char_value.size(); i++)
62
value[i]=char_value[i];
63
64
return
value;
65
}
66
}
67
68
exprt
convert_string_literal
(
const
std::string &src)
69
{
70
// note that 'src' could be a concatenation of string literals,
71
// e.g., something like "asd" "xyz".
72
// GCC allows "asd" L"xyz"!
73
74
std::basic_string<char32_t> value;
75
76
char
wide=0;
77
78
for
(std::size_t i=0; i<src.size(); i++)
79
{
80
char
ch=src[i];
81
82
// skip whitespace/newline
83
if
(ch!=
'L'
&& ch!=
'u'
&& ch!=
'U'
&& ch!=
'"'
)
84
continue
;
85
86
if
(ch==
'L'
)
87
wide=ch;
88
if
((ch==
'u'
|| ch==
'U'
) && i+1<src.size() && src[i+1]==
'"'
)
89
wide=ch;
90
91
// find start of sequence
92
std::size_t j=src.find(
'"'
, i);
93
CHECK_RETURN
(j != std::string::npos);
94
95
// find end of sequence, considering escaping
96
for
(++j; j<src.size() && src[j]!=
'"'
; ++j)
97
if
(src[j]==
'\\'
)
// skip next character
98
++j;
99
100
INVARIANT
(j < src.size(),
"non-terminated string constant '"
+ src +
"'"
);
101
102
std::string tmp_src=std::string(src, i, j-i+1);
103
std::basic_string<char32_t> tmp_value =
convert_one_string_literal
(tmp_src);
104
value.append(tmp_value);
105
i=j;
106
}
107
108
if
(wide!=0)
109
{
110
// add implicit trailing zero
111
value.push_back(0);
112
113
// L is wchar_t, u is char16_t, U is char32_t.
114
typet
subtype;
115
116
switch
(wide)
117
{
118
case
'L'
: subtype=
wchar_t_type
();
break
;
119
case
'u'
: subtype=
char16_t_type
();
break
;
120
case
'U'
: subtype=
char32_t_type
();
break
;
121
default
:
122
UNREACHABLE
;
123
}
124
125
exprt
result=
exprt
(ID_array);
126
result.
set
(ID_C_string_constant,
true
);
127
result.
type
() =
128
array_typet
(subtype,
from_integer
(value.size(),
c_index_type
()));
129
130
result.
operands
().resize(value.size());
131
for
(std::size_t i=0; i<value.size(); i++)
132
result.
operands
()[i]=
from_integer
(value[i], subtype);
133
134
return
result;
135
}
136
else
137
{
138
std::string char_value;
139
140
char_value.resize(value.size());
141
142
for
(std::size_t i=0; i<value.size(); i++)
143
{
144
// Loss of data here if value[i]>255.
145
// gcc issues a warning in this case.
146
char_value[i]=value[i];
147
}
148
149
return
string_constantt
(char_value);
150
}
151
}
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition
arith_tools.cpp:99
arith_tools.h
char32_t_type
unsignedbv_typet char32_t_type()
Definition
c_types.cpp:167
wchar_t_type
bitvector_typet wchar_t_type()
Definition
c_types.cpp:141
c_index_type
bitvector_typet c_index_type()
Definition
c_types.cpp:16
char16_t_type
unsignedbv_typet char16_t_type()
Definition
c_types.cpp:157
c_types.h
array_typet
Arrays with given size.
Definition
std_types.h:807
exprt
Base class for all expressions.
Definition
expr.h:57
exprt::type
typet & type()
Return the type of the expression.
Definition
expr.h:85
exprt::operands
operandst & operands()
Definition
expr.h:95
irept::set
void set(const irep_idt &name, const irep_idt &value)
Definition
irep.h:412
string_constantt
Definition
string_constant.h:15
typet
The type of an expression, extends irept.
Definition
type.h:29
convert_one_string_literal
std::basic_string< char32_t > convert_one_string_literal(const std::string &src)
Definition
convert_string_literal.cpp:21
convert_string_literal
exprt convert_string_literal(const std::string &src)
Definition
convert_string_literal.cpp:68
convert_string_literal.h
C/C++ Language Conversion.
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition
invariant.h:495
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition
invariant.h:525
PRECONDITION
#define PRECONDITION(CONDITION)
Definition
invariant.h:463
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition
invariant.h:423
string_constant.h
unescape_wide_string
std::basic_string< char32_t > unescape_wide_string(const std::string &src)
Definition
unescape_string.cpp:155
unescape_string
std::string unescape_string(const std::string &src)
Definition
unescape_string.cpp:150
unescape_string.h
ANSI-C Language Conversion.
utf32_native_endian_to_utf8
std::string utf32_native_endian_to_utf8(const std::basic_string< char32_t > &s)
Definition
unicode.cpp:136
unicode.h
ansi-c
literals
convert_string_literal.cpp
Generated by
1.17.0