cprover
Toggle main menu visibility
Loading...
Searching...
No Matches
unescape_string.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module: ANSI-C Language Conversion
4
5
Author: Daniel Kroening, kroening@kroening.com
6
7
\*******************************************************************/
8
11
12
#include "
unescape_string.h
"
13
14
#include <cctype>
15
16
#include <
util/invariant.h
>
17
#include <
util/unicode.h
>
18
19
static
void
append_universal_char
(
20
unsigned
int
value,
21
std::string &dest)
22
{
23
std::basic_string<char32_t> value_str(1, value);
24
25
// turn into utf-8
26
const
std::string utf8_value =
utf32_native_endian_to_utf8
(value_str);
27
28
dest.append(utf8_value);
29
}
30
31
static
void
32
append_universal_char
(
unsigned
int
value, std::basic_string<char32_t> &dest)
33
{
34
dest.push_back(value);
35
}
36
37
template
<
typename
T>
38
std::basic_string<T>
unescape_string_templ
(
const
std::string &src)
39
{
40
std::basic_string<T> dest;
41
42
dest.reserve(src.size());
// about that long, but may be shorter
43
44
for
(
unsigned
i=0; i<src.size(); i++)
45
{
46
T ch=(
unsigned
char)src[i];
47
48
if
(ch==
'\\'
)
// escape?
49
{
50
// go to next character
51
i++;
52
INVARIANT
(i < src.size(),
"backslash can't be last character"
);
53
54
ch=(
unsigned
char)src[i];
55
switch
(ch)
56
{
57
case
'\\'
: dest.push_back(ch);
break
;
58
case
'n'
: dest.push_back(
'\n'
);
break
;
/* NL (0x0a) */
59
case
't'
: dest.push_back(
'\t'
);
break
;
/* HT (0x09) */
60
case
'v'
: dest.push_back(
'\v'
);
break
;
/* VT (0x0b) */
61
case
'b'
: dest.push_back(
'\b'
);
break
;
/* BS (0x08) */
62
case
'r'
: dest.push_back(
'\r'
);
break
;
/* CR (0x0d) */
63
case
'f'
: dest.push_back(
'\f'
);
break
;
/* FF (0x0c) */
64
case
'a'
: dest.push_back(
'\a'
);
break
;
/* BEL (0x07) */
65
case
'"'
: dest.push_back(
'"'
);
break
;
66
case
'\''
: dest.push_back(
'\''
);
break
;
67
68
case
'u'
:
// universal character
69
case
'U'
:
// universal character
70
i++;
71
72
{
73
std::string hex;
74
75
const
unsigned
digits = (ch ==
'u'
) ? 4u : 8u;
76
hex.reserve(digits);
77
78
for
(
unsigned
count=digits;
79
count!=0 && i<src.size();
80
i++, count--)
81
hex+=src[i];
82
83
// go back
84
i--;
85
86
unsigned
int
result=
hex_to_unsigned
(hex.c_str(), hex.size());
87
88
append_universal_char
(result, dest);
89
}
90
91
break
;
92
93
case
'x'
:
// hex
94
i++;
95
96
{
97
std::string hex;
98
99
while
(i<src.size() && isxdigit(src[i]))
100
{
101
hex+=src[i];
102
i++;
103
}
104
105
// go back
106
i--;
107
108
ch=
hex_to_unsigned
(hex.c_str(), hex.size());
109
}
110
111
// if T isn't sufficiently wide to hold unsigned values
112
// the following might truncate; but then
113
// universal characters in non-wide strings don't
114
// really work; gcc just issues a warning.
115
dest.push_back(ch);
116
break
;
117
118
default
:
119
if
(isdigit(ch))
// octal
120
{
121
std::string octal;
122
123
while
(i<src.size() && isdigit(src[i]))
124
{
125
octal+=src[i];
126
i++;
127
}
128
129
// go back
130
i--;
131
132
ch=
octal_to_unsigned
(octal.c_str(), octal.size());
133
dest.push_back(ch);
134
}
135
else
136
{
137
// Unknown escape sequence.
138
// Both GCC and CL turn \% into %.
139
dest.push_back(ch);
140
}
141
}
142
}
143
else
144
dest.push_back(ch);
145
}
146
147
return
dest;
148
}
149
150
std::string
unescape_string
(
const
std::string &src)
151
{
152
return
unescape_string_templ<char>
(src);
153
}
154
155
std::basic_string<char32_t>
unescape_wide_string
(
const
std::string &src)
156
{
157
return
unescape_string_templ<char32_t>
(src);
158
}
159
160
unsigned
hex_to_unsigned
(
const
char
*hex, std::size_t digits)
161
{
162
unsigned
value=0;
163
164
for
(; digits!=0; digits--, hex++)
165
{
166
char
ch=*hex;
167
168
if
(ch==0)
169
break
;
170
171
value<<=4;
172
173
if
(isdigit(ch))
174
value|=ch-
'0'
;
175
else
if
(isxdigit(ch))
176
value|=10+tolower(ch)-
'a'
;
177
}
178
179
return
value;
180
}
181
182
unsigned
octal_to_unsigned
(
const
char
*octal, std::size_t digits)
183
{
184
unsigned
value=0;
185
186
for
(; digits!=0; digits--, octal++)
187
{
188
char
ch=*octal;
189
190
if
(ch==0)
191
break
;
192
193
value<<=3;
194
195
if
(isdigit(ch))
196
value|=ch-
'0'
;
197
}
198
199
return
value;
200
}
invariant.h
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition
invariant.h:423
unescape_string_templ
std::basic_string< T > unescape_string_templ(const std::string &src)
Definition
unescape_string.cpp:38
octal_to_unsigned
unsigned octal_to_unsigned(const char *octal, std::size_t digits)
Definition
unescape_string.cpp:182
unescape_wide_string
std::basic_string< char32_t > unescape_wide_string(const std::string &src)
Definition
unescape_string.cpp:155
unescape_string
std::string unescape_string(const std::string &src)
Definition
unescape_string.cpp:150
append_universal_char
static void append_universal_char(unsigned int value, std::string &dest)
Definition
unescape_string.cpp:19
hex_to_unsigned
unsigned hex_to_unsigned(const char *hex, std::size_t digits)
Definition
unescape_string.cpp:160
unescape_string.h
ANSI-C Language Conversion.
utf32_native_endian_to_utf8
std::string utf32_native_endian_to_utf8(const std::basic_string< char32_t > &s)
Definition
unicode.cpp:136
unicode.h
ansi-c
literals
unescape_string.cpp
Generated by
1.17.0