Ada 3.4.3
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_aggregator-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_AGGREGATOR_INL_H
6#define ADA_URL_AGGREGATOR_INL_H
7
9#include "ada/helpers.h"
10#include "ada/unicode-inl.h"
11#include "ada/url_aggregator.h"
12#include "ada/url_components.h"
13#include "ada/scheme.h"
14#include "ada/log.h"
15
16#include <charconv>
17#include <ostream>
18#include <string_view>
19
20namespace ada {
21
22inline void url_aggregator::update_base_authority(
23 std::string_view base_buffer, const ada::url_components &base) {
24 std::string_view input = base_buffer.substr(
25 base.protocol_end, base.host_start - base.protocol_end);
26 ada_log("url_aggregator::update_base_authority ", input);
27
28 bool input_starts_with_dash = input.starts_with("//");
29 uint32_t diff = components.host_start - components.protocol_end;
30
31 buffer.erase(components.protocol_end,
32 components.host_start - components.protocol_end);
33 components.username_end = components.protocol_end;
34
35 if (input_starts_with_dash) {
36 input.remove_prefix(2);
37 diff += 2; // add "//"
38 buffer.insert(components.protocol_end, "//");
39 components.username_end += 2;
40 }
41
42 size_t password_delimiter = input.find(':');
43
44 // Check if input contains both username and password by checking the
45 // delimiter: ":" A typical input that contains authority would be "user:pass"
46 if (password_delimiter != std::string_view::npos) {
47 // Insert both username and password
48 std::string_view username = input.substr(0, password_delimiter);
49 std::string_view password = input.substr(password_delimiter + 1);
50
51 buffer.insert(components.protocol_end + diff, username);
52 diff += uint32_t(username.size());
53 buffer.insert(components.protocol_end + diff, ":");
54 components.username_end = components.protocol_end + diff;
55 buffer.insert(components.protocol_end + diff + 1, password);
56 diff += uint32_t(password.size()) + 1;
57 } else if (!input.empty()) {
58 // Insert only username
59 buffer.insert(components.protocol_end + diff, input);
60 components.username_end =
61 components.protocol_end + diff + uint32_t(input.size());
62 diff += uint32_t(input.size());
63 }
64
65 components.host_start += diff;
66
67 if (buffer.size() > base.host_start && buffer[base.host_start] != '@') {
68 buffer.insert(components.host_start, "@");
69 diff++;
70 }
71 components.host_end += diff;
72 components.pathname_start += diff;
73 if (components.search_start != url_components::omitted) {
74 components.search_start += diff;
75 }
76 if (components.hash_start != url_components::omitted) {
77 components.hash_start += diff;
78 }
79}
80
81inline void url_aggregator::update_unencoded_base_hash(std::string_view input) {
82 ada_log("url_aggregator::update_unencoded_base_hash ", input, " [",
83 input.size(), " bytes], buffer is '", buffer, "' [", buffer.size(),
84 " bytes] components.hash_start = ", components.hash_start);
86 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
87 if (components.hash_start != url_components::omitted) {
88 buffer.resize(components.hash_start);
89 }
90 components.hash_start = uint32_t(buffer.size());
91 buffer += "#";
92 bool encoding_required = unicode::percent_encode<true>(
94 // When encoding_required is false, then buffer is left unchanged, and percent
95 // encoding was not deemed required.
96 if (!encoding_required) {
97 buffer.append(input);
98 }
99 ada_log("url_aggregator::update_unencoded_base_hash final buffer is '",
100 buffer, "' [", buffer.size(), " bytes]");
102}
103
104ada_really_inline uint32_t url_aggregator::replace_and_resize(
105 uint32_t start, uint32_t end, std::string_view input) {
106 uint32_t current_length = end - start;
107 uint32_t input_size = uint32_t(input.size());
108 uint32_t new_difference = input_size - current_length;
109
110 if (current_length == 0) {
111 buffer.insert(start, input);
112 } else if (input_size == current_length) {
113 buffer.replace(start, input_size, input);
114 } else if (input_size < current_length) {
115 buffer.erase(start, current_length - input_size);
116 buffer.replace(start, input_size, input);
117 } else {
118 buffer.replace(start, current_length, input.substr(0, current_length));
119 buffer.insert(start + current_length, input.substr(current_length));
120 }
121
122 return new_difference;
123}
124
125inline void url_aggregator::update_base_hostname(const std::string_view input) {
126 ada_log("url_aggregator::update_base_hostname ", input, " [", input.size(),
127 " bytes], buffer is '", buffer, "' [", buffer.size(), " bytes]");
129 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
130
131 // This next line is required for when parsing a URL like `foo://`
132 add_authority_slashes_if_needed();
133
134 bool has_credentials = components.protocol_end + 2 < components.host_start;
135 uint32_t new_difference =
136 replace_and_resize(components.host_start, components.host_end, input);
137
138 if (has_credentials) {
139 buffer.insert(components.host_start, "@");
140 new_difference++;
141 }
142 components.host_end += new_difference;
143 components.pathname_start += new_difference;
144 if (components.search_start != url_components::omitted) {
145 components.search_start += new_difference;
146 }
147 if (components.hash_start != url_components::omitted) {
148 components.hash_start += new_difference;
149 }
151}
152
153[[nodiscard]] ada_really_inline uint32_t
155 ada_log("url_aggregator::get_pathname_length");
156 uint32_t ending_index = uint32_t(buffer.size());
157 if (components.search_start != url_components::omitted) {
158 ending_index = components.search_start;
159 } else if (components.hash_start != url_components::omitted) {
160 ending_index = components.hash_start;
161 }
162 return ending_index - components.pathname_start;
163}
164
165[[nodiscard]] ada_really_inline bool url_aggregator::is_at_path()
166 const noexcept {
167 return buffer.size() == components.pathname_start;
168}
169
170inline void url_aggregator::update_base_search(std::string_view input) {
171 ada_log("url_aggregator::update_base_search ", input);
173 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
174 if (input.empty()) {
175 clear_search();
176 return;
177 }
178
179 if (input[0] == '?') {
180 input.remove_prefix(1);
181 }
182
183 if (components.hash_start == url_components::omitted) {
184 if (components.search_start == url_components::omitted) {
185 components.search_start = uint32_t(buffer.size());
186 buffer += "?";
187 } else {
188 buffer.resize(components.search_start + 1);
189 }
190
191 buffer.append(input);
192 } else {
193 if (components.search_start == url_components::omitted) {
194 components.search_start = components.hash_start;
195 } else {
196 buffer.erase(components.search_start,
197 components.hash_start - components.search_start);
198 components.hash_start = components.search_start;
199 }
200
201 buffer.insert(components.search_start, "?");
202 buffer.insert(components.search_start + 1, input);
203 components.hash_start += uint32_t(input.size() + 1); // Do not forget `?`
204 }
205
207}
208
209inline void url_aggregator::update_base_search(
210 std::string_view input, const uint8_t query_percent_encode_set[]) {
211 ada_log("url_aggregator::update_base_search ", input,
212 " with encoding parameter ", to_string(), "\n", to_diagram());
214 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
215
216 if (components.hash_start == url_components::omitted) {
217 if (components.search_start == url_components::omitted) {
218 components.search_start = uint32_t(buffer.size());
219 buffer += "?";
220 } else {
221 buffer.resize(components.search_start + 1);
222 }
223
224 bool encoding_required =
225 unicode::percent_encode<true>(input, query_percent_encode_set, buffer);
226 // When encoding_required is false, then buffer is left unchanged, and
227 // percent encoding was not deemed required.
228 if (!encoding_required) {
229 buffer.append(input);
230 }
231 } else {
232 if (components.search_start == url_components::omitted) {
233 components.search_start = components.hash_start;
234 } else {
235 buffer.erase(components.search_start,
236 components.hash_start - components.search_start);
237 components.hash_start = components.search_start;
238 }
239
240 buffer.insert(components.search_start, "?");
241 size_t idx =
242 ada::unicode::percent_encode_index(input, query_percent_encode_set);
243 if (idx == input.size()) {
244 buffer.insert(components.search_start + 1, input);
245 components.hash_start += uint32_t(input.size() + 1); // Do not forget `?`
246 } else {
247 buffer.insert(components.search_start + 1, input, 0, idx);
248 input.remove_prefix(idx);
249 // We only create a temporary string if we need percent encoding and
250 // we attempt to create as small a temporary string as we can.
251 std::string encoded =
252 ada::unicode::percent_encode(input, query_percent_encode_set);
253 buffer.insert(components.search_start + idx + 1, encoded);
254 components.hash_start +=
255 uint32_t(encoded.size() + idx + 1); // Do not forget `?`
256 }
257 }
258
260}
261
262inline void url_aggregator::update_base_pathname(const std::string_view input) {
263 ada_log("url_aggregator::update_base_pathname '", input, "' [", input.size(),
264 " bytes] \n", to_diagram());
265 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
267
268 const bool begins_with_dashdash = input.starts_with("//");
269 if (!begins_with_dashdash && has_dash_dot()) {
270 // We must delete the ./
271 delete_dash_dot();
272 }
273
274 if (begins_with_dashdash && !has_opaque_path && !has_authority() &&
275 !has_dash_dot()) {
276 // If url's host is null, url does not have an opaque path, url's path's
277 // size is greater than 1, then append U+002F (/) followed by U+002E (.) to
278 // output.
279 buffer.insert(components.pathname_start, "/.");
280 components.pathname_start += 2;
281 if (components.search_start != url_components::omitted) {
282 components.search_start += 2;
283 }
284 if (components.hash_start != url_components::omitted) {
285 components.hash_start += 2;
286 }
287 }
288
289 uint32_t difference = replace_and_resize(
290 components.pathname_start,
291 components.pathname_start + get_pathname_length(), input);
292 if (components.search_start != url_components::omitted) {
293 components.search_start += difference;
294 }
295 if (components.hash_start != url_components::omitted) {
296 components.hash_start += difference;
297 }
299}
300
301inline void url_aggregator::append_base_pathname(const std::string_view input) {
302 ada_log("url_aggregator::append_base_pathname ", input, " ", to_string(),
303 "\n", to_diagram());
305 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
306#if ADA_DEVELOPMENT_CHECKS
307 // computing the expected password.
308 std::string path_expected(get_pathname());
309 path_expected.append(input);
310#endif // ADA_DEVELOPMENT_CHECKS
311 uint32_t ending_index = uint32_t(buffer.size());
312 if (components.search_start != url_components::omitted) {
313 ending_index = components.search_start;
314 } else if (components.hash_start != url_components::omitted) {
315 ending_index = components.hash_start;
316 }
317 buffer.insert(ending_index, input);
318
319 if (components.search_start != url_components::omitted) {
320 components.search_start += uint32_t(input.size());
321 }
322 if (components.hash_start != url_components::omitted) {
323 components.hash_start += uint32_t(input.size());
324 }
325#if ADA_DEVELOPMENT_CHECKS
326 std::string path_after = std::string(get_pathname());
328 path_expected, path_after,
329 "append_base_pathname problem after inserting " + std::string(input));
330#endif // ADA_DEVELOPMENT_CHECKS
332}
333
334inline void url_aggregator::update_base_username(const std::string_view input) {
335 ada_log("url_aggregator::update_base_username '", input, "' ", to_string(),
336 "\n", to_diagram());
338 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
339
340 add_authority_slashes_if_needed();
341
343 bool host_starts_with_at = buffer.size() > components.host_start &&
344 buffer[components.host_start] == '@';
345 uint32_t diff = replace_and_resize(components.protocol_end + 2,
346 components.username_end, input);
347
348 components.username_end += diff;
349 components.host_start += diff;
350
351 if (!input.empty() && !host_starts_with_at) {
352 buffer.insert(components.host_start, "@");
353 diff++;
354 } else if (input.empty() && host_starts_with_at && !has_password) {
355 // Input is empty, there is no password, and we need to remove "@" from
356 // hostname
357 buffer.erase(components.host_start, 1);
358 diff--;
359 }
360
361 components.host_end += diff;
362 components.pathname_start += diff;
363 if (components.search_start != url_components::omitted) {
364 components.search_start += diff;
365 }
366 if (components.hash_start != url_components::omitted) {
367 components.hash_start += diff;
368 }
370}
371
372inline void url_aggregator::append_base_username(const std::string_view input) {
373 ada_log("url_aggregator::append_base_username ", input);
375 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
376#if ADA_DEVELOPMENT_CHECKS
377 // computing the expected password.
378 std::string username_expected(get_username());
379 username_expected.append(input);
380#endif // ADA_DEVELOPMENT_CHECKS
381 add_authority_slashes_if_needed();
382
383 // If input is empty, do nothing.
384 if (input.empty()) {
385 return;
386 }
387
388 uint32_t difference = uint32_t(input.size());
389 buffer.insert(components.username_end, input);
390 components.username_end += difference;
391 components.host_start += difference;
392
393 if (buffer[components.host_start] != '@' &&
394 components.host_start != components.host_end) {
395 buffer.insert(components.host_start, "@");
396 difference++;
397 }
398
399 components.host_end += difference;
400 components.pathname_start += difference;
401 if (components.search_start != url_components::omitted) {
402 components.search_start += difference;
403 }
404 if (components.hash_start != url_components::omitted) {
405 components.hash_start += difference;
406 }
407#if ADA_DEVELOPMENT_CHECKS
408 std::string username_after(get_username());
410 username_expected, username_after,
411 "append_base_username problem after inserting " + std::string(input));
412#endif // ADA_DEVELOPMENT_CHECKS
414}
415
416constexpr void url_aggregator::clear_password() {
417 ada_log("url_aggregator::clear_password ", to_string());
419 if (!has_password()) {
420 return;
421 }
422
423 uint32_t diff = components.host_start - components.username_end;
424 buffer.erase(components.username_end, diff);
425 components.host_start -= diff;
426 components.host_end -= diff;
427 components.pathname_start -= diff;
428 if (components.search_start != url_components::omitted) {
429 components.search_start -= diff;
430 }
431 if (components.hash_start != url_components::omitted) {
432 components.hash_start -= diff;
433 }
434}
435
436inline void url_aggregator::update_base_password(const std::string_view input) {
437 ada_log("url_aggregator::update_base_password ", input);
439 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
440
441 add_authority_slashes_if_needed();
442
443 // TODO: Optimization opportunity. Merge the following removal functions.
444 if (input.empty()) {
445 clear_password();
446
447 // Remove username too, if it is empty.
448 if (!has_non_empty_username()) {
449 update_base_username("");
450 }
451
452 return;
453 }
454
455 bool password_exists = has_password();
456 uint32_t difference = uint32_t(input.size());
457
458 if (password_exists) {
459 uint32_t current_length =
460 components.host_start - components.username_end - 1;
461 buffer.erase(components.username_end + 1, current_length);
462 difference -= current_length;
463 } else {
464 buffer.insert(components.username_end, ":");
465 difference++;
466 }
467
468 buffer.insert(components.username_end + 1, input);
469 components.host_start += difference;
470
471 // The following line is required to add "@" to hostname. When updating
472 // password if hostname does not start with "@", it is "update_base_password"s
473 // responsibility to set it.
474 if (buffer[components.host_start] != '@') {
475 buffer.insert(components.host_start, "@");
476 difference++;
477 }
478
479 components.host_end += difference;
480 components.pathname_start += difference;
481 if (components.search_start != url_components::omitted) {
482 components.search_start += difference;
483 }
484 if (components.hash_start != url_components::omitted) {
485 components.hash_start += difference;
486 }
488}
489
490inline void url_aggregator::append_base_password(const std::string_view input) {
491 ada_log("url_aggregator::append_base_password ", input, " ", to_string(),
492 "\n", to_diagram());
494 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
495#if ADA_DEVELOPMENT_CHECKS
496 // computing the expected password.
497 std::string password_expected = std::string(get_password());
498 password_expected.append(input);
499#endif // ADA_DEVELOPMENT_CHECKS
500 add_authority_slashes_if_needed();
501
502 // If input is empty, do nothing.
503 if (input.empty()) {
504 return;
505 }
506
507 uint32_t difference = uint32_t(input.size());
508 if (has_password()) {
509 buffer.insert(components.host_start, input);
510 } else {
511 difference++; // Increment for ":"
512 buffer.insert(components.username_end, ":");
513 buffer.insert(components.username_end + 1, input);
514 }
515 components.host_start += difference;
516
517 // The following line is required to add "@" to hostname. When updating
518 // password if hostname does not start with "@", it is "append_base_password"s
519 // responsibility to set it.
520 if (buffer[components.host_start] != '@') {
521 buffer.insert(components.host_start, "@");
522 difference++;
523 }
524
525 components.host_end += difference;
526 components.pathname_start += difference;
527 if (components.search_start != url_components::omitted) {
528 components.search_start += difference;
529 }
530 if (components.hash_start != url_components::omitted) {
531 components.hash_start += difference;
532 }
533#if ADA_DEVELOPMENT_CHECKS
534 std::string password_after(get_password());
536 password_expected, password_after,
537 "append_base_password problem after inserting " + std::string(input));
538#endif // ADA_DEVELOPMENT_CHECKS
540}
541
542inline void url_aggregator::update_base_port(uint32_t input) {
543 ada_log("url_aggregator::update_base_port");
545 if (input == url_components::omitted) {
546 clear_port();
547 return;
548 }
549 // calling std::to_string(input.value()) is unfortunate given that the port
550 // value is probably already available as a string.
551 std::string value = helpers::concat(":", std::to_string(input));
552 uint32_t difference = uint32_t(value.size());
553
554 if (components.port != url_components::omitted) {
555 difference -= components.pathname_start - components.host_end;
556 buffer.erase(components.host_end,
557 components.pathname_start - components.host_end);
558 }
559
560 buffer.insert(components.host_end, value);
561 components.pathname_start += difference;
562 if (components.search_start != url_components::omitted) {
563 components.search_start += difference;
564 }
565 if (components.hash_start != url_components::omitted) {
566 components.hash_start += difference;
567 }
568 components.port = input;
570}
571
573 ada_log("url_aggregator::clear_port");
575 if (components.port == url_components::omitted) {
576 return;
577 }
578 uint32_t length = components.pathname_start - components.host_end;
579 buffer.erase(components.host_end, length);
580 components.pathname_start -= length;
581 if (components.search_start != url_components::omitted) {
582 components.search_start -= length;
583 }
584 if (components.hash_start != url_components::omitted) {
585 components.hash_start -= length;
586 }
587 components.port = url_components::omitted;
589}
590
591[[nodiscard]] inline uint32_t url_aggregator::retrieve_base_port() const {
592 ada_log("url_aggregator::retrieve_base_port");
593 return components.port;
594}
595
597 ada_log("url_aggregator::clear_search");
599 if (components.search_start == url_components::omitted) {
600 return;
601 }
602
603 if (components.hash_start == url_components::omitted) {
604 buffer.resize(components.search_start);
605 } else {
606 buffer.erase(components.search_start,
607 components.hash_start - components.search_start);
608 components.hash_start = components.search_start;
609 }
610
611 components.search_start = url_components::omitted;
612
613#if ADA_DEVELOPMENT_CHECKS
615 "search should have been cleared on buffer=" + buffer +
616 " with " + components.to_string() + "\n" + to_diagram());
617#endif
619}
620
622 ada_log("url_aggregator::clear_hash");
624 if (components.hash_start == url_components::omitted) {
625 return;
626 }
627 buffer.resize(components.hash_start);
628 components.hash_start = url_components::omitted;
629
630#if ADA_DEVELOPMENT_CHECKS
632 "hash should have been cleared on buffer=" + buffer +
633 " with " + components.to_string() + "\n" + to_diagram());
634#endif
636}
637
638constexpr void url_aggregator::clear_pathname() {
639 ada_log("url_aggregator::clear_pathname");
641 uint32_t ending_index = uint32_t(buffer.size());
642 if (components.search_start != url_components::omitted) {
643 ending_index = components.search_start;
644 } else if (components.hash_start != url_components::omitted) {
645 ending_index = components.hash_start;
646 }
647 uint32_t pathname_length = ending_index - components.pathname_start;
648 buffer.erase(components.pathname_start, pathname_length);
649 uint32_t difference = pathname_length;
650 if (components.pathname_start == components.host_end + 2 &&
651 buffer[components.host_end] == '/' &&
652 buffer[components.host_end + 1] == '.') {
653 components.pathname_start -= 2;
654 buffer.erase(components.host_end, 2);
655 difference += 2;
656 }
657 if (components.search_start != url_components::omitted) {
658 components.search_start -= difference;
659 }
660 if (components.hash_start != url_components::omitted) {
661 components.hash_start -= difference;
662 }
663 ada_log("url_aggregator::clear_pathname completed, running checks...");
664#if ADA_DEVELOPMENT_CHECKS
666 "pathname should have been cleared on buffer=" + buffer +
667 " with " + components.to_string() + "\n" + to_diagram());
668#endif
670 ada_log("url_aggregator::clear_pathname completed, running checks... ok");
671}
672
673constexpr void url_aggregator::clear_hostname() {
674 ada_log("url_aggregator::clear_hostname");
676 if (!has_authority()) {
677 return;
678 }
679 ADA_ASSERT_TRUE(has_authority());
680
681 uint32_t hostname_length = components.host_end - components.host_start;
682 uint32_t start = components.host_start;
683
684 // If hostname starts with "@", we should not remove that character.
685 if (hostname_length > 0 && buffer[start] == '@') {
686 start++;
687 hostname_length--;
688 }
689 buffer.erase(start, hostname_length);
690 components.host_end = start;
691 components.pathname_start -= hostname_length;
692 if (components.search_start != url_components::omitted) {
693 components.search_start -= hostname_length;
694 }
695 if (components.hash_start != url_components::omitted) {
696 components.hash_start -= hostname_length;
697 }
698#if ADA_DEVELOPMENT_CHECKS
700 "hostname should have been cleared on buffer=" + buffer +
701 " with " + components.to_string() + "\n" + to_diagram());
702#endif
703 ADA_ASSERT_TRUE(has_authority());
705 "hostname should have been cleared on buffer=" + buffer +
706 " with " + components.to_string() + "\n" + to_diagram());
708}
709
710[[nodiscard]] constexpr bool url_aggregator::has_hash() const noexcept {
711 ada_log("url_aggregator::has_hash");
712 return components.hash_start != url_components::omitted;
713}
714
715[[nodiscard]] constexpr bool url_aggregator::has_search() const noexcept {
716 ada_log("url_aggregator::has_search");
717 return components.search_start != url_components::omitted;
718}
719
720constexpr bool url_aggregator::has_credentials() const noexcept {
721 ada_log("url_aggregator::has_credentials");
723}
724
725constexpr bool url_aggregator::cannot_have_credentials_or_port() const {
726 ada_log("url_aggregator::cannot_have_credentials_or_port");
727 return type == ada::scheme::type::FILE ||
728 components.host_start == components.host_end;
729}
730
731[[nodiscard]] ada_really_inline const ada::url_components &
733 return components;
734}
735
736[[nodiscard]] constexpr bool ada::url_aggregator::has_authority()
737 const noexcept {
738 ada_log("url_aggregator::has_authority");
739 // Performance: instead of doing this potentially expensive check, we could
740 // have a boolean in the struct.
741 return components.protocol_end + 2 <= components.host_start &&
742 helpers::substring(buffer, components.protocol_end,
743 components.protocol_end + 2) == "//";
744}
745
746inline void ada::url_aggregator::add_authority_slashes_if_needed() {
747 ada_log("url_aggregator::add_authority_slashes_if_needed");
748 ADA_ASSERT_TRUE(validate());
749 // Protocol setter will insert `http:` to the URL. It is up to hostname setter
750 // to insert
751 // `//` initially to the buffer, since it depends on the hostname existence.
752 if (has_authority()) {
753 return;
754 }
755 // Performance: the common case is components.protocol_end == buffer.size()
756 // Optimization opportunity: in many cases, the "//" is part of the input and
757 // the insert could be fused with another insert.
758 buffer.insert(components.protocol_end, "//");
759 components.username_end += 2;
760 components.host_start += 2;
761 components.host_end += 2;
762 components.pathname_start += 2;
763 if (components.search_start != url_components::omitted) {
764 components.search_start += 2;
765 }
766 if (components.hash_start != url_components::omitted) {
767 components.hash_start += 2;
768 }
769 ADA_ASSERT_TRUE(validate());
770}
771
772constexpr void ada::url_aggregator::reserve(uint32_t capacity) {
773 buffer.reserve(capacity);
774}
775
776constexpr bool url_aggregator::has_non_empty_username() const noexcept {
777 ada_log("url_aggregator::has_non_empty_username");
778 return components.protocol_end + 2 < components.username_end;
779}
780
781constexpr bool url_aggregator::has_non_empty_password() const noexcept {
782 ada_log("url_aggregator::has_non_empty_password");
783 return components.host_start > components.username_end;
784}
785
786constexpr bool url_aggregator::has_password() const noexcept {
787 ada_log("url_aggregator::has_password");
788 // This function does not care about the length of the password
789 return components.host_start > components.username_end &&
790 buffer[components.username_end] == ':';
791}
792
793constexpr bool url_aggregator::has_empty_hostname() const noexcept {
794 if (!has_hostname()) {
795 return false;
796 }
797 if (components.host_start == components.host_end) {
798 return true;
799 }
800 if (components.host_end > components.host_start + 1) {
801 return false;
802 }
803 return components.username_end != components.host_start;
804}
805
806constexpr bool url_aggregator::has_hostname() const noexcept {
807 return has_authority();
808}
809
810constexpr bool url_aggregator::has_port() const noexcept {
811 ada_log("url_aggregator::has_port");
812 // A URL cannot have a username/password/port if its host is null or the empty
813 // string, or its scheme is "file".
814 return has_hostname() && components.pathname_start != components.host_end;
815}
816
817[[nodiscard]] constexpr bool url_aggregator::has_dash_dot() const noexcept {
818 // If url's host is null, url does not have an opaque path, url's path's size
819 // is greater than 1, and url's path[0] is the empty string, then append
820 // U+002F (/) followed by U+002E (.) to output.
821 ada_log("url_aggregator::has_dash_dot");
822#if ADA_DEVELOPMENT_CHECKS
823 // If pathname_start and host_end are exactly two characters apart, then we
824 // either have a one-digit port such as http://test.com:5?param=1 or else we
825 // have a /.: sequence such as "non-spec:/.//". We test that this is the case.
826 if (components.pathname_start == components.host_end + 2) {
827 ADA_ASSERT_TRUE((buffer[components.host_end] == '/' &&
828 buffer[components.host_end + 1] == '.') ||
829 (buffer[components.host_end] == ':' &&
830 checkers::is_digit(buffer[components.host_end + 1])));
831 }
832 if (components.pathname_start == components.host_end + 2 &&
833 buffer[components.host_end] == '/' &&
834 buffer[components.host_end + 1] == '.') {
835 ADA_ASSERT_TRUE(components.pathname_start + 1 < buffer.size());
836 ADA_ASSERT_TRUE(buffer[components.pathname_start] == '/');
837 ADA_ASSERT_TRUE(buffer[components.pathname_start + 1] == '/');
838 }
839#endif
840 // Performance: it should be uncommon for components.pathname_start ==
841 // components.host_end + 2 to be true. So we put this check first in the
842 // sequence. Most times, we do not have an opaque path. Checking for '/.' is
843 // more expensive, but should be uncommon.
844 return components.pathname_start == components.host_end + 2 &&
845 !has_opaque_path && buffer[components.host_end] == '/' &&
846 buffer[components.host_end + 1] == '.';
847}
848
849[[nodiscard]] constexpr std::string_view url_aggregator::get_href()
850 const noexcept ada_lifetime_bound {
851 ada_log("url_aggregator::get_href");
852 return buffer;
853}
854
856url_aggregator::parse_port(std::string_view view, bool check_trailing_content) {
857 ada_log("url_aggregator::parse_port('", view, "') ", view.size());
858 if (!view.empty() && view[0] == '-') {
859 ada_log("parse_port: view[0] == '0' && view.size() > 1");
860 is_valid = false;
861 return 0;
862 }
863 uint16_t parsed_port{};
864 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
865 if (r.ec == std::errc::result_out_of_range) {
866 ada_log("parse_port: r.ec == std::errc::result_out_of_range");
867 is_valid = false;
868 return 0;
869 }
870 ada_log("parse_port: ", parsed_port);
871 const size_t consumed = size_t(r.ptr - view.data());
872 ada_log("parse_port: consumed ", consumed);
873 if (check_trailing_content) {
874 is_valid &=
875 (consumed == view.size() || view[consumed] == '/' ||
876 view[consumed] == '?' || (is_special() && view[consumed] == '\\'));
877 }
878 ada_log("parse_port: is_valid = ", is_valid);
879 if (is_valid) {
880 ada_log("parse_port", r.ec == std::errc());
881 // scheme_default_port can return 0, and we should allow 0 as a base port.
882 auto default_port = scheme_default_port();
883 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
884 (default_port != parsed_port);
885 if (r.ec == std::errc() && is_port_valid) {
886 update_base_port(parsed_port);
887 } else {
888 clear_port();
889 }
890 }
891 return consumed;
892}
893
894constexpr void url_aggregator::set_protocol_as_file() {
895 ada_log("url_aggregator::set_protocol_as_file ");
898 // next line could overflow but unsigned arithmetic has well-defined
899 // overflows.
900 uint32_t new_difference = 5 - components.protocol_end;
901
902 if (buffer.empty()) {
903 buffer.append("file:");
904 } else {
905 buffer.erase(0, components.protocol_end);
906 buffer.insert(0, "file:");
907 }
908 components.protocol_end = 5;
909
910 // Update the rest of the components.
911 components.username_end += new_difference;
912 components.host_start += new_difference;
913 components.host_end += new_difference;
914 components.pathname_start += new_difference;
915 if (components.search_start != url_components::omitted) {
916 components.search_start += new_difference;
917 }
918 if (components.hash_start != url_components::omitted) {
919 components.hash_start += new_difference;
920 }
922}
923
924[[nodiscard]] constexpr bool url_aggregator::validate() const noexcept {
925 if (!is_valid) {
926 return true;
927 }
928 if (!components.check_offset_consistency()) {
929 ada_log("url_aggregator::validate inconsistent components \n",
930 to_diagram());
931 return false;
932 }
933 // We have a credible components struct, but let us investivate more
934 // carefully:
947 if (components.protocol_end == url_components::omitted) {
948 ada_log("url_aggregator::validate omitted protocol_end \n", to_diagram());
949 return false;
950 }
951 if (components.username_end == url_components::omitted) {
952 ada_log("url_aggregator::validate omitted username_end \n", to_diagram());
953 return false;
954 }
955 if (components.host_start == url_components::omitted) {
956 ada_log("url_aggregator::validate omitted host_start \n", to_diagram());
957 return false;
958 }
959 if (components.host_end == url_components::omitted) {
960 ada_log("url_aggregator::validate omitted host_end \n", to_diagram());
961 return false;
962 }
963 if (components.pathname_start == url_components::omitted) {
964 ada_log("url_aggregator::validate omitted pathname_start \n", to_diagram());
965 return false;
966 }
967
968 if (components.protocol_end > buffer.size()) {
969 ada_log("url_aggregator::validate protocol_end overflow \n", to_diagram());
970 return false;
971 }
972 if (components.username_end > buffer.size()) {
973 ada_log("url_aggregator::validate username_end overflow \n", to_diagram());
974 return false;
975 }
976 if (components.host_start > buffer.size()) {
977 ada_log("url_aggregator::validate host_start overflow \n", to_diagram());
978 return false;
979 }
980 if (components.host_end > buffer.size()) {
981 ada_log("url_aggregator::validate host_end overflow \n", to_diagram());
982 return false;
983 }
984 if (components.pathname_start > buffer.size()) {
985 ada_log("url_aggregator::validate pathname_start overflow \n",
986 to_diagram());
987 return false;
988 }
989
990 if (components.protocol_end > 0) {
991 if (buffer[components.protocol_end - 1] != ':') {
992 ada_log(
993 "url_aggregator::validate missing : at the end of the protocol \n",
994 to_diagram());
995 return false;
996 }
997 }
998
999 if (components.username_end != buffer.size() &&
1000 components.username_end > components.protocol_end + 2) {
1001 if (buffer[components.username_end] != ':' &&
1002 buffer[components.username_end] != '@') {
1003 ada_log(
1004 "url_aggregator::validate missing : or @ at the end of the username "
1005 "\n",
1006 to_diagram());
1007 return false;
1008 }
1009 }
1010
1011 if (components.host_start != buffer.size()) {
1012 if (components.host_start > components.username_end) {
1013 if (buffer[components.host_start] != '@') {
1014 ada_log(
1015 "url_aggregator::validate missing @ at the end of the password \n",
1016 to_diagram());
1017 return false;
1018 }
1019 } else if (components.host_start == components.username_end &&
1020 components.host_end > components.host_start) {
1021 if (components.host_start == components.protocol_end + 2) {
1022 if (buffer[components.protocol_end] != '/' ||
1023 buffer[components.protocol_end + 1] != '/') {
1024 ada_log(
1025 "url_aggregator::validate missing // between protocol and host "
1026 "\n",
1027 to_diagram());
1028 return false;
1029 }
1030 } else {
1031 if (components.host_start > components.protocol_end &&
1032 buffer[components.host_start] != '@') {
1033 ada_log(
1034 "url_aggregator::validate missing @ at the end of the username "
1035 "\n",
1036 to_diagram());
1037 return false;
1038 }
1039 }
1040 } else {
1041 if (components.host_end != components.host_start) {
1042 ada_log("url_aggregator::validate expected omitted host \n",
1043 to_diagram());
1044 return false;
1045 }
1046 }
1047 }
1048 if (components.host_end != buffer.size() &&
1049 components.pathname_start > components.host_end) {
1050 if (components.pathname_start == components.host_end + 2 &&
1051 buffer[components.host_end] == '/' &&
1052 buffer[components.host_end + 1] == '.') {
1053 if (components.pathname_start + 1 >= buffer.size() ||
1054 buffer[components.pathname_start] != '/' ||
1055 buffer[components.pathname_start + 1] != '/') {
1056 ada_log(
1057 "url_aggregator::validate expected the path to begin with // \n",
1058 to_diagram());
1059 return false;
1060 }
1061 } else if (buffer[components.host_end] != ':') {
1062 ada_log("url_aggregator::validate missing : at the port \n",
1063 to_diagram());
1064 return false;
1065 }
1066 }
1067 if (components.pathname_start != buffer.size() &&
1068 components.pathname_start < components.search_start &&
1069 components.pathname_start < components.hash_start && !has_opaque_path) {
1070 if (buffer[components.pathname_start] != '/') {
1071 ada_log("url_aggregator::validate missing / at the path \n",
1072 to_diagram());
1073 return false;
1074 }
1075 }
1076 if (components.search_start != url_components::omitted) {
1077 if (buffer[components.search_start] != '?') {
1078 ada_log("url_aggregator::validate missing ? at the search \n",
1079 to_diagram());
1080 return false;
1081 }
1082 }
1083 if (components.hash_start != url_components::omitted) {
1084 if (buffer[components.hash_start] != '#') {
1085 ada_log("url_aggregator::validate missing # at the hash \n",
1086 to_diagram());
1087 return false;
1088 }
1089 }
1090
1091 return true;
1092}
1093
1094[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const
1096 ada_log("url_aggregator::get_pathname pathname_start = ",
1097 components.pathname_start, " buffer.size() = ", buffer.size(),
1098 " components.search_start = ", components.search_start,
1099 " components.hash_start = ", components.hash_start);
1100 auto ending_index = uint32_t(buffer.size());
1101 if (components.search_start != url_components::omitted) {
1102 ending_index = components.search_start;
1103 } else if (components.hash_start != url_components::omitted) {
1104 ending_index = components.hash_start;
1105 }
1106 return helpers::substring(buffer, components.pathname_start, ending_index);
1107}
1108
1109inline std::ostream &operator<<(std::ostream &out,
1110 const ada::url_aggregator &u) {
1111 return out << u.to_string();
1112}
1113
1114void url_aggregator::update_host_to_base_host(const std::string_view input) {
1115 ada_log("url_aggregator::update_host_to_base_host ", input);
1117 ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
1118 if (type != ada::scheme::type::FILE) {
1119 // Let host be the result of host parsing host_view with url is not special.
1120 if (input.empty() && !is_special()) {
1121 if (has_hostname()) {
1122 clear_hostname();
1123 } else if (has_dash_dot()) {
1124 add_authority_slashes_if_needed();
1125 delete_dash_dot();
1126 }
1127 return;
1128 }
1129 }
1130 update_base_hostname(input);
1132 return;
1133}
1134} // namespace ada
1135
1136#endif // ADA_URL_AGGREGATOR_INL_H
Definitions of the character sets used by unicode functions.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
#define ADA_ASSERT_EQUAL(LHS, RHS, MESSAGE)
#define ada_really_inline
Definition common_defs.h:85
Definitions for helper functions used within Ada.
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool is_digit(char x) noexcept
constexpr int32_t base
ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[])
Definition unicode-inl.h:19
Definition ada_idna.h:13
std::ostream & operator<<(std::ostream &out, const ada::url &u)
Definition url-inl.h:38
URL scheme type definitions and utilities.
Memory-efficient URL representation using a single buffer.
ada_really_inline const url_components & get_components() const noexcept
constexpr bool has_non_empty_password() const noexcept
constexpr bool validate() const noexcept
void clear_search() override
std::string_view get_search() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string to_string() const override
std::string_view get_password() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string to_diagram() const
constexpr bool has_hostname() const noexcept
constexpr bool has_search() const noexcept override
constexpr std::string_view get_href() const noexcept ada_lifetime_bound
constexpr bool has_empty_hostname() const noexcept
constexpr bool has_password() const noexcept
ada_really_inline uint32_t get_pathname_length() const noexcept
constexpr bool has_hash() const noexcept override
constexpr std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_hostname() const ada_lifetime_bound
constexpr bool has_port() const noexcept
ada_really_inline constexpr bool has_credentials() const noexcept
constexpr bool has_non_empty_username() const noexcept
ada_really_inline constexpr bool is_special() const noexcept
bool is_valid
Definition url_base.h:56
bool has_opaque_path
Definition url_base.h:62
Stores byte offsets for URL components within a buffer.
static constexpr uint32_t omitted
Definitions for unicode operations.
Declaration for the ada::url_aggregator class.
Declaration for the URL Components.