forked from mirrors/gecko-dev
This patch provides functions to extract the fragment directive from a url / a hash into an array of `TextDirective`s as well as to create a fragment directive string from given text directives. The algorithms are implemented as a rust crate. Interface functions and data structures which are accessible from C++ are provided in `lib.rs`. The actual implementation (using pure rust types) lives in `fragment_directive_impl.rs`, tests live in `test.rs`. The implementation currently only supports text directives. Other future directive types are not considered and will be ignored. The main function, `parse_fragment_directive()` takes a url / a url hash as parameter and returns (as out parameter) a struct which contains the stripped input url, the fragment directive string, and an array of parsed text directive objects. Additionally, there are functions that create a full fragment directive string from a list of text directives as well as a function that creates a single text directive string from a text directive. The `TextDirective` class, which is shared with C++, contains four string elements for the prefix, start, end and suffix elements. These strings are percent-decoded and do not contain identifiers (like the `-` that indicates it being a prefix or suffix). All elements besides `start` can be empty. The implemented algorithms are used in the following patches. Differential Revision: https://phabricator.services.mozilla.com/D195685
599 lines
21 KiB
Rust
599 lines
21 KiB
Rust
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use crate::fragment_directive_impl::{
|
|
create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,
|
|
TextDirective,
|
|
};
|
|
|
|
/// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.
|
|
#[test]
|
|
fn test_parse_fragment_directive_with_one_text_directive() {
|
|
let test_cases = vec![
|
|
("#:~:text=start", (None, Some("start"), None, None)),
|
|
(
|
|
"#:~:text=start,end",
|
|
(None, Some("start"), Some("end"), None),
|
|
),
|
|
(
|
|
"#:~:text=prefix-,start",
|
|
(Some("prefix"), Some("start"), None, None),
|
|
),
|
|
(
|
|
"#:~:text=prefix-,start,end",
|
|
(Some("prefix"), Some("start"), Some("end"), None),
|
|
),
|
|
(
|
|
"#:~:text=prefix-,start,end,-suffix",
|
|
(Some("prefix"), Some("start"), Some("end"), Some("suffix")),
|
|
),
|
|
(
|
|
"#:~:text=start,-suffix",
|
|
(None, Some("start"), None, Some("suffix")),
|
|
),
|
|
(
|
|
"#:~:text=start,end,-suffix",
|
|
(None, Some("start"), Some("end"), Some("suffix")),
|
|
),
|
|
("#:~:text=text=", (None, Some("text="), None, None)),
|
|
];
|
|
for (url, (prefix, start, end, suffix)) in test_cases {
|
|
let (stripped_url, fragment_directive, result) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
fragment_directive,
|
|
&url[4..],
|
|
"The extracted fragment directive string
|
|
should be unsanitized and therefore match the input string."
|
|
);
|
|
assert_eq!(result.len(), 1, "There must be one parsed text fragment.");
|
|
assert_eq!(
|
|
stripped_url, "",
|
|
"The fragment directive must be removed from the url hash."
|
|
);
|
|
let text_directive = result.first().unwrap();
|
|
if prefix.is_none() {
|
|
assert!(
|
|
text_directive.prefix().is_none(),
|
|
"There must be no `prefix` token (test case `{}`).",
|
|
url
|
|
);
|
|
} else {
|
|
assert!(
|
|
text_directive
|
|
.prefix()
|
|
.as_ref()
|
|
.expect("There must be a `prefix` token.")
|
|
.value()
|
|
== prefix.unwrap(),
|
|
"Wrong value for `prefix` (test case `{}`).",
|
|
url
|
|
);
|
|
}
|
|
if start.is_none() {
|
|
assert!(
|
|
text_directive.start().is_none(),
|
|
"There must be no `start` token (test case `{}`).",
|
|
url
|
|
);
|
|
} else {
|
|
assert!(
|
|
text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value()
|
|
== start.unwrap(),
|
|
"Wrong value for `start` (test case `{}`).",
|
|
url
|
|
);
|
|
}
|
|
if end.is_none() {
|
|
assert!(
|
|
text_directive.end().is_none(),
|
|
"There must be no `end` token (test case `{}`).",
|
|
url
|
|
);
|
|
} else {
|
|
assert!(
|
|
text_directive
|
|
.end()
|
|
.as_ref()
|
|
.expect("There must be a `end` token.")
|
|
.value()
|
|
== end.unwrap(),
|
|
"Wrong value for `end` (test case `{}`).",
|
|
url
|
|
);
|
|
}
|
|
if suffix.is_none() {
|
|
assert!(
|
|
text_directive.suffix().is_none(),
|
|
"There must be no `suffix` token (test case `{}`).",
|
|
url
|
|
);
|
|
} else {
|
|
assert!(
|
|
text_directive
|
|
.suffix()
|
|
.as_ref()
|
|
.expect("There must be a `suffix` token.")
|
|
.value()
|
|
== suffix.unwrap(),
|
|
"Wrong value for `suffix` (test case `{}`).",
|
|
url
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_full_url() {
|
|
for (url, stripped_url_ref) in [
|
|
("https://example.com#:~:text=foo", "https://example.com"),
|
|
(
|
|
"https://example.com/some/page.html?query=answer#:~:text=foo",
|
|
"https://example.com/some/page.html?query=answer",
|
|
),
|
|
(
|
|
"https://example.com/some/page.html?query=answer#fragment:~:text=foo",
|
|
"https://example.com/some/page.html?query=answer#fragment",
|
|
),
|
|
(
|
|
"http://example.com/page.html?query=irrelevant:~:#bar:~:text=foo",
|
|
"http://example.com/page.html?query=irrelevant:~:#bar"
|
|
)
|
|
] {
|
|
let (stripped_url, fragment_directive, _) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result");
|
|
assert_eq!(stripped_url, stripped_url_ref, "The stripped url is not correct.");
|
|
assert_eq!(fragment_directive, "text=foo");
|
|
}
|
|
}
|
|
|
|
/// This test verifies that a text fragment is parsed correctly if it is preceded
|
|
/// or followed by a fragment (i.e. `#foo:~:text=bar`).
|
|
#[test]
|
|
fn test_parse_text_fragment_after_fragments() {
|
|
let url = "#foo:~:text=start";
|
|
let (stripped_url, fragment_directive, result) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
result.len(),
|
|
1,
|
|
"There must be exactly one parsed text fragment."
|
|
);
|
|
assert_eq!(
|
|
stripped_url, "#foo",
|
|
"The fragment directive was not removed correctly."
|
|
);
|
|
assert_eq!(
|
|
fragment_directive, "text=start",
|
|
"The fragment directive was not extracted correctly."
|
|
);
|
|
let fragment = result.first().unwrap();
|
|
assert!(fragment.prefix().is_none(), "There is no `prefix` token.");
|
|
assert_eq!(
|
|
fragment
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"start"
|
|
);
|
|
assert!(fragment.end().is_none(), "There is no `end` token.");
|
|
assert!(fragment.suffix().is_none(), "There is no `suffix` token.");
|
|
}
|
|
|
|
/// Ensure that multiple text fragments are parsed correctly.
|
|
#[test]
|
|
fn test_parse_multiple_text_fragments() {
|
|
let url = "#:~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";
|
|
let (_, _, text_directives) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
text_directives.len(),
|
|
3,
|
|
"There must be exactly two parsed text fragments."
|
|
);
|
|
let first_text_directive = &text_directives[0];
|
|
assert_eq!(
|
|
first_text_directive
|
|
.prefix()
|
|
.as_ref()
|
|
.expect("There must be a `prefix` token.")
|
|
.value(),
|
|
"prefix"
|
|
);
|
|
assert_eq!(
|
|
first_text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"start"
|
|
);
|
|
assert!(
|
|
first_text_directive.end().is_none(),
|
|
"There is no `end` token."
|
|
);
|
|
assert_eq!(
|
|
first_text_directive
|
|
.suffix()
|
|
.as_ref()
|
|
.expect("There must be a `suffix` token.")
|
|
.value(),
|
|
"suffix"
|
|
);
|
|
|
|
let second_text_directive = &text_directives[1];
|
|
assert!(
|
|
second_text_directive.prefix().is_none(),
|
|
"There is no `prefix` token."
|
|
);
|
|
assert_eq!(
|
|
second_text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"foo"
|
|
);
|
|
assert!(
|
|
second_text_directive.end().is_none(),
|
|
"There is no `end` token."
|
|
);
|
|
assert!(
|
|
second_text_directive.suffix().is_none(),
|
|
"There is no `suffix` token."
|
|
);
|
|
let third_text_directive = &text_directives[2];
|
|
assert!(
|
|
third_text_directive.prefix().is_none(),
|
|
"There is no `prefix` token."
|
|
);
|
|
assert_eq!(
|
|
third_text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"bar"
|
|
);
|
|
assert!(
|
|
third_text_directive.end().is_none(),
|
|
"There is no `end` token."
|
|
);
|
|
assert_eq!(
|
|
third_text_directive
|
|
.suffix()
|
|
.as_ref()
|
|
.expect("There must be a `suffix` token.")
|
|
.value(),
|
|
"suffix"
|
|
);
|
|
}
|
|
|
|
/// Multiple text directives should be parsed correctly
|
|
/// if they are surrounded or separated by unknown directives.
|
|
#[test]
|
|
fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {
|
|
for url in [
|
|
"#:~:foo&text=start1&text=start2",
|
|
"#:~:text=start1&foo&text=start2",
|
|
"#:~:text=start1&text=start2&foo",
|
|
] {
|
|
let (_, fragment_directive, text_directives) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
fragment_directive,
|
|
&url[4..],
|
|
"The extracted fragment directive string is unsanitized
|
|
and should contain the unknown directive."
|
|
);
|
|
assert_eq!(
|
|
text_directives.len(),
|
|
2,
|
|
"There must be exactly two parsed text fragments."
|
|
);
|
|
let first_text_directive = &text_directives[0];
|
|
assert_eq!(
|
|
first_text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"start1"
|
|
);
|
|
let second_text_directive = &text_directives[1];
|
|
assert_eq!(
|
|
second_text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` token.")
|
|
.value(),
|
|
"start2"
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Ensures that input that doesn't contain a text fragment does not produce a result.
|
|
/// This includes the use of partial identifying tokens necessary for a text fragment
|
|
/// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)
|
|
/// In these cases, the parser must return `None` to indicate that there are no valid text fragments.
|
|
#[test]
|
|
fn test_parse_invalid_or_unknown_fragment_directive() {
|
|
for url in [
|
|
"#foo",
|
|
"#foo:",
|
|
"#foo:~:",
|
|
"#foo:~:bar",
|
|
"text=prefix-,start",
|
|
"#:~:text=foo-,bar,-baz:~:text=foo",
|
|
] {
|
|
let text_directives =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url);
|
|
assert!(
|
|
text_directives.is_none(),
|
|
"The fragment `{}` does not contain a valid or known fragment directive.",
|
|
url
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Ensures that ill-formed text directives (but valid fragment directives)
|
|
/// (starting correctly with `:~:text=`) are not parsed.
|
|
/// Instead `None` must be returned.
|
|
/// Test cases include invalid combinations of `prefix`/`suffix`es,
|
|
/// additional `,`s, too many `start`/`end` tokens, or empty text fragments.
|
|
#[test]
|
|
fn test_parse_invalid_text_fragments() {
|
|
for url in [
|
|
"#:~:text=start,start,start",
|
|
"#:~:text=prefix-,prefix-",
|
|
"#:~:text=prefix-,-suffix",
|
|
"#:~:text=prefix-,start,start,start",
|
|
"#:~:text=prefix-,start,start,start,-suffix",
|
|
"#:~:text=start,start,start,-suffix",
|
|
"#:~:text=prefix-,start,end,-suffix,foo",
|
|
"#:~:text=foo,prefix-,start",
|
|
"#:~:text=prefix-,,start,",
|
|
"#:~:text=,prefix,start",
|
|
"#:~:text=",
|
|
] {
|
|
let text_directives =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url);
|
|
assert!(
|
|
text_directives.is_none(),
|
|
"The fragment directive `{}` does not contain a valid text directive.",
|
|
url
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Ensure that out of multiple text fragments only the invalid ones are ignored
|
|
/// while valid text fragments are still returned.
|
|
/// Since correct parsing of multiple text fragments as well as
|
|
/// several forms of invalid text fragments are already tested in
|
|
/// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,
|
|
/// it should be enough to test this with only one fragment directive
|
|
/// that contains two text fragments, one of them being invalid.
|
|
#[test]
|
|
fn test_valid_and_invalid_text_directives() {
|
|
for url in [
|
|
"#:~:text=start&text=,foo,",
|
|
"#:~:text=foo,foo,foo&text=start",
|
|
] {
|
|
let (_, fragment_directive, text_directives) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
fragment_directive,
|
|
&url[4..],
|
|
"The extracted fragment directive string is unsanitized
|
|
and should contain invalid text directives."
|
|
);
|
|
assert_eq!(
|
|
text_directives.len(),
|
|
1,
|
|
"There must be exactly one parsed text fragment."
|
|
);
|
|
let text_directive = text_directives.first().unwrap();
|
|
assert_eq!(
|
|
text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a `start` value.")
|
|
.value(),
|
|
"start",
|
|
"The `start` value of the text directive has the wrong value."
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Ensures that a fragment directive that contains percent-encoded characters
|
|
/// is decoded correctly. This explicitly includes characters which are used
|
|
/// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.
|
|
#[test]
|
|
fn test_parse_percent_encoding_tokens() {
|
|
let url = "#:~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";
|
|
let (_, fragment_directive, text_directives) =
|
|
parse_fragment_directive_and_remove_it_from_hash(&url)
|
|
.expect("The parser must find a result.");
|
|
assert_eq!(
|
|
fragment_directive,
|
|
&url[4..],
|
|
"The extracted fragment directive string is unsanitized
|
|
and should contain the original and percent-decoded string."
|
|
);
|
|
let text_directive = text_directives.first().unwrap();
|
|
assert_eq!(
|
|
text_directive
|
|
.prefix()
|
|
.as_ref()
|
|
.expect("There must be a prefix.")
|
|
.value(),
|
|
"prefix&",
|
|
""
|
|
);
|
|
assert_eq!(
|
|
text_directive
|
|
.start()
|
|
.as_ref()
|
|
.expect("There must be a prefix.")
|
|
.value(),
|
|
"start and,",
|
|
""
|
|
);
|
|
assert_eq!(
|
|
text_directive
|
|
.end()
|
|
.as_ref()
|
|
.expect("There must be a prefix.")
|
|
.value(),
|
|
"end#",
|
|
""
|
|
);
|
|
assert_eq!(
|
|
text_directive
|
|
.suffix()
|
|
.as_ref()
|
|
.expect("There must be a prefix.")
|
|
.value(),
|
|
"&suffix-",
|
|
""
|
|
);
|
|
}
|
|
|
|
/// Ensures that a text fragment is created correctly,
|
|
/// based on a given combination of tokens.
|
|
/// This includes all sorts of combinations of
|
|
/// `prefix`, `suffix`, `start` and `end`,
|
|
/// als well as values for these tokens which contain
|
|
/// characters that need to be encoded because they are
|
|
/// identifiers for text fragments
|
|
/// (#`, `, `, `&`, `:`, `~` and `-`).
|
|
#[test]
|
|
fn test_create_fragment_directive() {
|
|
for (text_directive, expected_fragment_directive) in [
|
|
(
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start"),
|
|
String::new(),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
":~:text=start",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start"),
|
|
String::from("end"),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
":~:text=start,end",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::from("prefix"),
|
|
String::from("start"),
|
|
String::from("end"),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
":~:text=prefix-,start,end",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::from("prefix"),
|
|
String::from("start"),
|
|
String::from("end"),
|
|
String::from("suffix"),
|
|
)
|
|
.unwrap(),
|
|
":~:text=prefix-,start,end,-suffix",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start"),
|
|
String::from("end"),
|
|
String::from("suffix"),
|
|
)
|
|
.unwrap(),
|
|
":~:text=start,end,-suffix",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::from("prefix"),
|
|
String::from("start"),
|
|
String::new(),
|
|
String::from("suffix"),
|
|
)
|
|
.unwrap(),
|
|
":~:text=prefix-,start,-suffix",
|
|
),
|
|
(
|
|
TextDirective::from_parts(
|
|
String::from("prefix-"),
|
|
String::from("start and,"),
|
|
String::from("&end"),
|
|
String::from("#:~:suffix"),
|
|
)
|
|
.unwrap(),
|
|
":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",
|
|
),
|
|
] {
|
|
let fragment_directive = create_fragment_directive_string(&vec![text_directive])
|
|
.expect("The given input must produce a valid fragment directive.");
|
|
assert_eq!(fragment_directive, expected_fragment_directive);
|
|
}
|
|
}
|
|
|
|
/// Ensures that a fragment directive is created correctly if multiple text fragments are given.
|
|
/// The resulting fragment must start with `:~:`
|
|
/// and each text fragment must be separated using `&text=`.
|
|
#[test]
|
|
fn test_create_fragment_directive_from_multiple_text_directives() {
|
|
let text_directives = vec![
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start1"),
|
|
String::new(),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start2"),
|
|
String::new(),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
TextDirective::from_parts(
|
|
String::new(),
|
|
String::from("start3"),
|
|
String::new(),
|
|
String::new(),
|
|
)
|
|
.unwrap(),
|
|
];
|
|
let fragment_directive = create_fragment_directive_string(&text_directives)
|
|
.expect("The given input must produce a valid fragment directive.");
|
|
assert_eq!(
|
|
fragment_directive, ":~:text=start1&text=start2&text=start3",
|
|
"The created fragment directive is wrong for multiple fragments."
|
|
);
|
|
}
|
|
}
|