Bug 1867939, part 2: Implement Fragment Directive parser. r=hsivonen,dom-core

This patch provides functions to extract the fragment directive from a url / a hash
into an array of `TextDirective`s
as well as to create a fragment directive string from given text directives.

The algorithms are implemented as a rust crate.
Interface functions and data structures which are accessible from C++
are provided in `lib.rs`.
The actual implementation (using pure rust types)
lives in `fragment_directive_impl.rs`, tests live in `test.rs`.

The implementation currently only supports text directives.
Other future directive types are not considered and will be ignored.

The main function, `parse_fragment_directive()` takes a url / a url hash
as parameter and returns (as out parameter) a struct which contains the stripped input url,
the fragment directive string, and an array of parsed text directive objects.

Additionally, there are functions that create a full fragment directive string
from a list of text directives as well as a function that creates
a single text directive string from a text directive.

The `TextDirective` class, which is shared with C++, contains four
string elements for the prefix, start, end and suffix elements.
These strings are percent-decoded and do not contain identifiers
(like the `-` that indicates it being a prefix or suffix).
All elements besides `start` can be empty.

The implemented algorithms are used in the following patches.

Differential Revision: https://phabricator.services.mozilla.com/D195685
This commit is contained in:
Jan-Niklas Jaeschke 2024-04-02 13:44:24 +00:00
parent 48c61f0bee
commit 55e8f7f971
9 changed files with 1145 additions and 0 deletions

10
Cargo.lock generated
View file

@ -1494,6 +1494,15 @@ dependencies = [
"bitflags 2.4.1",
]
[[package]]
name = "dom_fragmentdirectives"
version = "0.1.0"
dependencies = [
"nsstring",
"percent-encoding",
"thin-vec",
]
[[package]]
name = "dtoa"
version = "0.4.8"
@ -2255,6 +2264,7 @@ dependencies = [
"data_storage",
"detect_win32k_conflicts",
"dom",
"dom_fragmentdirectives",
"encoding_glue",
"fallible_collections",
"fluent",

View file

@ -0,0 +1,13 @@
[package]
name = "dom_fragmentdirectives"
version = "0.1.0"
authors = ["Jan Jaeschke <jjaschke@mozilla.com>"]
edition = "2021"
license = "MPL-2.0"
[dependencies]
nsstring = { path = "../../../xpcom/rust/nsstring/" }
thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
percent-encoding = { version = "2.3.1" }
[lib]
path = "lib.rs"

View file

@ -0,0 +1,15 @@
header = """/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */"""
autogen_warning = """/* DO NOT MODIFY THIS MANUALLY! This file was generated using cbindgen. See RunCbindgen.py */
"""
include_version = true
braces = "SameLine"
line_length = 100
tab_width = 2
language = "C++"
include_guard = "fragmentdirectives_ffi_generated_h"
includes = ["nsStringFwd.h", "nsTArrayForwardDeclare.h"]
[export.rename]
"ThinVec" = "nsTArray"

View file

@ -0,0 +1,342 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use percent_encoding::{percent_decode, percent_encode, NON_ALPHANUMERIC};
use std::str;
/// The `FragmentDirectiveParameter` represents one of
/// `[prefix-,]start[,end][,-suffix]` without any surrounding `-` or `,`.
///
/// The token is stored as percent-decoded string.
/// Therefore, interfaces exist to
/// - create a `FragmentDirectiveParameter` from a percent-encoded string.
/// This function will determine from occurrence and position of a dash
/// if the token represents a `prefix`, `suffix` or either `start` or `end`.
/// - create a percent-encoded string from the value the token holds.
pub enum TextDirectiveParameter {
Prefix(String),
StartOrEnd(String),
Suffix(String),
}
impl TextDirectiveParameter {
/// Creates a token from a percent-encoded string.
/// Based on position of a dash the correct token type is determined.
/// Returns `None` in case of an ill-formed token:
/// - starts and ends with a dash (i.e. `-token-`)
/// - only consists of a dash (i.e. `-`) or is empty
/// - conversion from percent-encoded string to utf8 fails.
pub fn from_percent_encoded(token: &[u8]) -> Option<Self> {
if token.is_empty() {
return None;
}
let starts_with_dash = *token.first().unwrap() == b'-';
let ends_with_dash = *token.last().unwrap() == b'-';
if starts_with_dash && ends_with_dash {
// `-token-` is not valid.
return None;
}
if token.len() == 1 && starts_with_dash {
// `-` is not valid.
return None;
}
// Note: Trimming of the raw strings is currently not mentioned in the spec.
// However, it looks as it is implicitly expected.
if starts_with_dash {
if let Ok(decoded_suffix) = percent_decode(&token[1..]).decode_utf8() {
return Some(TextDirectiveParameter::Suffix(String::from(
decoded_suffix.trim(),
)));
}
return None;
}
if ends_with_dash {
if let Ok(decoded_prefix) = percent_decode(&token[..token.len() - 1]).decode_utf8() {
return Some(TextDirectiveParameter::Prefix(String::from(
decoded_prefix.trim(),
)));
}
return None;
}
if let Ok(decoded_text) = percent_decode(&token).decode_utf8() {
return Some(TextDirectiveParameter::StartOrEnd(String::from(
decoded_text.trim(),
)));
}
None
}
/// Returns the value of the token as percent-decoded `String`.
pub fn value(&self) -> &String {
match self {
TextDirectiveParameter::Prefix(value) => &value,
TextDirectiveParameter::StartOrEnd(value) => &value,
TextDirectiveParameter::Suffix(value) => &value,
}
}
/// Creates a percent-encoded string of the token's value.
/// This includes placing a dash appropriately
/// to indicate whether this token is prefix, suffix or start/end.
///
/// This method always returns a new object.
pub fn to_percent_encoded_string(&self) -> String {
let encode = |text: &String| percent_encode(text.as_bytes(), NON_ALPHANUMERIC).to_string();
match self {
Self::Prefix(text) => encode(text) + "-",
Self::StartOrEnd(text) => encode(text),
Self::Suffix(text) => {
let encoded = encode(text);
let mut result = String::with_capacity(encoded.len() + 1);
result.push_str("-");
result.push_str(&encoded);
result
}
}
}
}
/// This struct represents one parsed text directive using Rust types.
///
/// A text fragment is encoded into a URL fragment like this:
/// `text=[prefix-,]start[,end][,-suffix]`
///
/// The text directive is considered valid if at least `start` is not None.
/// (see `Self::is_valid()`).
#[derive(Default)]
pub struct TextDirective {
prefix: Option<TextDirectiveParameter>,
start: Option<TextDirectiveParameter>,
end: Option<TextDirectiveParameter>,
suffix: Option<TextDirectiveParameter>,
}
impl TextDirective {
/// Creates an instance from string parts.
/// This function is intended to be used when a fragment directive string should be created.
/// Returns `None` if `start` is empty.
pub fn from_parts(prefix: String, start: String, end: String, suffix: String) -> Option<Self> {
if !start.is_empty() {
Some(Self {
prefix: if !prefix.is_empty() {
Some(TextDirectiveParameter::Prefix(prefix.trim().into()))
} else {
None
},
start: Some(TextDirectiveParameter::StartOrEnd(start.trim().into())),
end: if !end.is_empty() {
Some(TextDirectiveParameter::StartOrEnd(end.trim().into()))
} else {
None
},
suffix: if !suffix.is_empty() {
Some(TextDirectiveParameter::Suffix(suffix.trim().into()))
} else {
None
},
})
} else {
None
}
}
/// Creates an instance from a percent-encoded string
/// that originates from a fragment directive.
///
/// `text_fragment` is supposed to have this format:
/// ```
/// text=[prefix-,]start[,end][,-suffix]
/// ```
/// This function returns `None` if `text_fragment`
/// does not start with `text=`, it contains 0 or more
/// than 4 elements or prefix/suffix/start or end
/// occur too many times.
/// It also returns `None` if any of the tokens parses to fail.
pub fn from_percent_encoded_string(text_directive: &str) -> Option<Self> {
// first check if the string starts with `text=`
if text_directive.len() < 6 {
return None;
}
if !text_directive.starts_with("text=") {
return None;
}
let mut parsed_text_directive = Self::default();
let valid = text_directive[5..]
.split(",")
// Parse the substrings into `TextDirectiveParameter`s. This will determine
// for each substring if it is a Prefix, Suffix or Start/End,
// or if it is invalid.
.map(|token| TextDirectiveParameter::from_percent_encoded(token.as_bytes()))
// populate `parsed_text_directive` and check its validity by inserting the parameters
// one by one. Given that the parameters are sorted by their position in the source,
// the validity of the text directive can be determined while adding the parameters.
.map(|token| match token {
Some(TextDirectiveParameter::Prefix(..)) => {
if !parsed_text_directive.is_empty() {
// `prefix-` must be the first result.
return false;
}
parsed_text_directive.prefix = token;
return true;
}
Some(TextDirectiveParameter::StartOrEnd(..)) => {
if parsed_text_directive.suffix.is_some() {
// start or end must come before `-suffix`.
return false;
}
if parsed_text_directive.start.is_none() {
parsed_text_directive.start = token;
return true;
}
if parsed_text_directive.end.is_none() {
parsed_text_directive.end = token;
return true;
}
// if `start` and `end` is already filled,
// this is invalid as well.
return false;
}
Some(TextDirectiveParameter::Suffix(..)) => {
if parsed_text_directive.start.is_some()
&& parsed_text_directive.suffix.is_none()
{
// `start` must be present and `-suffix` must not be present.
// `end` may be present.
parsed_text_directive.suffix = token;
return true;
}
return false;
}
// empty or invalid token renders the whole text directive invalid.
None => false,
})
.all(|valid| valid);
if valid {
return Some(parsed_text_directive);
}
None
}
/// Creates a percent-encoded string for the current `TextDirective`.
/// In the unlikely case that the `TextDirective` is invalid (i.e. `start` is None),
/// which should have been caught earlier,this method returns an empty string.
pub fn to_percent_encoded_string(&self) -> String {
if !self.is_valid() {
return String::default();
}
String::from("text=")
+ &[&self.prefix, &self.start, &self.end, &self.suffix]
.iter()
.filter_map(|&token| token.as_ref())
.map(|token| token.to_percent_encoded_string())
.collect::<Vec<_>>()
.join(",")
}
pub fn start(&self) -> &Option<TextDirectiveParameter> {
&self.start
}
pub fn end(&self) -> &Option<TextDirectiveParameter> {
&self.end
}
pub fn prefix(&self) -> &Option<TextDirectiveParameter> {
&self.prefix
}
pub fn suffix(&self) -> &Option<TextDirectiveParameter> {
&self.suffix
}
fn is_empty(&self) -> bool {
self.prefix.is_none() && self.start.is_none() && self.end.is_none() && self.suffix.is_none()
}
/// A `TextDirective` object is valid if it contains the `start` token.
/// All other tokens are optional.
fn is_valid(&self) -> bool {
self.start.is_some()
}
}
/// Parses a fragment directive into a list of `TextDirective` objects and removes
/// the fragment directive from the input url.
///
/// If the hash does not contain a fragment directive, `url` is not modified
/// and this function returns `None`.
/// Otherwise, the fragment directive is removed from `url` and parsed.
/// If parsing fails, this function returns `None`.
pub fn parse_fragment_directive_and_remove_it_from_hash(
url: &str,
) -> Option<(&str, &str, Vec<TextDirective>)> {
// The Fragment Directive is preceded by a `:~:`,
// which is only allowed to appear in the hash once.
// However (even if unlikely), it might appear outside of the hash,
// so this code only considers it when it is after the #.
let maybe_first_hash_pos = url.find("#");
// If there is no # in url, it is considered to be only the hash (and not a full url).
let first_hash_pos = maybe_first_hash_pos.unwrap_or_default();
let mut fragment_directive_iter = url[first_hash_pos..].split(":~:");
let url_with_stripped_fragment_directive =
&url[..first_hash_pos + fragment_directive_iter.next().unwrap_or_default().len()];
if let Some(fragment_directive) = fragment_directive_iter.next() {
if fragment_directive_iter.next().is_some() {
// There are multiple occurrences of `:~:`, which is not allowed.
return None;
}
// - fragments are separated by `&`.
// - if a fragment does not start with `text=`, it is not a text fragment and will be ignored.
// - if parsing of the text fragment fails (for whatever reason), it will be ignored.
let text_directives: Vec<_> = fragment_directive
.split("&")
.map(|maybe_text_fragment| {
TextDirective::from_percent_encoded_string(&maybe_text_fragment)
})
.filter_map(|maybe_text_directive| maybe_text_directive)
.collect();
if !text_directives.is_empty() {
return Some((
url_with_stripped_fragment_directive
.strip_suffix("#")
.unwrap_or(url_with_stripped_fragment_directive),
fragment_directive,
text_directives,
));
}
}
None
}
/// Creates a percent-encoded text fragment string.
///
/// The returned string starts with `:~:`, so that it can be appended
/// to a normal fragment.
/// Text directives which are not valid (ie., they are missing the `start` parameter),
/// are skipped.
///
/// Returns `None` if `fragment_directives` is empty.
pub fn create_fragment_directive_string(text_directives: &Vec<TextDirective>) -> Option<String> {
if text_directives.is_empty() {
return None;
}
let encoded_fragment_directives: Vec<_> = text_directives
.iter()
.filter(|&fragment_directive| fragment_directive.is_valid())
.map(|fragment_directive| fragment_directive.to_percent_encoded_string())
.filter(|text_directive| !text_directive.is_empty())
.collect();
if encoded_fragment_directives.is_empty() {
return None;
}
Some(String::from(":~:") + &encoded_fragment_directives.join("&"))
}
/// Creates the percent-encoded text directive string for a single text directive.
pub fn create_text_directive_string(text_directive: &TextDirective) -> Option<String> {
if text_directive.is_valid() {
Some(text_directive.to_percent_encoded_string())
} else {
None
}
}

View file

@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use nsstring::{nsCString, nsString};
use thin_vec::ThinVec;
pub mod fragment_directive_impl;
mod test;
/// This struct contains the percent-decoded parts of a text directive.
/// All parts besides `start` are optional (which is indicated by an empty string).
///
/// This struct uses Gecko String types, whereas the parser internally uses Rust types.
/// Therefore, conversion functions are provided.
#[repr(C)]
pub struct TextDirective {
prefix: nsString,
start: nsString,
end: nsString,
suffix: nsString,
}
impl TextDirective {
/// Creates a `FragmentDirectiveElement` object from a `FragmentDirectiveElementInternal` object
/// (which uses Rust string types).
fn from_rust_type(element: &fragment_directive_impl::TextDirective) -> Self {
Self {
prefix: element
.prefix()
.as_ref()
.map_or_else(nsString::new, |token| nsString::from(token.value())),
start: element
.start()
.as_ref()
.map_or_else(nsString::new, |token| nsString::from(token.value())),
end: element
.end()
.as_ref()
.map_or_else(nsString::new, |token| nsString::from(token.value())),
suffix: element
.suffix()
.as_ref()
.map_or_else(nsString::new, |token| nsString::from(token.value())),
}
}
/// Converts the contents of this object into Rust types.
/// Returns `None` if the given fragment is not valid.
/// The only invalid condition is a fragment that is missing the `start` token.
fn to_rust_type(&self) -> Option<fragment_directive_impl::TextDirective> {
fragment_directive_impl::TextDirective::from_parts(
self.prefix.to_string(),
self.start.to_string(),
self.end.to_string(),
self.suffix.to_string(),
)
}
}
/// Result of the `parse_fragment_directive()` function.
///
/// The result contains the original given URL without the fragment directive,
/// a unsanitized string version of the extracted fragment directive,
/// and an array of the parsed text directives.
#[repr(C)]
pub struct ParsedFragmentDirectiveResult {
url_without_fragment_directive: nsCString,
fragment_directive: nsCString,
text_directives: ThinVec<TextDirective>,
}
/// Parses the fragment directive from a given URL.
///
/// This function writes the result data into `result`.
/// The result consists of
/// - the input url without the fragment directive,
/// - the fragment directive as unparsed string,
/// - a list of the parsed and percent-decoded text directives.
///
/// Directives which are unknown will be ignored.
/// If new directive types are added in the future, they should also be considered here.
/// This function returns false if no fragment directive is found, or it could not be parsed.
#[no_mangle]
pub extern "C" fn parse_fragment_directive(
url: &nsCString,
result: &mut ParsedFragmentDirectiveResult,
) -> bool {
// sanitize inputs
result.url_without_fragment_directive = nsCString::new();
result.fragment_directive = nsCString::new();
result.text_directives.clear();
let url_as_rust_string = url.to_utf8();
if let Some((stripped_url, fragment_directive, text_directives)) =
fragment_directive_impl::parse_fragment_directive_and_remove_it_from_hash(
&url_as_rust_string,
)
{
result
.url_without_fragment_directive
.assign(&stripped_url);
result.fragment_directive.assign(&fragment_directive);
result.text_directives.extend(
text_directives
.iter()
.map(|text_directive| TextDirective::from_rust_type(text_directive)),
);
return true;
}
false
}
/// Creates a percent-encoded fragment directive string from a given list of `FragmentDirectiveElement`s.
///
/// The returned string has this form:
/// `:~:text=[prefix1-,]start1[,end1][,-suffix1]&text=[prefix2-,]start2[,end2][,-suffix2]`
///
/// Invalid `FragmentDirectiveElement`s are ignored, where "invalid" means that no `start` token is provided.
/// If there are no valid `FragmentDirectiveElement`s, an empty string is returned.
#[no_mangle]
pub extern "C" fn create_fragment_directive(
text_directives: &ThinVec<TextDirective>,
fragment_directive: &mut nsCString,
) -> bool {
let directives_rust = Vec::from_iter(
text_directives
.iter()
.filter_map(|fragment| fragment.to_rust_type()),
);
if let Some(fragment_directive_rust) =
fragment_directive_impl::create_fragment_directive_string(&directives_rust)
{
fragment_directive.assign(&fragment_directive_rust);
return true;
}
false
}
/// Creates a percent-encoded text directive string for a single text directive.
/// The returned string has the form `text=[prefix-,]start[,end][,-suffix]`.
/// If the provided `TextDirective` is invalid (i.e. it has no `start` attribute),
/// the outparam `directive_string` is empty and the function returns false.
#[no_mangle]
pub extern "C" fn create_text_directive(
text_directive: &TextDirective,
directive_string: &mut nsCString,
) -> bool {
if let Some(text_directive_rust) = text_directive.to_rust_type() {
if let Some(text_directive_string_rust) =
fragment_directive_impl::create_text_directive_string(&text_directive_rust)
{
directive_string.assign(&text_directive_string_rust);
return true;
}
}
false
}

View file

@ -0,0 +1,599 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#[cfg(test)]
mod test {
use crate::fragment_directive_impl::{
create_fragment_directive_string, parse_fragment_directive_and_remove_it_from_hash,
TextDirective,
};
/// This test verifies that valid combinations of [prefix-,]start[,end][,-suffix] are parsed correctly.
#[test]
fn test_parse_fragment_directive_with_one_text_directive() {
let test_cases = vec![
("#:~:text=start", (None, Some("start"), None, None)),
(
"#:~:text=start,end",
(None, Some("start"), Some("end"), None),
),
(
"#:~:text=prefix-,start",
(Some("prefix"), Some("start"), None, None),
),
(
"#:~:text=prefix-,start,end",
(Some("prefix"), Some("start"), Some("end"), None),
),
(
"#:~:text=prefix-,start,end,-suffix",
(Some("prefix"), Some("start"), Some("end"), Some("suffix")),
),
(
"#:~:text=start,-suffix",
(None, Some("start"), None, Some("suffix")),
),
(
"#:~:text=start,end,-suffix",
(None, Some("start"), Some("end"), Some("suffix")),
),
("#:~:text=text=", (None, Some("text="), None, None)),
];
for (url, (prefix, start, end, suffix)) in test_cases {
let (stripped_url, fragment_directive, result) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
fragment_directive,
&url[4..],
"The extracted fragment directive string
should be unsanitized and therefore match the input string."
);
assert_eq!(result.len(), 1, "There must be one parsed text fragment.");
assert_eq!(
stripped_url, "",
"The fragment directive must be removed from the url hash."
);
let text_directive = result.first().unwrap();
if prefix.is_none() {
assert!(
text_directive.prefix().is_none(),
"There must be no `prefix` token (test case `{}`).",
url
);
} else {
assert!(
text_directive
.prefix()
.as_ref()
.expect("There must be a `prefix` token.")
.value()
== prefix.unwrap(),
"Wrong value for `prefix` (test case `{}`).",
url
);
}
if start.is_none() {
assert!(
text_directive.start().is_none(),
"There must be no `start` token (test case `{}`).",
url
);
} else {
assert!(
text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value()
== start.unwrap(),
"Wrong value for `start` (test case `{}`).",
url
);
}
if end.is_none() {
assert!(
text_directive.end().is_none(),
"There must be no `end` token (test case `{}`).",
url
);
} else {
assert!(
text_directive
.end()
.as_ref()
.expect("There must be a `end` token.")
.value()
== end.unwrap(),
"Wrong value for `end` (test case `{}`).",
url
);
}
if suffix.is_none() {
assert!(
text_directive.suffix().is_none(),
"There must be no `suffix` token (test case `{}`).",
url
);
} else {
assert!(
text_directive
.suffix()
.as_ref()
.expect("There must be a `suffix` token.")
.value()
== suffix.unwrap(),
"Wrong value for `suffix` (test case `{}`).",
url
);
}
}
}
#[test]
fn test_parse_full_url() {
for (url, stripped_url_ref) in [
("https://example.com#:~:text=foo", "https://example.com"),
(
"https://example.com/some/page.html?query=answer#:~:text=foo",
"https://example.com/some/page.html?query=answer",
),
(
"https://example.com/some/page.html?query=answer#fragment:~:text=foo",
"https://example.com/some/page.html?query=answer#fragment",
),
(
"http://example.com/page.html?query=irrelevant:~:#bar:~:text=foo",
"http://example.com/page.html?query=irrelevant:~:#bar"
)
] {
let (stripped_url, fragment_directive, _) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result");
assert_eq!(stripped_url, stripped_url_ref, "The stripped url is not correct.");
assert_eq!(fragment_directive, "text=foo");
}
}
/// This test verifies that a text fragment is parsed correctly if it is preceded
/// or followed by a fragment (i.e. `#foo:~:text=bar`).
#[test]
fn test_parse_text_fragment_after_fragments() {
let url = "#foo:~:text=start";
let (stripped_url, fragment_directive, result) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
result.len(),
1,
"There must be exactly one parsed text fragment."
);
assert_eq!(
stripped_url, "#foo",
"The fragment directive was not removed correctly."
);
assert_eq!(
fragment_directive, "text=start",
"The fragment directive was not extracted correctly."
);
let fragment = result.first().unwrap();
assert!(fragment.prefix().is_none(), "There is no `prefix` token.");
assert_eq!(
fragment
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"start"
);
assert!(fragment.end().is_none(), "There is no `end` token.");
assert!(fragment.suffix().is_none(), "There is no `suffix` token.");
}
/// Ensure that multiple text fragments are parsed correctly.
#[test]
fn test_parse_multiple_text_fragments() {
let url = "#:~:text=prefix-,start,-suffix&text=foo&text=bar,-suffix";
let (_, _, text_directives) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
text_directives.len(),
3,
"There must be exactly two parsed text fragments."
);
let first_text_directive = &text_directives[0];
assert_eq!(
first_text_directive
.prefix()
.as_ref()
.expect("There must be a `prefix` token.")
.value(),
"prefix"
);
assert_eq!(
first_text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"start"
);
assert!(
first_text_directive.end().is_none(),
"There is no `end` token."
);
assert_eq!(
first_text_directive
.suffix()
.as_ref()
.expect("There must be a `suffix` token.")
.value(),
"suffix"
);
let second_text_directive = &text_directives[1];
assert!(
second_text_directive.prefix().is_none(),
"There is no `prefix` token."
);
assert_eq!(
second_text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"foo"
);
assert!(
second_text_directive.end().is_none(),
"There is no `end` token."
);
assert!(
second_text_directive.suffix().is_none(),
"There is no `suffix` token."
);
let third_text_directive = &text_directives[2];
assert!(
third_text_directive.prefix().is_none(),
"There is no `prefix` token."
);
assert_eq!(
third_text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"bar"
);
assert!(
third_text_directive.end().is_none(),
"There is no `end` token."
);
assert_eq!(
third_text_directive
.suffix()
.as_ref()
.expect("There must be a `suffix` token.")
.value(),
"suffix"
);
}
/// Multiple text directives should be parsed correctly
/// if they are surrounded or separated by unknown directives.
#[test]
fn test_parse_multiple_text_directives_with_unknown_directive_in_between() {
for url in [
"#:~:foo&text=start1&text=start2",
"#:~:text=start1&foo&text=start2",
"#:~:text=start1&text=start2&foo",
] {
let (_, fragment_directive, text_directives) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
fragment_directive,
&url[4..],
"The extracted fragment directive string is unsanitized
and should contain the unknown directive."
);
assert_eq!(
text_directives.len(),
2,
"There must be exactly two parsed text fragments."
);
let first_text_directive = &text_directives[0];
assert_eq!(
first_text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"start1"
);
let second_text_directive = &text_directives[1];
assert_eq!(
second_text_directive
.start()
.as_ref()
.expect("There must be a `start` token.")
.value(),
"start2"
);
}
}
/// Ensures that input that doesn't contain a text fragment does not produce a result.
/// This includes the use of partial identifying tokens necessary for a text fragment
/// (e.g. `:~:` without `text=`, `text=foo` without the `:~:` or multiple occurrences of `:~:`)
/// In these cases, the parser must return `None` to indicate that there are no valid text fragments.
#[test]
fn test_parse_invalid_or_unknown_fragment_directive() {
for url in [
"#foo",
"#foo:",
"#foo:~:",
"#foo:~:bar",
"text=prefix-,start",
"#:~:text=foo-,bar,-baz:~:text=foo",
] {
let text_directives =
parse_fragment_directive_and_remove_it_from_hash(&url);
assert!(
text_directives.is_none(),
"The fragment `{}` does not contain a valid or known fragment directive.",
url
);
}
}
/// Ensures that ill-formed text directives (but valid fragment directives)
/// (starting correctly with `:~:text=`) are not parsed.
/// Instead `None` must be returned.
/// Test cases include invalid combinations of `prefix`/`suffix`es,
/// additional `,`s, too many `start`/`end` tokens, or empty text fragments.
#[test]
fn test_parse_invalid_text_fragments() {
for url in [
"#:~:text=start,start,start",
"#:~:text=prefix-,prefix-",
"#:~:text=prefix-,-suffix",
"#:~:text=prefix-,start,start,start",
"#:~:text=prefix-,start,start,start,-suffix",
"#:~:text=start,start,start,-suffix",
"#:~:text=prefix-,start,end,-suffix,foo",
"#:~:text=foo,prefix-,start",
"#:~:text=prefix-,,start,",
"#:~:text=,prefix,start",
"#:~:text=",
] {
let text_directives =
parse_fragment_directive_and_remove_it_from_hash(&url);
assert!(
text_directives.is_none(),
"The fragment directive `{}` does not contain a valid text directive.",
url
);
}
}
/// Ensure that out of multiple text fragments only the invalid ones are ignored
/// while valid text fragments are still returned.
/// Since correct parsing of multiple text fragments as well as
/// several forms of invalid text fragments are already tested in
/// `test_parse_multiple_text_fragments` and `test_parse_invalid_text_fragments()`,
/// it should be enough to test this with only one fragment directive
/// that contains two text fragments, one of them being invalid.
#[test]
fn test_valid_and_invalid_text_directives() {
for url in [
"#:~:text=start&text=,foo,",
"#:~:text=foo,foo,foo&text=start",
] {
let (_, fragment_directive, text_directives) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
fragment_directive,
&url[4..],
"The extracted fragment directive string is unsanitized
and should contain invalid text directives."
);
assert_eq!(
text_directives.len(),
1,
"There must be exactly one parsed text fragment."
);
let text_directive = text_directives.first().unwrap();
assert_eq!(
text_directive
.start()
.as_ref()
.expect("There must be a `start` value.")
.value(),
"start",
"The `start` value of the text directive has the wrong value."
);
}
}
/// Ensures that a fragment directive that contains percent-encoded characters
/// is decoded correctly. This explicitly includes characters which are used
/// for identifying text fragments, i.e. `#`, `, `, `&`, `:`, `~` and `-`.
#[test]
fn test_parse_percent_encoding_tokens() {
let url = "#:~:text=prefix%26-,start%20and%2C,end%23,-%26suffix%2D";
let (_, fragment_directive, text_directives) =
parse_fragment_directive_and_remove_it_from_hash(&url)
.expect("The parser must find a result.");
assert_eq!(
fragment_directive,
&url[4..],
"The extracted fragment directive string is unsanitized
and should contain the original and percent-decoded string."
);
let text_directive = text_directives.first().unwrap();
assert_eq!(
text_directive
.prefix()
.as_ref()
.expect("There must be a prefix.")
.value(),
"prefix&",
""
);
assert_eq!(
text_directive
.start()
.as_ref()
.expect("There must be a prefix.")
.value(),
"start and,",
""
);
assert_eq!(
text_directive
.end()
.as_ref()
.expect("There must be a prefix.")
.value(),
"end#",
""
);
assert_eq!(
text_directive
.suffix()
.as_ref()
.expect("There must be a prefix.")
.value(),
"&suffix-",
""
);
}
/// Ensures that a text fragment is created correctly,
/// based on a given combination of tokens.
/// This includes all sorts of combinations of
/// `prefix`, `suffix`, `start` and `end`,
/// als well as values for these tokens which contain
/// characters that need to be encoded because they are
/// identifiers for text fragments
/// (#`, `, `, `&`, `:`, `~` and `-`).
#[test]
fn test_create_fragment_directive() {
for (text_directive, expected_fragment_directive) in [
(
TextDirective::from_parts(
String::new(),
String::from("start"),
String::new(),
String::new(),
)
.unwrap(),
":~:text=start",
),
(
TextDirective::from_parts(
String::new(),
String::from("start"),
String::from("end"),
String::new(),
)
.unwrap(),
":~:text=start,end",
),
(
TextDirective::from_parts(
String::from("prefix"),
String::from("start"),
String::from("end"),
String::new(),
)
.unwrap(),
":~:text=prefix-,start,end",
),
(
TextDirective::from_parts(
String::from("prefix"),
String::from("start"),
String::from("end"),
String::from("suffix"),
)
.unwrap(),
":~:text=prefix-,start,end,-suffix",
),
(
TextDirective::from_parts(
String::new(),
String::from("start"),
String::from("end"),
String::from("suffix"),
)
.unwrap(),
":~:text=start,end,-suffix",
),
(
TextDirective::from_parts(
String::from("prefix"),
String::from("start"),
String::new(),
String::from("suffix"),
)
.unwrap(),
":~:text=prefix-,start,-suffix",
),
(
TextDirective::from_parts(
String::from("prefix-"),
String::from("start and,"),
String::from("&end"),
String::from("#:~:suffix"),
)
.unwrap(),
":~:text=prefix%2D-,start%20and%2C,%26end,-%23%3A%7E%3Asuffix",
),
] {
let fragment_directive = create_fragment_directive_string(&vec![text_directive])
.expect("The given input must produce a valid fragment directive.");
assert_eq!(fragment_directive, expected_fragment_directive);
}
}
/// Ensures that a fragment directive is created correctly if multiple text fragments are given.
/// The resulting fragment must start with `:~:`
/// and each text fragment must be separated using `&text=`.
#[test]
fn test_create_fragment_directive_from_multiple_text_directives() {
let text_directives = vec![
TextDirective::from_parts(
String::new(),
String::from("start1"),
String::new(),
String::new(),
)
.unwrap(),
TextDirective::from_parts(
String::new(),
String::from("start2"),
String::new(),
String::new(),
)
.unwrap(),
TextDirective::from_parts(
String::new(),
String::from("start3"),
String::new(),
String::new(),
)
.unwrap(),
];
let fragment_directive = create_fragment_directive_string(&text_directives)
.expect("The given input must produce a valid fragment directive.");
assert_eq!(
fragment_directive, ":~:text=start1&text=start2&text=start3",
"The created fragment directive is wrong for multiple fragments."
);
}
}

View file

@ -298,6 +298,7 @@ if CONFIG["FUZZING"]:
if CONFIG["COMPILE_ENVIRONMENT"]:
EXPORTS.mozilla.dom += [
"!fragmentdirectives_ffi_generated.h",
"!GeneratedElementDocumentState.h",
"RustTypes.h",
]
@ -307,6 +308,11 @@ if CONFIG["COMPILE_ENVIRONMENT"]:
inputs=["rust"],
)
CbindgenHeader(
"fragmentdirectives_ffi_generated.h",
inputs=["fragmentdirectives"],
)
UNIFIED_SOURCES += [
"!UseCounterMetrics.cpp",
"AbstractRange.cpp",

View file

@ -67,6 +67,7 @@ mozannotation_server = { path = "../../../crashreporter/mozannotation_server",
gecko-profiler = { path = "../../../../tools/profiler/rust-api"}
midir_impl = { path = "../../../../dom/midi/midir_impl", optional = true }
dom = { path = "../../../../dom/base/rust" }
dom_fragmentdirectives = { path="../../../../dom/base/fragmentdirectives" }
origin-trials-ffi = { path = "../../../../dom/origin-trials/ffi" }
jog = { path = "../../../components/glean/bindings/jog" }
dap_ffi = { path = "../../../components/telemetry/dap/ffi" }

View file

@ -24,6 +24,7 @@ extern crate cubeb_coreaudio;
#[cfg(feature = "cubeb_pulse_rust")]
extern crate cubeb_pulse;
extern crate data_storage;
extern crate dom_fragmentdirectives;
extern crate encoding_glue;
extern crate fog_control;
extern crate gecko_profiler;