/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef DOM_TEXTDIRECTIVECREATOR_H_ #define DOM_TEXTDIRECTIVECREATOR_H_ #include #include "RangeBoundary.h" #include "mozilla/dom/fragmentdirectives_ffi_generated.h" #include "TextDirectiveUtil.h" #include "nsStringFwd.h" #include "mozilla/RefPtr.h" #include "mozilla/Result.h" class nsRange; namespace mozilla { class ErrorResult; } namespace mozilla::dom { class Document; /** * @brief Helper class to create a text directive string from a given `Range`. * * The class provides a public static creator function which encapsulates all * necessary logic. * This class serves as a base class that defines the main algorithm, and is * subclassed twice for exact and range-based matching. */ class TextDirectiveCreator { public: /** * @brief Static creator function. Takes a `Range` and creates a text * directive string, if possible. * * @param aDocument The document in which `aInputRange` lives. * @param aInputRange The input range. This range will not be modified. * @param aWatchdog A watchdog to ensure the operation does not run * longer than the predefined timeout. * * @return Returns a percent-encoded text directive string on success, an * empty string if it's not possible to create a text fragment for the * given range, or an error code. */ static Result CreateTextDirectiveFromRange( Document* aDocument, AbstractRange* aInputRange, const TimeoutWatchdog* aWatchdog); virtual ~TextDirectiveCreator() = default; protected: TextDirectiveCreator(Document* aDocument, AbstractRange* aRange, const TimeoutWatchdog* aWatchdog); /** * @brief Ensures the boundary points of the range point to word boundaries. * * This function always returns a new range. */ static Result, ErrorResult> ExtendRangeToWordBoundaries( AbstractRange* aRange); /** * @brief Determines whether exact or range-based matching should be used. * * This function searches for a block boundary in `aRange`, which requires * range-based matching. If there is no block boundary, but the range content * is longer than a threshold, range-based matching is used as well. * This threshold is defined by the pref * `dom.text_fragments.create_text_fragment.exact_match_max_length`. * */ static Result MustUseRangeBasedMatching( AbstractRange* aRange); /** * @brief Creates an instance either for exact or range-based matching. */ static Result, ErrorResult> CreateInstance( Document* aDocument, AbstractRange* aRange, const TimeoutWatchdog* aWatchdog); /** * @brief Collects text content surrounding the target range. * * The context terms are then stored both in normal and fold case form. */ virtual Result CollectContextTerms() = 0; /** * @brief Common helper which collects the prefix term of the target range. */ Result CollectPrefixContextTerm(); /** * @brief Common helper which collects the suffix term of the target range. */ Result CollectSuffixContextTerm(); /** * @brief Collect the word begin / word end distances for the context terms. * * For start (for range-based matching) and suffix terms, the search direction * is left-to-right. Therefore, the distances are based off the beginning of * the context terms and use the word end boundary. * * For prefix and end (for range-based matching), the search direction is * right-to-left. Therefore, the distances are based off the end of the * context terms and use the word start boundary. * * The distances are always sorted, so that the first entry points to the * nearest word boundary in search direction. * * This method returns false if collecting context term word boundary * distances failed in a way that it's not considered a failure, but rather * it's not possible to create a text directive for the target range. * This can happen if the target range is too long for exact matching, but * does not contain a word boundary. */ virtual bool CollectContextTermWordBoundaryDistances() = 0; /** * @brief Searches the document for other occurrences of the target range and * converts the results into a comparable format. * * This method searches the partial document from the beginning up to the * target range for occurrences of the target range content. * This needs to be done differently based on whether matching is exact or * range-based. For exact matching, the whole text content of the target range * is searched for. For range-based matching, two search runs are required: * One for the minimal `start` term (ie., the first word), which ends at the * beginning of the target range. And one for the minimal `end` term (ie., the * last word), which starts at the beginning of the target range and ends * _before_ its end. * The resulting lists of matching ranges do not exclude the target range. */ virtual Result FindAllMatchingCandidates() = 0; /** * @brief Find all occurrences of `aSearchQuery` in the partial document. * * This method uses `nsFind` to perform a case-insensitive search for * `aSearchQuery` in the partial document from `aSearchStart` to `aSearchEnd`. * * @return List of `Range`s which have the case-insensitive-same content as * `aSearchQuery`. */ Result>, ErrorResult> FindAllMatchingRanges( const nsString& aSearchQuery, const RangeBoundary& aSearchStart, const RangeBoundary& aSearchEnd); /** * @brief Creates the shortest possible text directive. * * @return A percent-encoded string containing a text directive. Returns empty * string in cases where it's not possible to create a text directive. */ Result CreateTextDirective(); /** * @brief Creates unique substring length arrays which are extended to the * nearest word boundary. */ static std::tuple, nsTArray> ExtendSubstringLengthsToWordBoundaries( const nsTArray>& aExactSubstringLengths, const Span& aFirstWordPositions, const Span& aSecondWordPositions); /** * @brief Test all combinations to identify the shortest text directive. */ virtual Maybe FindShortestCombination() const = 0; /** * @brief Perform a brute-force optimization run to find the shortest * combination of a combination of two context terms. * * Each combination of the extended values is compared against all exact * values. It is only considered valid if at least one value is longer than * the exact lengths. * * @param aExactWordLengths Array of tuples containing the exact * common sub string lengths of this * combination. * @param aFirstExtendedToWordBoundaries All valid substring lengths for the * first context term, extended to its * next word boundary in reading * direction. * @param aSecondExtendedToWordBoundaries All valid substring lengths for the * second context term, extended to its * next word boundary in reading * direction. * @return A tuple of sub string lengths extended to word boundaries, which is * the shortest allowed combination to eliminate all matches. * Returns `Nothing` if it's not possible to eliminate all matches. */ static Maybe> CheckAllCombinations( const nsTArray>& aExactWordLengths, const nsTArray& aFirstExtendedToWordBoundaries, const nsTArray& aSecondExtendedToWordBoundaries); nsString mPrefixContent; nsString mPrefixFoldCaseContent; nsTArray mPrefixWordBeginDistances; nsString mStartContent; nsString mSuffixContent; nsString mSuffixFoldCaseContent; nsTArray mSuffixWordEndDistances; NotNull> mDocument; NotNull> mRange; /** * The watchdog ensures that the algorithm exits after a defined time * duration, to ensure that the main thread is not blocked for too long. * * The duration is defined by the pref * `dom.text_fragments.create_text_fragment.timeout`. */ RefPtr mWatchdog; nsContentUtils::NodeIndexCache mNodeIndexCache; }; /** * @brief Creator class which creates a range-based text directive. * */ class RangeBasedTextDirectiveCreator : public TextDirectiveCreator { private: using TextDirectiveCreator::TextDirectiveCreator; Result CollectContextTerms() override; bool CollectContextTermWordBoundaryDistances() override; Result FindAllMatchingCandidates() override; void FindStartMatchCommonSubstringLengths( const nsTArray>& aMatchRanges); void FindEndMatchCommonSubstringLengths( const nsTArray>& aMatchRanges); Maybe FindShortestCombination() const override; nsString mEndContent; // The fold case contents for start and end terms don't include the first/last // word of the start and end terms, because they are only used for finding the // common lengths for other matches. nsString mStartFoldCaseContent; nsString mEndFoldCaseContent; // These values are only passed into nsFind, therefore fold case is not // required. nsString mFirstWordOfStartContent; nsString mLastWordOfEndContent; // The lengths of the first/last word of the start and end terms, including // whitespace to the next word. // Therefore, these values are equal to // `m[Start|End]Content.Length() - m[Start|End]FoldCaseContent.Length()`. uint32_t mStartFirstWordLengthIncludingWhitespace = 0; uint32_t mEndLastWordLengthIncludingWhitespace = 0; // The distances are bound to the Fold Case Content strings, which do not // include the first/last word of the start and end terms. nsTArray mStartWordEndDistances; nsTArray mEndWordBeginDistances; nsTArray> mStartMatchCommonSubstringLengths; nsTArray> mEndMatchCommonSubstringLengths; }; /** * @brief Creator class which creates an exact match text directive. * */ class ExactMatchTextDirectiveCreator : public TextDirectiveCreator { private: using TextDirectiveCreator::TextDirectiveCreator; Result CollectContextTerms() override; bool CollectContextTermWordBoundaryDistances() override; Result FindAllMatchingCandidates() override; void FindCommonSubstringLengths( const nsTArray>& aMatchRanges); Maybe FindShortestCombination() const override; nsTArray> mCommonSubstringLengths; }; } // namespace mozilla::dom #endif