Bug 855184 part 1: Add BOUNDARY_CLUSTER so a11y can query grapheme clusters, AKA user-perceived characters. r=eeejay

Most OS APIs want a cluster when they ask for a "character", except ATK.
Rather than altering BOUNDARY_CHAR, I added a new BOUNDARY_CLUSTER.
Aside from being less risky and causing less churn, there are cases internally where we want to move a TextLeafPoint by character; e.g. to explicitly move to the next/previous Accessible or to move to the next/previous character in an abstract way without worrying about Accessible boundaries.
Calculating clusters is more expensive, so it doesn't make sense to move by cluster in those cases.

Differential Revision: https://phabricator.services.mozilla.com/D212517
This commit is contained in:
James Teh 2024-06-04 20:36:42 +00:00
parent caa1123fd9
commit 3b3a1815ea
5 changed files with 97 additions and 0 deletions

View file

@ -1126,6 +1126,9 @@ TextLeafPoint TextLeafPoint::FindBoundary(AccessibleTextBoundary aBoundaryType,
boundary = searchFrom.FindParagraphSameAcc(aDirection, includeOrigin,
ignoreListItemMarker);
break;
case nsIAccessibleText::BOUNDARY_CLUSTER:
boundary = searchFrom.FindClusterSameAcc(aDirection, includeOrigin);
break;
default:
MOZ_ASSERT_UNREACHABLE();
break;
@ -1369,6 +1372,62 @@ TextLeafPoint TextLeafPoint::FindParagraphSameAcc(
return TextLeafPoint();
}
TextLeafPoint TextLeafPoint::FindClusterSameAcc(nsDirection aDirection,
bool aIncludeOrigin) const {
// We don't support clusters which cross nodes. We can live with that because
// editor doesn't seem to fully support this either.
if (aIncludeOrigin && mOffset == 0) {
// Since we don't cross nodes, offset 0 always begins a cluster.
return *this;
}
if (aDirection == eDirPrevious) {
if (mOffset == 0) {
// We can't go back any further.
return TextLeafPoint();
}
if (!aIncludeOrigin && mOffset == 1) {
// Since we don't cross nodes, offset 0 always begins a cluster. We can't
// take this fast path if aIncludeOrigin is true because offset 1 might
// start a cluster, but we don't know that yet.
return TextLeafPoint(mAcc, 0);
}
}
nsAutoString text;
mAcc->AppendTextTo(text);
if (text.IsEmpty()) {
return TextLeafPoint();
}
if (aDirection == eDirNext &&
mOffset == static_cast<int32_t>(text.Length())) {
return TextLeafPoint();
}
// There is GraphemeClusterBreakReverseIteratorUtf16, but it "doesn't
// handle conjoining Jamo and emoji". Therefore, we must use
// GraphemeClusterBreakIteratorUtf16 even when moving backward.
// GraphemeClusterBreakIteratorUtf16::Seek() always starts from the beginning
// and repeatedly calls Next(), regardless of the seek offset. The best we
// can do is call Next() until we find the offset we need.
intl::GraphemeClusterBreakIteratorUtf16 iter(text);
// Since we don't cross nodes, offset 0 always begins a cluster.
int32_t prevCluster = 0;
while (Maybe<uint32_t> next = iter.Next()) {
int32_t cluster = static_cast<int32_t>(*next);
if (aIncludeOrigin && cluster == mOffset) {
return *this;
}
if (aDirection == eDirPrevious) {
if (cluster >= mOffset) {
return TextLeafPoint(mAcc, prevCluster);
}
prevCluster = cluster;
} else if (cluster > mOffset) {
MOZ_ASSERT(aDirection == eDirNext);
return TextLeafPoint(mAcc, cluster);
}
}
return TextLeafPoint();
}
bool TextLeafPoint::IsInSpellingError() const {
if (LocalAccessible* acc = mAcc->AsLocal()) {
auto domRanges = FindDOMSpellingErrors(acc, mOffset, mOffset + 1);

View file

@ -228,6 +228,9 @@ class TextLeafPoint final {
bool aIncludeOrigin,
bool aIgnoreListItemMarker = false) const;
TextLeafPoint FindClusterSameAcc(nsDirection aDirection,
bool aIncludeOrigin) const;
bool IsInSpellingError() const;
/**

View file

@ -22,6 +22,8 @@ interface nsIAccessibleText : nsISupports
const int32_t TEXT_OFFSET_END_OF_TEXT = -1;
const int32_t TEXT_OFFSET_CARET = -2;
// A single Unicode character. For a user-perceived character, see
// BOUNDARY_CLUSTER.
const AccessibleTextBoundary BOUNDARY_CHAR = 0;
const AccessibleTextBoundary BOUNDARY_WORD_START = 1;
const AccessibleTextBoundary BOUNDARY_WORD_END = 2;
@ -30,6 +32,10 @@ interface nsIAccessibleText : nsISupports
const AccessibleTextBoundary BOUNDARY_LINE_START = 5;
const AccessibleTextBoundary BOUNDARY_LINE_END = 6;
const AccessibleTextBoundary BOUNDARY_PARAGRAPH = 7;
// A grapheme cluster, AKA user-perceived character. This might consist of
// multiple Unicode characters, but a user will perceive this as a single
// character and it is treated as such by the caret, selection, etc.
const AccessibleTextBoundary BOUNDARY_CLUSTER = 8;
/**
* The current current caret offset.

View file

@ -333,3 +333,31 @@ addAccessibleTask(
remoteIframe: true,
}
);
/**
* Test cluster offsets.
*/
addAccessibleTask(
`<p id="clusters">À2🤦🤦🏼5x͇͕̦̍͂͒7È</p>`,
async function testCluster(browser, docAcc) {
const clusters = findAccessibleChildByID(docAcc, "clusters");
testCharacterCount(clusters, 26);
testTextAtOffset(clusters, BOUNDARY_CLUSTER, [
[0, 1, "À", 0, 2],
[2, 2, "2", 2, 3],
[3, 7, "🤦‍♂️", 3, 8],
[8, 14, "🤦🏼‍♂️", 8, 15],
[15, 15, "5", 15, 16],
[16, 22, "x͇͕̦̍͂͒", 16, 23],
[23, 23, "7", 23, 24],
[24, 25, "È", 24, 26],
[26, 26, "", 26, 26],
]);
// Ensure that BOUNDARY_CHAR returns single Unicode characters.
testTextAtOffset(clusters, BOUNDARY_CHAR, [
[0, 0, "A", 0, 1],
[1, 1, "̀", 1, 2],
]);
},
{ chrome: true, topLevel: true }
);

View file

@ -9,6 +9,7 @@ const BOUNDARY_WORD_END = nsIAccessibleText.BOUNDARY_WORD_END;
const BOUNDARY_LINE_START = nsIAccessibleText.BOUNDARY_LINE_START;
const BOUNDARY_LINE_END = nsIAccessibleText.BOUNDARY_LINE_END;
const BOUNDARY_PARAGRAPH = nsIAccessibleText.BOUNDARY_PARAGRAPH;
const BOUNDARY_CLUSTER = nsIAccessibleText.BOUNDARY_CLUSTER;
const kTextEndOffset = nsIAccessibleText.TEXT_OFFSET_END_OF_TEXT;
const kCaretOffset = nsIAccessibleText.TEXT_OFFSET_CARET;