Commit 86d6d937 authored by Jonathan Kew's avatar Jonathan Kew
Browse files

Bug 1554998 - Fix the handling of numeric characters by word-break:break-all. r=masayuki

Differential Revision: https://phabricator.services.mozilla.com/D32969

--HG--
extra : moz-landing-system : lando
parent a283135e
Loading
Loading
Loading
Loading
+5 −5
Original line number Original line Diff line number Diff line
@@ -405,7 +405,7 @@ static int8_t GetClass(uint32_t u, LineBreaker::Strictness aLevel,
      /* INFIX_NUMERIC = 16,                [IS] */ CLASS_CHARACTER,
      /* INFIX_NUMERIC = 16,                [IS] */ CLASS_CHARACTER,
      /* LINE_FEED = 17,                    [LF] */ CLASS_BREAKABLE,
      /* LINE_FEED = 17,                    [LF] */ CLASS_BREAKABLE,
      /* NONSTARTER = 18,                   [NS] */ CLASS_CLOSE_LIKE_CHARACTER,
      /* NONSTARTER = 18,                   [NS] */ CLASS_CLOSE_LIKE_CHARACTER,
      /* NUMERIC = 19,                      [NU] */ CLASS_CHARACTER,
      /* NUMERIC = 19,                      [NU] */ CLASS_NUMERIC,
      /* OPEN_PUNCTUATION = 20,             [OP] */ CLASS_CHARACTER,
      /* OPEN_PUNCTUATION = 20,             [OP] */ CLASS_CHARACTER,
      /* POSTFIX_NUMERIC = 21,              [PO] */ CLASS_CHARACTER,
      /* POSTFIX_NUMERIC = 21,              [PO] */ CLASS_CHARACTER,
      /* PREFIX_NUMERIC = 22,               [PR] */ CLASS_CHARACTER,
      /* PREFIX_NUMERIC = 22,               [PR] */ CLASS_CHARACTER,
@@ -990,12 +990,12 @@ void LineBreaker::GetJISx4051Breaks(const char16_t* aChars, uint32_t aLength,


    // To implement word-break:break-all, we overwrite the line-break class of
    // To implement word-break:break-all, we overwrite the line-break class of
    // alphanumeric characters so they are treated the same as ideographic.
    // alphanumeric characters so they are treated the same as ideographic.
    // The relevant characters will have been assigned CLASS_CHARACTER or
    // The relevant characters will have been assigned CLASS_CHARACTER, _CLOSE,
    // CLASS_CLOSE by GetClass(), but those classes also include others that
    // or _NUMERIC by GetClass(), but those classes also include others that
    // we don't want to touch here, so we re-check the Unicode line-break class
    // we don't want to touch here, so we re-check the Unicode line-break class
    // to determine which ones to modify.
    // to determine which ones to modify.
    if (aWordBreak == WordBreak::BreakAll &&
    if (aWordBreak == WordBreak::BreakAll &&
        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
      auto cls = GetLineBreakClass(ch);
      auto cls = GetLineBreakClass(ch);
      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
          cls == U_LB_AMBIGUOUS || cls == U_LB_COMPLEX_CONTEXT ||
          cls == U_LB_AMBIGUOUS || cls == U_LB_COMPLEX_CONTEXT ||
@@ -1086,7 +1086,7 @@ void LineBreaker::GetJISx4051Breaks(const uint8_t* aChars, uint32_t aLength,
      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
    }
    }
    if (aWordBreak == WordBreak::BreakAll &&
    if (aWordBreak == WordBreak::BreakAll &&
        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
      auto cls = GetLineBreakClass(ch);
      auto cls = GetLineBreakClass(ch);
      // Don't need to check additional Japanese/Korean classes in 8-bit
      // Don't need to check additional Japanese/Korean classes in 8-bit
      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
      if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||