char-code-definitions.cjs 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. 'use strict';
  2. const EOF = 0;
  3. // https://drafts.csswg.org/css-syntax-3/
  4. // § 4.2. Definitions
  5. // digit
  6. // A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9).
  7. function isDigit(code) {
  8. return code >= 0x0030 && code <= 0x0039;
  9. }
  10. // hex digit
  11. // A digit, or a code point between U+0041 LATIN CAPITAL LETTER A (A) and U+0046 LATIN CAPITAL LETTER F (F),
  12. // or a code point between U+0061 LATIN SMALL LETTER A (a) and U+0066 LATIN SMALL LETTER F (f).
  13. function isHexDigit(code) {
  14. return (
  15. isDigit(code) || // 0 .. 9
  16. (code >= 0x0041 && code <= 0x0046) || // A .. F
  17. (code >= 0x0061 && code <= 0x0066) // a .. f
  18. );
  19. }
  20. // uppercase letter
  21. // A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z).
  22. function isUppercaseLetter(code) {
  23. return code >= 0x0041 && code <= 0x005A;
  24. }
  25. // lowercase letter
  26. // A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z).
  27. function isLowercaseLetter(code) {
  28. return code >= 0x0061 && code <= 0x007A;
  29. }
  30. // letter
  31. // An uppercase letter or a lowercase letter.
  32. function isLetter(code) {
  33. return isUppercaseLetter(code) || isLowercaseLetter(code);
  34. }
  35. // non-ASCII code point
  36. // A code point with a value equal to or greater than U+0080 <control>.
  37. //
  38. // 2024-09-02: The latest spec narrows the range for non-ASCII characters (see https://github.com/csstree/csstree/issues/188).
  39. // However, all modern browsers support a wider range, and strictly following the latest spec could result
  40. // in some CSS being parsed incorrectly, even though it works in the browser. Therefore, this function adheres
  41. // to the previous, broader definition of non-ASCII characters.
  42. function isNonAscii(code) {
  43. return code >= 0x0080;
  44. }
  45. // name-start code point
  46. // A letter, a non-ASCII code point, or U+005F LOW LINE (_).
  47. function isNameStart(code) {
  48. return isLetter(code) || isNonAscii(code) || code === 0x005F;
  49. }
  50. // name code point
  51. // A name-start code point, a digit, or U+002D HYPHEN-MINUS (-).
  52. function isName(code) {
  53. return isNameStart(code) || isDigit(code) || code === 0x002D;
  54. }
  55. // non-printable code point
  56. // A code point between U+0000 NULL and U+0008 BACKSPACE, or U+000B LINE TABULATION,
  57. // or a code point between U+000E SHIFT OUT and U+001F INFORMATION SEPARATOR ONE, or U+007F DELETE.
  58. function isNonPrintable(code) {
  59. return (
  60. (code >= 0x0000 && code <= 0x0008) ||
  61. (code === 0x000B) ||
  62. (code >= 0x000E && code <= 0x001F) ||
  63. (code === 0x007F)
  64. );
  65. }
  66. // newline
  67. // U+000A LINE FEED. Note that U+000D CARRIAGE RETURN and U+000C FORM FEED are not included in this definition,
  68. // as they are converted to U+000A LINE FEED during preprocessing.
  69. // TODO: we doesn't do a preprocessing, so check a code point for U+000D CARRIAGE RETURN and U+000C FORM FEED
  70. function isNewline(code) {
  71. return code === 0x000A || code === 0x000D || code === 0x000C;
  72. }
  73. // whitespace
  74. // A newline, U+0009 CHARACTER TABULATION, or U+0020 SPACE.
  75. function isWhiteSpace(code) {
  76. return isNewline(code) || code === 0x0020 || code === 0x0009;
  77. }
  78. // § 4.3.8. Check if two code points are a valid escape
  79. function isValidEscape(first, second) {
  80. // If the first code point is not U+005C REVERSE SOLIDUS (\), return false.
  81. if (first !== 0x005C) {
  82. return false;
  83. }
  84. // Otherwise, if the second code point is a newline or EOF, return false.
  85. if (isNewline(second) || second === EOF) {
  86. return false;
  87. }
  88. // Otherwise, return true.
  89. return true;
  90. }
  91. // § 4.3.9. Check if three code points would start an identifier
  92. function isIdentifierStart(first, second, third) {
  93. // Look at the first code point:
  94. // U+002D HYPHEN-MINUS
  95. if (first === 0x002D) {
  96. // If the second code point is a name-start code point or a U+002D HYPHEN-MINUS,
  97. // or the second and third code points are a valid escape, return true. Otherwise, return false.
  98. return (
  99. isNameStart(second) ||
  100. second === 0x002D ||
  101. isValidEscape(second, third)
  102. );
  103. }
  104. // name-start code point
  105. if (isNameStart(first)) {
  106. // Return true.
  107. return true;
  108. }
  109. // U+005C REVERSE SOLIDUS (\)
  110. if (first === 0x005C) {
  111. // If the first and second code points are a valid escape, return true. Otherwise, return false.
  112. return isValidEscape(first, second);
  113. }
  114. // anything else
  115. // Return false.
  116. return false;
  117. }
  118. // § 4.3.10. Check if three code points would start a number
  119. function isNumberStart(first, second, third) {
  120. // Look at the first code point:
  121. // U+002B PLUS SIGN (+)
  122. // U+002D HYPHEN-MINUS (-)
  123. if (first === 0x002B || first === 0x002D) {
  124. // If the second code point is a digit, return true.
  125. if (isDigit(second)) {
  126. return 2;
  127. }
  128. // Otherwise, if the second code point is a U+002E FULL STOP (.)
  129. // and the third code point is a digit, return true.
  130. // Otherwise, return false.
  131. return second === 0x002E && isDigit(third) ? 3 : 0;
  132. }
  133. // U+002E FULL STOP (.)
  134. if (first === 0x002E) {
  135. // If the second code point is a digit, return true. Otherwise, return false.
  136. return isDigit(second) ? 2 : 0;
  137. }
  138. // digit
  139. if (isDigit(first)) {
  140. // Return true.
  141. return 1;
  142. }
  143. // anything else
  144. // Return false.
  145. return 0;
  146. }
  147. //
  148. // Misc
  149. //
  150. // detect BOM (https://en.wikipedia.org/wiki/Byte_order_mark)
  151. function isBOM(code) {
  152. // UTF-16BE
  153. if (code === 0xFEFF) {
  154. return 1;
  155. }
  156. // UTF-16LE
  157. if (code === 0xFFFE) {
  158. return 1;
  159. }
  160. return 0;
  161. }
  162. // Fast code category
  163. // Only ASCII code points has a special meaning, that's why we define a maps for 0..127 codes only
  164. const CATEGORY = new Array(0x80);
  165. const EofCategory = 0x80;
  166. const WhiteSpaceCategory = 0x82;
  167. const DigitCategory = 0x83;
  168. const NameStartCategory = 0x84;
  169. const NonPrintableCategory = 0x85;
  170. for (let i = 0; i < CATEGORY.length; i++) {
  171. CATEGORY[i] =
  172. isWhiteSpace(i) && WhiteSpaceCategory ||
  173. isDigit(i) && DigitCategory ||
  174. isNameStart(i) && NameStartCategory ||
  175. isNonPrintable(i) && NonPrintableCategory ||
  176. i || EofCategory;
  177. }
  178. function charCodeCategory(code) {
  179. return code < 0x80 ? CATEGORY[code] : NameStartCategory;
  180. }
  181. exports.DigitCategory = DigitCategory;
  182. exports.EofCategory = EofCategory;
  183. exports.NameStartCategory = NameStartCategory;
  184. exports.NonPrintableCategory = NonPrintableCategory;
  185. exports.WhiteSpaceCategory = WhiteSpaceCategory;
  186. exports.charCodeCategory = charCodeCategory;
  187. exports.isBOM = isBOM;
  188. exports.isDigit = isDigit;
  189. exports.isHexDigit = isHexDigit;
  190. exports.isIdentifierStart = isIdentifierStart;
  191. exports.isLetter = isLetter;
  192. exports.isLowercaseLetter = isLowercaseLetter;
  193. exports.isName = isName;
  194. exports.isNameStart = isNameStart;
  195. exports.isNewline = isNewline;
  196. exports.isNonAscii = isNonAscii;
  197. exports.isNonPrintable = isNonPrintable;
  198. exports.isNumberStart = isNumberStart;
  199. exports.isUppercaseLetter = isUppercaseLetter;
  200. exports.isValidEscape = isValidEscape;
  201. exports.isWhiteSpace = isWhiteSpace;