base32.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import { assertU8 } from './_utils.js'
  2. import { nativeEncoder, nativeDecoder, isHermes } from './platform.js'
  3. import { encodeAscii, decodeAscii } from './latin1.js'
  4. // See https://datatracker.ietf.org/doc/html/rfc4648
  5. const BASE32 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'] // RFC 4648, #6
  6. const BASE32HEX = [...'0123456789ABCDEFGHIJKLMNOPQRSTUV'] // RFC 4648, #7
  7. const BASE32_HELPERS = {}
  8. const BASE32HEX_HELPERS = {}
  9. export const E_CHAR = 'Invalid character in base32 input'
  10. export const E_PADDING = 'Invalid base32 padding'
  11. export const E_LENGTH = 'Invalid base32 length'
  12. export const E_LAST = 'Invalid last chunk'
  13. const useTemplates = isHermes // Faster on Hermes and JSC, but we use it only on Hermes
  14. // We construct output by concatenating chars, this seems to be fine enough on modern JS engines
  15. export function toBase32(arr, isBase32Hex, padding) {
  16. assertU8(arr)
  17. const fullChunks = Math.floor(arr.length / 5)
  18. const fullChunksBytes = fullChunks * 5
  19. let o = ''
  20. let i = 0
  21. const alphabet = isBase32Hex ? BASE32HEX : BASE32
  22. const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
  23. if (!helpers.pairs) {
  24. helpers.pairs = []
  25. if (nativeDecoder) {
  26. // Lazy to save memory in case if this is not needed
  27. helpers.codepairs = new Uint16Array(32 * 32)
  28. const u16 = helpers.codepairs
  29. const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
  30. for (let i = 0; i < 32; i++) {
  31. const ic = alphabet[i].charCodeAt(0)
  32. for (let j = 0; j < 32; j++) u8[(i << 6) | (j << 1)] = u8[(j << 6) | ((i << 1) + 1)] = ic
  33. }
  34. } else {
  35. const p = helpers.pairs
  36. for (let i = 0; i < 32; i++) {
  37. for (let j = 0; j < 32; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
  38. }
  39. }
  40. }
  41. const { pairs, codepairs } = helpers
  42. // Fast path for complete blocks
  43. // This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
  44. if (nativeDecoder) {
  45. const oa = new Uint16Array(fullChunks * 4)
  46. for (let j = 0; i < fullChunksBytes; i += 5) {
  47. const a = arr[i]
  48. const b = arr[i + 1]
  49. const c = arr[i + 2]
  50. const d = arr[i + 3]
  51. const e = arr[i + 4]
  52. const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
  53. const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
  54. const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
  55. const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
  56. oa[j] = codepairs[x0]
  57. oa[j + 1] = codepairs[x1]
  58. oa[j + 2] = codepairs[x2]
  59. oa[j + 3] = codepairs[x3]
  60. j += 4
  61. }
  62. o = decodeAscii(oa)
  63. } else if (useTemplates) {
  64. // Templates are faster only on Hermes and JSC. Browsers have TextDecoder anyway
  65. for (; i < fullChunksBytes; i += 5) {
  66. const a = arr[i]
  67. const b = arr[i + 1]
  68. const c = arr[i + 2]
  69. const d = arr[i + 3]
  70. const e = arr[i + 4]
  71. const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
  72. const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
  73. const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
  74. const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
  75. o += `${pairs[x0]}${pairs[x1]}${pairs[x2]}${pairs[x3]}`
  76. }
  77. } else {
  78. for (; i < fullChunksBytes; i += 5) {
  79. const a = arr[i]
  80. const b = arr[i + 1]
  81. const c = arr[i + 2]
  82. const d = arr[i + 3]
  83. const e = arr[i + 4]
  84. const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
  85. const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
  86. const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
  87. const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
  88. o += pairs[x0]
  89. o += pairs[x1]
  90. o += pairs[x2]
  91. o += pairs[x3]
  92. }
  93. }
  94. // If we have something left, process it with a full algo
  95. let carry = 0
  96. let shift = 3 // First byte needs to be shifted by 3 to get 5 bits
  97. for (; i < arr.length; i++) {
  98. const x = arr[i]
  99. o += alphabet[carry | (x >> shift)] // shift >= 3, so this fits
  100. if (shift >= 5) {
  101. shift -= 5
  102. o += alphabet[(x >> shift) & 0x1f]
  103. }
  104. carry = (x << (5 - shift)) & 0x1f
  105. shift += 3 // Each byte prints 5 bits and leaves 3 bits
  106. }
  107. if (shift !== 3) o += alphabet[carry] // shift 3 means we have no carry left
  108. if (padding) o += ['', '======', '====', '===', '='][arr.length - fullChunksBytes]
  109. return o
  110. }
  111. // TODO: can this be optimized? This only affects non-Hermes barebone engines though
  112. const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
  113. export function fromBase32(str, isBase32Hex) {
  114. let inputLength = str.length
  115. while (str[inputLength - 1] === '=') inputLength--
  116. const paddingLength = str.length - inputLength
  117. const tailLength = inputLength % 8
  118. const mainLength = inputLength - tailLength // multiples of 8
  119. if (![0, 2, 4, 5, 7].includes(tailLength)) throw new SyntaxError(E_LENGTH) // fast verification
  120. if (paddingLength > 7 || (paddingLength !== 0 && str.length % 8 !== 0)) {
  121. throw new SyntaxError(E_PADDING)
  122. }
  123. const alphabet = isBase32Hex ? BASE32HEX : BASE32
  124. const helpers = isBase32Hex ? BASE32HEX_HELPERS : BASE32_HELPERS
  125. if (!helpers.fromMap) {
  126. helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
  127. alphabet.forEach((c, i) => {
  128. helpers.fromMap[c.charCodeAt(0)] = helpers.fromMap[c.toLowerCase().charCodeAt(0)] = i
  129. })
  130. }
  131. const m = helpers.fromMap
  132. const arr = new Uint8Array(Math.floor((inputLength * 5) / 8))
  133. let at = 0
  134. let i = 0
  135. if (nativeEncoder) {
  136. const codes = encodeAscii(str, E_CHAR)
  137. for (; i < mainLength; i += 8) {
  138. // each 5 bits, grouped 5 * 4 = 20
  139. const x0 = codes[i]
  140. const x1 = codes[i + 1]
  141. const x2 = codes[i + 2]
  142. const x3 = codes[i + 3]
  143. const x4 = codes[i + 4]
  144. const x5 = codes[i + 5]
  145. const x6 = codes[i + 6]
  146. const x7 = codes[i + 7]
  147. const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
  148. const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
  149. if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
  150. arr[at] = a >> 12
  151. arr[at + 1] = (a >> 4) & 0xff
  152. arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
  153. arr[at + 3] = (b >> 8) & 0xff
  154. arr[at + 4] = b & 0xff
  155. at += 5
  156. }
  157. } else {
  158. for (; i < mainLength; i += 8) {
  159. // each 5 bits, grouped 5 * 4 = 20
  160. const x0 = str.charCodeAt(i)
  161. const x1 = str.charCodeAt(i + 1)
  162. const x2 = str.charCodeAt(i + 2)
  163. const x3 = str.charCodeAt(i + 3)
  164. const x4 = str.charCodeAt(i + 4)
  165. const x5 = str.charCodeAt(i + 5)
  166. const x6 = str.charCodeAt(i + 6)
  167. const x7 = str.charCodeAt(i + 7)
  168. const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
  169. const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
  170. if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
  171. arr[at] = a >> 12
  172. arr[at + 1] = (a >> 4) & 0xff
  173. arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
  174. arr[at + 3] = (b >> 8) & 0xff
  175. arr[at + 4] = b & 0xff
  176. at += 5
  177. }
  178. }
  179. // Last block, valid tailLength: 0 2 4 5 7, checked already
  180. // We check last chunk to be strict
  181. if (tailLength < 2) return arr
  182. const ab = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
  183. if (ab < 0) throw new SyntaxError(E_CHAR)
  184. arr[at++] = ab >> 2
  185. if (tailLength < 4) {
  186. if (ab & 0x3) throw new SyntaxError(E_LAST)
  187. return arr
  188. }
  189. const cd = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
  190. if (cd < 0) throw new SyntaxError(E_CHAR)
  191. arr[at++] = ((ab << 6) & 0xff) | (cd >> 4)
  192. if (tailLength < 5) {
  193. if (cd & 0xf) throw new SyntaxError(E_LAST)
  194. return arr
  195. }
  196. const e = m[str.charCodeAt(i++)]
  197. if (e < 0) throw new SyntaxError(E_CHAR)
  198. arr[at++] = ((cd << 4) & 0xff) | (e >> 1) // 4 + 4
  199. if (tailLength < 7) {
  200. if (e & 0x1) throw new SyntaxError(E_LAST)
  201. return arr
  202. }
  203. const fg = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
  204. if (fg < 0) throw new SyntaxError(E_CHAR)
  205. arr[at++] = ((e << 7) & 0xff) | (fg >> 3) // 1 + 5 + 2
  206. // Can't be 8, so no h
  207. if (fg & 0x7) throw new SyntaxError(E_LAST)
  208. return arr
  209. }