base64.nim 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module implements a base64 encoder and decoder.
  10. ##
  11. ## Unstable API.
  12. ##
  13. ## Base64 is an encoding and decoding technique used to convert binary
  14. ## data to an ASCII string format.
  15. ## Each Base64 digit represents exactly 6 bits of data. Three 8-bit
  16. ## bytes (i.e., a total of 24 bits) can therefore be represented by
  17. ## four 6-bit Base64 digits.
  18. ##[
  19. # Basic usage
  20. ## Encoding data
  21. ]##
  22. runnableExamples:
  23. let encoded = encode("Hello World")
  24. assert encoded == "SGVsbG8gV29ybGQ="
  25. ##
  26. ## Apart from strings you can also encode lists of integers or characters:
  27. ##
  28. runnableExamples:
  29. let encodedInts = encode([1'u8,2,3])
  30. assert encodedInts == "AQID"
  31. let encodedChars = encode(['h','e','y'])
  32. assert encodedChars == "aGV5"
  33. ##[
  34. ## Decoding data
  35. ]##
  36. runnableExamples:
  37. let decoded = decode("SGVsbG8gV29ybGQ=")
  38. assert decoded == "Hello World"
  39. ##[
  40. ## URL Safe Base64
  41. ]##
  42. runnableExamples:
  43. assert encode("c\xf7>", safe = true) == "Y_c-"
  44. assert encode("c\xf7>", safe = false) == "Y/c+"
  45. ## See also
  46. ## ========
  47. ##
  48. ## * `hashes module<hashes.html>`_ for efficient computations of hash values for diverse Nim types
  49. ## * `md5 module<md5.html>`_ for the MD5 checksum algorithm
  50. ## * `sha1 module<sha1.html>`_ for the SHA-1 checksum algorithm
  51. template cbBase(a, b): untyped = [
  52. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  53. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  54. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  55. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  56. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', a, b]
  57. let
  58. cb64 = cbBase('+', '/')
  59. cb64safe = cbBase('-', '_')
  60. const
  61. cb64VM = cbBase('+', '/')
  62. cb64safeVM = cbBase('-', '_')
  63. const
  64. invalidChar = 255
  65. template encodeSize(size: int): int = (size * 4 div 3) + 6
  66. template encodeInternal(s, alphabet: typed): untyped =
  67. ## encodes `s` into base64 representation.
  68. result.setLen(encodeSize(s.len))
  69. let
  70. padding = s.len mod 3
  71. inputEnds = s.len - padding
  72. var
  73. inputIndex = 0
  74. outputIndex = 0
  75. n: uint32
  76. b: uint32
  77. template inputByte(exp: untyped) =
  78. b = uint32(s[inputIndex])
  79. n = exp
  80. inc inputIndex
  81. template outputChar(x: typed) =
  82. result[outputIndex] = alphabet[x and 63]
  83. inc outputIndex
  84. template outputChar(c: char) =
  85. result[outputIndex] = c
  86. inc outputIndex
  87. while inputIndex != inputEnds:
  88. inputByte(b shl 16)
  89. inputByte(n or b shl 8)
  90. inputByte(n or b shl 0)
  91. outputChar(n shr 18)
  92. outputChar(n shr 12)
  93. outputChar(n shr 6)
  94. outputChar(n shr 0)
  95. if padding == 1:
  96. inputByte(b shl 16)
  97. outputChar(n shr 18)
  98. outputChar(n shr 12)
  99. outputChar('=')
  100. outputChar('=')
  101. elif padding == 2:
  102. inputByte(b shl 16)
  103. inputByte(n or b shl 8)
  104. outputChar(n shr 18)
  105. outputChar(n shr 12)
  106. outputChar(n shr 6)
  107. outputChar('=')
  108. result.setLen(outputIndex)
  109. template encodeImpl() {.dirty.} =
  110. when nimvm:
  111. block:
  112. let lookupTableVM = if safe: cb64safeVM else: cb64VM
  113. encodeInternal(s, lookupTableVM)
  114. else:
  115. block:
  116. let lookupTable = if safe: unsafeAddr(cb64safe) else: unsafeAddr(cb64)
  117. encodeInternal(s, lookupTable)
  118. proc encode*[T: byte|char](s: openArray[T], safe = false): string =
  119. ## Encodes `s` into base64 representation.
  120. ##
  121. ## If `safe` is `true` then it will encode using the
  122. ## URL-Safe and Filesystem-safe standard alphabet characters,
  123. ## which substitutes `-` instead of `+` and `_` instead of `/`.
  124. ## * https://en.wikipedia.org/wiki/Base64#URL_applications
  125. ## * https://tools.ietf.org/html/rfc4648#page-7
  126. ##
  127. ## **See also:**
  128. ## * `decode proc<#decode,string>`_ for decoding a string
  129. runnableExamples:
  130. assert encode("Hello World") == "SGVsbG8gV29ybGQ="
  131. assert encode(['n', 'i', 'm']) == "bmlt"
  132. assert encode(@['n', 'i', 'm']) == "bmlt"
  133. assert encode([1'u8, 2, 3, 4, 5]) == "AQIDBAU="
  134. encodeImpl()
  135. proc encode*[T: SomeInteger and not byte](s: openArray[T], safe = false): string
  136. {.deprecated: "use `byte` or `char` instead".} =
  137. encodeImpl()
  138. proc encodeMime*(s: string, lineLen = 75.Positive, newLine = "\r\n",
  139. safe = false): string =
  140. ## Encodes `s` into base64 representation as lines.
  141. ## Used in email MIME format, use `lineLen` and `newline`.
  142. ##
  143. ## This procedure encodes a string according to MIME spec.
  144. ##
  145. ## If `safe` is `true` then it will encode using the
  146. ## URL-Safe and Filesystem-safe standard alphabet characters,
  147. ## which substitutes `-` instead of `+` and `_` instead of `/`.
  148. ## * https://en.wikipedia.org/wiki/Base64#URL_applications
  149. ## * https://tools.ietf.org/html/rfc4648#page-7
  150. ##
  151. ## **See also:**
  152. ## * `encode proc<#encode,openArray[T]>`_ for encoding an openArray
  153. ## * `decode proc<#decode,string>`_ for decoding a string
  154. runnableExamples:
  155. assert encodeMime("Hello World", 4, "\n") == "SGVs\nbG8g\nV29y\nbGQ="
  156. template cpy(l, src, idx) =
  157. b = l
  158. while i < b:
  159. result[i] = src[idx]
  160. inc i
  161. inc idx
  162. if s.len == 0: return
  163. let e = encode(s, safe)
  164. if e.len <= lineLen or newLine.len == 0:
  165. return e
  166. result = newString(e.len + newLine.len * ((e.len div lineLen) - int(e.len mod lineLen == 0)))
  167. var i, j, k, b: int
  168. let nd = e.len - lineLen
  169. while j < nd:
  170. cpy(i + lineLen, e, j)
  171. cpy(i + newLine.len, newLine, k)
  172. k = 0
  173. cpy(result.len, e, j)
  174. proc initDecodeTable*(): array[256, char] =
  175. # computes a decode table at compile time
  176. for i in 0 ..< 256:
  177. let ch = char(i)
  178. var code = invalidChar
  179. if ch >= 'A' and ch <= 'Z': code = i - 0x00000041
  180. if ch >= 'a' and ch <= 'z': code = i - 0x00000047
  181. if ch >= '0' and ch <= '9': code = i + 0x00000004
  182. if ch == '+' or ch == '-': code = 0x0000003E
  183. if ch == '/' or ch == '_': code = 0x0000003F
  184. result[i] = char(code)
  185. const
  186. decodeTable = initDecodeTable()
  187. proc decode*(s: string): string =
  188. ## Decodes string `s` in base64 representation back into its original form.
  189. ## The initial whitespace is skipped.
  190. ##
  191. ## **See also:**
  192. ## * `encode proc<#encode,openArray[T]>`_ for encoding an openarray
  193. runnableExamples:
  194. assert decode("SGVsbG8gV29ybGQ=") == "Hello World"
  195. assert decode(" SGVsbG8gV29ybGQ=") == "Hello World"
  196. if s.len == 0: return
  197. proc decodeSize(size: int): int =
  198. return (size * 3 div 4) + 6
  199. template inputChar(x: untyped) =
  200. let x = int decodeTable[ord(s[inputIndex])]
  201. if x == invalidChar:
  202. raise newException(ValueError,
  203. "Invalid base64 format character `" & s[inputIndex] &
  204. "` (ord " & $s[inputIndex].ord & ") at location " & $inputIndex & ".")
  205. inc inputIndex
  206. template outputChar(x: untyped) =
  207. result[outputIndex] = char(x and 255)
  208. inc outputIndex
  209. # pre allocate output string once
  210. result.setLen(decodeSize(s.len))
  211. var
  212. inputIndex = 0
  213. outputIndex = 0
  214. inputLen = s.len
  215. inputEnds = 0
  216. # strip trailing characters
  217. while s[inputLen - 1] in {'\n', '\r', ' ', '='}:
  218. dec inputLen
  219. # hot loop: read 4 characters at at time
  220. inputEnds = inputLen - 4
  221. while inputIndex <= inputEnds:
  222. while s[inputIndex] in {'\n', '\r', ' '}:
  223. inc inputIndex
  224. inputChar(a)
  225. inputChar(b)
  226. inputChar(c)
  227. inputChar(d)
  228. outputChar(a shl 2 or b shr 4)
  229. outputChar(b shl 4 or c shr 2)
  230. outputChar(c shl 6 or d shr 0)
  231. # do the last 2 or 3 characters
  232. var leftLen = abs((inputIndex - inputLen) mod 4)
  233. if leftLen == 2:
  234. inputChar(a)
  235. inputChar(b)
  236. outputChar(a shl 2 or b shr 4)
  237. elif leftLen == 3:
  238. inputChar(a)
  239. inputChar(b)
  240. inputChar(c)
  241. outputChar(a shl 2 or b shr 4)
  242. outputChar(b shl 4 or c shr 2)
  243. result.setLen(outputIndex)