pcre.nim 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # The current PCRE version information.
  10. const
  11. PCRE_MAJOR* = 8
  12. PCRE_MINOR* = 36
  13. PCRE_PRERELEASE* = true
  14. PCRE_DATE* = "2014-09-26"
  15. # When an application links to a PCRE DLL in Windows, the symbols that are
  16. # imported have to be identified as such. When building PCRE, the appropriate
  17. # export setting is defined in pcre_internal.h, which includes this file. So we
  18. # don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL.
  19. # By default, we use the standard "extern" declarations.
  20. # Allow for C++ users
  21. # Public options. Some are compile-time only, some are run-time only, and some
  22. # are both. Most of the compile-time options are saved with the compiled regex
  23. # so that they can be inspected during studying (and therefore JIT compiling).
  24. # Note that pcre_study() has its own set of options. Originally, all the options
  25. # defined here used distinct bits. However, almost all the bits in a 32-bit word
  26. # are now used, so in order to conserve them, option bits that were previously
  27. # only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
  28. # also be used for compile-time options that affect only compiling and are not
  29. # relevant for studying or JIT compiling.
  30. #
  31. # Some options for pcre_compile() change its behaviour but do not affect the
  32. # behaviour of the execution functions. Other options are passed through to the
  33. # execution functions and affect their behaviour, with or without affecting the
  34. # behaviour of pcre_compile().
  35. #
  36. # Options that can be passed to pcre_compile() are tagged Cx below, with these
  37. # variants:
  38. #
  39. # C1 Affects compile only
  40. # C2 Does not affect compile; affects exec, dfa_exec
  41. # C3 Affects compile, exec, dfa_exec
  42. # C4 Affects compile, exec, dfa_exec, study
  43. # C5 Affects compile, exec, study
  44. #
  45. # Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged
  46. # with E and D, respectively. They take precedence over C3, C4, and C5 settings
  47. # passed from pcre_compile(). Those that are compatible with JIT execution are
  48. # flagged with J.
  49. const
  50. CASELESS* = 0x00000001 # C1
  51. MULTILINE* = 0x00000002 # C1
  52. DOTALL* = 0x00000004 # C1
  53. EXTENDED* = 0x00000008 # C1
  54. ANCHORED* = 0x00000010 # C4 E D
  55. DOLLAR_ENDONLY* = 0x00000020 # C2
  56. EXTRA* = 0x00000040 # C1
  57. NOTBOL* = 0x00000080 # E D J
  58. NOTEOL* = 0x00000100 # E D J
  59. UNGREEDY* = 0x00000200 # C1
  60. NOTEMPTY* = 0x00000400 # E D J
  61. UTF8* = 0x00000800 # C4 )
  62. UTF16* = 0x00000800 # C4 ) Synonyms
  63. UTF32* = 0x00000800 # C4 )
  64. NO_AUTO_CAPTURE* = 0x00001000 # C1
  65. NO_UTF8_CHECK* = 0x00002000 # C1 E D J )
  66. NO_UTF16_CHECK* = 0x00002000 # C1 E D J ) Synonyms
  67. NO_UTF32_CHECK* = 0x00002000 # C1 E D J )
  68. AUTO_CALLOUT* = 0x00004000 # C1
  69. PARTIAL_SOFT* = 0x00008000 # E D J ) Synonyms
  70. PARTIAL* = 0x00008000 # E D J )
  71. # This pair use the same bit.
  72. const
  73. NEVER_UTF* = 0x00010000 # C1 ) Overlaid
  74. DFA_SHORTEST* = 0x00010000 # D ) Overlaid
  75. # This pair use the same bit.
  76. const
  77. NO_AUTO_POSSESS* = 0x00020000 # C1 ) Overlaid
  78. DFA_RESTART* = 0x00020000 # D ) Overlaid
  79. const
  80. FIRSTLINE* = 0x00040000 # C3
  81. DUPNAMES* = 0x00080000 # C1
  82. NEWLINE_CR* = 0x00100000 # C3 E D
  83. NEWLINE_LF* = 0x00200000 # C3 E D
  84. NEWLINE_CRLF* = 0x00300000 # C3 E D
  85. NEWLINE_ANY* = 0x00400000 # C3 E D
  86. NEWLINE_ANYCRLF* = 0x00500000 # C3 E D
  87. BSR_ANYCRLF* = 0x00800000 # C3 E D
  88. BSR_UNICODE* = 0x01000000 # C3 E D
  89. JAVASCRIPT_COMPAT* = 0x02000000 # C5
  90. NO_START_OPTIMIZE* = 0x04000000 # C2 E D ) Synonyms
  91. NO_START_OPTIMISE* = 0x04000000 # C2 E D )
  92. PARTIAL_HARD* = 0x08000000 # E D J
  93. NOTEMPTY_ATSTART* = 0x10000000 # E D J
  94. UCP* = 0x20000000 # C3
  95. # Exec-time and get/set-time error codes
  96. const
  97. ERROR_NOMATCH* = -1
  98. ERROR_NULL* = -2
  99. ERROR_BADOPTION* = -3
  100. ERROR_BADMAGIC* = -4
  101. ERROR_UNKNOWN_OPCODE* = -5
  102. ERROR_UNKNOWN_NODE* = -5 ## For backward compatibility
  103. ERROR_NOMEMORY* = -6
  104. ERROR_NOSUBSTRING* = -7
  105. ERROR_MATCHLIMIT* = -8
  106. ERROR_CALLOUT* = -9 ## Never used by PCRE itself
  107. ERROR_BADUTF8* = -10 ## Same for 8/16/32
  108. ERROR_BADUTF16* = -10 ## Same for 8/16/32
  109. ERROR_BADUTF32* = -10 ## Same for 8/16/32
  110. ERROR_BADUTF8_OFFSET* = -11 ## Same for 8/16
  111. ERROR_BADUTF16_OFFSET* = -11 ## Same for 8/16
  112. ERROR_PARTIAL* = -12
  113. ERROR_BADPARTIAL* = -13
  114. ERROR_INTERNAL* = -14
  115. ERROR_BADCOUNT* = -15
  116. ERROR_DFA_UITEM* = -16
  117. ERROR_DFA_UCOND* = -17
  118. ERROR_DFA_UMLIMIT* = -18
  119. ERROR_DFA_WSSIZE* = -19
  120. ERROR_DFA_RECURSE* = -20
  121. ERROR_RECURSIONLIMIT* = -21
  122. ERROR_NULLWSLIMIT* = -22 ## No longer actually used
  123. ERROR_BADNEWLINE* = -23
  124. ERROR_BADOFFSET* = -24
  125. ERROR_SHORTUTF8* = -25
  126. ERROR_SHORTUTF16* = -25 ## Same for 8/16
  127. ERROR_RECURSELOOP* = -26
  128. ERROR_JIT_STACKLIMIT* = -27
  129. ERROR_BADMODE* = -28
  130. ERROR_BADENDIANNESS* = -29
  131. ERROR_DFA_BADRESTART* = -30
  132. ERROR_JIT_BADOPTION* = -31
  133. ERROR_BADLENGTH* = -32
  134. ERROR_UNSET* = -33
  135. # Specific error codes for UTF-8 validity checks
  136. const
  137. UTF8_ERR0* = 0
  138. UTF8_ERR1* = 1
  139. UTF8_ERR2* = 2
  140. UTF8_ERR3* = 3
  141. UTF8_ERR4* = 4
  142. UTF8_ERR5* = 5
  143. UTF8_ERR6* = 6
  144. UTF8_ERR7* = 7
  145. UTF8_ERR8* = 8
  146. UTF8_ERR9* = 9
  147. UTF8_ERR10* = 10
  148. UTF8_ERR11* = 11
  149. UTF8_ERR12* = 12
  150. UTF8_ERR13* = 13
  151. UTF8_ERR14* = 14
  152. UTF8_ERR15* = 15
  153. UTF8_ERR16* = 16
  154. UTF8_ERR17* = 17
  155. UTF8_ERR18* = 18
  156. UTF8_ERR19* = 19
  157. UTF8_ERR20* = 20
  158. UTF8_ERR21* = 21
  159. UTF8_ERR22* = 22 # Unused (was non-character)
  160. # Specific error codes for UTF-16 validity checks
  161. const
  162. UTF16_ERR0* = 0
  163. UTF16_ERR1* = 1
  164. UTF16_ERR2* = 2
  165. UTF16_ERR3* = 3
  166. UTF16_ERR4* = 4 # Unused (was non-character)
  167. # Specific error codes for UTF-32 validity checks
  168. const
  169. UTF32_ERR0* = 0
  170. UTF32_ERR1* = 1
  171. UTF32_ERR2* = 2 # Unused (was non-character)
  172. UTF32_ERR3* = 3
  173. # Request types for pcre_fullinfo()
  174. const
  175. INFO_OPTIONS* = 0
  176. INFO_SIZE* = 1
  177. INFO_CAPTURECOUNT* = 2
  178. INFO_BACKREFMAX* = 3
  179. INFO_FIRSTBYTE* = 4
  180. INFO_FIRSTCHAR* = 4 ## For backwards compatibility
  181. INFO_FIRSTTABLE* = 5
  182. INFO_LASTLITERAL* = 6
  183. INFO_NAMEENTRYSIZE* = 7
  184. INFO_NAMECOUNT* = 8
  185. INFO_NAMETABLE* = 9
  186. INFO_STUDYSIZE* = 10
  187. INFO_DEFAULT_TABLES* = 11
  188. INFO_OKPARTIAL* = 12
  189. INFO_JCHANGED* = 13
  190. INFO_HASCRORLF* = 14
  191. INFO_MINLENGTH* = 15
  192. INFO_JIT* = 16
  193. INFO_JITSIZE* = 17
  194. INFO_MAXLOOKBEHIND* = 18
  195. INFO_FIRSTCHARACTER* = 19
  196. INFO_FIRSTCHARACTERFLAGS* = 20
  197. INFO_REQUIREDCHAR* = 21
  198. INFO_REQUIREDCHARFLAGS* = 22
  199. INFO_MATCHLIMIT* = 23
  200. INFO_RECURSIONLIMIT* = 24
  201. INFO_MATCH_EMPTY* = 25
  202. # Request types for pcre_config(). Do not re-arrange, in order to remain
  203. # compatible.
  204. const
  205. CONFIG_UTF8* = 0
  206. CONFIG_NEWLINE* = 1
  207. CONFIG_LINK_SIZE* = 2
  208. CONFIG_POSIX_MALLOC_THRESHOLD* = 3
  209. CONFIG_MATCH_LIMIT* = 4
  210. CONFIG_STACKRECURSE* = 5
  211. CONFIG_UNICODE_PROPERTIES* = 6
  212. CONFIG_MATCH_LIMIT_RECURSION* = 7
  213. CONFIG_BSR* = 8
  214. CONFIG_JIT* = 9
  215. CONFIG_UTF16* = 10
  216. CONFIG_JITTARGET* = 11
  217. CONFIG_UTF32* = 12
  218. CONFIG_PARENS_LIMIT* = 13
  219. # Request types for pcre_study(). Do not re-arrange, in order to remain
  220. # compatible.
  221. const
  222. STUDY_JIT_COMPILE* = 0x0001
  223. STUDY_JIT_PARTIAL_SOFT_COMPILE* = 0x0002
  224. STUDY_JIT_PARTIAL_HARD_COMPILE* = 0x0004
  225. STUDY_EXTRA_NEEDED* = 0x0008
  226. # Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
  227. # these bits, just add new ones on the end, in order to remain compatible.
  228. const
  229. EXTRA_STUDY_DATA* = 0x0001
  230. EXTRA_MATCH_LIMIT* = 0x0002
  231. EXTRA_CALLOUT_DATA* = 0x0004
  232. EXTRA_TABLES* = 0x0008
  233. EXTRA_MATCH_LIMIT_RECURSION* = 0x0010
  234. EXTRA_MARK* = 0x0020
  235. EXTRA_EXECUTABLE_JIT* = 0x0040
  236. # Types
  237. type
  238. Pcre* = object
  239. Pcre16* = object
  240. Pcre32* = object
  241. JitStack* = object
  242. JitStack16* = object
  243. JitStack32* = object
  244. when defined(nimHasStyleChecks):
  245. {.push styleChecks: off.}
  246. # The structure for passing additional data to pcre_exec(). This is defined in
  247. # such as way as to be extensible. Always add new fields at the end, in order
  248. # to remain compatible.
  249. type
  250. ExtraData* = object
  251. flags*: clong ## Bits for which fields are set
  252. study_data*: pointer ## Opaque data from pcre_study()
  253. match_limit*: clong ## Maximum number of calls to match()
  254. callout_data*: pointer ## Data passed back in callouts
  255. tables*: pointer ## Pointer to character tables
  256. match_limit_recursion*: clong ## Max recursive calls to match()
  257. mark*: pointer ## For passing back a mark pointer
  258. executable_jit*: pointer ## Contains a pointer to a compiled jit code
  259. # The structure for passing out data via the pcre_callout_function. We use a
  260. # structure so that new fields can be added on the end in future versions,
  261. # without changing the API of the function, thereby allowing old clients to
  262. # work without modification.
  263. type
  264. CalloutBlock* = object
  265. version* : cint ## Identifies version of block
  266. # ------------------------ Version 0 -------------------------------
  267. callout_number* : cint ## Number compiled into pattern
  268. offset_vector* : ptr cint ## The offset vector
  269. subject* : cstring ## The subject being matched
  270. subject_length* : cint ## The length of the subject
  271. start_match* : cint ## Offset to start of this match attempt
  272. current_position*: cint ## Where we currently are in the subject
  273. capture_top* : cint ## Max current capture
  274. capture_last* : cint ## Most recently closed capture
  275. callout_data* : pointer ## Data passed in with the call
  276. # ------------------- Added for Version 1 --------------------------
  277. pattern_position*: cint ## Offset to next item in the pattern
  278. next_item_length*: cint ## Length of next item in the pattern
  279. # ------------------- Added for Version 2 --------------------------
  280. mark* : pointer ## Pointer to current mark or NULL
  281. # ------------------------------------------------------------------
  282. when defined(nimHasStyleChecks):
  283. {.pop.}
  284. # User defined callback which provides a stack just before the match starts.
  285. type
  286. JitCallback* = proc (a: pointer): ptr JitStack {.cdecl.}
  287. when not defined(usePcreHeader):
  288. when hostOS == "windows":
  289. when defined(nimOldDlls):
  290. const pcreDll = "pcre.dll"
  291. elif defined(cpu64):
  292. const pcreDll = "pcre64.dll"
  293. else:
  294. const pcreDll = "pcre32.dll"
  295. elif hostOS == "macosx":
  296. const pcreDll = "libpcre(.3|.1|).dylib"
  297. else:
  298. const pcreDll = "libpcre.so(.3|.1|)"
  299. {.push dynlib: pcreDll.}
  300. else:
  301. {.push header: "<pcre.h>".}
  302. {.push cdecl, importc: "pcre_$1".}
  303. # Exported PCRE functions
  304. proc compile*(pattern: cstring,
  305. options: cint,
  306. errptr: ptr cstring,
  307. erroffset: ptr cint,
  308. tableptr: pointer): ptr Pcre
  309. proc compile2*(pattern: cstring,
  310. options: cint,
  311. errorcodeptr: ptr cint,
  312. errptr: ptr cstring,
  313. erroffset: ptr cint,
  314. tableptr: pointer): ptr Pcre
  315. proc config*(what: cint,
  316. where: pointer): cint
  317. proc copy_named_substring*(code: ptr Pcre,
  318. subject: cstring,
  319. ovector: ptr cint,
  320. stringcount: cint,
  321. stringname: cstring,
  322. buffer: cstring,
  323. buffersize: cint): cint
  324. proc copy_substring*(subject: cstring,
  325. ovector: ptr cint,
  326. stringcount: cint,
  327. stringnumber: cint,
  328. buffer: cstring,
  329. buffersize: cint): cint
  330. proc dfa_exec*(code: ptr Pcre,
  331. extra: ptr ExtraData,
  332. subject: cstring,
  333. length: cint,
  334. startoffset: cint,
  335. options: cint,
  336. ovector: ptr cint,
  337. ovecsize: cint,
  338. workspace: ptr cint,
  339. wscount: cint): cint
  340. proc exec*(code: ptr Pcre,
  341. extra: ptr ExtraData,
  342. subject: cstring,
  343. length: cint,
  344. startoffset: cint,
  345. options: cint,
  346. ovector: ptr cint,
  347. ovecsize: cint): cint
  348. proc jit_exec*(code: ptr Pcre,
  349. extra: ptr ExtraData,
  350. subject: cstring,
  351. length: cint,
  352. startoffset: cint,
  353. options: cint,
  354. ovector: ptr cint,
  355. ovecsize: cint,
  356. jstack: ptr JitStack): cint
  357. proc free_substring*(stringptr: cstring)
  358. proc free_substring_list*(stringptr: cstringArray)
  359. proc fullinfo*(code: ptr Pcre,
  360. extra: ptr ExtraData,
  361. what: cint,
  362. where: pointer): cint
  363. proc get_named_substring*(code: ptr Pcre,
  364. subject: cstring,
  365. ovector: ptr cint,
  366. stringcount: cint,
  367. stringname: cstring,
  368. stringptr: cstringArray): cint
  369. proc get_stringnumber*(code: ptr Pcre,
  370. name: cstring): cint
  371. proc get_stringtable_entries*(code: ptr Pcre,
  372. name: cstring,
  373. first: cstringArray,
  374. last: cstringArray): cint
  375. proc get_substring*(subject: cstring,
  376. ovector: ptr cint,
  377. stringcount: cint,
  378. stringnumber: cint,
  379. stringptr: cstringArray): cint
  380. proc get_substring_list*(subject: cstring,
  381. ovector: ptr cint,
  382. stringcount: cint,
  383. listptr: ptr cstringArray): cint
  384. proc maketables*(): pointer
  385. proc refcount*(code: ptr Pcre,
  386. adjust: cint): cint
  387. proc study*(code: ptr Pcre,
  388. options: cint,
  389. errptr: ptr cstring): ptr ExtraData
  390. proc free_study*(extra: ptr ExtraData)
  391. proc version*(): cstring
  392. # Utility functions for byte order swaps.
  393. proc pattern_to_host_byte_order*(code: ptr Pcre,
  394. extra: ptr ExtraData,
  395. tables: pointer): cint
  396. # JIT compiler related functions.
  397. proc jit_stack_alloc*(startsize: cint,
  398. maxsize: cint): ptr JitStack
  399. proc jit_stack_free*(stack: ptr JitStack)
  400. proc assign_jit_stack*(extra: ptr ExtraData,
  401. callback: JitCallback,
  402. data: pointer)
  403. proc jit_free_unused_memory*()
  404. # There was an odd function with `var cstring` instead of `ptr`
  405. proc study*(code: ptr Pcre,
  406. options: cint,
  407. errptr: var cstring): ptr ExtraData {.deprecated.}
  408. {.pop.}
  409. {.pop.}
  410. type
  411. PPcre* {.deprecated.} = ptr Pcre
  412. PJitStack* {.deprecated.} = ptr JitStack