pageident.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. # -*- coding: utf-8 -*-
  2. #
  3. # cms.py - simple WSGI/Python based CMS script
  4. #
  5. # Copyright (C) 2011-2019 Michael Buesch <m@bues.ch>
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 2 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. #from cms.cython_support cimport * #@cy
  20. from cms.exception import *
  21. from cms.util import * #+cimport
  22. import re
  23. import os
  24. __all__ = [
  25. "CMSPageIdent",
  26. ]
  27. class CMSPageIdent(object):
  28. # Page identifier.
  29. __slots__ = (
  30. # Path components.
  31. # List of str.
  32. "__elements",
  33. # Boolean.
  34. # True, if all __elements have been validated.
  35. "__allValidated",
  36. )
  37. __pageFileName_re = re.compile(
  38. r'^(.*)((?:\.html?)|(?:\.py)|(?:\.php))$', re.DOTALL)
  39. __indexPages = {"", "index"}
  40. # Parse a page identifier from a string.
  41. # That string may contain malicious components such as backwards
  42. # traversals (".." in the file path). This class takes care to reject
  43. # such page identifiers before use as filesystem path.
  44. @classmethod
  45. def parse(cls, path, maxPathLen=512, maxIdentDepth=32):
  46. if len(path) > maxPathLen:
  47. raise CMSException(400, "Invalid URL")
  48. pageIdent = cls()
  49. # Strip whitespace and slashes
  50. path = path.strip(' \t/')
  51. # Remove page file extensions like .html and such.
  52. m = cls.__pageFileName_re.match(path)
  53. if m:
  54. path = m.group(1)
  55. # Use the ident elements, if this is not the root page.
  56. if path not in cls.__indexPages:
  57. pageIdent.extend(path.split("/"))
  58. if len(pageIdent.__elements) > maxIdentDepth:
  59. raise CMSException(400, "Invalid URL")
  60. return pageIdent
  61. __pathSep = os.path.sep
  62. __validPathChars = LOWERCASE + UPPERCASE + NUMBERS + "-_."
  63. # Validate a path component. Avoid any directory change.
  64. # Raises CMSException on failure.
  65. @classmethod
  66. def validateSafePathComponent(cls, pcomp):
  67. if pcomp.startswith('.'):
  68. # No ".", ".." and hidden files.
  69. raise CMSException(404, "Invalid page path")
  70. if [ c for c in pcomp if c not in cls.__validPathChars ]:
  71. raise CMSException(404, "Invalid page path")
  72. return pcomp
  73. # Validate a path. Avoid going back in the hierarchy (. and ..)
  74. # Raises CMSException on failure.
  75. @classmethod
  76. def validateSafePath(cls, path):
  77. for pcomp in path.split(cls.__pathSep):
  78. cls.validateSafePathComponent(pcomp)
  79. return path
  80. # Validate a page name.
  81. # Raises CMSException on failure.
  82. # If allowSysNames is True, system names starting with "__" are allowed.
  83. @classmethod
  84. def validateName(cls, name, allowSysNames=False):
  85. if name.startswith("__") and not allowSysNames:
  86. # Page names with __ are system folders.
  87. raise CMSException(404, "Invalid page name")
  88. return cls.validateSafePathComponent(name)
  89. # Initialize this page identifier.
  90. def __init__(self, initialElements=None):
  91. self.__elements = []
  92. self.extend(initialElements)
  93. self.__allValidated = False
  94. # Add a list of path elements to this identifier.
  95. def extend(self, other):
  96. if other is not None:
  97. self.__allValidated = False
  98. if isinstance(other, self.__class__):
  99. self.__elements.extend(other.__elements)
  100. elif isiterable(other):
  101. self.__elements.extend(other)
  102. else:
  103. raise CMSException(500, "Invalid 'other' in CMSPageIdent.extend()")
  104. return self
  105. # Add a list of path elements to this identifier.
  106. def __iadd__(self, other):
  107. return self.extend(other)
  108. # Create a new page identifier from 'self' and add 'other'.
  109. def __add__(self, other):
  110. return self.__class__(self).extend(other)
  111. # Get the number of path components in this path identifier.
  112. def __len__(self):
  113. return len(self.__elements)
  114. # Validate all page identifier name components.
  115. # (Do not allow system name components)
  116. def __validateAll(self):
  117. if not self.__allValidated:
  118. for pcomp in self.__elements:
  119. self.validateName(pcomp)
  120. # Remember that we validated.
  121. # (This flag must be reset to false, if components are added.)
  122. self.__allValidated = True
  123. # Get one page identifier component by index.
  124. def get(self, index, default=None, allowSysNames=False):
  125. try:
  126. return self.validateName(self.__elements[index],
  127. allowSysNames)
  128. except IndexError:
  129. return default
  130. # Get the page identifier as URL.
  131. def getUrl(self, protocol=None, domain=None,
  132. urlBase=None, pageSuffix=".html"):
  133. self.__validateAll()
  134. url = []
  135. if protocol:
  136. url.append(protocol + ":/")
  137. if domain:
  138. url.append(domain)
  139. if urlBase:
  140. url.append(urlBase.strip("/"))
  141. localPath = [elem for elem in self.__elements if elem]
  142. url.extend(localPath)
  143. if not protocol and not domain:
  144. url.insert(0, "")
  145. urlStr = "/".join(url)
  146. if localPath and pageSuffix:
  147. urlStr += pageSuffix
  148. return urlStr
  149. # Get the page identifier as filesystem path.
  150. def getFilesystemPath(self, rstrip=0):
  151. self.__validateAll()
  152. if self.__elements:
  153. if rstrip > 0:
  154. pcomps = self.__elements[ : 0 - rstrip]
  155. if pcomps:
  156. return fs.mkpath(*pcomps)
  157. return ""
  158. return fs.mkpath(*(self.__elements))
  159. return ""
  160. # Test if this identifier starts with the same elements
  161. # as another one.
  162. def startswith(self, other):
  163. return other is not None and\
  164. len(self.__elements) >= len(other.__elements) and\
  165. self.__elements[ : len(other.__elements)] == other.__elements
  166. def __hash__(self):
  167. #@cy cdef Py_ssize_t h
  168. #@cy cdef list elements
  169. #@cy cdef str element
  170. h = 0
  171. elements = self.__elements
  172. for element in elements:
  173. h ^= hash(element)
  174. return h
  175. def __eq__(self, other):
  176. return (isinstance(other, self.__class__) and
  177. self.__elements == other.__elements)