123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- # -*- coding: utf-8 -*-
- #
- # cms.py - simple WSGI/Python based CMS script
- #
- # Copyright (C) 2011-2019 Michael Buesch <m@bues.ch>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 2 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- #from cms.cython_support cimport * #@cy
- from cms.exception import *
- from cms.util import * #+cimport
- import re
- import os
- __all__ = [
- "CMSPageIdent",
- ]
- class CMSPageIdent(object):
- # Page identifier.
- __slots__ = (
- # Path components.
- # List of str.
- "__elements",
- # Boolean.
- # True, if all __elements have been validated.
- "__allValidated",
- )
- __pageFileName_re = re.compile(
- r'^(.*)((?:\.html?)|(?:\.py)|(?:\.php))$', re.DOTALL)
- __indexPages = {"", "index"}
- # Parse a page identifier from a string.
- # That string may contain malicious components such as backwards
- # traversals (".." in the file path). This class takes care to reject
- # such page identifiers before use as filesystem path.
- @classmethod
- def parse(cls, path, maxPathLen=512, maxIdentDepth=32):
- if len(path) > maxPathLen:
- raise CMSException(400, "Invalid URL")
- pageIdent = cls()
- # Strip whitespace and slashes
- path = path.strip(' \t/')
- # Remove page file extensions like .html and such.
- m = cls.__pageFileName_re.match(path)
- if m:
- path = m.group(1)
- # Use the ident elements, if this is not the root page.
- if path not in cls.__indexPages:
- pageIdent.extend(path.split("/"))
- if len(pageIdent.__elements) > maxIdentDepth:
- raise CMSException(400, "Invalid URL")
- return pageIdent
- __pathSep = os.path.sep
- __validPathChars = LOWERCASE + UPPERCASE + NUMBERS + "-_."
- # Validate a path component. Avoid any directory change.
- # Raises CMSException on failure.
- @classmethod
- def validateSafePathComponent(cls, pcomp):
- if pcomp.startswith('.'):
- # No ".", ".." and hidden files.
- raise CMSException(404, "Invalid page path")
- if [ c for c in pcomp if c not in cls.__validPathChars ]:
- raise CMSException(404, "Invalid page path")
- return pcomp
- # Validate a path. Avoid going back in the hierarchy (. and ..)
- # Raises CMSException on failure.
- @classmethod
- def validateSafePath(cls, path):
- for pcomp in path.split(cls.__pathSep):
- cls.validateSafePathComponent(pcomp)
- return path
- # Validate a page name.
- # Raises CMSException on failure.
- # If allowSysNames is True, system names starting with "__" are allowed.
- @classmethod
- def validateName(cls, name, allowSysNames=False):
- if name.startswith("__") and not allowSysNames:
- # Page names with __ are system folders.
- raise CMSException(404, "Invalid page name")
- return cls.validateSafePathComponent(name)
- # Initialize this page identifier.
- def __init__(self, initialElements=None):
- self.__elements = []
- self.extend(initialElements)
- self.__allValidated = False
- # Add a list of path elements to this identifier.
- def extend(self, other):
- if other is not None:
- self.__allValidated = False
- if isinstance(other, self.__class__):
- self.__elements.extend(other.__elements)
- elif isiterable(other):
- self.__elements.extend(other)
- else:
- raise CMSException(500, "Invalid 'other' in CMSPageIdent.extend()")
- return self
- # Add a list of path elements to this identifier.
- def __iadd__(self, other):
- return self.extend(other)
- # Create a new page identifier from 'self' and add 'other'.
- def __add__(self, other):
- return self.__class__(self).extend(other)
- # Get the number of path components in this path identifier.
- def __len__(self):
- return len(self.__elements)
- # Validate all page identifier name components.
- # (Do not allow system name components)
- def __validateAll(self):
- if not self.__allValidated:
- for pcomp in self.__elements:
- self.validateName(pcomp)
- # Remember that we validated.
- # (This flag must be reset to false, if components are added.)
- self.__allValidated = True
- # Get one page identifier component by index.
- def get(self, index, default=None, allowSysNames=False):
- try:
- return self.validateName(self.__elements[index],
- allowSysNames)
- except IndexError:
- return default
- # Get the page identifier as URL.
- def getUrl(self, protocol=None, domain=None,
- urlBase=None, pageSuffix=".html"):
- self.__validateAll()
- url = []
- if protocol:
- url.append(protocol + ":/")
- if domain:
- url.append(domain)
- if urlBase:
- url.append(urlBase.strip("/"))
- localPath = [elem for elem in self.__elements if elem]
- url.extend(localPath)
- if not protocol and not domain:
- url.insert(0, "")
- urlStr = "/".join(url)
- if localPath and pageSuffix:
- urlStr += pageSuffix
- return urlStr
- # Get the page identifier as filesystem path.
- def getFilesystemPath(self, rstrip=0):
- self.__validateAll()
- if self.__elements:
- if rstrip > 0:
- pcomps = self.__elements[ : 0 - rstrip]
- if pcomps:
- return fs.mkpath(*pcomps)
- return ""
- return fs.mkpath(*(self.__elements))
- return ""
- # Test if this identifier starts with the same elements
- # as another one.
- def startswith(self, other):
- return other is not None and\
- len(self.__elements) >= len(other.__elements) and\
- self.__elements[ : len(other.__elements)] == other.__elements
- def __hash__(self):
- #@cy cdef Py_ssize_t h
- #@cy cdef list elements
- #@cy cdef str element
- h = 0
- elements = self.__elements
- for element in elements:
- h ^= hash(element)
- return h
- def __eq__(self, other):
- return (isinstance(other, self.__class__) and
- self.__elements == other.__elements)
|