123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- # GNU MediaGoblin -- federated, autonomous media hosting
- # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Affero General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU Affero General Public License for more details.
- #
- # You should have received a copy of the GNU Affero General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- from io import open
- import os
- import copy
- import json
- import re
- from pkg_resources import resource_filename
- import dateutil.parser
- from pyld import jsonld
- from jsonschema import validate, FormatChecker, draft4_format_checker
- from jsonschema.compat import str_types
- from mediagoblin.tools.pluginapi import hook_handle
- ########################################################
- ## Set up the MediaGoblin format checker for json-schema
- ########################################################
- URL_REGEX = re.compile(
- r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
- re.IGNORECASE)
- def is_uri(instance):
- """
- jsonschema uri validator
- """
- if not isinstance(instance, str_types):
- return True
- return URL_REGEX.match(instance)
- def is_datetime(instance):
- """
- Is a date or datetime readable string.
- """
- if not isinstance(instance, str_types):
- return True
- return dateutil.parser.parse(instance)
- class DefaultChecker(FormatChecker):
- """
- Default MediaGoblin format checker... extended to include a few extra things
- """
- checkers = copy.deepcopy(draft4_format_checker.checkers)
- DefaultChecker.checkers[u"uri"] = (is_uri, ())
- DefaultChecker.checkers[u"date-time"] = (is_datetime, (ValueError, TypeError))
- DEFAULT_CHECKER = DefaultChecker()
- # Crappy default schema, checks for things we deem important
- DEFAULT_SCHEMA = {
- "$schema": "http://json-schema.org/schema#",
- "type": "object",
- "properties": {
- "license": {
- "format": "uri",
- "type": "string",
- },
- "dcterms:created": {
- "format": "date-time",
- "type": "string",
- },
- "dc:created": {
- "format": "date-time",
- "type": "string",
- }
- },
- }
- def load_resource(package, resource_path):
- """
- Load a resource, return it as a string.
- Args:
- - package: package or module name. Eg "mediagoblin.media_types.audio"
- - resource_path: path to get to this resource, a list of
- directories and finally a filename. Will be joined with
- os.path.sep.
- """
- filename = resource_filename(package, os.path.sep.join(resource_path))
- return open(filename, encoding="utf-8").read()
- def load_resource_json(package, resource_path):
- """
- Load a resource json file, return a dictionary.
- Args:
- - package: package or module name. Eg "mediagoblin.media_types.audio"
- - resource_path: path to get to this resource, a list of
- directories and finally a filename. Will be joined with
- os.path.sep.
- """
- return json.loads(load_resource(package, resource_path))
- ##################################
- ## Load the MediaGoblin core files
- ##################################
- BUILTIN_CONTEXTS = {
- "http://www.w3.org/2013/json-ld-context/rdfa11": load_resource(
- "mediagoblin", ["static", "metadata", "rdfa11.jsonld"])}
- _CONTEXT_CACHE = {}
- def load_context(url):
- """
- A self-aware document loader. For those contexts MediaGoblin
- stores internally, load them from disk.
- """
- if url in _CONTEXT_CACHE:
- return _CONTEXT_CACHE[url]
- # See if it's one of our basic ones
- document = BUILTIN_CONTEXTS.get(url, None)
- # No? See if we have an internal schema for this
- if document is None:
- document = hook_handle(("context_url_data", url))
- # Okay, if we've gotten a document by now... let's package it up
- if document is not None:
- document = {'contextUrl': None,
- 'documentUrl': url,
- 'document': document}
- # Otherwise, use jsonld.load_document
- else:
- document = jsonld.load_document(url)
- # cache
- _CONTEXT_CACHE[url] = document
- return document
- DEFAULT_CONTEXT = "http://www.w3.org/2013/json-ld-context/rdfa11"
- def compact_json(metadata, context=DEFAULT_CONTEXT):
- """
- Compact json with supplied context.
- Note: Free floating" nodes are removed (eg a key just named
- "bazzzzzz" which isn't specified in the context... something like
- bazzzzzz:blerp will stay though. This is jsonld.compact behavior.
- """
- compacted = jsonld.compact(
- metadata, context,
- options={
- "documentLoader": load_context,
- # This allows for things like "license" and etc to be preserved
- "expandContext": context,
- "keepFreeFloatingNodes": False})
- return compacted
- def compact_and_validate(metadata, context=DEFAULT_CONTEXT,
- schema=DEFAULT_SCHEMA):
- """
- compact json with supplied context, check against schema for errors
- raises an exception (jsonschema.exceptions.ValidationError) if
- there's an error.
- Note: Free floating" nodes are removed (eg a key just named
- "bazzzzzz" which isn't specified in the context... something like
- bazzzzzz:blerp will stay though. This is jsonld.compact behavior.
- You may wish to do this validation yourself... this is just for convenience.
- """
- compacted = compact_json(metadata, context)
- validate(metadata, schema, format_checker=DEFAULT_CHECKER)
- return compacted
- def expand_json(metadata, context=DEFAULT_CONTEXT):
- """
- Expand json, but be sure to use our documentLoader.
- By default this expands with DEFAULT_CONTEXT, but if you do not need this,
- you can safely set this to None.
- # @@: Is the above a good idea? Maybe it should be set to None by
- # default.
- """
- options = {
- "documentLoader": load_context}
- if context is not None:
- options["expandContext"] = context
- return jsonld.expand(metadata, options=options)
- def rdfa_to_readable(rdfa_predicate):
- readable = rdfa_predicate.split(u":")[1].capitalize()
- return readable
|