graph.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # -*- coding: utf-8 -*-
  2. """
  3. (c) 2020 - Copyright ...
  4. Authors:
  5. zPlus <zplus@peers.community>
  6. """
  7. import json
  8. import logging
  9. import pyld
  10. import rdflib
  11. from Crypto.PublicKey import RSA
  12. from . import activitypub
  13. from . import settings
  14. # This constant is used for rdflib persistent storage, as we need an identifier
  15. # when creating a new graph.
  16. GRAPH_NAME = 'forgefed_graph'
  17. # Define some RDF namespaces to use with rdflib
  18. AS = rdflib.Namespace('https://www.w3.org/ns/activitystreams#')
  19. SEC = rdflib.Namespace('https://w3id.org/security#')
  20. FORGE = rdflib.Namespace('https://forgefed.peers.community/ns#')
  21. log = logging.getLogger(__name__)
  22. class Graph(rdflib.Graph):
  23. """
  24. A class representing the Forgefed Graph.
  25. """
  26. def __init__(self, connect=True, create=False):
  27. """
  28. :param connect: Automatically connect graph to storage.
  29. :param create: Automatically try to create persistent storage.
  30. """
  31. if settings.STORAGE == 'berkeleydb':
  32. super().__init__(store='Sleepycat', identifier=GRAPH_NAME)
  33. elif settings.STORAGE == 'sqlalchemy':
  34. super().__init__(
  35. store=rdflib.plugin.get('SQLAlchemy', rdflib.store.Store)(identifier=GRAPH_NAME),
  36. identifier=GRAPH_NAME)
  37. else:
  38. error_message = 'Unknown storage: ' + settings.STORAGE
  39. log.critical(error_message)
  40. raise Exception(error_message)
  41. if create:
  42. self.connect(create=create)
  43. self.disconnect()
  44. if connect:
  45. self.connect()
  46. def connect(self, create=False):
  47. self.open(settings.STORAGE_PATH, create=create)
  48. def disconnect(self):
  49. self.close()
  50. def subgraph(self, triple):
  51. """
  52. rdflib only seems to have a triples() function that returns an iterator.
  53. There are not functions for returning triples as a rdflib.Graph object.
  54. This function allows to retrieve a set of triples and return a new
  55. graph.
  56. Since this function returns a Graph and not an iterator, do *not* use
  57. this when you're fetching too many triples because they're all loaded
  58. into memory.
  59. :param triple: The triple to match. Every triple that matches "triple"
  60. will be added to the subgraph.
  61. Example: (subject, None, None)
  62. """
  63. triples = self.triples(triple)
  64. new_graph = rdflib.Graph()
  65. new_graph += triples
  66. return new_graph
  67. def json_subgraph(self, triple):
  68. return pyld.jsonld.compact(
  69. json.loads(self.subgraph(triple).serialize(format='json-ld')),
  70. activitypub.cached_jsonld_context)
  71. def get_node(self, node_uri):
  72. """
  73. A special case of subgraph() for returning a single node of the graph.
  74. """
  75. return self.subgraph((rdflib.URIRef(node_uri), None, None))
  76. def get_json_node(self, node_uri):
  77. """
  78. A special case of json_subgraph() for returning a single node of the graph.
  79. """
  80. return self.json_subgraph((rdflib.URIRef(node_uri), None, None))
  81. # TODO can this be moved to the Actor class?
  82. def test_or_set_key(self, actor_id, key_id):
  83. """
  84. Test if an Actor already has a GPG key, otherwise automatically
  85. generate a new one and add it to the graph. These keys are used to sign
  86. HTTP requests.
  87. :param actor_id: ID (URL) of the ActivityPub Actor.
  88. :param key_id: ID (URL) of the GPG key object.
  89. """
  90. # Check if we already have a node in our graph for the key
  91. if (rdflib.URIRef(key_id), None, None) in self:
  92. return
  93. # Triples not found! Let's create a new key pair for the Actor
  94. key = RSA.generate(settings.HTTP_SIGNATURE_KEY_BITS)
  95. # And create the graph node for the key
  96. node = {
  97. '@context': activitypub.cached_jsonld_context,
  98. 'id': key_id,
  99. 'type': 'CryptographicKey',
  100. 'owner': actor_id,
  101. #'created': None,
  102. #'expires': None,
  103. #'revoked': None,
  104. 'privateKeyPem': key.export_key('PEM').decode('UTF-8'),
  105. 'publicKeyPem': key.publickey().export_key('PEM').decode('UTF-8')
  106. }
  107. # Finally add the new node to the graph
  108. self.parse(data=json.dumps(node), format='json-ld')
  109. self.commit()
  110. # TODO can this be moved to the Actor class?
  111. def is_valid_key(self, actor_uri, key_uri):
  112. """
  113. Check if the given key belongs to the given actor.
  114. """
  115. return (rdflib.URIRef(key_uri), rdflib.RDF.type, SEC.Key) in self \
  116. and \
  117. (rdflib.URIRef(key_uri), SEC.owner, rdflib.URIRef(actor_uri)) in self
  118. def test_or_set_ordered_collection(self, uri):
  119. """
  120. Test if an OrderedCollection already exists, otherwise automatically
  121. generate a new one and add it to the graph.
  122. :param uri: The URI of the object.
  123. """
  124. # Check if we already have a node in our graph for the key
  125. if (rdflib.URIRef(uri), None, None) in self:
  126. return
  127. collection = {
  128. '@context': activitypub.cached_jsonld_context,
  129. 'type': 'OrderedCollection',
  130. 'id': uri,
  131. 'totalItems': 0,
  132. 'first': {
  133. 'id': uri + '/0',
  134. 'type': 'OrderedCollectionPage',
  135. 'partOf': uri,
  136. # 'orderedItems': []
  137. },
  138. 'last': uri + '/0',
  139. 'current': uri + '/0' }
  140. self.parse(data=json.dumps(collection), format='json-ld')
  141. self.commit()
  142. def get_collection(self, collection_uri):
  143. """
  144. Used to retrieve a collection for displaying when "followers/following"
  145. objects are requested.
  146. """
  147. node = self.subgraph((rdflib.URIRef(collection_uri), None, None))
  148. # Retrieve all the @list nodes (rdf:rest)
  149. for a_object in self[ rdflib.URIRef(collection_uri)
  150. : (AS.items|AS.orderedItems)/(rdflib.RDF.rest*'*') ]:
  151. node += self.subgraph((rdflib.URIRef(a_object), None, None))
  152. return pyld.jsonld.compact(
  153. json.loads(node.serialize(format='json-ld')),
  154. activitypub.cached_jsonld_context)
  155. def remove_collection_page(self, page_uri):
  156. # Remove all the as:items @list nodes
  157. objs = self[ rdflib.URIRef(page_uri)
  158. : AS.items/(rdflib.RDF.rest*'*') ]
  159. for a_object in objs:
  160. self.remove((rdflib.URIRef(a_object), None, None))
  161. # Remove the page node
  162. self.remove((rdflib.URIRef(page_uri), AS.items, None))
  163. self.commit()
  164. def add_collection_item(self, collection_uri, object_uri):
  165. self.test_or_set_ordered_collection(collection_uri)
  166. # Get URI of the last page
  167. page = self.value(
  168. subject=rdflib.URIRef(collection_uri),
  169. predicate=AS.last)
  170. page = self.get_collection(page)
  171. # Append the new object to the list of ordered items
  172. # An OrderedCollection MUST be presented consistently in reverse chronological order.
  173. if 'orderedItems' not in page:
  174. page['orderedItems'] = [ object_uri ]
  175. else:
  176. page['orderedItems'].insert(0, object_uri)
  177. # Before updating the items list, we need to remove the old list. This
  178. # is required because a @list in RDF is the equivalent of a linked
  179. # list, where each item is a blank node with rdf:first and rdf:rest.
  180. # When we parse a JSON-LD document and insert the new ordered list, the
  181. # blank nodes are assigned a new ID (remember: blank nodes ID are not
  182. # fixed like proper IDs of non-blank nodes) and all the old nodes will
  183. # be unlinked albeit they still exist in the database.1
  184. self.remove_collection_page(page['id'])
  185. self.parse(data=json.dumps(page), format='json-ld')
  186. self.commit()
  187. def collection_contains(self, collection_uri, object_uri):
  188. # Graph slicing: https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs
  189. # Graph paths: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#module-rdflib.paths
  190. #
  191. # We only use paged collections. This means that a Collection will have
  192. # a as:first property pointing to the first CollectionPage. as:next are
  193. # optional, if there are more pages. The SPARQL path below checks
  194. # exactly for this. It starts from a Collection node, follows as:first,
  195. # follows any optional as:next, follows the (ordered) list of as:items,
  196. # and finally tries to match any rdf:first that links to the target
  197. # object. Remember: an RDF @list is basically a linked list consisting
  198. # of blank nodes with rdf:first and rdf:rest properties.
  199. return self[rdflib.URIRef(collection_uri)
  200. : AS.first/(AS.next*'*')/AS.items/(rdflib.RDF.first*'+')
  201. : rdflib.URIRef(object_uri) ]
  202. # Create the graph storage during initialization
  203. Graph(connect=False, create=True)