4 Commits 0364ca4a2f ... ba8ce8b200

Author SHA1 Message Date
  Stefano Cossu ba8ce8b200 Memcheck clean. 3 years ago
  Stefano Cossu 8555a45d3d Basic Python tests. 3 years ago
  Stefano Cossu f70ace7b27 WIP Remove ID cache. 3 years ago
  Stefano Cossu df8a1d2366 WIP Store lang tags and data types separately; bootstrap DB. 3 years ago
10 changed files with 68 additions and 54 deletions
  1. 6 0
      Makefile
  2. 11 7
      README.md
  3. 9 3
      TODO.md
  4. 2 5
      cpython/py_graph.h
  5. 6 6
      cpython/py_namespace.h
  6. 8 2
      cpython/py_term.h
  7. 0 1
      cpython/py_triple.h
  8. 26 30
      docs/dev/deps.dot
  9. BIN
      docs/dev/deps.pdf
  10. 0 0
      include/buffer.h

+ 6 - 0
Makefile

@@ -58,6 +58,7 @@ test_lexer:
 valgrind:
 	valgrind \
 	--leak-check=full --show-leak-kinds=all --track-origins=yes \
+	--log-file=/tmp/lsup_valgrind.log \
 	./bin/test
 
 
@@ -73,6 +74,11 @@ profile: build_parsers
 		-o bin/profile
 
 
+py_test:
+	pip3 install --user . && \
+	python3 test/cpython_test.py
+
+
 # Build a visual dependency graph.
 # Requires cinclude2dot (https://www.flourish.org/cinclude2dot) and Graphviz.
 depgraph: src/* include/*

+ 11 - 7
README.md

@@ -6,12 +6,14 @@ Embedded RDF (and maybe later, generic graph) store and manipulation library.
 
 ## Purpose
 
-The goal of this library is to provide extremely efficient and compact
-handling of RDF data. At least a C API and Python bindings are planned.
+The goal of this library is to provide efficient and compact handling of RDF
+data. At least a complete C API and Python bindings are planned.
 
 This library can be thought of as SQLite or BerkeleyDB for graphs. It can be
 embedded directly in a program and store persistent data without the need of
-running a server.
+running a server. In addition, `lsup_rdf` can perform in-memory graph
+operations such as validation, de/serialization, boolean operations, lookup,
+etc.
 
 Two graph back ends are available: a memory one based on hash maps and a
 disk-based one based on [LMDB](https://symas.com/lmdb/), an extremely fast and
@@ -29,9 +31,9 @@ remain focused on serving Lakesuperior.
 
 ## Development Status
 
-**Pre-alpha.** The API is not yet defined and may change radically. The code
-may not compile, or throw a fit when run. At the moment this project is only
-intended for curious developers and researchers.
+**Alpha.** The API structure is not yet stable and may change radically. The
+code may not compile, or throw a fit when run. Testing is minimal. At the
+moment this project is only intended for curious developers and researchers.
 
 This is also my first stab at writing a C library (coming from Python) and an
 unpaid fun project, so don't be surprised if you find some gross stuff.
@@ -48,9 +50,10 @@ of features as a standalone library:
 - Memory- and disk-backed (persistent) graph storage
 - Contexts (disk-backed only)
 - Handling of blank nodes
+- Namespace prefixes
 - Validation of literal and URI terms
 - Validation of RDF triples
-- Fast graph Lookup using matching patterns
+- Fast graph lookup using matching patterns
 - Graph boolean operations
 - Serialization and de-serialization to/from N-Triples and N-Quads
 - Serialization and de-serialization to/from Turtle and TriG
@@ -62,6 +65,7 @@ of features as a standalone library:
 
 - Binary serialization and hashing of graphs
 - Binary protocol for synchronizing remote replicas
+- Backend for massive distributed storage (possibly Ceph)
 - Lua bindings
 
 ### Likely Out of Scope

+ 9 - 3
TODO.md

@@ -17,15 +17,21 @@
     - *D* term, triple, graph modules
     - *D* Codec integration
     - *D* Graph remove and lookup ops
-    - *W* Namespace module
-    - *P* Query and slicing methods
-    - *P* Tests
+    - *D* Namespace module
+    - *D* Lookup methods
+    - *D* Tests (basic)
+    - *P* Subclass term types
 - *P* Turtle serialization / deserialization
+- *P* Extended tests
+    - *P* C API
+    - *P* Python API
 
 
 ## Non-critical for MVP
 
 - Term and triple validation
+- Enhanced graph operations
+    - Extract unique terms and 2-term tuples
 - NQ codec
 - TriG codec
 

+ 2 - 5
cpython/py_graph.h

@@ -410,11 +410,8 @@ Graph_add (PyObject *self, PyObject *triples)
 
         log_trace ("Inserting triple #%lu", i);
 
-        LSUP_BufferTriple *sspo = LSUP_btriple_from_triple (
-                ((TripleObject *) trp_obj)->ob_struct);
-        LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
-
-        LSUP_btriple_free (sspo);
+        LSUP_rc db_rc = LSUP_graph_add_iter (
+                it, ((TripleObject *) trp_obj)->ob_struct);
 
         if (db_rc == LSUP_OK) rc = LSUP_OK;
         if (UNLIKELY (db_rc < 0)) {

+ 6 - 6
cpython/py_namespace.h

@@ -83,13 +83,13 @@ NSMap_get (PyObject *self, PyObject *pfx_obj)
 
 
 static PyObject *
-NSMap_normalize_uri (PyObject *self, PyObject *fq_uri_obj)
+NSMap_denormalize_uri (PyObject *self, PyObject *fq_uri_obj)
 {
     if (PyUnicode_READY (fq_uri_obj) < 0) return NULL;
     const char *fq_uri = PyUnicode_AsUTF8 (fq_uri_obj);
 
     char *pfx_uri;
-    LSUP_rc rc = LSUP_nsmap_normalize_uri (
+    LSUP_rc rc = LSUP_nsmap_denormalize_uri (
             ((NSMapObject *)self)->ob_struct, fq_uri, &pfx_uri);
     if (rc < 0)  {
         PyErr_SetString (PyExc_ValueError, "Error normalizing URI.");
@@ -104,13 +104,13 @@ NSMap_normalize_uri (PyObject *self, PyObject *fq_uri_obj)
 
 
 static PyObject *
-NSMap_denormalize_uri (PyObject *self, PyObject *pfx_uri_obj)
+NSMap_normalize_uri (PyObject *self, PyObject *pfx_uri_obj)
 {
     if (PyUnicode_READY (pfx_uri_obj) < 0) return NULL;
     const char *pfx_uri = PyUnicode_AsUTF8 (pfx_uri_obj);
 
     char *fq_uri;
-    LSUP_rc rc = LSUP_nsmap_denormalize_uri (
+    LSUP_rc rc = LSUP_nsmap_normalize_uri (
             ((NSMapObject *)self)->ob_struct, pfx_uri, &fq_uri);
     if (rc < 0)  {
         PyErr_SetString (PyExc_ValueError, "Error denormalizing URI.");
@@ -158,11 +158,11 @@ static PyMethodDef NSMap_methods[] = {
     },
     {
         "normalize_uri", (PyCFunction) NSMap_normalize_uri, METH_O,
-        "Normalize a URI (i.e. convert from fully qualified to prefixed)."
+        "Normalize a URI (i.e. convert from prefixed to fully qualified)."
     },
     {
         "denormalize_uri", (PyCFunction) NSMap_denormalize_uri, METH_O,
-        "Denormalize a URI (i.e. convert from prefixed to fully qualified)."
+        "Denormalize a URI (i.e. convert from fully qualified to prefixed)."
     },
     {
         "as_dict", (PyCFunction) NSMap_as_dict, METH_NOARGS,

+ 8 - 2
cpython/py_term.h

@@ -28,7 +28,10 @@ Term_init (TermObject *self, PyObject *args, PyObject *kwargs)
             &term_type, &data, &datatype, &lang))
         return -1;
 
-    self->ob_struct = LSUP_term_new ((LSUP_TermType) term_type, data, datatype, lang);
+    char *metadata = datatype ? datatype : lang;
+
+    self->ob_struct = LSUP_term_new (
+            (LSUP_TermType) term_type, data, metadata);
     if (!self->ob_struct) {
         PyErr_SetString (PyExc_ValueError, "Could not create term.");
         return -1;
@@ -70,7 +73,10 @@ Term_get_datatype (TermObject *self, void *closure)
 {
     if (!self->ob_struct->datatype) Py_RETURN_NONE;
 
-    PyObject *datatype = PyUnicode_FromString (self->ob_struct->datatype);
+    const LSUP_Term *dtype = LSUP_tcache_get (self->ob_struct->datatype);
+    if (!dtype) Py_RETURN_NONE;
+
+    PyObject *datatype = PyUnicode_FromString (dtype->data);
 
     Py_INCREF (datatype);
     return datatype;

+ 0 - 1
cpython/py_triple.h

@@ -6,7 +6,6 @@
 #include <Python.h>
 #include <structmember.h>
 
-#include "triple.h"
 #include "py_term.h"
 
 

+ 26 - 30
docs/dev/deps.dot

@@ -5,45 +5,41 @@ digraph "source tree" {
     fontsize="16";
     fontname="Helvetica";
 	clusterrank="local";
-	"term" -> "tpl"
 	"environment" -> "store_mdb"
-	"store_htable" -> "uthash"
-	"graph" -> "environment"
-	"store_mdb" -> "lmdb"
-	"core" -> "log"
 	"buffer" -> "xxhash"
-	"nt_parser" -> "nt_grammar"
-	"py_graph" -> "codec_nt"
-	"profile" -> "graph"
-	"py_term" -> "term"
+	"graph" -> "store_htable"
+	"profile" -> "lsup_rdf"
+	"graph" -> "environment"
+	"py_graph" -> "graph"
 	"py_lsup_rdf" -> "py_namespace"
 	"namespace" -> "core"
-	"term" -> "buffer"
-	"store_mdb" -> "uthash"
-	"environment" -> "uthash"
+	"graph" -> "term"
+	"store_htable" -> "buffer"
 	"codec_nt" -> "codec_base"
-	"term" -> "namespace"
-	"graph" -> "store_htable"
 	"codec_base" -> "graph"
-	"store_mdb" -> "triple"
-	"py_triple" -> "py_term"
-	"buffer" -> "core"
-	"store_htable" -> "triple"
-	"py_graph" -> "graph"
-	"lsup_rdf" -> "graph"
-	"store_mdb" -> "namespace"
-	"py_triple" -> "triple"
+	"term" -> "buffer"
+	"term" -> "uthash"
+	"py_namespace" -> "namespace"
+	"py_graph" -> "codec_nt"
+	"namespace" -> "uthash"
+	"store_htable" -> "uthash"
 	"py_lsup_rdf" -> "py_graph"
+	"term" -> "tpl"
+	"py_term" -> "term"
+	"lsup_rdf" -> "codec_nt"
 	"nt_parser" -> "graph"
-	"store_mdb" -> "store"
-	"triple" -> "term"
+	"core" -> "log"
 	"graph" -> "store_mdb"
-	"namespace" -> "uthash"
-	"py_namespace" -> "namespace"
+	"py_graph" -> "py_triple"
+	"buffer" -> "core"
+	"store_mdb" -> "lmdb"
+	"store_mdb" -> "buffer"
+	"term" -> "namespace"
+	"codec_nt" -> "nt_parser"
 	"nt_grammar" -> "graph"
+	"store_mdb" -> "bootstrap"
+	"py_triple" -> "py_term"
+	"store_mdb" -> "store"
 	"core" -> "lmdb"
-	"term" -> "uthash"
-	"codec_nt" -> "nt_parser"
-	"py_graph" -> "py_triple"
-	"store_htable" -> "namespace"
+	"nt_parser" -> "nt_grammar"
 }

BIN
docs/dev/deps.pdf


+ 0 - 0
include/buffer.h


Some files were not shown because too many files changed in this diff