#2 Hashmap overhaul: replace UThash with hashmap.c

Merged
scossu merged 5 commits from scossu/hashmap into scossu/master 2 years ago
10 changed files with 65 additions and 61 deletions
  1. 3 0
      .gitmodules
  2. 2 2
      Makefile
  3. 1 1
      TODO.md
  4. 17 12
      cpython/py_graph.h
  5. 32 31
      docs/dev/deps.dot
  6. BIN
      docs/dev/deps.pdf
  7. 1 0
      ext/hashmap
  8. 0 1
      ext/uthash
  9. 9 14
      include/buffer.h
  10. 0 0
      include/core.h

+ 3 - 0
.gitmodules

@@ -28,3 +28,6 @@
 	path = ext/re2c
 	url = https://github.com/skvadrik/re2c.git
     shallow = true
+[submodule "ext/hashmap"]
+	path = ext/hashmap
+	url = git@github.com:scossu/hashmap.c.git

+ 2 - 2
Makefile

@@ -20,7 +20,7 @@ VALGRIND_DUMP = /tmp/lsup_valgrind.log
 CALLGRIND_DUMP = /tmp/lsup_callgrind.out
 
 INCLUDE_BASE = . -Iinclude -I$(MDB_DIR) -I$(XXHASH_DIR) \
-	-Iext/tpl/src -Iext/uthash/src -Iext/log/src
+	-Iext/tpl/src -Iext/hashmap -Iext/log/src
 INCLUDE = -I$(INCLUDE_BASE)
 CFLAGS += -Wall -fPIC -MMD -DLOG_USE_COLOR $(INCLUDE)
 DBG_CFLAGS = -Itest -O0 -g3 -DDEBUG
@@ -41,7 +41,7 @@ EXT_SRC = $(wildcard ext/log/src/*.c) \
 # External headers of libraries compiled in core.
 EXT_H = $(wildcard ext/log/src/*.h) \
 	  	$(wildcard ext/tpl/src/*.h) \
-	  	$(wildcard ext/uthash/src/*.h)
+	  	$(wildcard ext/hashmap/*.h)
 
 LSUP_SRC = $(wildcard src/*.c)
 SRC = $(EXT_SRC) $(LSUP_SRC)

+ 1 - 1
TODO.md

@@ -22,7 +22,7 @@
     - *D* Subclass term types
 - *D* Namespaced IRIs
 - *D* Relative IRIs
-- *P* Atomic multi-graph updates
+- *P* Transaction control
 - *P* Turtle serialization / deserialization
 - *P* Full UTF-8 support
 - *P* Extended tests

+ 17 - 12
cpython/py_graph.h

@@ -272,9 +272,9 @@ Graph_new_from_rdf (PyTypeObject *cls, PyObject *args)
      * > created by fdopen() is closed. The result of applying fdopen() to a
      * > shared memory object is undefined.
      *
-     * This handle must not be closed. Leave open for the Python caller to
-     * handle it.
+     * Hence the `dup()`.
      */
+    fd = dup (fd);
     FILE *fh = fdopen (fd, "r");
 
     GraphObject *res = (GraphObject *) cls->tp_alloc(cls, 0);
@@ -291,6 +291,7 @@ Graph_new_from_rdf (PyTypeObject *cls, PyObject *args)
     size_t ct;
     char *err;
     codec->decode_graph (fh, &res->ob_struct, &ct, &err);
+    fclose (fh);
 
     log_debug ("Decoded %lu triples.", ct);
     if (UNLIKELY (err)) {
@@ -334,6 +335,7 @@ inline static int build_trp_pattern (PyObject *args, LSUP_Term *spo[])
 }
 
 
+/*
 static PyObject *
 Graph_new_set_from_store_lookup (PyTypeObject *cls, PyObject *args)
 {
@@ -364,6 +366,7 @@ Graph_new_set_from_store_lookup (PyTypeObject *cls, PyObject *args)
     Py_INCREF (ret);
     return ret;
 }
+*/
 
 
 static PyObject *
@@ -412,11 +415,11 @@ Graph_add (PyObject *self, PyObject *triples)
 
     PyObject *trp_obj;
     int rc = 0;
-    size_t i;
+    size_t ct = 0;
     LSUP_GraphIterator *it = LSUP_graph_add_init (
             ((GraphObject *)self)->ob_struct);
 
-    for (i = 0; (trp_obj = PyIter_Next (iter)); i++) {
+    while ((trp_obj = PyIter_Next (iter))) {
         if (!PyObject_TypeCheck (trp_obj, &TripleType)) {
             PyErr_SetString (
                     PyExc_ValueError, "Object is not a triple.");
@@ -424,26 +427,26 @@ Graph_add (PyObject *self, PyObject *triples)
             goto finally;
         }
 
-        log_trace ("Inserting triple #%lu", i);
+        log_trace ("Inserting triple #%lu", ct);
 
         LSUP_rc db_rc = LSUP_graph_add_iter (
                 it, ((TripleObject *) trp_obj)->ob_struct);
 
-        if (db_rc == LSUP_OK) rc = LSUP_OK;
-        if (UNLIKELY (db_rc < 0)) {
-            PyErr_SetString (
-                    PyExc_ValueError, "Unknown error while adding triples.");
+        if (db_rc == LSUP_OK) {
+            rc = LSUP_OK;
+            ct++;
+        } else if (UNLIKELY (db_rc < 0)) {
+            PyErr_SetString (PyExc_ValueError, "Error while adding triples.");
             rc = -1;
             goto finally;
         }
+        // If db_rc > 0, it's a no-op and the counter is not increased.
     }
 
 finally:
     LSUP_graph_add_done (it);
 
-    if (rc == LSUP_OK)
-        return PyLong_FromSize_t (LSUP_graph_iter_cur (it));
-
+    if (rc == LSUP_OK) return PyLong_FromSize_t (ct);
     return NULL;
 }
 
@@ -550,11 +553,13 @@ static PyMethodDef Graph_methods[] = {
         METH_CLASS | METH_VARARGS,
         "Create a graph from a RDF file."
     },
+    /*
     {
         "from_lookup", (PyCFunction) Graph_new_set_from_store_lookup,
         METH_CLASS | METH_VARARGS,
         "Create a set of graphs from a store SPO lookup."
     },
+    */
     {
         "store", (PyCFunction) Graph_store, METH_NOARGS,
         "Store a graph into the permanent back end."

+ 32 - 31
docs/dev/deps.dot

@@ -5,45 +5,46 @@ digraph "source tree" {
     fontsize="16";
     fontname="Helvetica";
 	clusterrank="local";
-	"py_triple" -> "py_term"
-	"store_mdb" -> "namespace"
-	"store_htable" -> "uthash"
-	"graph" -> "store_htable"
-	"store_htable" -> "buffer"
-	"nt_parser" -> "nt_grammar"
-	"py_graph" -> "codec_nt"
-	"codec_nt" -> "nt_parser"
 	"codec_nt" -> "codec_base"
-	"codec_base" -> "graph"
-	"store_mdb" -> "lmdb"
+	"namespace" -> "core"
+	"buffer" -> "core"
+	"py_graph" -> "codec_nt"
+	"store_htable" -> "hashmap"
 	"core" -> "lmdb"
-	"graph" -> "store_mdb"
-	"store_mdb" -> "buffer"
 	"graph" -> "environment"
-	"buffer" -> "core"
+	"core" -> "xxhash"
+	"environment" -> "hashmap"
+	"graph" -> "store_htable"
 	"core" -> "log"
-	"nt_grammar" -> "graph"
-	"py_term" -> "term"
-	"py_graph" -> "graph"
+	"codec_nt" -> "nt_parser"
+	"py_lsup_rdf" -> "py_namespace"
 	"graph" -> "term"
-	"term" -> "buffer"
-	"environment" -> "term"
-	"profile" -> "lsup_rdf"
+	"store_mdb" -> "buffer"
 	"py_term" -> "py_namespace"
-	"buffer" -> "xxhash"
-	"py_lsup_rdf" -> "py_graph"
-	"namespace" -> "core"
-	"store_mdb" -> "store"
 	"environment" -> "store_mdb"
-	"namespace" -> "uthash"
-	"store_mdb" -> "bootstrap"
-	"store_mdb" -> "uthash"
-	"lsup_rdf" -> "codec_nt"
+	"store_mdb" -> "store"
+	"environment" -> "term"
+	"term" -> "hashmap"
+	"store_mdb" -> "namespace"
+	"graph" -> "store_mdb"
+	"term" -> "buffer"
+	"py_triple" -> "py_term"
+	"nt_parser" -> "nt_grammar"
+	"store_htable" -> "buffer"
 	"term" -> "namespace"
-	"py_namespace" -> "namespace"
-	"py_graph" -> "py_triple"
+	"lsup_rdf" -> "codec_nt"
+	"profile" -> "lsup_rdf"
+	"store_mdb" -> "lmdb"
 	"term" -> "tpl"
+	"py_graph" -> "py_triple"
+	"store_htable" -> "store"
+	"py_namespace" -> "namespace"
+	"namespace" -> "hashmap"
+	"py_term" -> "term"
+	"store_mdb" -> "bootstrap"
+	"py_graph" -> "graph"
 	"nt_parser" -> "graph"
-	"py_lsup_rdf" -> "py_namespace"
-	"term" -> "uthash"
+	"py_lsup_rdf" -> "py_graph"
+	"codec_base" -> "graph"
+	"nt_grammar" -> "graph"
 }

BIN
docs/dev/deps.pdf


+ 1 - 0
ext/hashmap

@@ -0,0 +1 @@
+Subproject commit 774694ec6dd36f1bb8e5fd3a55a2c47e48295e21

+ 0 - 1
ext/uthash

@@ -1 +0,0 @@
-Subproject commit 86e677629ab5feb477f3f0c85d94a7beeefef3b5

+ 9 - 14
include/buffer.h

@@ -1,21 +1,12 @@
 #ifndef _LSUP_BUFFER_H
 #define _LSUP_BUFFER_H
 
-#include "xxhash.h"
-
 #include "core.h"
 
-#ifndef HASH_SEED
-/** @brief Seed used for all hashing. Compile-time configurable.
- */
-#define HASH_SEED 0
-#endif
-
 // "NULL" key, a value that is never user-provided. Used to mark special
 // values (e.g. deleted records).
 #define NULL_KEY 0
 
-
 /** @brief General-purpose data buffer.
  *
  * The structure is transparently exposed so that the related API only defines
@@ -120,7 +111,10 @@ void LSUP_buffer_free (LSUP_Buffer *buf);
  */
 inline LSUP_Key
 LSUP_buffer_hash (const LSUP_Buffer *buf)
-{ return (buf == NULL) ? NULL_KEY : XXH64 (buf->addr, buf->size, HASH_SEED); }
+{
+    return (buf == NULL) ? NULL_KEY :
+        LSUP_HASH (buf->addr, buf->size, LSUP_HASH_SEED);
+}
 
 
 /** @brief Print a byte string of a given length in a human-readable format.
@@ -184,7 +178,8 @@ LSUP_btriple_new(LSUP_Buffer *s, LSUP_Buffer *p, LSUP_Buffer *o);
  */
 LSUP_rc
 LSUP_btriple_init (
-        LSUP_BufferTriple *sspo, LSUP_Buffer *s, LSUP_Buffer *p, LSUP_Buffer *o);
+        LSUP_BufferTriple *sspo,
+        LSUP_Buffer *s, LSUP_Buffer *p, LSUP_Buffer *o);
 
 
 /** @brief Free the internal pointers of a buffer triple.
@@ -244,11 +239,11 @@ LSUP_btriple_pos (const LSUP_BufferTriple *btrp, LSUP_TriplePos n)
 inline LSUP_Key
 LSUP_btriple_hash (const LSUP_BufferTriple *strp)
 {
-    return XXH64 (
+    return LSUP_HASH (
         strp->s->addr, strp->s->size,
-        XXH64 (
+        LSUP_HASH (
             strp->p->addr, strp->p->size,
-            XXH64 (strp->o->addr, strp->o->size, HASH_SEED)
+            LSUP_HASH (strp->o->addr, strp->o->size, LSUP_HASH_SEED)
         )
     );
 }

+ 0 - 0
include/core.h


Some files were not shown because too many files changed in this diff