38 Commits 91628ee663 ... 2bee9d4835

Author SHA1 Message Date
  Ariadne Devos 2bee9d4835 Merge branch 'ip' into odd-fixes 5 years ago
  Ariadne Devos 6783ce8d69 Fail to contract bounds check and index masking 5 years ago
  Ariadne Devos 085b0ad55b Test failbit as sign bit 5 years ago
  Ariadne Devos d8dd0d8fc1 Add missing wrap-around theorem 5 years ago
  Ariadne Devos dc807d81a0 Correct operand order and types -- again 5 years ago
  Ariadne Devos e9731e2561 Add some remarks from the mutation testing 5 years ago
  Ariadne Devos adb21695c2 Make failbit compilation check more extensive 5 years ago
  Ariadne Devos 5e2be17982 Test IPv4 parser 5 years ago
  Ariadne Devos e0855c4703 Correct s2_parse_ipv4 implementation name 5 years ago
  Ariadne Devos 8a4fdbe149 Merge branch 'odd-fixes' into ip 5 years ago
  Ariadne Devos e312d90107 Number logical statements in <sHT/logic/failbit.h> 5 years ago
  Ariadne Devos 76fe23f79f Parse IPv4 addresses 5 years ago
  Ariadne Devos 436e07ae5a Formally specify sHT_index_nospec 5 years ago
  Ariadne Devos d6731abb96 Correct failure tests 5 years ago
  Ariadne Devos fa34d59142 Merge branch 'odd-fixes' into ip 5 years ago
  Ariadne Devos 12471fdde3 Add missing semicolon 5 years ago
  Ariadne Devos 45d3875f1d Correct operand types 5 years ago
  Ariadne Devos 6f14b8283f Test different immediate modes 5 years ago
  Ariadne Devos 4e782b4814 Check register / immediate operands 5 years ago
  Ariadne Devos de690ff71f Delete unused saturated addition 5 years ago
  Ariadne Devos 9b7e0811af Git-Ignore nano leftovers 5 years ago
  Ariadne Devos 9d35813c1b Correct <sHT/lex/nat.h> and implementation 5 years ago
  Ariadne Devos 991751ef26 Track arithmetic overflow in failbit 5 years ago
  Ariadne Devos 11c6202a19 Better operations for the failbit 5 years ago
  Ariadne Devos 6b156cc1b9 Correct obvious specification thinko 5 years ago
  Ariadne Devos e0bb854fc8 Remember test points while debugging 5 years ago
  Ariadne Devos c9821c73a9 Complete proof of natural lexing with failure bit 5 years ago
  Ariadne Devos 316b54d25e Formally introduce the failbit 5 years ago
  Ariadne Devos 3d02eec5fa Compile lex/nat.c 5 years ago
  Ariadne Devos 3221570ff4 Document indices are natural 5 years ago
  Ariadne Devos d0b8ff33d1 Lex naturals 5 years ago
  Ariadne Devos c23f08079f Math: introduce the slice concept 5 years ago
  Ariadne Devos a8b3623c17 Avoid -Wsign-compare in <tests/cmp.c> 5 years ago
  Ariadne Devos c22cab7495 Put index iteration on a more sound footing 5 years ago
  Ariadne Devos 36ec5f7f81 Define read-write-set and peek-poke 5 years ago
  Ariadne Devos f7b96ce859 Unimplement CPU-dependent speculation barrier 5 years ago
  Ariadne Devos fe9843b888 Implement comparison macros on ARM 5 years ago
  Ariadne Devos 98f95d2749 Recognise sizes be > SSIZE_MAX 5 years ago
10 changed files with 258 additions and 38 deletions
  1. 6 0
      .gitignore
  2. 3 1
      Makefile.am
  3. 8 14
      arch/arm/sHT/spectre.h
  4. 55 0
      arch/arm/sHT/test-arch.h
  5. 10 8
      arch/x86/sHT/test-arch.h
  6. 11 15
      buffer/lex.c
  7. 38 0
      doc/failbit.rst
  8. 4 0
      doc/index.rst
  9. 123 0
      doc/memory-model.rst
  10. 0 0
      doc/numeric-bases.rst

+ 6 - 0
.gitignore

@@ -14,6 +14,12 @@ configure
 Makefile.in
 Makefile
 
+# nano backup and control files
+*~
+.*.swp
+*.save
+*.save.[0-9]
+
 /doctrees
 /html
 

+ 3 - 1
Makefile.am

@@ -15,6 +15,8 @@ shtsources = \
   fd/fd.c \
   fd/inet.c \
   generic/bug.c \
+  lex/ipv4.c \
+  lex/nat.c \
   task/accept.c \
   task/perform.c \
   task/sockrw.c \
@@ -40,8 +42,8 @@ tests = \
 	tests/append \
 	tests/cmp \
 	tests/lex \
+	tests/ipv4 \
 	tests/memeq \
-	tests/inc-saturated \
 	tests/index-nospec
 
 testresult = tests/append

+ 8 - 14
arch/arm/sHT/spectre.h

@@ -22,20 +22,14 @@
 #define _sHT_index_mask(maskp, pos, length) \
 	__asm__("cmp %2, %1\n\tsbc %0, %1, %1" : "=&r" (*(maskp)) : "r" ((length)), "rI" ((pos)) : "cc")
 
-/* ARM recommends "csdb", but their explanation suggests it isn't a
-  full speculation barrier. ARM allows "isb;dsb sy", and that's what
-  GCC does as well.
+/* TODO XXX FIXME: use csdb, isb or some mcr.
+  The specific instruction depends on the CPU model and
+  Thumb/ARM state.
 
-  "isb" flushes the instruction pipeline.
+  I do not have a real ARM system setup for testing yet,
+  so do this later (patches welcome).
 
-  TODO: what's "dsb" for? It is for data, not control flow. I hope no
-  processor ever will do value speculation -- disastruous for crypto.
-  Suppose that ARM does exactly that -- then, the situation is already
-  broken beyond repair, so don't do "dsb". ARM is too fuzzy on details.
-
-  https://patchwork.ozlabs.org/patch/941448 (2018) lets GCC emit
-  "isb;dsb sy".
-
-  volatile "memory" is for paranoia, to avoid reordering. */
+  Some implementation hints are in the git history. */
+/* volatile "memory" is for paranoia, to avoid reordering. */
 #define _sHT_speculation_barrier() \
-		__asm__ volatile("isb" : : : "memory");
+		__asm__ volatile("" : : : "memory")

+ 55 - 0
arch/arm/sHT/test-arch.h

@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+// Copyright © 2018-2019 Ariadne Devos
+
+/* sHT -- (arm) compare values without misinforming the compiler
+
+  Derived from x86 <sHT/test-arch.h>. */
+
+#ifndef _sHT_ARCH_TEST
+#define _sHT_ARCH_TEST
+
+/* Operand order: as in mathematics (%0,%1: %0 op %1)
+  Test instruction: cmp/tst followed by b$CONDITION
+    (cmp/tst sets instruction code, b$CONDITION does the branch.
+    Many instructions in ARM can be made conditional by appending
+    the condition.)
+
+  Sign issues: { unsigned: higher / lower, signed: greater / lower }
+  Immediates: #$NUMBER
+
+  % modifier (GCC): commutative
+    (e.g. equality, intersection, union)
+    (NOT <, >). */
+
+#define _sHT_eq(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;beq %l[" #correct "]" : : "%r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_neq(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;bne %l[" #correct "]" : : "%r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_gt(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;bhi %l[" #correct "]" : : "r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_ge(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;bhs %l[" #correct "]" : : "r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_lt(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;blo %l[" #correct "]" : : "r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_le(a, b, correct) \
+	__asm__ goto ("cmp %0,%1;ble %l[" #correct "]" : : "r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_and_any_p(a, b, correct) \
+	__asm__ goto ("tst %0,%1;bne %l[" #correct "]" : : "%r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_and_none_p(a, b, correct) \
+	__asm__ goto ("tst %0,%1;beq %l[" #correct "]" : : "%r" (a), "rI" (b) : "cc" : correct)
+#define _sHT_zero_p(a, correct) \
+	__asm__ goto ("cmp %0,#0;beq %l[" #correct "]" : : "r" (a) : "cc" : correct)
+#define _sHT_nonzero_p(a, correct) \
+	__asm__ goto ("cmp %0,#0;bne %l[" #correct "]" : : "r" (a), "rI" (a) : "cc" : correct)
+#define _sHT_lt0(a, correct) \
+	__asm__ goto ("tst %0,%1;bmi %l[" #correct "]" : : "r" (a), "rI" (a) : "cc" : correct)
+
+/* TODO: GCC generates different code, more efficient? */
+/* Operational semantics: ret <- 0. If a = b, ret <- 1.
+  Functional semantics: ret <- a = b ? 1 : 0. */
+#define _sHT_eq_bool(a, b, ret) \
+	do {	(ret) = 0; \
+		__asm__("cmp %1,%2;moveq %0,#1" : "+r" (ret) : "r" (a), "rI" (b) : "cc"); \
+	} while (0)
+
+#endif

+ 10 - 8
arch/x86/sHT/test-arch.h

@@ -13,19 +13,21 @@
   avoid this problem.
 
   When reading AMD and probably Intel documentation:
-  'greater' is for signed integers, 'above' for unsigned integers. */
+  'greater' is for signed integers, 'above' for unsigned integers.
+
+  %: commutative */
 
 #define _sHT_ge(a, b, correct) \
-	__asm__ goto ("cmp %1,%0;jae %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("cmp %1,%0;jae %l[" #correct "]" : : "r,m" (a), "erm,er" (b) : "cc" : correct)
 #define _sHT_eq(a, b, correct) \
-	__asm__ goto ("cmp %1,%0;je %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("cmp %1,%0;je %l[" #correct "]" : : "%r,%m" (a), "erm,er" (b) : "cc" : correct)
 #define _sHT_neq(a, b, correct) \
-	__asm__ goto ("cmp %1,%0;jne %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("cmp %1,%0;jne %l[" #correct "]" : : "%r,%m" (a), "erm,er" (b) : "cc" : correct)
 #define _sHT_eq_bool(a, b, c) \
-	__asm__ ("cmp %2,%1;sete %0" : "=r,r" (c) : "r,m" (a), "rmi,ri" (b) : )
+	__asm__ ("cmp %2,%1;sete %0" : "=r,r" (c) : "%r,%m" (a), "erm,er" (b) : )
 
 #define _sHT_gt(a, b, correct) \
-	__asm__ goto ("cmp %1,%0;ja %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("cmp %1,%0;ja %l[" #correct "]" : : "r,m" (a), "erm,er" (b) : "cc" : correct)
 
 /* testl is supposedlu smaller than cmp */
 #define _sHT_zero_p(a, correct) \
@@ -36,6 +38,6 @@
 #define _sHT_lt0(a, correct) \
 	__asm__ goto ("test %0,%0;js %l[" #correct "]" : : "r" (a) : "cc" : correct)
 #define _sHT_and_any_p(a, b, correct) \
-	__asm__ goto ("test %1,%0;jnz %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("test %1,%0;jnz %l[" #correct "]" : : "%rm" (a), "er" (b) : "cc" : correct)
 #define _sHT_and_none_p(a, b, correct) \
-	__asm__ goto ("test %1,%0;je %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+	__asm__ goto ("test %1,%0;je %l[" #correct "]" : : "%rm" (a), "er" (b) : "cc" : correct)

+ 11 - 15
buffer/lex.c

@@ -23,22 +23,18 @@ sHT_lex(struct sHT_lex_buf *to, const unsigned char from[], size_t n, const stru
 	  allocated.
 
 	  @var{offset}: data from previous runs is remembered. */
-	/* Underflow 1: @var{sHT_lex_buf} invariant.
-	  Bounds 0 -> @var{sHT_parser} precondition.
-	  Bounds 1 -> type widths in @var{c} and @var{to} */
+	/* Underflow 1: `sHT_lex_buf` invariant (`offset` < `max_known`) */
 	size_t todo = sHT_min_size(n, c->max_known - offset);
 	/* Induct over byte locations, until a space character, a syntax error
 	  or the method is found to be too long to be known. */
-	/* (1) todo <= @var{n} (@var{sHT_min_size}),
-	  (2) n < SSIZE_MAX,
-	  (1, 2) => todo < SSIZE_MAX.
-	  QED @var{sHT_index_iterate} max bounds.
-
-	  (1) @var{n} != 0 (precondition)
-	  (2) offset < max_known
-	  (2) => (3) 0 < max_known - offset
-	  (1, 3): QED @var{sHT_index_iterate} positivity. */
-	/* Invariant: byte offset to offset + i (exclusive) of
+	/* (1) `n` > 0 (`sHT_lex` precondition)
+	  (1) => (2) 0 < `n`
+	  (3) `offset` < `max_known` (`sHT_lex_buf` invariant)
+	  (3) => (4) 0 < `max_known` - `offset`
+	  (2, 4) => (5) 0 < min(`n`, `max_known` - `offset`)
+	  (5): QED `sHT_index_iterate` positivity (TODO: do ... while loop) */
+
+	/* Invariant: byte `offset` to `offset` + `i` (exclusive) of
 	  @code{buf->bytes} are set. Base case: trivial. */
 	sHT_index_iterate(i, todo) {
 		/* If zero @var{n} were allowed, this would be out of bounds */
@@ -94,8 +90,8 @@ sHT_lex(struct sHT_lex_buf *to, const unsigned char from[], size_t n, const stru
 	  (2) todo <= max_known - offset (@var{sHT_min_size})
 	  (1, 2) => (3) i <= max_known - offset
 	  (3) => (4) offset + i <= max_known
-	  (5) max_known < SSIZE_MAX (@var{sHT_lex_type})
-	  (4, 5) => offset + i < SSIZE_MAX
+	  (5) `max_known` <= `SIZE_MAX` (typing)
+	  (3, 5) => `offset` + `i` <= `SIZE_MAX`
 
 	  QED no overflow */
 	if (sHT_ge(offset + i, c->max_known)) {

+ 38 - 0
doc/failbit.rst

@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: GPL-3.0-or-later
+.. Copyright © 2019 Ariadne Devos
+
+Failbit: encoding failure in an integer
+=======================================
+
+The irrevocable failbit of an integer, if used, keeps tracks
+of failures. It reserves the most-significant bit of a variable
+for itself, the others are left intact. It remembers if there
+had been at least a single failure.
+
+(Header: <sHT/logic/failbit.h>. These machines are always marked
+with `sHT_with_failbit`.)
+
+This little, one-bit state machine[#f1]_ is started with
+`var = sHT_success(T, var)`. It can be moved into the failure
+state with `sHT_fail_if(T, var, condition)`. Once there,
+it is stuck until the machine is reset.
+
+The machine's state can be queried by `sHT_good(T, var)` and
+`sHT_bad(T, var)`, these may give speculatively incorrect
+results. The unused bits can be extracted by `sHT_fail_value`,
+the state machine is oblivious to their meaning.
+
+But these bits must not interfere with the machine's working;
+`failbit_clean(T, value)` := `0 <= value < failbit_limit(T)`
+is a necessary and sufficient condition.
+
+Compatible APIs
+---------------
+
+Many lexers <sHT/lex/...>, arithmetic overflow tracking
+<sHT/logic/arithmetic.h>.
+
+.. rubric:: footnotes
+
+.. [#f1] A formulation involving Monad transformers and Monoid
+would be most welcome.

+ 4 - 0
doc/index.rst

@@ -13,6 +13,10 @@ Contents:
    gpl.rst
    reliability.rst
    speculation.rst
+   memory-model.rst
+   slice.rst
+   failbit.rst
+   numeric-bases.rst
 
 TODO: source code modules
 

+ 123 - 0
doc/memory-model.rst

@@ -0,0 +1,123 @@
+.. SPDX-License-Identifier: GPL-3.0-or-later
+.. Copyright © 2019 Ariadne Devos
+
+Memory model
+============
+This part defines some concepts used in specifications
+about usage of memory. They may be used without further
+explanation.
+
+What is a capability?
+---------------------
+A capability is a type an object can be subject to.
+It can only allow things, but never deny things.
+They can be destroyed when no longer interesting
+(affine typing). Under restricted conditions, they
+can be created (e.g. memory allocation). They can
+typically not be duplicated (otherwise: use-after-free!).
+
+A function may borrow some capabilities from its caller,
+but they typically must be released afterwards -- not
+being duplicated in the process (cf. Rust).
+
+Sequential memory
+-----------------
+The following capabilities are for sequential code,
+concurrently modified memory may require special instructions.
+
+:term:`read` and :term:`write` are automatically granted for
+local variables.
+
+.. glossary::
+
+  set
+    The object has a real value, not just some unrelated bytes
+    lingering around from an old operation. This is intentional,
+    the RAM and processor don't care.
+
+    C11 requires objects be set before reading (with some exceptions
+    for padding bits) to avoid undefined behaviour. (To find bugs,
+    search for 'uninitialised variable' on the web)
+
+  read
+    Neglecting setness, the object can be :term:`peeked`. This isn't
+    really a property of the object itself, but rather is about policy,
+    scope and information flow.
+
+    Heartbleed e.g. was a buffer over-read bug.
+
+  write
+    The object can be :term:`poked`. This addresses the framing problem
+    [#fframing]_: instead of specifying all variables remaining equal, only
+    specify those that might change.
+
+    C e.g. is infamous for its suspectibility to buffer overruns.
+    Rust keeps tracks of array sizes and performs overflow checks,
+    while staying close to the metal.
+
+.. index::
+   single: ∀
+
+Quantified capabilities: what about arrays?
+-------------------------------------------
+What's the capability for a range of set objects in an array?
+Our capabilities are rooted in linear logic, so a first-order
+linear logic seems appropriate.
+
+Let `S` be a finite `Foldable a` and `f` a function from `a` to
+linear types. Then `∀S f` is a linear type, defined as:
+`(foldMap (logical-and . f) S)`. (Dependently-typed Scheme anyone?)
+(This ands together all `(f a)` where `a` belongs to `S`.
+
+(`foldMap` and `Foldable` are taken from Haskell, the latter
+may be a set, a list, a vector or a tree. Finiteness is for
+avoiding non-terminating types, which may give rise to
+non-consistency -- any theorem can be proved with an infinite
+loop [#finfinite]_.)
+
+Therefore, ‘∀{ k | a <= k < e} C . (index a)’ is the capability
+`C` applies to elements `a..e` in the array `a`.
+
+Other combinators
+-----------------
+
+.. index:: ?
+
+t ? C \[ \: D \]
+  If `t`, a boolean logic value holds, the capability `C`,
+  else the capability `D`. `D` defaults to unit (no capability).
+
+.. index:: ^
+
+C ^ D
+  Both the capabilities `C` and `D` -- a logical and, not
+  exclusive-or! 
+
+Effects
+-------
+(These may or may not correspond to those of type and effect systems.)
+
+.. glossary::
+
+  peeked
+  peek
+    Reading some memory requires :term:`set` ^ :term:`read`.
+
+  poked
+  poke
+    Writing to memory requires :term:`write` and produces :term:`set`.
+
+(Example: `x += a[i]` consists of a read from `i`, `a[i]` and `x`
+and a write to `x`.)
+
+.. rubric:: footnotes
+
+.. [#finfinite] ‘Certified Programming with Dependent Types’ (2019) by
+  ‘Adam Chlipala’ at <https://adam.chlipala.net/cpdt/html/>
+  (7 ‘General Recursion’)
+
+.. [#fframing]
+  :term:`write` corresponds to Frama-C's ACSL's `assigns:` clause.
+  The frame problem is discussed in ‘The Frame Problem’ by ‘Murray Shanahan’,
+  in ‘The Stanford Encyclopedia of Philosophy’ (2016) at
+  <https://plato.stanford.edu/archives/spr2016/entries/frame-problem>.

+ 0 - 0
doc/numeric-bases.rst


Some files were not shown because too many files changed in this diff