11 Commity 507f054a2d ... 4ab8facf97

Autor SHA1 Správa Dátum
  Ariadne Devos 4ab8facf97 Strengthen sHT_lex postcondition 5 rokov pred
  Ariadne Devos 2cee7d37fb Allow lexeme boundary detection without state 5 rokov pred
  Ariadne Devos 0407ba6493 Fix character sign in sHT_lex 5 rokov pred
  Ariadne Devos 2d05da98d4 Add missing <sHT/lex.h> 5 rokov pred
  Ariadne Devos b6efb0a5e3 Make sHT_index_iterate invariant stronger 5 rokov pred
  Ariadne Devos 6fce1107df Introduce sHT_lex, for lexing 5 rokov pred
  Ariadne Devos 9ba570a5fe Avoid unhelpful compiler warning 5 rokov pred
  Ariadne Devos ab3d5aea1e Correct identifier typo in <sHT/taint.h> 5 rokov pred
  Ariadne Devos 50dc74600b Introduce equality operator returning integer 5 rokov pred
  Ariadne Devos c3fcca0077 Compile tainting out by default 5 rokov pred
  Ariadne Devos c8a5b58e45 Syntax fix <sHT/taint.h> 5 rokov pred
9 zmenil súbory, kde vykonal 531 pridanie a 6 odobranie
  1. 2 0
      Makefile.am
  2. 153 0
      buffer/lex.c
  3. 49 0
      buffer/skip.c
  4. 3 0
      sHT/index.h
  5. 127 0
      sHT/lex.h
  6. 7 6
      sHT/taint.h
  7. 4 0
      sHT/test-arch.h
  8. 16 0
      sHT/test.h
  9. 170 0
      tests/lex.c

+ 2 - 0
Makefile.am

@@ -21,7 +21,9 @@ AM_CPPFLAGS = -D_GNU_SOURCE
 bin_PROGRAMS = shttpd
 shtsources = \
   buffer/append.c \
+  buffer/lex.c \
   buffer/memeq.c \
+  buffer/skip.c \
   fd/fd.c \
   fd/inet.c \
   generic/bug.c \

+ 153 - 0
buffer/lex.c

@@ -0,0 +1,153 @@
+/* sHT -- find lexeme boundaries
+   Copyright (C) 2019 Ariadne Devos
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <sHT/bitvec.h>
+#include <sHT/index.h>
+#include <sHT/lex.h>
+#include <sHT/string.h>
+#include <sHT/taint.h>
+#include <sHT/test.h>
+
+size_t
+sHT_lex(struct sHT_lex_buf *to, const unsigned char from[], size_t n, const struct sHT_lex_type *c)
+{
+	/* The current offset into @var{from}. (Set later). */
+	size_t i;
+	/* The old number of accumulated bytes,
+	   therefore, the index into @var{to->bytes} to start writing to. */
+	size_t offset = to->offset;
+	/* The following loop: iterate over the bytes of @var{from}, to
+	  validate their syntax and copy them -- and process a fragment when
+	  complete. However, not all bytes, as only @code{c->max_known} are
+	  allocated.
+
+	  @var{offset}: data from previous runs is remembered. */
+	/* Underflow 1: @var{sHT_lex_buf} invariant.
+	  Bounds 0 -> @var{sHT_parser} precondition.
+	  Bounds 1 -> type widths in @var{c} and @var{to} */
+	size_t todo = sHT_min_size(n, c->max_known - offset);
+	/* Induct over byte locations, until a space character, a syntax error
+	  or the method is found to be too long to be known. */
+	/* (1) todo <= @var{n} (@var{sHT_min_size}),
+	  (2) n < SSIZE_MAX,
+	  (1, 2) => todo < SSIZE_MAX.
+	  QED @var{sHT_index_iterate} max bounds.
+
+	  (1) @var{n} != 0 (precondition)
+	  (2) offset < max_known
+	  (2) => (3) 0 < max_known - offset
+	  (1, 3): QED @var{sHT_index_iterate} positivity. */
+	/* Invariant: byte offset to offset + i (exclusive) of
+	  @code{buf->bytes} are set. Base case: trivial. */
+	sHT_index_iterate(i, todo) {
+		/* If zero @var{n} were allowed, this would be out of bounds */
+		/* (1) i < todo (@var{sHT_index_iterate})
+		  (2) todo <= n (@var{sHT_min_size})
+		  (1, 2) => i < n
+		  QED @var{from} length */
+		uint8_t b = from[i];
+		/* (1) i < todo (@var{sHT_index_iterate})
+		  (2) todo <= max_known - offset (@var{sHT_min_size})
+		  (1, 2) => (3) i < max_known - offset
+		  (3) => offset + i < max_known
+		  QED @var{to} capacity.
+
+		  QED induction step (is set). */
+		to->bytes[offset + i] = b;
+		if (sHT_bit_test(c->c_allow, b)) {
+			/* Correct byte, but not a terminator.
+			  Continue the search. */
+			continue;
+		}
+		/* Non-speculatively, @var{b} is not one of the allowed
+		  bytes. Either it is the terminator, or a syntax error.
+		  Which one? (0: syntax error, 1: terminator)*/
+		int which = sHT_eq_bool(c->c_stop, b);
+		/* Not used anymore; taint for analysis */
+		sHT_taint(&to->offset);
+		/* +1: also count the terminating byte
+		  (<tests/lex.c> found this bug)
+
+		  (1) i < todo (@var{sHT_index_iterate}),
+		  (2) todo <= n (@var{sHT_index_iterate})
+		  (1, 2) => (3) i < n.
+		  (3) => (4) i + 1 <= n
+		  QED bounds last argument
+
+		  (1) i < todo (@var{sHT_index_iterate}),
+		  (2) todo <= max_known - offset (@var{sHT_min_size})
+		  (1, 2) => (3) i <= max_known - offset
+		  (3) => (4) offset + i <= max_known
+
+		  QED length/index bounds */
+		return c->cb_value[which](to, offset + i, i + 1);
+	}
+
+	/* Compare the number of running total of tested bytes with the
+	  maximal known lexeme length. If it the former begins to equal
+	  the latter, there is no point in copying anymore, but the
+	  syntax must still be validated. */
+	/* Overflow:
+
+	  (1) i < todo (@var{sHT_index_iterate}),
+	  (2) todo <= max_known - offset (@var{sHT_min_size})
+	  (1, 2) => (3) i <= max_known - offset
+	  (3) => (4) offset + i <= max_known
+	  (5) max_known < SSIZE_MAX (@var{sHT_lex_type})
+	  (4, 5) => offset + i < SSIZE_MAX
+
+	  QED no overflow */
+	if (sHT_ge(offset + i, c->max_known)) {
+		/* Not used anymore; taint for analysis */
+		sHT_taint(&to->offset);
+		sHT_taint(&to->bytes[0]);
+		return c->cb_ignore(to, i);
+	}
+
+	/* More bytes must be read before the lexeme is complete.
+	  Proof of progress (i = n) (non-speculatively):
+
+	  (1) offset + i < max_known (@var{sHT_ge})
+	  (2) i = todo (@var{sHT_index_iterate})
+	  (1) => (4) i < max_known - offset
+	  (2, 4) => (5) todo < max_known - offset
+	  (5) => todo = n (@var{sHT_min_size})
+	  (2, 5) => i = n
+
+	  QED progress */
+	/* Remember the number of copied bytes */
+ 	/* Overflow/bounds:
+
+	  (1) i <= todo (@var{sHT_index_iterate}),
+	  (2) todo <= max_known - offset (@var{sHT_min_size})
+	  (1, 2) => (3) i <= max_known - offset
+	  (3) => (4) offset + i <= max_known
+	  QED bounds; continue overflow
+
+	  (5) max_known < UINT16_MAX (@var{uint16_t})
+	  (4, 5) => offset + i < UINT16_MAX
+
+	  QED overflow */
+	to->offset += i;
+	/* Bounds:
+
+	  (1) i <= todo (@var{sHT_index_iterate})
+	  (2) todo <= n (@var{sHT_min_size})
+	  (1, 2) => i <= n
+
+	  QED bounds */
+	return i;
+}

+ 49 - 0
buffer/skip.c

@@ -0,0 +1,49 @@
+/* sHT -- find a byte in a string
+   Copyright (C) 2019 Ariadne Devos
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <limits.h>
+#include <stddef.h>
+#include <sHT/bitvec.h>
+#include <sHT/index.h>
+#include <sHT/lex.h>
+#include <sHT/test.h>
+
+size_t
+sHT_lex_skip(const unsigned char from[], size_t n, const struct sHT_lex_type *c, void *x)
+{
+	/* TODO: word-at-a-time */
+	/* This can be assigned to the return register */
+	size_t i;
+	/* TODO: variant which always performs one iteration
+	   (less branching, shorter code) */
+	sHT_index_iterate(i, n) {
+		/* bounds: @var{sHT_index_iterate} */
+		unsigned char b = from[i];
+		if (sHT_bit_test(c->c_allow, b))
+			continue;
+		/* A syntax error or the terminator. */
+		/* 1: terminator, 0: syntax error */
+		int which = sHT_eq_bool(c->c_stop, b);
+		/* In any case, pass the number of skipped/parsed bytes,
+		  not the index of the last. */
+		i++;
+		return c->cb_skip_done[which](i, x);
+	}
+	/* ‘The detection may speculatively be incorrect.’
+	  Any of @var{i} and @var{n} would do, but @var{i} produced
+	  smaller code on x86-64 SystemV (153 < 155). */
+	return i;
+}

+ 3 - 0
sHT/index.h

@@ -58,6 +58,9 @@
   A lower number of iterations may be done speculatively. Afterwards,
   speculatively do some extra iterations, with @code{i < n} or @code{i == 0}.
   Non-speculatively, after a normal loop exit, @var{i} equals @var{n}.
+  Speculatively, @var{i} will always be less than @var{n} within an iteration
+  and never be greater than @var{n}. If the end value is @var{j}, at least
+  @var{j} iterations have been done.
 
   @code{break} and @var{continue} keep their usual semantics.
 

+ 127 - 0
sHT/lex.h

@@ -0,0 +1,127 @@
+/* sHT -- find lexeme boundaries
+   Copyright (C) 2019 Ariadne Devos
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _sHT_LEX_H
+#define _sHT_LEX_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/** Detecting lexeme boundaries
+
+  This module detects lexeme boundaries for variable-length
+  strings of bytes belonging to a certain class, followed by
+  a terminator byte.
+
+  TODO: 'ignore' variant. */
+
+/** Accumulates bytes
+
+  It is to be interpreted in the context of a @var{sHT_lex_state} named
+  @var{c}. It is disjoint from @var{c}. */
+struct sHT_lex_buf
+{
+	/** The number of accumulated bytes, therefore, the number
+	  of meaningful bytes in @var{bytes}. (R/W, not accessed concurrently)
+	  (less than @code{c->max_known}) */
+	uint16_t offset;
+	/** Some accumulated bytes (R/W, not accessed concurrently).
+	  Its capacity is @code{c->max_known} and its length @var{offset}. */
+	unsigned char bytes[];
+};
+
+/** Lexing parameters
+
+  The lexeme boundary and length detection may speculatively be incorrect.
+  All fields are readable and read-only. */
+struct sHT_lex_type
+{
+	/** Lex a string into its syntactical element
+
+	  At least, that's the common use case.
+	  The first index is for syntax errors, the second for
+	  well-formed strings. For the former, @var{n} is the index of the
+	  syntax error; for the latter, @var{n} is the index of the offending
+	  byte.
+
+	  @var{to}: the first argument passed to @var{sHT_lex}
+	  @code{n}: the length of the string to lex,
+	    non-speculatively excluding the terminator
+	    (not greater than @var{max_known}).
+	  @var{ret}: the number of bytes @var{sHT_lex} parsed */
+	size_t (* cb_value[2])(struct sHT_lex_buf *to, size_t n, size_t ret);
+	/** The lexeme is longer than any known
+
+	  @var{to}: the first argument passed to @var{sHT_lex}
+	  @var{ret}: the number of bytes @var{sHT_lex} parsed */
+	size_t (* cb_ignore)(struct sHT_lex_buf *to, size_t ret);
+
+	/** The lexeme is longer than any known, but it has been parsed
+
+	  The first index is for syntax errors, the second for well-formed
+	  strings. The terminator or syntax error is included in @var{ret}.
+
+	  @var{ret}: the number of bytes @var{sHT_lex_skip} parsed
+	  @var{x}: the first argument passed to @var{sHT_lex_skip} */
+	size_t (* cb_skip_done[2])(size_t ret, void *c);
+
+	/** A byte class represented by a readable bitvector, not modified
+	  concurrently (probably not at all), indexed by the byte to test
+	  for its well-formedness. If set, the byte is within the set,
+	  otherwise, it isn't. */
+	const unsigned char *c_allow;
+	/** The maximal length of any known lexeme, including the terminating
+	  @var{c_stop} byte (positive, < 2**15; therefore, less than
+	  @var{SSIZE_MAX}, as @var{size_t} must be at least a @var{uint16_t}) */
+	uint_least16_t max_known;
+	/** The terminator byte. Does not belong to @var{c_allow}. */
+	unsigned char c_stop;
+};
+_Static_assert((size_t) -1 >= (uint_least16_t) -1, "size_t is too small!");
+
+/** Find the lexeme boundary of a scattered string
+
+  @var{to}: a buffer to accumulate bytes to
+  @var{from}: a string to take bytes from, readable, not modified concurrently
+  @var{n}: the length of @var{from} (positive, less than SSIZE_MAX)
+  @var{c}: how does a lexeme look like, and what to do when?
+
+  Bytes are accumulated into @var{to}.
+
+  @var{from} is disjoint from @var{to} and @var{c}.
+  If not calling into @var{c}, return the number of parsed bytes, including
+  the terminator, if any. The first time, @code{c->offset} must be set to zero.
+
+  Speculatively, the boundaries and syntax error detection may be incorrect. */
+size_t
+sHT_lex(struct sHT_lex_buf *to, const unsigned char from[], size_t n, const struct sHT_lex_type *c);
+
+/** Skip some bytes of @var{from}
+
+  @var{from}: a readable buffer, not modified concurrently, to ignore
+  @var{n}: the length of @var{from} (positive, less than @var{SSIZE_MAX})
+  @var{c}: how does a lexeme look like, and what to do when?
+  @var{x}: ignored, may be used by @var{c} callbacks
+
+  This function does not modify anything, except for what it tail-callees do.
+  @code{c->cb_skip_done} may be tail-called. Otherwise, return the number of
+  parsed bytes. The syntax and terminator detection may speculatively be
+  incorrect. */
+size_t
+sHT_lex_skip(const unsigned char from[], size_t n, const struct sHT_lex_type *c, void *x);
+
+#endif
+

+ 7 - 6
sHT/taint.h

@@ -1,5 +1,5 @@
 /* s2 - mark memory as 'considered meaningless'
-   Copyright (C) 2018 Ariadne Devos
+   Copyright (C) 2018-2019 Ariadne Devos
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -29,8 +29,8 @@
   is implemented.
 
   The policy is chosing by setting @var{sHT_taint_policy} to:
-  - @var{sHT_taint_policy_clear}: set to zero (default)
-  - @var{sHT_taint_policy_nothing}: do nothing
+  - @var{sHT_taint_policy_clear}: set to zero
+  - @var{sHT_taint_policy_nothing}: do nothing (default)
   - @var{sHT_taint_policy_msan}: inform MemorySanitizer
   - @var{sHT_taint_policy_memcheck}: inform Valgrind's memcheck
 
@@ -43,11 +43,12 @@
 #define sHT_taint_policy_memcheck 3
 
 #ifndef sHT_taint_policy
-# define sHT_taint_policy sHT_taint_policy_clear
+# define sHT_taint_policy sHT_taint_policy_nothing
 #endif
 
 #define _sHT_taint_req(e) \
 	_Generic(*(e), \
+		char: (e), \
 		unsigned char: (e), \
 		signed char: (e), \
 		unsigned short: (e), \
@@ -62,12 +63,12 @@
 #if sHT_taint_policy == sHT_taint_policy_clear
 /* Reduce exploitation oppurtunities. */
 # define sHT_taint(e) \
-	do { *(_sHT_taint_req(e) = 0; } while (0)
+	do { *_sHT_taint_req(e) = 0; } while (0)
 
 #elif sHT_taint_policy == sHT_taint_policy_nothing
 /* For when s2 has been proved correct. */
 # define sHT_taint(e) \
-	do { (void) sHT_taint_req(e); } while (0)
+	do { (void) _sHT_taint_req(e); } while (0)
 
 #elif sHT_taint_policy == sHT_taint_policy_msan
 /* Supported by certain versions of gcc and clang. */

+ 4 - 0
sHT/test-arch.h

@@ -35,6 +35,10 @@
 	__asm__ goto ("cmp %1,%0;je %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
 # define _sHT_neq(a, b, correct) \
 	__asm__ goto ("cmp %1,%0;jne %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
+# define _sHT_eq_bool(a, b, c) \
+	__asm__ ("cmp %2,%1;sete %0" : "=r,r" (c) : "r,m" (a), "rmi,ri" (b) : )
+
+
 # define _sHT_gt(a, b, correct) \
 	__asm__ goto ("cmp %1,%0;ja %l[" #correct "]" : : "r,m" (a), "rmi,ri" (b) : "cc" : correct)
 

+ 16 - 0
sHT/test.h

@@ -82,6 +82,22 @@ correct:
 	return 1;
 }
 
+/** @var{a} == @var{b} ? 1 : 0
+
+  This differs from @var{sHT_eq} in that the
+  return value is an integer, and not a condition.
+  It may not be directly branched upon. */
+__attribute__((always_inline))
+static inline int
+sHT_eq_bool(uintmax_t a, uintmax_t b)
+{
+	if (sHT_constant_p(a == b))
+		return a == b;
+	_Bool ret;
+	_sHT_eq_bool(a, b, ret);
+	return ret;
+}
+
 /** @var{a} != @var{b}?
   The fall-through case should be the most likely. */
 static inline _Bool

+ 170 - 0
tests/lex.c

@@ -0,0 +1,170 @@
+/* sHT - test the lexer
+   Copyright (C) 2019 Ariadne Devos
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/** Testing fragmentation in the lexer
+
+  The HTTP header name can theoretically be fragmented, but typically,
+  it isn't. Test fragmentation specially. TODO: test too long lexemes,
+  unless it's common in some situation in s2. */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sHT/index.h>
+#include <sHT/lex.h>
+#include <sHT/test.h>
+
+#define def_str(m, a) char m[sizeof(a) - 1] = a
+/* Include some signed bytes as well,
+  because <sHT/lex.c> used to pass potentially signed bytes
+  to @var{sHT_bit_test}, which takes unsigned int.
+
+  This is done by repeating the test with the bytes of @var{msg}
+  and @var{c_allow} transposed from 0 to 128.
+
+  This test case first had sign issues. On my x86-64 Linux system,
+  signed/unsigned char did't really matter.
+
+  GCC's -Werror=pointer-sign would have found the issue,
+  although it gave no explanation.  */
+static unsigned def_str(msg, "\x01\x02\x07\x03\x00\x05\x04\x03\x03");
+static unsigned char c_allow[64] = { 0b10111111 };
+static const unsigned char empty[53] = {};
+
+static enum {
+	TRANSPOSED_NOT,
+	TRANSPOSED,
+	END,
+} transposed = 0;
+
+_Noreturn
+static size_t
+fail_cb()
+{
+	if (transposed == TRANSPOSED_NOT) {
+		if (printf("%s\n", "FAIL: fragmented/low") < 0)
+			exit(2);
+		if (printf("%s\n", "SKIP: fragmented/high") < 0)
+			exit(2);
+		exit(1);
+	}
+	if (printf("%s\n", "FAIL: fragmented/high\n") < 0)
+		exit(2);
+	exit(1);
+}
+
+static struct {
+	struct sHT_lex_buf to;
+	unsigned char bytes[53];
+} to = { .to = { .offset = 0 }, .bytes = { } };
+static struct sHT_lex_type c = {
+	.cb_value = { &fail_cb, &fail_cb },
+	.cb_ignore = &fail_cb,
+	.c_allow = c_allow,
+	.max_known = 30,
+	.c_stop = 255,
+};
+
+static void
+more(size_t ret, size_t offset)
+{
+	if (sHT_neq(sHT_lex(&to.to, msg + offset - ret, ret, &c), ret))
+		fail_cb();
+	/* Test offset incrementing */
+	if (sHT_neq(to.to.offset, offset))
+		fail_cb();
+	size_t i;
+	/* Correctly copied */
+	sHT_index_iterate(i, offset) {
+		if (sHT_neq(to.bytes[i], msg[i]))
+			fail_cb();
+	}
+	/* Out-of-bound writes */
+	if (memcmp(to.bytes + offset, empty, sizeof(to.bytes) - c.max_known))
+		fail_cb();
+}
+
+_Noreturn
+static size_t
+expect_cb(struct sHT_lex_buf *arg_to, unsigned char *str, size_t n, size_t ret)
+{
+	if (arg_to != &to.to)
+		fail_cb();
+	if (str != to.bytes)
+		fail_cb();
+	/* ... excluding the terminator */
+	if (sHT_neq(n, 9 + 1))
+		fail_cb();
+	if (sHT_neq(ret, 2))
+		fail_cb();
+	/* Correctly copied */
+	size_t i;
+	sHT_index_iterate(i, 9u) {
+		if (sHT_neq(to.bytes[i], msg[i]))
+			fail_cb();
+	}
+	if (sHT_neq(to.bytes[9], transposed ? 131 : 4))
+		fail_cb();
+	/* Out-of-bound writes */
+	if (memcmp(to.bytes + sizeof(to.bytes) - c.max_known, empty, sizeof(to.bytes) - c.max_known))
+		fail_cb();
+	if (transposed == TRANSPOSED_NOT) {
+		if (printf("%s\n", "PASS: fragmented/low") < 2)
+			exit(2);
+		transposed = TRANSPOSED;
+	} else if (transposed == END) {
+		if (printf("%s\n", "PASS: fragmented/high") < 2)
+			exit(2);
+		exit(0);
+	}
+
+	/* Retry with all bytes > 127*/
+	transposed = 1;
+	sHT_index_iterate(i, sizeof(msg)) {
+		msg[i] += 128;
+	}
+	c_allow[16] = c_allow[0];
+	c_allow[0] = 0;
+	c.cb_value[1] = &fail_cb;
+	c.c_stop = 0x10;
+	memset(&to, 0, sizeof(to));
+
+	more(2u, 2u);
+	more(3u, 5u);
+	more(1u, 6u);
+	more(3u, 9u);
+	transposed = END;
+	c.cb_value[1] = &expect_cb;
+	/* cb_value[1] must be called -- well-formed */
+	sHT_lex(&to.to, "\x83\x10", 2, &c);
+	fail_cb();
+}
+
+
+int
+main(void)
+{
+	more(2u, 2u);
+	more(3u, 5u);
+	more(1u, 6u);
+	more(3u, 9u);
+	c.cb_value[1] = &expect_cb;
+	/* cb_value[1] must be called -- well-formed */
+	sHT_lex(&to.to, "\x04\xff", 2, &c);
+	fail_cb();
+}
+