matching.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /*
  2. * Standalone tool to run the matching algorithm.
  3. */
  4. #include <assert.h>
  5. #include <ctype.h>
  6. #include <string.h>
  7. #include <time.h>
  8. #include "puzzles.h"
  9. #include "matching.h"
  10. #include "tree234.h"
  11. static int nl, nr, count;
  12. static int **adjlists, *adjsizes;
  13. static int *adjdata, *outl, *outr, *witness;
  14. static void *scratch;
  15. static random_state *rs;
  16. static void allocate(int nl_, int nr_, int maxedges)
  17. {
  18. nl = nl_;
  19. nr = nr_;
  20. adjdata = snewn(maxedges, int);
  21. adjlists = snewn(nl, int *);
  22. adjsizes = snewn(nl, int);
  23. outl = snewn(nl, int);
  24. outr = snewn(nr, int);
  25. witness = snewn(nl+nr, int);
  26. scratch = smalloc(matching_scratch_size(nl, nr));
  27. }
  28. static void deallocate(void)
  29. {
  30. sfree(adjlists);
  31. sfree(adjsizes);
  32. sfree(adjdata);
  33. sfree(outl);
  34. sfree(outr);
  35. sfree(witness);
  36. sfree(scratch);
  37. }
  38. static void find_and_check_matching(void)
  39. {
  40. int i, j, k;
  41. count = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes,
  42. rs, outl, outr);
  43. matching_witness(scratch, nl, nr, witness);
  44. for (i = j = 0; i < nl; i++) {
  45. if (outl[i] != -1) {
  46. assert(0 <= outl[i] && outl[i] < nr);
  47. assert(outr[outl[i]] == i);
  48. j++;
  49. for (k = 0; k < adjsizes[i]; k++)
  50. if (adjlists[i][k] == outl[i])
  51. break;
  52. assert(k < adjsizes[i]);
  53. }
  54. }
  55. assert(j == count);
  56. for (i = j = 0; i < nr; i++) {
  57. if (outr[i] != -1) {
  58. assert(0 <= outr[i] && outr[i] < nl);
  59. assert(outl[outr[i]] == i);
  60. j++;
  61. }
  62. }
  63. assert(j == count);
  64. for (i = 0; i < nl; i++) {
  65. if (outl[i] == -1)
  66. assert(witness[i] == 0);
  67. }
  68. for (i = 0; i < nr; i++) {
  69. if (outr[i] == -1)
  70. assert(witness[nl+i] == 1);
  71. }
  72. for (i = 0; i < nl; i++) {
  73. for (j = 0; j < adjsizes[i]; j++) {
  74. k = adjlists[i][j];
  75. if (outl[i] == k)
  76. assert(!(witness[i] == 1 && witness[nl+k] == 0));
  77. else
  78. assert(!(witness[i] == 0 && witness[nl+k] == 1));
  79. }
  80. }
  81. }
  82. struct nodename {
  83. const char *name;
  84. int index;
  85. };
  86. static int compare_nodes(void *av, void *bv)
  87. {
  88. const struct nodename *a = (const struct nodename *)av;
  89. const struct nodename *b = (const struct nodename *)bv;
  90. return strcmp(a->name, b->name);
  91. }
  92. static int node_index(tree234 *n2i, tree234 *i2n, const char *name)
  93. {
  94. struct nodename *nn, *nn_prev;
  95. char *namedup = dupstr(name);
  96. nn = snew(struct nodename);
  97. nn->name = namedup;
  98. nn->index = count234(n2i);
  99. nn_prev = add234(n2i, nn);
  100. if (nn_prev != nn) {
  101. sfree(nn);
  102. sfree(namedup);
  103. } else {
  104. addpos234(i2n, nn, nn->index);
  105. }
  106. return nn_prev->index;
  107. }
  108. struct edge {
  109. int L, R;
  110. };
  111. static int compare_edges(void *av, void *bv)
  112. {
  113. const struct edge *a = (const struct edge *)av;
  114. const struct edge *b = (const struct edge *)bv;
  115. if (a->L < b->L) return -1;
  116. if (a->L > b->L) return +1;
  117. if (a->R < b->R) return -1;
  118. if (a->R > b->R) return +1;
  119. return 0;
  120. }
  121. static void matching_from_user_input(FILE *fp, const char *filename)
  122. {
  123. tree234 *Ln2i, *Li2n, *Rn2i, *Ri2n, *edges;
  124. char *line = NULL;
  125. struct edge *e;
  126. int i, lineno = 0;
  127. int *adjptr;
  128. Ln2i = newtree234(compare_nodes);
  129. Rn2i = newtree234(compare_nodes);
  130. Li2n = newtree234(NULL);
  131. Ri2n = newtree234(NULL);
  132. edges = newtree234(compare_edges);
  133. while (sfree(line), lineno++, (line = fgetline(fp)) != NULL) {
  134. char *p, *Lname, *Rname;
  135. p = line;
  136. while (*p && isspace((unsigned char)*p)) p++;
  137. if (!*p)
  138. continue;
  139. Lname = p;
  140. while (*p && !isspace((unsigned char)*p)) p++;
  141. if (*p)
  142. *p++ = '\0';
  143. while (*p && isspace((unsigned char)*p)) p++;
  144. if (!*p) {
  145. fprintf(stderr, "%s:%d: expected 2 words, found 1\n",
  146. filename, lineno);
  147. exit(1);
  148. }
  149. Rname = p;
  150. while (*p && !isspace((unsigned char)*p)) p++;
  151. if (*p)
  152. *p++ = '\0';
  153. while (*p && isspace((unsigned char)*p)) p++;
  154. if (*p) {
  155. fprintf(stderr, "%s:%d: expected 2 words, found more\n",
  156. filename, lineno);
  157. exit(1);
  158. }
  159. e = snew(struct edge);
  160. e->L = node_index(Ln2i, Li2n, Lname);
  161. e->R = node_index(Rn2i, Ri2n, Rname);
  162. if (add234(edges, e) != e) {
  163. fprintf(stderr, "%s:%d: duplicate edge\n",
  164. filename, lineno);
  165. exit(1);
  166. }
  167. }
  168. allocate(count234(Ln2i), count234(Rn2i), count234(edges));
  169. adjptr = adjdata;
  170. for (i = 0; i < nl; i++)
  171. adjlists[i] = NULL;
  172. for (i = 0; (e = index234(edges, i)) != NULL; i++) {
  173. if (!adjlists[e->L])
  174. adjlists[e->L] = adjptr;
  175. *adjptr++ = e->R;
  176. adjsizes[e->L] = adjptr - adjlists[e->L];
  177. }
  178. find_and_check_matching();
  179. for (i = 0; i < nl; i++) {
  180. if (outl[i] != -1) {
  181. struct nodename *Lnn = index234(Li2n, i);
  182. struct nodename *Rnn = index234(Ri2n, outl[i]);
  183. printf("%s %s\n", Lnn->name, Rnn->name);
  184. }
  185. }
  186. deallocate();
  187. }
  188. static void test_subsets(void)
  189. {
  190. int b = 8;
  191. int n = 1 << b;
  192. int i, j, nruns, expected_size;
  193. int *adjptr;
  194. int *edgecounts;
  195. struct stats {
  196. int min, max;
  197. double n, sx, sxx;
  198. } *stats;
  199. static const char seed[] = "fixed random seed for repeatability";
  200. /*
  201. * Generate a graph in which every subset of [b] = {1,...,b}
  202. * (represented as a b-bit integer 0 <= i < n) has an edge going
  203. * to every subset obtained by removing exactly one element.
  204. *
  205. * This graph is the disjoint union of the corresponding graph for
  206. * each layer (collection of same-sized subset) of the power set
  207. * of [b]. Each of those graphs has a matching of size equal to
  208. * the smaller of its vertex sets. So we expect the overall size
  209. * of the output matching to be less than n by the size of the
  210. * largest layer, that is, to be n - binomial(n, floor(n/2)).
  211. *
  212. * We run the generation repeatedly, randomising it every time,
  213. * and we expect to see every possible edge appear sooner or
  214. * later.
  215. */
  216. rs = random_new(seed, strlen(seed));
  217. allocate(n, n, n*b);
  218. adjptr = adjdata;
  219. expected_size = 0;
  220. for (i = 0; i < n; i++) {
  221. adjlists[i] = adjptr;
  222. for (j = 0; j < b; j++) {
  223. if (i & (1 << j))
  224. *adjptr++ = i & ~(1 << j);
  225. }
  226. adjsizes[i] = adjptr - adjlists[i];
  227. if (adjsizes[i] != b/2)
  228. expected_size++;
  229. }
  230. edgecounts = snewn(n*b, int);
  231. for (i = 0; i < n*b; i++)
  232. edgecounts[i] = 0;
  233. stats = snewn(b, struct stats);
  234. nruns = 0;
  235. while (nruns < 10000) {
  236. nruns++;
  237. find_and_check_matching();
  238. assert(count == expected_size);
  239. for (i = 0; i < n; i++)
  240. for (j = 0; j < b; j++)
  241. if ((i ^ outl[i]) == (1 << j))
  242. edgecounts[b*i+j]++;
  243. if (nruns % 1000 == 0) {
  244. for (i = 0; i < b; i++) {
  245. struct stats *st = &stats[i];
  246. st->min = st->max = -1;
  247. st->n = st->sx = st->sxx = 0;
  248. }
  249. for (i = 0; i < n; i++) {
  250. int pop = 0;
  251. for (j = 0; j < b; j++)
  252. if (i & (1 << j))
  253. pop++;
  254. pop--;
  255. for (j = 0; j < b; j++) {
  256. if (i & (1 << j)) {
  257. struct stats *st = &stats[pop];
  258. int x = edgecounts[b*i+j];
  259. if (st->max == -1 || st->max < x)
  260. st->max = x;
  261. if (st->min == -1 || st->min > x)
  262. st->min = x;
  263. st->n++;
  264. st->sx += x;
  265. st->sxx += (double)x*x;
  266. } else {
  267. assert(edgecounts[b*i+j] == 0);
  268. }
  269. }
  270. }
  271. }
  272. }
  273. printf("after %d runs:\n", nruns);
  274. for (j = 0; j < b; j++) {
  275. struct stats *st = &stats[j];
  276. printf("edges between layers %d,%d:"
  277. " min=%d max=%d mean=%f variance=%f\n",
  278. j, j+1, st->min, st->max, st->sx/st->n,
  279. (st->sxx - st->sx*st->sx/st->n) / st->n);
  280. }
  281. sfree(edgecounts);
  282. deallocate();
  283. }
  284. int main(int argc, char **argv)
  285. {
  286. static const char stdin_identifier[] = "<standard input>";
  287. const char *infile = NULL;
  288. bool doing_opts = true;
  289. enum { USER_INPUT, AUTOTEST } mode = USER_INPUT;
  290. while (--argc > 0) {
  291. const char *arg = *++argv;
  292. if (doing_opts && arg[0] == '-' && arg[1]) {
  293. if (!strcmp(arg, "--")) {
  294. doing_opts = false;
  295. } else if (!strcmp(arg, "--random")) {
  296. char buf[64];
  297. int len = sprintf(buf, "%lu", (unsigned long)time(NULL));
  298. rs = random_new(buf, len);
  299. } else if (!strcmp(arg, "--autotest")) {
  300. mode = AUTOTEST;
  301. } else {
  302. fprintf(stderr, "matching: unrecognised option '%s'\n", arg);
  303. return 1;
  304. }
  305. } else {
  306. if (!infile) {
  307. infile = (!strcmp(arg, "-") ? stdin_identifier : arg);
  308. } else {
  309. fprintf(stderr, "matching: too many arguments\n");
  310. return 1;
  311. }
  312. }
  313. }
  314. if (mode == USER_INPUT) {
  315. FILE *fp;
  316. if (!infile)
  317. infile = stdin_identifier;
  318. if (infile != stdin_identifier) {
  319. fp = fopen(infile, "r");
  320. if (!fp) {
  321. fprintf(stderr, "matching: could not open input file '%s'\n",
  322. infile);
  323. return 1;
  324. }
  325. } else {
  326. fp = stdin;
  327. }
  328. matching_from_user_input(fp, infile);
  329. if (infile != stdin_identifier)
  330. fclose(fp);
  331. }
  332. if (mode == AUTOTEST) {
  333. if (infile) {
  334. fprintf(stderr, "matching: expected no filename argument "
  335. "with --autotest\n");
  336. return 1;
  337. }
  338. test_subsets();
  339. }
  340. return 0;
  341. }