debdb2pupdb.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*
  2. * this is an efficient C replacement for the extremely slow debdb2pupdb.bac -
  3. * should be about 75 times faster according to my non-scientific benchmarks.
  4. * this implementation uses heavily buffered I/O, hashing, very little memory
  5. * management, compiled REs and other tricks to achieve this amazing
  6. * performance.
  7. *
  8. * to build:
  9. * musl-gcc -static -O3 -fomit-frame-pointer -ffunction-sections -fdata-sections -fmerge-all-constants -Wl,--sort-common -Wl,-gc-sections -o debdb2pupdb debdb2pupdb.c
  10. * strip --strip-all -R .note -R .comment debdb2pupdb
  11. *
  12. * cheers,
  13. * iguleder, September 2015
  14. */
  15. #include <string.h>
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <sys/types.h>
  19. #include <regex.h>
  20. #include <ctype.h>
  21. #include <sys/stat.h>
  22. #include <fcntl.h>
  23. #include <unistd.h>
  24. #define BUF_SIZE (1024 * 1024)
  25. #define MAX_PKGS (60000)
  26. #define VAL_SIZE (16 * 1024)
  27. #define REV_SIZE (32)
  28. #define LIKELY(x) __builtin_expect(!!(x), 1)
  29. #define UNLIKELY(x) __builtin_expect(!!(x), 0)
  30. static void trim_ver(char *ver, regex_t *preg, char **outver, char *rev)
  31. {
  32. regmatch_t pmatch;
  33. char *pos, *sep;
  34. /* strip "1:" from "1:2.0.10-1ubuntu3" */
  35. sep = strchr(ver, ':');
  36. if (UNLIKELY(sep == NULL))
  37. *outver = ver;
  38. else {
  39. for (pos = ver; sep > pos; ++pos) if (isdigit(pos[0]) == 0) return;
  40. *outver = sep + 1;
  41. }
  42. /* kick "~2011week36" in "2.2~2011week36" */
  43. sep = strchr(*outver, '~');
  44. if (UNLIKELY(sep != NULL)) sep[0] = '\0';
  45. /* kick "-1ubuntu3" from "1:2.0.10-1ubuntu3" */
  46. if (UNLIKELY(regexec(preg, *outver, 1, &pmatch, 0) == REG_NOMATCH)) {
  47. if (NULL != rev) rev[0] = '\0';
  48. } else {
  49. (*outver + pmatch.rm_so)[0] = '\0';
  50. if (NULL != rev)
  51. memcpy(rev, *outver + pmatch.rm_so + 1, pmatch.rm_eo - pmatch.rm_so);
  52. }
  53. }
  54. enum fields {
  55. FIELD_DESC = 0,
  56. FIELD_PATH,
  57. FIELD_NAME,
  58. FIELD_SIZE,
  59. FIELD_ARCH,
  60. FIELD_VER,
  61. FIELD_DEPS,
  62. FIELD_WWW,
  63. FIELD_SECT,
  64. FIELD_MAX
  65. };
  66. static const char *fields_str[] = {
  67. "Description",
  68. "Filename",
  69. "Package",
  70. "InstalledSize",
  71. "Architecture",
  72. "Version",
  73. "Depends",
  74. "Homepage",
  75. "Section"
  76. };
  77. static const unsigned long crc32_table[256] = {
  78. 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
  79. 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
  80. 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
  81. 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
  82. 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
  83. 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
  84. 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
  85. 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
  86. 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
  87. 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
  88. 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
  89. 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
  90. 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
  91. 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
  92. 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
  93. 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
  94. 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
  95. 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
  96. 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
  97. 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
  98. 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
  99. 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
  100. 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
  101. 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
  102. 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
  103. 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
  104. 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
  105. 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
  106. 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
  107. 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
  108. 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
  109. 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
  110. 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
  111. 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
  112. 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
  113. 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
  114. 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
  115. 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
  116. 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
  117. 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
  118. 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
  119. 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
  120. 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
  121. };
  122. /* based on CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29, public domain -
  123. * http://csbruce.com/software/crc32.c */
  124. static unsigned long crc32(const unsigned long in,
  125. const unsigned char *buf,
  126. const size_t len)
  127. {
  128. size_t i;
  129. unsigned long crc32;
  130. crc32 = in ^ 0xFFFFFFFF;
  131. for (i = 0; len > i; ++i)
  132. crc32 = (crc32 >> 8) ^ crc32_table[(crc32 ^ buf[i]) & 0xFF];
  133. return crc32 ^ 0xFFFFFFFF;
  134. }
  135. static unsigned long *list_pkgs(const char *path,
  136. const unsigned long initcrc,
  137. char *buf,
  138. int *out)
  139. {
  140. unsigned long *pkgs;
  141. ssize_t len;
  142. int fd;
  143. char *curr, *next;
  144. pkgs = malloc(sizeof(unsigned long) * MAX_PKGS);
  145. *out = 0;
  146. fd = open(path, O_RDONLY);
  147. len = read(fd, buf, BUF_SIZE);
  148. close(fd);
  149. buf[len] = '\0';
  150. for (curr = buf; NULL != curr; ++*out) {
  151. next = strchr(curr + 1, ' ');
  152. if (NULL != next) next[0] = '\0';
  153. ++curr;
  154. pkgs[*out] = crc32(initcrc, (unsigned char *) curr, strlen(curr));
  155. curr = next;
  156. }
  157. return pkgs;
  158. }
  159. int main(int argc, char *argv[])
  160. {
  161. regex_t preg;
  162. char rev[REV_SIZE];
  163. char *fields[FIELD_MAX];
  164. unsigned long initcrc, depcrc;
  165. char *buf, *iobuf, *sep, *fname, *pos, *ver, *name, *deprel, *depver, *line, *dep;
  166. FILE *db;
  167. unsigned long *pkgs, *baddeps;
  168. FILE *baddepf, *wwwf;
  169. int len, i, ndep, npkgs, nbaddeps;
  170. /* Trisquel has some packages with the "ubuntu" suffix, so we have to search
  171. * for both */
  172. regcomp(&preg, "(\\-|\\+)[0-9\\.]*(ubuntu|trisquel|debian|raspbian|build)[0-9\\.]*$", REG_EXTENDED);
  173. buf = malloc(BUF_SIZE);
  174. iobuf = malloc(BUF_SIZE);
  175. wwwf = fopen("/tmp/woof-homepages.acc", "w");
  176. db = fopen("/tmp/woof-debdb.in", "r");
  177. /* use a huge stdio buffer, to reduce the overhead of read() - we want to
  178. * spend time on output, not input */
  179. setbuffer(db, iobuf, BUF_SIZE);
  180. initcrc = crc32(0, NULL, 0);
  181. /* when running via woof-CE, read the list of all packages in the
  182. * repository, hash their names for quick comparison, drop missing
  183. * dependency packages and list them */
  184. if (UNLIKELY(0 == access("/tmp/0setupcompletelistpkgs", F_OK))) {
  185. baddeps = NULL;
  186. pkgs = list_pkgs("/tmp/0setupcompletelistpkgs", initcrc, buf, &npkgs);
  187. baddepf = fopen("/tmp/0setupnewinvaliddeps", "w");
  188. nbaddeps = 0;
  189. }
  190. else {
  191. /* when running via PPM, just drop the missing dependencies */
  192. baddeps = list_pkgs("/usr/local/petget/invaliddepslist", initcrc, buf, &nbaddeps);
  193. pkgs = NULL;
  194. baddepf = NULL;
  195. npkgs = 0;
  196. }
  197. /* allocate buffers for all fields; we set the first byte to \0 before each
  198. * package so we can determine when a field is missing, without having to
  199. * perform malloc() and free() every time */
  200. for (i = 0; sizeof(fields) / sizeof(fields[0]) > i; ++i) {
  201. fields[i] = malloc(VAL_SIZE);
  202. fields[i][0] = '\0';
  203. }
  204. do {
  205. next:
  206. line = fgets(buf, BUF_SIZE, db);
  207. if (NULL == line) {
  208. /* the last package entry does not end with a marker */
  209. if (feof(db) != 0) goto print;
  210. break;
  211. }
  212. len = strlen(buf);
  213. if (len == 0) continue;
  214. sep = strchr(buf, '|');
  215. sep[0] = '\0';
  216. /* extract all field values */
  217. for (i = 0; sizeof(fields) / sizeof(fields[0]) > i; ++i) {
  218. if (0 == strcmp(buf, fields_str[i])) {
  219. line[len - 1] = '\0';
  220. len -= 1 + (sep - buf);
  221. memcpy(fields[i], sep + 1, len);
  222. fields[i][len] = '\0';
  223. /* if a valid field was found, continue to the next line
  224. * immediately */
  225. goto next;
  226. }
  227. }
  228. /* if the end of a package hasn't been reached yet, continue */
  229. if (0 != strcmp(buf, "STARTMARKER")) continue;
  230. print:
  231. /* make sure all mandatory fields are present */
  232. if (('\0' == fields[FIELD_DESC][0]) ||
  233. ('\0' == fields[FIELD_PATH][0]) ||
  234. ('\0' == fields[FIELD_NAME][0]) ||
  235. ('\0' == fields[FIELD_ARCH][0]) ||
  236. ('\0' == fields[FIELD_VER][0]) ||
  237. ('\0' == fields[FIELD_SECT][0]))
  238. continue;
  239. /* skip debugging symbol packages */
  240. if (UNLIKELY(NULL != strstr(fields[FIELD_NAME], "-dbg"))) goto cleanup;
  241. /* split the path into directory and file name */
  242. pos = strrchr(fields[FIELD_PATH], '/');
  243. pos[0] = '\0';
  244. ++pos;
  245. fname = pos;
  246. /* trim the version */
  247. trim_ver(fields[FIELD_VER], &preg, &ver, rev);
  248. /* remove special charcaters from the description */
  249. for (i = strlen(fields[FIELD_DESC]) - 1; 0 <= i; --i) {
  250. if (UNLIKELY(NULL != strchr("'(),", fields[FIELD_DESC][i])))
  251. for (pos = &fields[FIELD_DESC][i]; '\0' != pos[0]; ++pos) pos[0] = pos[1];
  252. }
  253. /* use the package name as specified in the sub-directory path, for find_cat */
  254. if (UNLIKELY(0 == strlen(fields[FIELD_PATH])))
  255. name = fields[FIELD_NAME];
  256. else {
  257. name = strrchr(fields[FIELD_PATH], '/');
  258. ++name;
  259. }
  260. fputs(fields[FIELD_NAME], stdout);
  261. putc('_', stdout);
  262. fputs(ver, stdout);
  263. putc('|', stdout);
  264. fputs(fields[FIELD_NAME], stdout);
  265. putc('|', stdout);
  266. fputs(ver, stdout);
  267. putc('|', stdout);
  268. if (UNLIKELY('\0' != rev)) {
  269. fputs(rev, stdout);
  270. rev[0] = '\0';
  271. }
  272. putc('|', stdout);
  273. fputs(fields[FIELD_SECT], stdout);
  274. putc('|', stdout);
  275. /* some packages have no installed size, only package size -
  276. * libc6-ppc64el-cross in belenos */
  277. if (UNLIKELY(NULL == fields[FIELD_SIZE]))
  278. fwrite("0K|", 1, 3, stdout);
  279. else {
  280. fputs(fields[FIELD_SIZE], stdout);
  281. fwrite("K|", 1, 2, stdout);
  282. }
  283. fputs(fields[FIELD_PATH], stdout);
  284. putc('|', stdout);
  285. fputs(fname, stdout);
  286. putc('|', stdout);
  287. if (UNLIKELY(NULL != fields[FIELD_DEPS])) {
  288. ndep = 0;
  289. dep = fields[FIELD_DEPS];
  290. do {
  291. pos = strstr(dep, ", ");
  292. if (NULL != pos) pos[0] = '\0';
  293. sep = strstr(dep, " (");
  294. /* special case, in case there's an OR relationship between
  295. * dependencies 0setup replaces the | character with a space */
  296. if (UNLIKELY(NULL == sep))
  297. sep = strchr(dep, ' ');
  298. else
  299. strchr(sep + 2, ')')[0] = '\0';
  300. if (NULL != sep) sep[0] = '\0';
  301. depcrc = crc32(initcrc, (unsigned char *) dep, strlen(dep));
  302. /* if the package is an invalid dependency, skip it */
  303. for (i = 0; nbaddeps > i; ++i)
  304. if (depcrc == baddeps[i]) goto nextdep;
  305. /* check whether the package exists */
  306. if (0 < npkgs) {
  307. for (i = 0; npkgs > i; ++i)
  308. if (depcrc == pkgs[i]) goto parse_deps;
  309. /* if not - add it to the list of bad dependencies */
  310. if (NULL != baddepf) {
  311. fputs(dep, baddepf);
  312. fputc('\n', baddepf);
  313. }
  314. goto nextdep;
  315. }
  316. parse_deps:
  317. deprel = NULL;
  318. if (LIKELY(NULL != sep)) {
  319. sep += 2;
  320. if ('>' == sep[0]) {
  321. if ('=' == sep[1]) {
  322. trim_ver(sep + 3, &preg, &depver, NULL);
  323. deprel = "&ge";
  324. } else if ('>' == sep[1]) {
  325. trim_ver(sep + 3, &preg, &depver, NULL);
  326. deprel = "&gt";
  327. } else deprel = NULL;
  328. } else if ('=' == sep[0]) {
  329. trim_ver(sep + 2, &preg, &depver, NULL);
  330. deprel = "&eq";
  331. } else if (('<' == sep[0]) && ('=' == sep[1])) {
  332. trim_ver(sep + 3, &preg, &depver, NULL);
  333. deprel = "&le";
  334. }
  335. sep[0] = '\0';
  336. }
  337. ++ndep;
  338. if (1 < ndep)
  339. fwrite(",+", 1, 2, stdout);
  340. else
  341. putc('+', stdout);
  342. fputs(dep, stdout);
  343. if (NULL != deprel) {
  344. fputs(deprel, stdout);
  345. fputs(depver, stdout);
  346. }
  347. nextdep:
  348. if (pos == NULL) break;
  349. dep = pos + 2;
  350. } while (1);
  351. }
  352. putc('|', stdout);
  353. fputs(fields[FIELD_DESC], stdout);
  354. putc('|', stdout);
  355. fputs(argv[1], stdout);
  356. putc('|', stdout);
  357. fputs(argv[2], stdout);
  358. fwrite("|\n", 1, 2, stdout);
  359. /* write the homepage to the homepage list */
  360. if (UNLIKELY(fields[FIELD_WWW][0] != '\0')) {
  361. fputs(fields[FIELD_NAME], wwwf);
  362. putc(' ', wwwf);
  363. fputs(fields[FIELD_WWW], wwwf);
  364. putc('\n', wwwf);
  365. }
  366. cleanup:
  367. for (i = 0; sizeof(fields) / sizeof(fields[0]) > i; ++i)
  368. if (LIKELY('\0' != fields[i][0])) fields[i][0] = '\0';
  369. } while (NULL != line);
  370. for (i = sizeof(fields) / sizeof(fields[0]) - 1 ; 0 <= i; --i)
  371. free(fields[i]);
  372. if (NULL != baddeps)
  373. free(baddeps);
  374. else {
  375. fclose(baddepf);
  376. free(pkgs);
  377. }
  378. free(buf);
  379. free(iobuf);
  380. fclose(wwwf);
  381. fclose(db);
  382. regfree(&preg);
  383. return EXIT_SUCCESS;
  384. }