pdf2jp2.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. /*
  2. * Copyright (c) 2014, Mathieu Malaterre <mathieu.malaterre@voxxl.com>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  15. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  18. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*
  27. * Extract all JP2 files contained within a PDF file.
  28. *
  29. * Technically you could simply use mutool, eg:
  30. *
  31. * $ mutool show -be -o obj58.jp2 Bug691816.pdf 58
  32. *
  33. * to extract a given JP2 file from within a PDF
  34. * However it happens sometimes that the PDF is itself corrupted, this tools is
  35. * a lame PDF parser which only extract stream contained in JPXDecode box
  36. * only work on linux since I need memmem function
  37. */
  38. /*
  39. * Add support for other signatures:
  40. *
  41. * obj<</Subtype/Image/Length 110494/Filter/JPXDecode/BitsPerComponent 8/ColorSpace/DeviceRGB/Width 712/Height 1052>>stream
  42. */
  43. #define _GNU_SOURCE
  44. #include <string.h>
  45. #include <stdio.h>
  46. #include <stddef.h>
  47. #include <assert.h>
  48. int main(int argc, char *argv[])
  49. {
  50. #define NUMJP2 32
  51. int i, c = 0;
  52. long offets[NUMJP2];
  53. char buffer[512];
  54. #define BUFLEN 4096
  55. int cont = 1;
  56. FILE *f;
  57. size_t nread;
  58. char haystack[BUFLEN];
  59. const char needle[] = "JPXDecode";
  60. const size_t nlen = strlen( needle );
  61. const size_t flen = BUFLEN - nlen;
  62. char *fpos = haystack + nlen;
  63. const char *filename;
  64. if( argc < 2 ) return 1;
  65. filename = argv[1];
  66. memset( haystack, 0, nlen );
  67. f = fopen( filename, "rb" );
  68. while( cont )
  69. {
  70. const char *ret;
  71. size_t hlen;
  72. nread = fread(fpos, 1, flen, f);
  73. hlen = nlen + nread;
  74. ret = memmem( haystack, hlen, needle, nlen);
  75. if( ret )
  76. {
  77. const long cpos = ftell(f);
  78. const ptrdiff_t diff = ret - haystack;
  79. assert( diff >= 0 );
  80. /*fprintf( stdout, "Found it: %lx\n", (ptrdiff_t)cpos - (ptrdiff_t)hlen + diff);*/
  81. offets[c++] = (ptrdiff_t)cpos - (ptrdiff_t)hlen + diff;
  82. }
  83. cont = (nread == flen);
  84. memcpy( haystack, haystack + nread, nlen );
  85. }
  86. assert( feof( f ) );
  87. for( i = 0; i < c; ++i )
  88. {
  89. int s, len = 0;
  90. char *r;
  91. const int ret = fseek(f, offets[i], SEEK_SET);
  92. assert( ret == 0 );
  93. r = fgets(buffer, sizeof(buffer), f);
  94. assert( r );
  95. /*fprintf( stderr, "DEBUG: %s", r );*/
  96. s = sscanf(r, "JPXDecode]/Length %d/Width %*d/BitsPerComponent %*d/Height %*d", &len);
  97. if( s == 0 )
  98. { // try again harder
  99. const int ret = fseek(f, offets[i] - 40, SEEK_SET); // 40 is magic number
  100. assert( ret == 0 );
  101. r = fgets(buffer, sizeof(buffer), f);
  102. assert( r );
  103. const char needle2[] = "/Length";
  104. char * s2 = strstr(buffer, needle2);
  105. s = sscanf(s2, "/Length %d/", &len);
  106. }
  107. if( s == 1 )
  108. {
  109. FILE *jp2;
  110. int j;
  111. char jp2fn[512];
  112. sprintf( jp2fn, "%s.%d.jp2", filename, i );
  113. jp2 = fopen( jp2fn, "wb" );
  114. for( j = 0; j < len; ++j )
  115. {
  116. int v = fgetc(f);
  117. int ret2 = fputc(v, jp2);
  118. assert( ret2 != EOF );
  119. }
  120. fclose( jp2 );
  121. #if 0
  122. /* TODO need to check we reached endstream */
  123. r = fgets(buffer, sizeof(buffer), f);
  124. fprintf( stderr, "DEBUG: [%s]", r );
  125. r = fgets(buffer, sizeof(buffer), f);
  126. fprintf( stderr, "DEBUG: [%s]", r );
  127. #endif
  128. }
  129. }
  130. fclose(f);
  131. return 0;
  132. }