decrypt.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * Copyright @ 2020 Joshua Branson <jbranso@dismail.de>
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 3 of the License, or (at
  7. * your option) any later version.
  8. *
  9. * It is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. * This program will attempt to decrypt a caesar cipher. It uses a
  18. * very simple algorithm: It will try to find the most common letter
  19. * and assume that letter must be 'e'. Note decrypt only works for
  20. * English.
  21. * https://inventwithpython.com/hacking/chapter7.html
  22. * http://practicalcryptography.com/ciphers/caesar-cipher/
  23. * https://www.xarg.org/2010/05/cracking-a-caesar-cipher/
  24. * there's a 20 line algorithm here that works. */
  25. /* https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language */
  26. /*
  27. a 8.167% b 1.492% c 2.782% d 4.253% e 12.702% f 2.228% g 2.015%
  28. h 6.094% i 6.966% j 0.153% k 3.872% l 4.025% m 2.406% n 6.749%
  29. o 7.507% p 1.929% q 0.095% r 5.987% s 6.327% t 9.256% u 2.758%
  30. v 0.978% w 5.370% x 0.150% y 3.978% z 0.074%
  31. There are also relative frequencies of the first letter in a word
  32. There are also the most common double letters: LL EE SS OO TT FF RR NN PP CC
  33. GNU decrypt is distributed in the hope that it will be useful,
  34. but WITHOUT ANY WARRANTY; without even the implied warranty of
  35. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  36. GNU General Public License for more details.
  37. This is still fairly easy to fool though:
  38. echo "This will fool decrypt." |./caesar | ./decrypt
  39. */
  40. #include <ctype.h> //tolower
  41. #include "encrypt.h"
  42. #define BUFFER_SIZE 128
  43. /* This is the length of bytes needed to store all the ASCII letters */
  44. #define ASCII_LENGTH BUFFER_SIZE
  45. #define PRINT_MAX_OCCURANCES extern int letter_count []; \
  46. for (int i = 'a'; i <= 'z'; i++) \
  47. printf ("%c: %d\n", (int) 'a' + i, letter_count[i]);
  48. #define ORIGINAL_SHIFT(c) ((c < 'e') ? ((26 - ('e' - c))) : abs ('e' - c))
  49. char fileName [BUFFER_SIZE];
  50. char original_buffer [BUFFER_SIZE * sizeof (char)];
  51. /*
  52. * This will store the frequencies of letters. Letter_count ['a']
  53. * will store the number of times the letter [aA] is found in the
  54. * buffer. letter_count['b'] is the number of times [bB] appeared in
  55. * the buffer and so on.
  56. */
  57. int letter_count [ASCII_LENGTH];
  58. static const struct argp_option options [] =
  59. {
  60. {"file" , 'f', "FILE", 0, "Output the decrypted FILE." },
  61. { 0 }
  62. };
  63. //define an argp parse function
  64. error_t argp_parser (int opt, char *arg, struct argp_state *state)
  65. {
  66. extern char fileName [];
  67. switch (opt)
  68. {
  69. // if this parser function is called on an option that it doesn't recognize, then don't do anything.
  70. default:
  71. return ARGP_ERR_UNKNOWN;
  72. case 'f':
  73. {
  74. memcpy (fileName, arg, strlen (arg));
  75. break;
  76. }
  77. }
  78. return 0;
  79. }
  80. /* a string containing the basic usage of this program. */
  81. struct argp argp =
  82. {
  83. options, argp_parser, 0,
  84. "A simple program to decrypt a caesar cipher."
  85. };
  86. /*
  87. This function will store the first 128 bytes of input into a
  88. temporary buffer, so that we can later examine what is the most
  89. used character in that buffer. (It has to do store some input in a
  90. temp buffer, because one cannot fseek on stdin). If the stream is
  91. longer than 128, then this function will return 1. Otherwise 0.
  92. If it returns 1, then you will need to keep putcharing the rest of
  93. the file stream. If it returns 0, then the stream was shorter than
  94. 128, so you can just use original_buffer to decrypt it.
  95. This will also store the number of times letters from buffer appear
  96. in the variable: letter_count.
  97. */
  98. int store_input_in_buffer_and_letter_count (FILE * stream)
  99. {
  100. extern char original_buffer [];
  101. extern int letter_count [];
  102. char c;
  103. int i;
  104. /* change the || to a && and you get an infinite loop. Why? */
  105. for (i = 0; (i < BUFFER_SIZE) && ((c = getc(stream)) != EOF); i++)
  106. {
  107. original_buffer[i] = c;
  108. if (isalpha (c))
  109. letter_count[tolower (c)] += 1;
  110. }
  111. ungetc (original_buffer[i], stream);
  112. original_buffer[i] = '\0';
  113. return (i < (BUFFER_SIZE + 1)) ? 1 : 0;
  114. }
  115. /* return the "corrective" shift of the caesar cipher. */
  116. int discover_shift ()
  117. {
  118. extern int letter_count [];
  119. int max, most_used_char = 0;
  120. for (int i = 'a'; i <= 'z'; i++)
  121. if (letter_count[i] > max)
  122. {
  123. max = letter_count[i];
  124. most_used_char = i;
  125. }
  126. return abs (ORIGINAL_SHIFT (most_used_char) - 26);
  127. }
  128. void decrypt (FILE * stream)
  129. {
  130. extern char original_buffer [];
  131. int keep_decrypting = store_input_in_buffer_and_letter_count (stream);
  132. int shift = discover_shift ();
  133. /* Glibc lets you create streams from regions of memory. That is
  134. useful, because I have such a region of memory:
  135. original_buffer. */
  136. FILE * stream_memory_buffer;
  137. stream_memory_buffer = fmemopen (original_buffer, BUFFER_SIZE, "r");
  138. encrypt (shift, stream_memory_buffer);
  139. if (keep_decrypting)
  140. encrypt (shift, stream);
  141. }
  142. int main (int argc, char **argv)
  143. {
  144. argp_parse (&argp, argc, argv, 0, 0, 0);
  145. FILE * stream = maybe_open_file ();
  146. decrypt (stream);
  147. fclose (stream);
  148. return 0;
  149. }