123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- /*
- * Copyright @ 2020 Joshua Branson <jbranso@dismail.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or (at
- * your option) any later version.
- *
- * It is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This program will attempt to decrypt a caesar cipher. It uses a
- * very simple algorithm: It will try to find the most common letter
- * and assume that letter must be 'e'. Note decrypt only works for
- * English.
- * https://inventwithpython.com/hacking/chapter7.html
- * http://practicalcryptography.com/ciphers/caesar-cipher/
- * https://www.xarg.org/2010/05/cracking-a-caesar-cipher/
- * there's a 20 line algorithm here that works. */
- /* https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language */
- /*
- a 8.167% b 1.492% c 2.782% d 4.253% e 12.702% f 2.228% g 2.015%
- h 6.094% i 6.966% j 0.153% k 3.872% l 4.025% m 2.406% n 6.749%
- o 7.507% p 1.929% q 0.095% r 5.987% s 6.327% t 9.256% u 2.758%
- v 0.978% w 5.370% x 0.150% y 3.978% z 0.074%
- There are also relative frequencies of the first letter in a word
- There are also the most common double letters: LL EE SS OO TT FF RR NN PP CC
-
- GNU decrypt is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- This is still fairly easy to fool though:
- echo "This will fool decrypt." |./caesar | ./decrypt
- */
- #include <ctype.h> //tolower
- #include "encrypt.h"
- #define BUFFER_SIZE 128
- /* This is the length of bytes needed to store all the ASCII letters */
- #define ASCII_LENGTH BUFFER_SIZE
- #define PRINT_MAX_OCCURANCES extern int letter_count []; \
- for (int i = 'a'; i <= 'z'; i++) \
- printf ("%c: %d\n", (int) 'a' + i, letter_count[i]);
- #define ORIGINAL_SHIFT(c) ((c < 'e') ? ((26 - ('e' - c))) : abs ('e' - c))
- char fileName [BUFFER_SIZE];
- char original_buffer [BUFFER_SIZE * sizeof (char)];
- /*
- * This will store the frequencies of letters. Letter_count ['a']
- * will store the number of times the letter [aA] is found in the
- * buffer. letter_count['b'] is the number of times [bB] appeared in
- * the buffer and so on.
- */
- int letter_count [ASCII_LENGTH];
- static const struct argp_option options [] =
- {
- {"file" , 'f', "FILE", 0, "Output the decrypted FILE." },
- { 0 }
- };
- //define an argp parse function
- error_t argp_parser (int opt, char *arg, struct argp_state *state)
- {
- extern char fileName [];
- switch (opt)
- {
- // if this parser function is called on an option that it doesn't recognize, then don't do anything.
- default:
- return ARGP_ERR_UNKNOWN;
- case 'f':
- {
- memcpy (fileName, arg, strlen (arg));
- break;
- }
- }
- return 0;
- }
- /* a string containing the basic usage of this program. */
- struct argp argp =
- {
- options, argp_parser, 0,
- "A simple program to decrypt a caesar cipher."
- };
- /*
- This function will store the first 128 bytes of input into a
- temporary buffer, so that we can later examine what is the most
- used character in that buffer. (It has to do store some input in a
- temp buffer, because one cannot fseek on stdin). If the stream is
- longer than 128, then this function will return 1. Otherwise 0.
- If it returns 1, then you will need to keep putcharing the rest of
- the file stream. If it returns 0, then the stream was shorter than
- 128, so you can just use original_buffer to decrypt it.
- This will also store the number of times letters from buffer appear
- in the variable: letter_count.
- */
- int store_input_in_buffer_and_letter_count (FILE * stream)
- {
- extern char original_buffer [];
- extern int letter_count [];
- char c;
- int i;
- /* change the || to a && and you get an infinite loop. Why? */
- for (i = 0; (i < BUFFER_SIZE) && ((c = getc(stream)) != EOF); i++)
- {
- original_buffer[i] = c;
- if (isalpha (c))
- letter_count[tolower (c)] += 1;
- }
- ungetc (original_buffer[i], stream);
- original_buffer[i] = '\0';
- return (i < (BUFFER_SIZE + 1)) ? 1 : 0;
- }
- /* return the "corrective" shift of the caesar cipher. */
- int discover_shift ()
- {
- extern int letter_count [];
- int max, most_used_char = 0;
- for (int i = 'a'; i <= 'z'; i++)
- if (letter_count[i] > max)
- {
- max = letter_count[i];
- most_used_char = i;
- }
- return abs (ORIGINAL_SHIFT (most_used_char) - 26);
- }
- void decrypt (FILE * stream)
- {
- extern char original_buffer [];
- int keep_decrypting = store_input_in_buffer_and_letter_count (stream);
- int shift = discover_shift ();
- /* Glibc lets you create streams from regions of memory. That is
- useful, because I have such a region of memory:
- original_buffer. */
- FILE * stream_memory_buffer;
- stream_memory_buffer = fmemopen (original_buffer, BUFFER_SIZE, "r");
- encrypt (shift, stream_memory_buffer);
- if (keep_decrypting)
- encrypt (shift, stream);
- }
- int main (int argc, char **argv)
- {
- argp_parse (&argp, argc, argv, 0, 0, 0);
- FILE * stream = maybe_open_file ();
- decrypt (stream);
- fclose (stream);
- return 0;
- }
|