#include #include #include #include #include #include #include #include #include #include "str.h" #define MIN(a, b) ((a) > (b) ? (b) : (a)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) static const char charset[26] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; static double charfreq_english[sizeof charset] = { ['A' - 'A'] = 0.082, ['B' - 'A'] = 0.015, ['C' - 'A'] = 0.028, ['D' - 'A'] = 0.043, ['E' - 'A'] = 0.127, ['F' - 'A'] = 0.022, ['G' - 'A'] = 0.020, ['H' - 'A'] = 0.061, ['I' - 'A'] = 0.070, ['J' - 'A'] = 0.0015, ['K' - 'A'] = 0.0077, ['L' - 'A'] = 0.040, ['M' - 'A'] = 0.024, ['N' - 'A'] = 0.067, ['O' - 'A'] = 0.075, ['P' - 'A'] = 0.019, ['Q' - 'A'] = 0.0095, ['R' - 'A'] = 0.060, ['S' - 'A'] = 0.063, ['T' - 'A'] = 0.091, ['U' - 'A'] = 0.028, ['V' - 'A'] = 0.0098, ['W' - 'A'] = 0.024, ['X' - 'A'] = 0.0015, ['Y' - 'A'] = 0.020, ['Z' - 'A'] = 0.00074, }; static int do_nothing(int ch) { return ch; } static int charset_contains(int ch) { return ch >= 'A' && ch <= 'Z'; } static size_t charset_index(char ch) { if (isalpha(ch)) { return toupper(ch) - 'A'; } fprintf(stderr, "%s: invalid char %d\n", __func__, ch); abort(); } /* calculate index of coincidence of `text` * * map will transform the characters before calculating the ioc. For example, * ioc(data, ..., tolower) will transform samples with tolower before checking * if they are equal * */ static double ioc(struct str text, int stride, int offset, int (*map)(int)) { assert(offset < stride); if (stride > text.len) { return NAN; } if (text.len < 1) { return NAN; } int samples = 2048; int matches = 0; if (map == NULL) { map == do_nothing; } for (int i = 0; i < samples; i++) { size_t rand_a = (rand() % (text.len / stride)) * stride + offset; size_t rand_b; do { rand_b = (rand() % (text.len / stride)) * stride + offset; } while (rand_a == rand_b); char a = map(text.data[rand_a]); char b = map(text.data[rand_b]); if (a == b) { matches++; } } return (double)matches / (double)(samples); } static void frequency_count(double output[static sizeof charset], const struct str text, size_t offset, size_t stride) { for (size_t i = 0; i < sizeof charset; i++) { output[i] = 0; } assert(offset < stride); for (size_t i = offset; i < text.len; i += stride) { if (!charset_contains(text.data[i])) { continue; } output[charset_index(text.data[i])] += 1.0; } } static double frequency_correlation(const double a[static sizeof charset], const double b[static sizeof charset], size_t shift) { double sum = 0; for (size_t i = 0; i < sizeof charset; i++) { sum += a[i] * b[(i + shift) % sizeof charset]; } return sum; } static void frequency_print(const double freq[static sizeof charset]) { for (int i = 0; i < sizeof charset; i++) { fprintf(stderr, "[%c] = %.0lf, ", charset[i], freq[i]); } } static void vigenere_encode(struct str text, char* output, const char* key, size_t key_len, const char* charset, size_t charset_len) { for (size_t i = 0; i < text.len; i++) { const char ch = text.data[i]; if (charset_contains(ch)) { output[i] = charset[(charset_index(ch) + key[i % key_len]) % charset_len]; } } } static void vigenere_decode(struct str text, char* output, const char* key, size_t key_len, const char* charset, size_t charset_len) { for (size_t i = 0; i < text.len; i++) { const char ch = text.data[i]; if (charset_contains(ch)) { output[i] = charset[(charset_index(ch) - key[i % key_len] + charset_len) % charset_len]; } } } int main(int argc, char** argv) { srand(0); FILE* f = argc < 2 ? stdin : fopen(argv[1], "r"); if (f == NULL) { fprintf(stderr, "couldn't open file %s%m", argv[1]); exit(EXIT_FAILURE); } struct str text = read_all_filter(f, charset_contains, toupper); if (fclose(f) != 0) { perror("fclose"); /* not fatal, continue */ } if (text.data == NULL) { exit(EXIT_FAILURE); } /* Find key length (stride) * ========================*/ int key_len = 1; { /* values better than threshold immidiately break the loop */ constexpr double threshold = 1.6; double best_score = -1.0; for (int stride = 1; stride < text.len / 2; stride++) { double result = 0.0; for (int j = 0; j < stride; j++) { result += ioc(text, stride, j, toupper); } result /= stride; result *= 26.0; /* normalization */ if (result > best_score) { best_score = result; key_len = stride; if (result > threshold) { break; } } } fprintf(stderr, "best stride: %i (IOC %.2lf)\n", key_len, best_score); } /* Crack caesar ciphers column wise * ================================ */ char key[key_len] = {}; /* VLAs are bad but whatever */ { double frequencies[sizeof charset] = { 0 }; for (size_t col = 0; col < key_len; col++) { frequency_count(frequencies, text, col, key_len); double best = 0; for (size_t i = 0; i < sizeof charset; i++) { double n = frequency_correlation(frequencies, charfreq_english, i); if (n > best) { key[col] = (sizeof charset - i) % sizeof charset; best = n; } } } } /* print key to stdout (other info goes to stderr) */ printf("key: "); for (size_t i = 0; i < key_len; i++) { printf("%c", charset[key[i]]); } vigenere_decode(text, text.data, key, key_len, charset, sizeof charset); /* print preview to stderr to avoid clutter when piping */ fprintf(stderr, "preview:\n"); str_println(str_slice(text, 0, 79), stderr); str_free(&text); return EXIT_SUCCESS; }