diff options
author | Tom Ryder <tom@sanctum.geek.nz> | 2019-12-23 19:53:48 +1300 |
---|---|---|
committer | Tom Ryder <tom@sanctum.geek.nz> | 2019-12-23 19:53:48 +1300 |
commit | ca2737dd21714cef01ba61fedd1a7a8c22fc8a13 (patch) | |
tree | 32d10e4ead82e0eff8a68e2f82a49507203f105c | |
parent | Merge branch 'hotfix/v1.2.1' (diff) | |
parent | Bump VERSION (diff) | |
download | wtf8-ca2737dd21714cef01ba61fedd1a7a8c22fc8a13.tar.gz wtf8-ca2737dd21714cef01ba61fedd1a7a8c22fc8a13.zip |
Merge branch 'release/v1.3.0'v1.3.0
* release/v1.3.0:
Add help message for improper usage
Refactor for legibility
Make some dense code a little less opaque
-rw-r--r-- | VERSION | 2 | ||||
-rw-r--r-- | wtf8.c | 70 | ||||
-rw-r--r-- | wtf8.h | 9 |
3 files changed, 54 insertions, 27 deletions
@@ -1 +1 @@ -1.2.1 +1.3.0 @@ -1,26 +1,31 @@ #include "wtf8.h" /* - * Check if first two bits of the character are "10", meaning it's a UTF-8 + * Check if first two bits of the character are "10", meaning it'str a UTF-8 * continuation character */ -int is_utf8_cont(unsigned char c) { - return (c & 0xC0) == 0x80; +int is_utf8_cont(unsigned char chr) { + return (chr & 0xC0) == 0x80; } /* * Print each octet of a string of characters as lowercase hex followed by a * trailing space, ending with a newline */ -void print_octets(char *s) { - unsigned char c; +void print_octets(FILE *stream, char *str) { + unsigned char chr; /* * Iterate through the string, printing each octet, ending with a newline */ - while ((c = *s++)) - printf("%c%02x", (is_utf8_cont(c) ? '-' : ' '), c); - putchar('\n'); + while ((chr = *str++)) { + char sep; + sep = is_utf8_cont(chr) + ? BYTE_SEP + : CHAR_SEP; + fprintf(stream, "%c%02x", sep, chr); + } + fputc('\n', stream); return; } @@ -30,37 +35,52 @@ void print_octets(char *s) { * print_octets(), with each character in line with the end of the octet that * terminates it, ending with a newline */ -void print_characters(char *s) { - - /* - * We need a short counter to find how long each character is - */ - unsigned char c; +void print_characters(FILE *stream, char *str) { /* * Iterate through the string */ - while (*s) { + while (*str) { + + /* + * We need a short counter to find how long each character is + */ + unsigned char chr; /* * Print blanks and increment a counter until we find how long this * character is */ - for (c = 1; is_utf8_cont(s[c]) && c <= UCHAR_MAX; c++) - printf(" "); + for (chr = 1; is_utf8_cont(str[chr]); chr++) { + + /* + * Print blanks + */ + fprintf(stream, " "); + + /* + * If we've hit UCHAR_MAX, this is probably a perverse + * string of bytes for fuzzing or exploitation; bail + * out + */ + if (chr == UCHAR_MAX) { + fprintf(stderr, "Perverse byte count, bailing\n"); + exit(1); + } + } /* * Print two spaces, and then the full character */ - printf(" "); - while (c--) - putchar(*s++); + fprintf(stream, " "); + while (chr--) + fputc(*str++, stream); } /* * End with a newline */ - putchar('\n'); + fputc('\n', stream); return; } @@ -72,15 +92,17 @@ int main(int argc, char **argv) { /* * Check we have one and only one argument */ - if (argc != 2) + if (argc != 2) { + fprintf(stderr, "%s: Need one argument\n", PROGRAM_NAME); exit(EXIT_FAILURE); + } /* * Print the sole argument first as hex octets, then as characters, spaced * accordingly */ - print_octets(argv[1]); - print_characters(argv[1]); + print_octets(stdout, argv[1]); + print_characters(stdout, argv[1]); /* * Done! @@ -2,6 +2,11 @@ #include <stdio.h> #include <stdlib.h> +#define PROGRAM_NAME "wtf8" + +#define BYTE_SEP '-' +#define CHAR_SEP ' ' + int is_utf8_cont(unsigned char); -void print_octets(char *); -void print_characters(char *); +void print_octets(FILE *, char *); +void print_characters(FILE *, char *); |