aboutsummaryrefslogtreecommitdiff
path: root/wtf8.c
diff options
context:
space:
mode:
authorTom Ryder <tom@sanctum.geek.nz>2019-12-23 19:53:48 +1300
committerTom Ryder <tom@sanctum.geek.nz>2019-12-23 19:53:48 +1300
commitca2737dd21714cef01ba61fedd1a7a8c22fc8a13 (patch)
tree32d10e4ead82e0eff8a68e2f82a49507203f105c /wtf8.c
parentMerge branch 'hotfix/v1.2.1' (diff)
parentBump VERSION (diff)
downloadwtf8-ca2737dd21714cef01ba61fedd1a7a8c22fc8a13.tar.gz
wtf8-ca2737dd21714cef01ba61fedd1a7a8c22fc8a13.zip
Merge branch 'release/v1.3.0'v1.3.0
* release/v1.3.0: Add help message for improper usage Refactor for legibility Make some dense code a little less opaque
Diffstat (limited to 'wtf8.c')
-rw-r--r--wtf8.c70
1 files changed, 46 insertions, 24 deletions
diff --git a/wtf8.c b/wtf8.c
index 714a7cc..1b99d63 100644
--- a/wtf8.c
+++ b/wtf8.c
@@ -1,26 +1,31 @@
#include "wtf8.h"
/*
- * Check if first two bits of the character are "10", meaning it's a UTF-8
+ * Check if first two bits of the character are "10", meaning it'str a UTF-8
* continuation character
*/
-int is_utf8_cont(unsigned char c) {
- return (c & 0xC0) == 0x80;
+int is_utf8_cont(unsigned char chr) {
+ return (chr & 0xC0) == 0x80;
}
/*
* Print each octet of a string of characters as lowercase hex followed by a
* trailing space, ending with a newline
*/
-void print_octets(char *s) {
- unsigned char c;
+void print_octets(FILE *stream, char *str) {
+ unsigned char chr;
/*
* Iterate through the string, printing each octet, ending with a newline
*/
- while ((c = *s++))
- printf("%c%02x", (is_utf8_cont(c) ? '-' : ' '), c);
- putchar('\n');
+ while ((chr = *str++)) {
+ char sep;
+ sep = is_utf8_cont(chr)
+ ? BYTE_SEP
+ : CHAR_SEP;
+ fprintf(stream, "%c%02x", sep, chr);
+ }
+ fputc('\n', stream);
return;
}
@@ -30,37 +35,52 @@ void print_octets(char *s) {
* print_octets(), with each character in line with the end of the octet that
* terminates it, ending with a newline
*/
-void print_characters(char *s) {
-
- /*
- * We need a short counter to find how long each character is
- */
- unsigned char c;
+void print_characters(FILE *stream, char *str) {
/*
* Iterate through the string
*/
- while (*s) {
+ while (*str) {
+
+ /*
+ * We need a short counter to find how long each character is
+ */
+ unsigned char chr;
/*
* Print blanks and increment a counter until we find how long this
* character is
*/
- for (c = 1; is_utf8_cont(s[c]) && c <= UCHAR_MAX; c++)
- printf(" ");
+ for (chr = 1; is_utf8_cont(str[chr]); chr++) {
+
+ /*
+ * Print blanks
+ */
+ fprintf(stream, " ");
+
+ /*
+ * If we've hit UCHAR_MAX, this is probably a perverse
+ * string of bytes for fuzzing or exploitation; bail
+ * out
+ */
+ if (chr == UCHAR_MAX) {
+ fprintf(stderr, "Perverse byte count, bailing\n");
+ exit(1);
+ }
+ }
/*
* Print two spaces, and then the full character
*/
- printf(" ");
- while (c--)
- putchar(*s++);
+ fprintf(stream, " ");
+ while (chr--)
+ fputc(*str++, stream);
}
/*
* End with a newline
*/
- putchar('\n');
+ fputc('\n', stream);
return;
}
@@ -72,15 +92,17 @@ int main(int argc, char **argv) {
/*
* Check we have one and only one argument
*/
- if (argc != 2)
+ if (argc != 2) {
+ fprintf(stderr, "%s: Need one argument\n", PROGRAM_NAME);
exit(EXIT_FAILURE);
+ }
/*
* Print the sole argument first as hex octets, then as characters, spaced
* accordingly
*/
- print_octets(argv[1]);
- print_characters(argv[1]);
+ print_octets(stdout, argv[1]);
+ print_characters(stdout, argv[1]);
/*
* Done!