#include "wtf8.h" /* * Check if first two bits of the character are "10", meaning it'str a UTF-8 * continuation character */ int is_utf8_cont(unsigned char chr) { return (chr & 0xC0) == 0x80; } /* * Print each octet of a string of characters as lowercase hex followed by a * trailing space, ending with a newline */ void print_octets(FILE *stream, char *str) { unsigned char chr; /* * Iterate through the string, printing each octet, ending with a newline */ while ((chr = *str++)) { char sep; sep = is_utf8_cont(chr) ? BYTE_SEP : CHAR_SEP; fprintf(stream, "%c%02x", sep, chr); } fputc('\n', stream); return; } /* * Print each of the UTF-8 characters to align with the output of * print_octets(), with each character in line with the end of the octet that * terminates it, ending with a newline */ void print_characters(FILE *stream, char *str) { /* * Iterate through the string */ while (*str) { /* * We need a short counter to find how long each character is */ unsigned char chr; /* * Print blanks and increment a counter until we find how long this * character is */ for (chr = 1; is_utf8_cont(str[chr]); chr++) { /* * Print blanks */ fprintf(stream, " "); /* * If we've hit UCHAR_MAX, this is probably a perverse * string of bytes for fuzzing or exploitation; bail * out */ if (chr == UCHAR_MAX) { fprintf(stderr, "Perverse byte count, bailing\n"); exit(1); } } /* * Print two spaces, and then the full character */ fprintf(stream, " "); while (chr--) fputc(*str++, stream); } /* * End with a newline */ fputc('\n', stream); return; } /* * Main function */ int main(int argc, char **argv) { /* * Check we have one and only one argument */ if (argc != 2) { fprintf(stderr, "%s: Need one argument\n", PROGRAM_NAME); exit(EXIT_FAILURE); } /* * Print the sole argument first as hex octets, then as characters, spaced * accordingly */ print_octets(stdout, argv[1]); print_characters(stdout, argv[1]); /* * Done! */ exit(EXIT_SUCCESS); }