/** * Copyright (C) 2016--2019, 2021 Tom Ryder * * This file is part of wtf8. * * wtf8 is free software: you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * wtf8 is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * wtf8. If not, see . */ #include "wtf8.h" /* * Check if first two bits of the character are "10", meaning it'str a UTF-8 * continuation character */ int is_utf8_cont(unsigned char chr) { return (chr & 0xC0) == 0x80; } /* * Print each octet of a string of characters as lowercase hex followed by a * trailing space, ending with a newline */ void print_octets(FILE *stream, char *str) { unsigned char chr; /* * Iterate through the string, printing each octet, ending with a newline */ while ((chr = *str++)) { char sep; sep = is_utf8_cont(chr) ? BYTE_SEP : CHAR_SEP; fprintf(stream, "%c%02x", sep, chr); } fputc('\n', stream); return; } /* * Print each of the UTF-8 characters to align with the output of * print_octets(), with each character in line with the end of the octet that * terminates it, ending with a newline */ void print_characters(FILE *stream, char *str) { /* * Iterate through the string */ while (*str) { /* * We need a short counter to find how long each character is */ unsigned char chr; /* * Print blanks and increment a counter until we find how long this * character is */ for (chr = 1; is_utf8_cont(str[chr]); chr++) { /* * Print blanks */ fprintf(stream, " "); /* * If we've hit UCHAR_MAX, this is probably a perverse * string of bytes for fuzzing or exploitation; bail * out */ if (chr == UCHAR_MAX) { fprintf(stderr, "Perverse byte count, bailing\n"); exit(1); } } /* * Print two spaces, and then the full character */ fprintf(stream, " "); while (chr--) fputc(*str++, stream); } /* * End with a newline */ fputc('\n', stream); return; } /* * Main function */ int main(int argc, char **argv) { /* * Check we have one and only one argument */ if (argc != 2) { fprintf(stderr, "%s: Need one argument\n", PROGRAM_NAME); exit(EXIT_FAILURE); } /* * Print the sole argument first as hex octets, then as characters, spaced * accordingly */ print_octets(stdout, argv[1]); print_characters(stdout, argv[1]); /* * Done! */ exit(EXIT_SUCCESS); }