/* Copyright 2009 Daniel Martin */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ /* See the License for the specific language governing permissions and */ /* limitations under the License. */ #include #include "utf8checker.h" void unichar_handler(uint32_t unichar, int bytes_so_far, int char_index, uint8_t *buf, int buf_len) { int i; printf("%6d Character U+%06X; bytes %d-%d [", char_index, unichar, bytes_so_far, bytes_so_far + buf_len - 1); for (i=0; i < buf_len; i++) { if (i > 0) {printf(" ");} printf("%02x", buf[i]); } printf("]"); if ((unichar < 0x7F) && (unichar >= 0x20)) { printf(" (%c)", (uint8_t) unichar); } printf("\n"); } void err_handler(int errcode, int bytes_so_far, int char_index, uint8_t *buf, int buf_len) { int i; printf("ERR: "); switch(errcode) { case MISSING_CONTINUATION: printf("MISSING_CONTINUATION"); break; case UNEXPECTED_CONTINUATION: printf("UNEXPECTED_CONTINUATION"); break; case OVERLONG_FORM: printf("OVERLONG_FORM"); break; case OUT_OF_RANGE: printf("OUT_OF_RANGE"); break; case BAD_SCALAR_VALUE: printf("BAD_SCALAR_VALUE"); break; case INVALID: printf("INVALID"); break; } printf(" at bytes %d-%d [", bytes_so_far, bytes_so_far + buf_len - 1); for (i=0; i < buf_len; i++) { if (i > 0) {printf(" ");} printf("%02x", buf[i]); } printf("]\n"); } int main(int argc, char *argv[]) { FILE *inf; utf8_decoder_state_struct statestorage; utf8_decoder_state state = init_utf8_decoder_state(&statestorage); if (argc < 2) { fprintf(stderr, "Usage: %s infile\n", argv[0]); return 2; } inf = fopen(argv[1], "r"); if (NULL == inf) { perror("Couldn't open input"); return 1; } while(1) { int c = fgetc(inf); if (c < 0) { break; } process_byte(c, state, unichar_handler, err_handler); } end_processing(state, err_handler); return 0; }