/* Copyright 2009 Daniel Martin */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ /* See the License for the specific language governing permissions and */ /* limitations under the License. */ #include #include "utf8checker.h" void unichar_handler(uint32_t unichar, int bytes_so_far, int char_index, uint8_t *buf, int buf_len) { int i; printf("%2d Character U+%04X; bytes %d-%d [", char_index, unichar, bytes_so_far, bytes_so_far + buf_len - 1); for (i=0; i < buf_len; i++) { if (i > 0) {printf(" ");} printf("%02x", buf[i]); } printf("]\n"); } void err_handler(int errcode, int bytes_so_far, int char_index, uint8_t *buf, int buf_len) { int i; switch(errcode) { case MISSING_CONTINUATION: printf("MISSING_CONTINUATION"); break; case UNEXPECTED_CONTINUATION: printf("UNEXPECTED_CONTINUATION"); break; case OVERLONG_FORM: printf("OVERLONG_FORM"); break; case OUT_OF_RANGE: printf("OUT_OF_RANGE"); break; case BAD_SCALAR_VALUE: printf("BAD_SCALAR_VALUE"); break; case INVALID: printf("INVALID"); break; } printf(" at bytes %d-%d [", bytes_so_far, bytes_so_far + buf_len - 1); for (i=0; i < buf_len; i++) { if (i > 0) {printf(" ");} printf("%02x", buf[i]); } printf("]\n"); } void checkbytes(uint8_t *buf, int buf_len) { int i; utf8_decoder_state_struct statestorage; utf8_decoder_state state = init_utf8_decoder_state(&statestorage); for (i=0; i < buf_len; i++) { process_byte(buf[i], state, unichar_handler, err_handler); } end_processing(state, err_handler); } int main(int argc, char *argv[]) { uint8_t byte1[] = { /* A tiny bit of mathematics, all correct UTF-8 */ 0xe2, 0x84, 0x95, 0x20, 0xe2, 0x8a, 0x86, 0x20, 0xe2, 0x84, 0x95, 0xe2, 0x82, 0x80, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0xa4, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x9a, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x9d, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x82 }; uint8_t byte1_corrupted[] = { /* Two bits of badness inserted */ 0xe2, 0x84, 0x95, 0x20, 0xe2, 0x8a, 0x86, 0x20, 0xe2, 0x84, 0x95, 0xe2, 0x82, 0x80, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0xa4, 0x20, 0xe2, 0x8a, 0x05, 0x82, 0x20, 0xe2, 0x84, 0x9a, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x9d, 0x20, 0xe2, 0x8a, 0x82, 0xC0, 0xA0, 0xe2, 0x84, 0x82 }; uint8_t byte1_truncated[] = { /* missing the last byte */ 0xe2, 0x84, 0x95, 0x20, 0xe2, 0x8a, 0x86, 0x20, 0xe2, 0x84, 0x95, 0xe2, 0x82, 0x80, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0xa4, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x9a, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84, 0x9d, 0x20, 0xe2, 0x8a, 0x82, 0x20, 0xe2, 0x84 }; printf("byte1:\n"); checkbytes(byte1, sizeof(byte1)); printf("byte1_corrupted:\n"); checkbytes(byte1_corrupted, sizeof(byte1_corrupted)); printf("byte1_truncated:\n"); checkbytes(byte1_truncated, sizeof(byte1_truncated)); return 0; }