/* Copyright 2009 Daniel Martin */ /* */ /* Licensed under the Apache License, Version 2.0 (the "License"); */ /* you may not use this file except in compliance with the License. */ /* You may obtain a copy of the License at */ /* */ /* http://www.apache.org/licenses/LICENSE-2.0 */ /* */ /* Unless required by applicable law or agreed to in writing, software */ /* distributed under the License is distributed on an "AS IS" BASIS, */ /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ /* See the License for the specific language governing permissions and */ /* limitations under the License. */ #include #ifndef _UTF8CHECKER_H_ #define _UTF8CHECKER_H_ 1 /* Ways a UTF stream can screw up */ #define MISSING_CONTINUATION 1 /* a multibyte sequence without as many continuation bytes as expected. e.g. [ef 81] 48 */ #define UNEXPECTED_CONTINUATION 2 /* A continuation byte when not expected */ #define OVERLONG_FORM 3 /* A full multibyte sequence encoding something that should have been encoded shorter */ #define OUT_OF_RANGE 4 /* A full multibyte sequence encoding something larger than 10FFFF */ #define BAD_SCALAR_VALUE 5 /* A full multibyte sequence encoding something in the range U+D800..U+DFFF */ #define INVALID 6 /* bytes 0xFE or 0xFF */ /* called with: errorCode, bytes_so_far, unichars_so_far, invalid_byte_buf, invalid_byte_buf_len */ /* NOTE: treat passed byte buffer as a constant, and as something that vanishes after this call returns! */ typedef void (*error_handler)(int, int, int, uint8_t *, int); /* called with: unicode character, bytes_so_far, unichars_so_far, byte_buf, byte_buf_len */ /* NOTE: treat passed byte buffer as a constant, and as something that vanishes after this call returns! */ typedef void (*unicode_handler)(uint32_t, int, int, uint8_t *, int); typedef struct { uint8_t working[6]; int working_len; int unichars_so_far; int bytes_so_far; int expecting_continuation; } utf8_decoder_state_struct; typedef utf8_decoder_state_struct *utf8_decoder_state; utf8_decoder_state init_utf8_decoder_state(utf8_decoder_state state); void process_byte(uint8_t incoming, utf8_decoder_state state, unicode_handler unih, error_handler errh); void end_processing(utf8_decoder_state state, error_handler errh); #endif