1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- #ifndef _UTF8_H_
- #define _UTF8_H_
- #include <stdlib.h>
- #include <stdio.h>
- #include <stdint.h>
- #include <string.h>
- #include <stdbool.h>
- #include <unistd.h>
- // A unicode string is a array of `struct uchar` objects, terminated with a 'struct uchar' with `.bytes == 0`.
- // A `\0` byte isn't considered a valid unicode char.
- struct uchar {
- uint8_t bytes;
- union {
- char chars[4];
- uint32_t ichars;
- };
- };
- // [ valid UTF8 byte sequence => true | false ]
- bool uchar_valid (char* source);
- // [ valid UTF8 byte sequence =>
- // number of bytes occupied by a valid UTF8 byte sequence, between 1 and 4 | 0 ]
- size_t uchar_bytes (char* source);
- // [ sequence of valid UTF8 byte sequences =>
- // number of valid consecutive UTF8 byte sequences, greater or equal than 1 | 0 ]
- size_t ustring_length (char* source);
- // [ sequence of valid UTF8 byte sequences =>
- // the number of bytes occupied by valid consecutive UTF8 byte sequences,
- // greater or equal than 1 | 0 ]
- size_t ustring_bytes (char* source);
- // [ sequence of `struct uchar` UTF byte sequences =>
- // number of bytes required to convert it to a conventional `\0` terminated `char` array ]
- size_t cstring_bytes (struct uchar* source);
- // [ valid UTF8 byte sequence =>
- // a correctly initializated `struct uchar` object
- // and side effects: source position is incremented |
- // a `struct uchar` object with `.bytes == 0` ]
- struct uchar next_uchar (char* source);
- // [ a `char` array containing potentially valid UTF8 text =>
- // a `struct uchar` array with all consecutive UTF8 valid byte sequences is written at `*destination` ]
- // You need to calc the needed `struct uchar` array length beforehand,
- // with `ustring_length(source)`
- void c_to_ustring (char* source, struct uchar* destination);
- // [ a `struct uchar` array containing potentially valid UTF8 text =>
- // a `\0` terminated `char` array is written at `*destination` ]
- // You need to calc the needed `char` array length beforehand, summing all
- // `struct uchar` `.bytes` members plus 1 (accounting for a extra `\0` byte at the end
- void u_to_cstring (struct uchar* source, char* destination);
- // [ a `struct uchar` object =>
- // side effect: output UTF8 byte sequence at file descriptor, returns number of written bytes ]
- size_t uchar_puts (int fileno, struct uchar* uc);
- // [ sequence of `struct uchar` objects =>
- // side effect: output all UTF8 byte sequences at file descriptor, returns number of written bytes ]
- size_t ustring_puts (int fileno, struct uchar* ustring);
- #endif
|