188 lines
4.2 KiB
C++
188 lines
4.2 KiB
C++
// utf8.h
|
|
//
|
|
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
|
//
|
|
// This program is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU General Public License
|
|
// as published by the Free Software Foundation; either version 2
|
|
// of the License, or (at your option) any later version.
|
|
|
|
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#define UTF8_DEGREE_SIGN "\302\260"
|
|
#define UTF8_MULTIPLICATION_SIGN "\303\227"
|
|
#define UTF8_SUPERSCRIPT_0 "\342\201\260"
|
|
#define UTF8_SUPERSCRIPT_1 "\302\271"
|
|
#define UTF8_SUPERSCRIPT_2 "\302\262"
|
|
#define UTF8_SUPERSCRIPT_3 "\302\263"
|
|
#define UTF8_SUPERSCRIPT_4 "\342\201\264"
|
|
#define UTF8_SUPERSCRIPT_5 "\342\201\265"
|
|
#define UTF8_SUPERSCRIPT_6 "\342\201\266"
|
|
#define UTF8_SUPERSCRIPT_7 "\342\201\267"
|
|
#define UTF8_SUPERSCRIPT_8 "\342\201\270"
|
|
#define UTF8_SUPERSCRIPT_9 "\342\201\271"
|
|
#define UTF8_REPLACEMENT_CHAR "\357\277\275"
|
|
|
|
|
|
bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
|
|
bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
|
|
void UTF8Encode(std::uint32_t ch, std::string& dest);
|
|
int UTF8StringCompare(const std::string& s0, const std::string& s1);
|
|
int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false);
|
|
|
|
class UTF8StringOrderingPredicate
|
|
{
|
|
public:
|
|
bool operator()(const std::string& s0, const std::string& s1) const
|
|
{
|
|
return UTF8StringCompare(s0, s1) == -1;
|
|
}
|
|
};
|
|
|
|
|
|
int UTF8Length(const std::string& s);
|
|
|
|
inline int UTF8EncodedSize(wchar_t ch)
|
|
{
|
|
if (ch < 0x80)
|
|
return 1;
|
|
if (ch < 0x800)
|
|
return 2;
|
|
#if WCHAR_MAX > 0xFFFFu
|
|
if (ch < 0x10000)
|
|
#endif
|
|
return 3;
|
|
#if WCHAR_MAX > 0xFFFFu
|
|
if (ch < 0x200000)
|
|
return 4;
|
|
if (ch < 0x4000000)
|
|
return 5;
|
|
else
|
|
return 6;
|
|
#endif
|
|
}
|
|
|
|
constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
|
|
{
|
|
if (ch < 0x80)
|
|
return 1;
|
|
if (ch < 0x800)
|
|
return 2;
|
|
#if WCHAR_MAX > 0xFFFFu
|
|
if (ch < 0x10000)
|
|
#endif
|
|
return 3;
|
|
#if WCHAR_MAX > 0xFFFFu
|
|
if (ch < 0x110000)
|
|
return 4;
|
|
// out-of-range: assume U+FFFD REPLACEMENT CHARACTER
|
|
return 3;
|
|
#endif
|
|
}
|
|
|
|
inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
|
|
{
|
|
if (ch < 0x80)
|
|
return 1;
|
|
if ((ch & 0xe0) == 0xc0)
|
|
return 2;
|
|
if ((ch & 0xf0) == 0xe0)
|
|
return 3;
|
|
if ((ch & 0xf8) == 0xf0)
|
|
return 4;
|
|
if ((ch & 0xfc) == 0xf8)
|
|
return 5;
|
|
if ((ch & 0xfe) == 0xfc)
|
|
return 6;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
std::string ReplaceGreekLetterAbbr(const std::string&);
|
|
#if 0
|
|
unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
|
|
#endif
|
|
|
|
class Greek
|
|
{
|
|
private:
|
|
Greek();
|
|
~Greek();
|
|
|
|
public:
|
|
enum Letter
|
|
{
|
|
Alpha = 1,
|
|
Beta = 2,
|
|
Gamma = 3,
|
|
Delta = 4,
|
|
Epsilon = 5,
|
|
Zeta = 6,
|
|
Eta = 7,
|
|
Theta = 8,
|
|
Iota = 9,
|
|
Kappa = 10,
|
|
Lambda = 11,
|
|
Mu = 12,
|
|
Nu = 13,
|
|
Xi = 14,
|
|
Omicron = 15,
|
|
Pi = 16,
|
|
Rho = 17,
|
|
Sigma = 18,
|
|
Tau = 19,
|
|
Upsilon = 20,
|
|
Phi = 21,
|
|
Chi = 22,
|
|
Psi = 23,
|
|
Omega = 24,
|
|
};
|
|
|
|
static const std::string& canonicalAbbreviation(const std::string&);
|
|
private:
|
|
static Greek* m_instance;
|
|
public:
|
|
static Greek* getInstance();
|
|
int nLetters;
|
|
std::string* names;
|
|
std::string* abbrevs;
|
|
};
|
|
|
|
std::vector<std::string> getGreekCompletion(const std::string &);
|
|
|
|
enum class UTF8Status
|
|
{
|
|
Ok,
|
|
InvalidFirstByte,
|
|
InvalidTrailingByte,
|
|
};
|
|
|
|
class UTF8Validator
|
|
{
|
|
public:
|
|
UTF8Validator() = default;
|
|
~UTF8Validator() = default;
|
|
|
|
UTF8Status check(char c);
|
|
UTF8Status check(unsigned char c);
|
|
|
|
private:
|
|
enum class State
|
|
{
|
|
Initial,
|
|
Continuation1,
|
|
Continuation2,
|
|
Continuation3,
|
|
E0Continuation,
|
|
EDContinuation,
|
|
F0Continuation,
|
|
F4Continuation,
|
|
};
|
|
|
|
State state{ State::Initial };
|
|
};
|