Split greek-letters handling from utf8 and refactor a bit
parent
eaae852f8f
commit
c251dac856
|
@ -8,6 +8,7 @@
|
|||
// of the License, or (at your option) any later version.
|
||||
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include <celutil/logger.h>
|
||||
#include <celutil/tokenizer.h>
|
||||
#include "stardb.h"
|
||||
|
|
|
@ -179,7 +179,7 @@ void Console::print(char* s)
|
|||
while (i < length && validChar)
|
||||
{
|
||||
wchar_t ch = 0;
|
||||
validChar = UTF8Decode(s, i, length, ch);
|
||||
validChar = UTF8Decode(string_view(s, length), i, ch);
|
||||
i += UTF8EncodedSize(ch);
|
||||
print(ch);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include <celutil/logger.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include "name.h"
|
||||
|
||||
uint32_t NameDatabase::getNameCount() const
|
||||
|
@ -97,41 +98,25 @@ NameDatabase::NumberIndex::const_iterator NameDatabase::getFinalNameIter() const
|
|||
return numberIndex.end();
|
||||
}
|
||||
|
||||
std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n, bool greek) const
|
||||
std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n) const
|
||||
{
|
||||
if (greek)
|
||||
{
|
||||
auto compList = getGreekCompletion(name);
|
||||
compList.push_back(name);
|
||||
return getCompletion(compList, i18n);
|
||||
}
|
||||
std::string name2 = ReplaceGreekLetter(name);
|
||||
|
||||
std::vector<std::string> completion;
|
||||
int name_length = UTF8Length(name);
|
||||
const int name_length = UTF8Length(name2);
|
||||
|
||||
for (NameIndex::const_iterator iter = nameIndex.begin(); iter != nameIndex.end(); ++iter)
|
||||
for (const auto &[n, _] : nameIndex)
|
||||
{
|
||||
if (!UTF8StringCompare(iter->first, name, name_length, true))
|
||||
completion.push_back(iter->first);
|
||||
if (!UTF8StringCompare(n, name2, name_length, true))
|
||||
completion.push_back(n);
|
||||
}
|
||||
if (i18n)
|
||||
{
|
||||
for (NameIndex::const_iterator iter = localizedNameIndex.begin(); iter != localizedNameIndex.end(); ++iter)
|
||||
for (const auto &[n, _] : localizedNameIndex)
|
||||
{
|
||||
if (!UTF8StringCompare(iter->first, name, name_length, true))
|
||||
completion.push_back(iter->first);
|
||||
if (!UTF8StringCompare(n, name2, name_length, true))
|
||||
completion.push_back(n);
|
||||
}
|
||||
}
|
||||
return completion;
|
||||
}
|
||||
|
||||
std::vector<std::string> NameDatabase::getCompletion(const std::vector<std::string> &list, bool i18n) const
|
||||
{
|
||||
std::vector<std::string> completion;
|
||||
for (const auto &n : list)
|
||||
{
|
||||
for (const auto &nn : getCompletion(n, i18n, false))
|
||||
completion.emplace_back(nn);
|
||||
}
|
||||
return completion;
|
||||
}
|
||||
|
|
|
@ -45,8 +45,7 @@ class NameDatabase
|
|||
NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const;
|
||||
NumberIndex::const_iterator getFinalNameIter() const;
|
||||
|
||||
std::vector<std::string> getCompletion(const std::string& name, bool i18n, bool greek = true) const;
|
||||
std::vector<std::string> getCompletion(const std::vector<std::string> &list, bool i18n) const;
|
||||
std::vector<std::string> getCompletion(const std::string& name, bool i18n) const;
|
||||
|
||||
protected:
|
||||
NameIndex nameIndex;
|
||||
|
|
|
@ -159,7 +159,7 @@ void Overlay::print_impl(const std::string& s)
|
|||
while (i < length && validChar)
|
||||
{
|
||||
wchar_t ch = 0;
|
||||
validChar = UTF8Decode(s.c_str(), i, length, ch);
|
||||
validChar = UTF8Decode(s, i, ch);
|
||||
i += UTF8EncodedSize(ch);
|
||||
print(ch);
|
||||
}
|
||||
|
|
|
@ -10,8 +10,10 @@
|
|||
//
|
||||
//
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <celengine/constellation.h>
|
||||
#include <celengine/starname.h>
|
||||
#include <celutil/greek.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -61,26 +63,26 @@ uint32_t StarNameDatabase::findCatalogNumberByName(const string& name, bool i18n
|
|||
// We have a valid constellation as the last part
|
||||
// of the name. Next, we see if the first part of
|
||||
// the name is a greek letter.
|
||||
const string& letter = Greek::canonicalAbbreviation(string(prefix, 0, len));
|
||||
std::string_view letter = GetCanonicalGreekAbbreviation(std::string_view(prefix).substr(0, len));
|
||||
if (!letter.empty())
|
||||
{
|
||||
// Matched . . . this is a Bayer designation
|
||||
if (digit == ' ')
|
||||
{
|
||||
priName = letter + ' ' + con->getAbbreviation();
|
||||
priName = fmt::format("{} {}", letter, con->getAbbreviation());
|
||||
// If 'let con' doesn't match, try using
|
||||
// 'let1 con' instead.
|
||||
altName = letter + '1' + ' ' + con->getAbbreviation();
|
||||
altName = fmt::format("{}1 {}", letter, con->getAbbreviation());
|
||||
}
|
||||
else
|
||||
{
|
||||
priName = letter + digit + ' ' + con->getAbbreviation();
|
||||
priName = fmt::format("{}{} {}", letter, digit, con->getAbbreviation());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Something other than a Bayer designation
|
||||
priName = prefix + ' ' + con->getAbbreviation();
|
||||
priName = fmt::format("{} {}", prefix, con->getAbbreviation());
|
||||
}
|
||||
|
||||
if (isOrbitingStar)
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include <celmath/mathlib.h>
|
||||
#include <celmath/intersect.h>
|
||||
#include <celmath/ray.h>
|
||||
#include <celutil/utf8.h>
|
||||
#include <celutil/greek.h>
|
||||
#include <cassert>
|
||||
|
||||
static const double ANGULAR_RES = 3.5e-6;
|
||||
|
|
|
@ -970,7 +970,7 @@ void CelestiaCore::charEntered(const char *c_p, int modifiers)
|
|||
if (textEnterMode & KbAutoComplete)
|
||||
{
|
||||
wchar_t wc = 0; // Null wide character
|
||||
UTF8Decode(c_p, 0, strlen(c_p), wc);
|
||||
UTF8Decode(c_p, 0, wc);
|
||||
#ifdef __APPLE__
|
||||
if ( wc && (!iscntrl(wc)) )
|
||||
#else
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <celengine/starbrowser.h>
|
||||
#include <celengine/stardb.h>
|
||||
#include <celengine/univcoord.h>
|
||||
#include <celutil/utf8.h>
|
||||
#include <celutil/greek.h>
|
||||
|
||||
#include "dialog-star.h"
|
||||
#include "actions.h"
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include <celengine/simulation.h>
|
||||
#include <celestia/celestiacore.h>
|
||||
#include <celestia/helper.h>
|
||||
#include <celutil/utf8.h>
|
||||
#include <celutil/greek.h>
|
||||
|
||||
#include "menu-context.h"
|
||||
#include "actions.h"
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include <celutil/tzutil.h>
|
||||
#include "qtappwin.h"
|
||||
#include "qtglwidget.h"
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <celestia/celestiacore.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include "qtcelestialbrowser.h"
|
||||
#include "qtcolorswatchwidget.h"
|
||||
#include "qtinfopanel.h"
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <celestia/celestiacore.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include "qtdeepskybrowser.h"
|
||||
#include "qtcolorswatchwidget.h"
|
||||
#include "qtinfopanel.h"
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include <celengine/astro.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/logger.h>
|
||||
#include <celutil/utf8.h>
|
||||
#include <celutil/greek.h>
|
||||
#include <celengine/universe.h>
|
||||
#include <QTextBrowser>
|
||||
#include <QIODevice>
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <celengine/axisarrow.h>
|
||||
#include <celengine/planetgrid.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include <fmt/printf.h>
|
||||
#include "qtselectionpopup.h"
|
||||
#include "qtappwin.h"
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
#include <celestia/celestiacore.h>
|
||||
#include <celutil/gettext.h>
|
||||
#include <celutil/greek.h>
|
||||
#include "qtsolarsystembrowser.h"
|
||||
#include "qtinfopanel.h"
|
||||
#include "qtcolorswatchwidget.h"
|
||||
|
|
|
@ -13,6 +13,8 @@ set(CELUTIL_SOURCES
|
|||
formatnum.h
|
||||
fsutils.cpp
|
||||
fsutils.h
|
||||
greek.cpp
|
||||
greek.h
|
||||
logger.cpp
|
||||
logger.h
|
||||
reshandle.h
|
||||
|
|
|
@ -0,0 +1,270 @@
|
|||
// utf8.cpp
|
||||
//
|
||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||
// 2018-present, Celestia Development Team
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
|
||||
#include "greek.h"
|
||||
|
||||
#include "stringutils.h"
|
||||
#include "utf8.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cctype>
|
||||
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr int nLetters = 24;
|
||||
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_0 = "\342\201\260"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_1 = "\302\271"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_2 = "\302\262"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_3 = "\302\263"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_4 = "\342\201\264"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_5 = "\342\201\265"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_6 = "\342\201\266"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_7 = "\342\201\267"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_8 = "\342\201\270"sv;
|
||||
constexpr std::string_view UTF8_SUPERSCRIPT_9 = "\342\201\271"sv;
|
||||
|
||||
// clang-format off
|
||||
const std::array<std::string_view, nLetters> greekAlphabet =
|
||||
{
|
||||
"Alpha"sv,
|
||||
"Beta"sv,
|
||||
"Gamma"sv,
|
||||
"Delta"sv,
|
||||
"Epsilon"sv,
|
||||
"Zeta"sv,
|
||||
"Eta"sv,
|
||||
"Theta"sv,
|
||||
"Iota"sv,
|
||||
"Kappa"sv,
|
||||
"Lambda"sv,
|
||||
"Mu"sv,
|
||||
"Nu"sv,
|
||||
"Xi"sv,
|
||||
"Omicron"sv,
|
||||
"Pi"sv,
|
||||
"Rho"sv,
|
||||
"Sigma"sv,
|
||||
"Tau"sv,
|
||||
"Upsilon"sv,
|
||||
"Phi"sv,
|
||||
"Chi"sv,
|
||||
"Psi"sv,
|
||||
"Omega"sv
|
||||
};
|
||||
|
||||
const std::array<std::string_view, nLetters> greekAlphabetUTF8 = {
|
||||
"\316\261"sv, // ALF
|
||||
"\316\262"sv, // BET
|
||||
"\316\263"sv, // GAM
|
||||
"\316\264"sv, // DEL
|
||||
"\316\265"sv, // EPS
|
||||
"\316\266"sv, // ZET
|
||||
"\316\267"sv, // ETA
|
||||
"\316\270"sv, // TET
|
||||
"\316\271"sv, // IOT
|
||||
"\316\272"sv, // KAP
|
||||
"\316\273"sv, // LAM
|
||||
"\316\274"sv, // MU
|
||||
"\316\275"sv, // NU
|
||||
"\316\276"sv, // XI
|
||||
"\316\277"sv, // OMI
|
||||
"\317\200"sv, // PI
|
||||
"\317\201"sv, // RHO
|
||||
"\317\203"sv, // SIG
|
||||
"\317\204"sv, // TAU
|
||||
"\317\205"sv, // UPS
|
||||
"\317\206"sv, // PHI
|
||||
"\317\207"sv, // CHI
|
||||
"\317\210"sv, // PSI
|
||||
"\317\211"sv, // OME
|
||||
};
|
||||
|
||||
const std::array<std::string_view, nLetters> canonicalAbbrevs =
|
||||
{
|
||||
"ALF"sv,
|
||||
"BET"sv,
|
||||
"GAM"sv,
|
||||
"DEL"sv,
|
||||
"EPS"sv,
|
||||
"ZET"sv,
|
||||
"ETA"sv,
|
||||
"TET"sv,
|
||||
"IOT"sv,
|
||||
"KAP"sv,
|
||||
"LAM"sv,
|
||||
"MU"sv,
|
||||
"NU"sv,
|
||||
"XI"sv,
|
||||
"OMI"sv,
|
||||
"PI"sv,
|
||||
"RHO"sv,
|
||||
"SIG"sv,
|
||||
"TAU"sv,
|
||||
"UPS"sv,
|
||||
"PHI"sv,
|
||||
"CHI"sv,
|
||||
"PSI"sv,
|
||||
"OME"sv,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
std::string_view::size_type
|
||||
getFirstWordLength(std::string_view str)
|
||||
{
|
||||
auto sp = str.find(' ');
|
||||
if (sp == std::string_view::npos)
|
||||
sp = str.length();
|
||||
|
||||
// skip digits
|
||||
while (sp > 0 && std::isdigit(str[sp - 1]) != 0)
|
||||
sp--;
|
||||
|
||||
return sp;
|
||||
}
|
||||
|
||||
std::string_view
|
||||
toSuperscript(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '0':
|
||||
return UTF8_SUPERSCRIPT_0;
|
||||
case '1':
|
||||
return UTF8_SUPERSCRIPT_1;
|
||||
case '2':
|
||||
return UTF8_SUPERSCRIPT_2;
|
||||
case '3':
|
||||
return UTF8_SUPERSCRIPT_3;
|
||||
case '4':
|
||||
return UTF8_SUPERSCRIPT_4;
|
||||
case '5':
|
||||
return UTF8_SUPERSCRIPT_5;
|
||||
case '6':
|
||||
return UTF8_SUPERSCRIPT_6;
|
||||
case '7':
|
||||
return UTF8_SUPERSCRIPT_7;
|
||||
case '8':
|
||||
return UTF8_SUPERSCRIPT_8;
|
||||
case '9':
|
||||
return UTF8_SUPERSCRIPT_9;
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Replaces the Greek letter abbreviation at the beginning
|
||||
* of a string by the UTF-8 representation of that letter.
|
||||
* Also, replaces digits following Greek letters with UTF-8
|
||||
* superscripts.
|
||||
*/
|
||||
std::string
|
||||
ReplaceGreekLetterAbbr(std::string_view str)
|
||||
{
|
||||
if (str.empty())
|
||||
return {};
|
||||
|
||||
if (auto len = getFirstWordLength(str); len > 0 && str[0] >= 'A' && str[0] <= 'Z')
|
||||
{
|
||||
// Linear search through all letter abbreviations
|
||||
for (int i = 0; i < nLetters; i++)
|
||||
{
|
||||
auto prefix = canonicalAbbrevs[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
{
|
||||
prefix = greekAlphabet[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string ret(greekAlphabetUTF8[i]);
|
||||
for (; str.length() > len && std::isdigit(str[len]); len++)
|
||||
ret.append(toSuperscript(str[len]));
|
||||
ret.append(str.substr(len));
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns canonical greek abbreviation for a letter passed.
|
||||
* The letter can be: latin name of a greek letter, canonical
|
||||
* representation of it or a greek letter itself in UTF-8.
|
||||
*/
|
||||
std::string_view
|
||||
GetCanonicalGreekAbbreviation(std::string_view letter)
|
||||
{
|
||||
for (int i = 0; i < nLetters; i++)
|
||||
{
|
||||
if (compareIgnoringCase(letter, greekAlphabet[i]) == 0
|
||||
|| compareIgnoringCase(letter, canonicalAbbrevs[i]) == 0)
|
||||
{
|
||||
return canonicalAbbrevs[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (letter.length() == 2)
|
||||
{
|
||||
for (int i = 0; i < nLetters; i++)
|
||||
{
|
||||
if (letter == greekAlphabetUTF8[i]) return canonicalAbbrevs[i];
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces the Greek letter or abbreviation at the beginning
|
||||
* of a string by the UTF-8 representation of that letter.
|
||||
* Also, replaces digits following Greek letters with UTF-8
|
||||
* superscripts.
|
||||
*/
|
||||
std::string
|
||||
ReplaceGreekLetter(std::string_view str)
|
||||
{
|
||||
if (str.empty()) return {};
|
||||
|
||||
if (auto len = getFirstWordLength(str); len > 0)
|
||||
{
|
||||
// Linear search through all letter abbreviations
|
||||
for (int i = 0; i < nLetters; i++)
|
||||
{
|
||||
if (len != 2 || str != greekAlphabetUTF8[i])
|
||||
{
|
||||
auto prefix = canonicalAbbrevs[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
{
|
||||
prefix = greekAlphabet[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
std::string ret(greekAlphabetUTF8[i]);
|
||||
for (; str.length() > len && std::isdigit(str[len]); len++)
|
||||
ret.append(toSuperscript(str[len]));
|
||||
ret.append(str.substr(len));
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return std::string(str);
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
// greek.h
|
||||
//
|
||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||
// 2018-present, Celestia Development Team
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
std::string ReplaceGreekLetterAbbr(std::string_view str);
|
||||
std::string ReplaceGreekLetter(std::string_view str);
|
||||
std::string_view GetCanonicalGreekAbbreviation(std::string_view letter);
|
|
@ -1,20 +1,21 @@
|
|||
// utf8.cpp
|
||||
//
|
||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||
// 2018-present, Celestia Development Team
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
// as published by the Free Software Foundation; either version 2
|
||||
// of the License, or (at your option) any later version.
|
||||
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <wchar.h>
|
||||
#include <climits>
|
||||
#include <fmt/printf.h>
|
||||
#include "stringutils.h"
|
||||
#include "utf8.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
// clang-format off
|
||||
|
||||
uint16_t WGL4_Normalization_00[256] = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
|
@ -300,11 +301,45 @@ uint16_t* WGL4NormalizationTables[256] = {
|
|||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
};
|
||||
|
||||
// clang-format on
|
||||
|
||||
inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
|
||||
{
|
||||
if (ch < 0x80)
|
||||
return 1;
|
||||
if ((ch & 0xe0) == 0xc0)
|
||||
return 2;
|
||||
if ((ch & 0xf0) == 0xe0)
|
||||
return 3;
|
||||
if ((ch & 0xf8) == 0xf0)
|
||||
return 4;
|
||||
if ((ch & 0xfc) == 0xf8)
|
||||
return 5;
|
||||
if ((ch & 0xfe) == 0xfc)
|
||||
return 6;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
inline wchar_t UTF8Normalize(wchar_t ch)
|
||||
{
|
||||
auto page = (unsigned int) ch >> 8;
|
||||
if (page >= 256)
|
||||
return ch;
|
||||
|
||||
uint16_t* normTable = WGL4NormalizationTables[page];
|
||||
if (normTable == nullptr)
|
||||
return ch;
|
||||
|
||||
return (wchar_t) normTable[(unsigned int) ch & 0xff];
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//! Decode the UTF-8 characters in string str beginning at position pos.
|
||||
//! The decoded character is returned in ch; the return value of the function
|
||||
//! is true if a valid UTF-8 sequence was successfully decoded.
|
||||
bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
|
||||
bool UTF8Decode(std::string_view str, int pos, wchar_t& ch)
|
||||
{
|
||||
auto c0 = (unsigned int) str[pos];
|
||||
int charlen = UTF8EncodedSizeFromFirstByte(c0);
|
||||
|
@ -362,69 +397,6 @@ bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//! Decode the UTF-8 characters in string str beginning at position pos.
|
||||
//! The decoded character is returned in ch; the return value of the function
|
||||
//! is true if a valid UTF-8 sequence was successfully decoded.
|
||||
bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch)
|
||||
{
|
||||
auto c0 = (unsigned int) str[pos];
|
||||
int charlen = UTF8EncodedSizeFromFirstByte(c0);
|
||||
|
||||
// Bad UTF-8 character that extends past end of string
|
||||
if (pos + charlen > length)
|
||||
return false;
|
||||
|
||||
// TODO: Should check that the bytes of characters after the first are all
|
||||
// of the form 01xxxxxx
|
||||
// TODO: Need to reject overlong encoding sequences
|
||||
|
||||
switch (charlen)
|
||||
{
|
||||
case 1:
|
||||
ch = c0;
|
||||
return true;
|
||||
|
||||
case 2:
|
||||
ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
|
||||
return true;
|
||||
|
||||
case 3:
|
||||
ch = ((c0 & 0x0f) << 12) |
|
||||
(((unsigned int) str[pos + 1] & 0x3f) << 6) |
|
||||
((unsigned int) str[pos + 2] & 0x3f);
|
||||
return true;
|
||||
|
||||
case 4:
|
||||
ch = ((c0 & 0x07) << 18) |
|
||||
(((unsigned int) str[pos + 1] & 0x3f) << 12) |
|
||||
(((unsigned int) str[pos + 2] & 0x3f) << 6) |
|
||||
((unsigned int) str[pos + 3] & 0x3f);
|
||||
return true;
|
||||
|
||||
case 5:
|
||||
ch = ((c0 & 0x03) << 24) |
|
||||
(((unsigned int) str[pos + 1] & 0x3f) << 18) |
|
||||
(((unsigned int) str[pos + 2] & 0x3f) << 12) |
|
||||
(((unsigned int) str[pos + 3] & 0x3f) << 6) |
|
||||
((unsigned int) str[pos + 4] & 0x3f);
|
||||
return true;
|
||||
|
||||
case 6:
|
||||
ch = ((c0 & 0x01) << 30) |
|
||||
(((unsigned int) str[pos + 1] & 0x3f) << 24) |
|
||||
(((unsigned int) str[pos + 2] & 0x3f) << 18) |
|
||||
(((unsigned int) str[pos + 3] & 0x3f) << 12) |
|
||||
(((unsigned int) str[pos + 4] & 0x3f) << 6) |
|
||||
((unsigned int) str[pos + 5] & 0x3f);
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//! Appends the UTF-8 encoded version of the code point ch to the
|
||||
//! destination string
|
||||
void UTF8Encode(std::uint32_t ch, std::string& dest)
|
||||
|
@ -469,9 +441,8 @@ void UTF8Encode(std::uint32_t ch, std::string& dest)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//! Return the number of characters encoded by a UTF-8 string
|
||||
int UTF8Length(const std::string& s)
|
||||
int UTF8Length(std::string_view s)
|
||||
{
|
||||
int len = s.length();
|
||||
int count = 0;
|
||||
|
@ -485,25 +456,10 @@ int UTF8Length(const std::string& s)
|
|||
return count;
|
||||
}
|
||||
|
||||
|
||||
inline wchar_t UTF8Normalize(wchar_t ch)
|
||||
{
|
||||
auto page = (unsigned int) ch >> 8;
|
||||
if (page >= 256)
|
||||
return ch;
|
||||
|
||||
uint16_t* normTable = WGL4NormalizationTables[page];
|
||||
if (normTable == nullptr)
|
||||
return ch;
|
||||
|
||||
return (wchar_t) normTable[(unsigned int) ch & 0xff];
|
||||
}
|
||||
|
||||
|
||||
//! Perform a normalized comparison of two UTF-8 strings. The normalization
|
||||
//! only works for characters in the WGL-4 subset, and no multicharacter
|
||||
//! translations are performed.
|
||||
int UTF8StringCompare(const std::string& s0, const std::string& s1)
|
||||
int UTF8StringCompare(std::string_view s0, std::string_view s1)
|
||||
{
|
||||
int len0 = s0.length();
|
||||
int len1 = s1.length();
|
||||
|
@ -542,7 +498,7 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase)
|
||||
int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase)
|
||||
{
|
||||
int len0 = s0.length();
|
||||
int len1 = s1.length();
|
||||
|
@ -588,432 +544,6 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bo
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
//! Currently incomplete, but could be a helpful class for dealing with
|
||||
//! UTF-8 streams
|
||||
class UTF8StringIterator
|
||||
{
|
||||
public:
|
||||
UTF8StringIterator(const std::string& _str) : str(_str) {};
|
||||
UTF8StringIterator(const UTF8StringIterator& iter) = default;
|
||||
|
||||
UTF8StringIterator& operator++();
|
||||
UTF8StringIterator& operator++(int);
|
||||
|
||||
private:
|
||||
const std::string& str;
|
||||
int position{ 0 };
|
||||
};
|
||||
|
||||
|
||||
UTF8StringIterator& UTF8StringIterator::operator++()
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
UTF8StringIterator& UTF8StringIterator::operator++(int)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static const char *greekAlphabet[] =
|
||||
{
|
||||
"Alpha",
|
||||
"Beta",
|
||||
"Gamma",
|
||||
"Delta",
|
||||
"Epsilon",
|
||||
"Zeta",
|
||||
"Eta",
|
||||
"Theta",
|
||||
"Iota",
|
||||
"Kappa",
|
||||
"Lambda",
|
||||
"Mu",
|
||||
"Nu",
|
||||
"Xi",
|
||||
"Omicron",
|
||||
"Pi",
|
||||
"Rho",
|
||||
"Sigma",
|
||||
"Tau",
|
||||
"Upsilon",
|
||||
"Phi",
|
||||
"Chi",
|
||||
"Psi",
|
||||
"Omega"
|
||||
};
|
||||
|
||||
static const char* greekAlphabetUTF8[] =
|
||||
{
|
||||
"\316\261",
|
||||
"\316\262",
|
||||
"\316\263",
|
||||
"\316\264",
|
||||
"\316\265",
|
||||
"\316\266",
|
||||
"\316\267",
|
||||
"\316\270",
|
||||
"\316\271",
|
||||
"\316\272",
|
||||
"\316\273",
|
||||
"\316\274",
|
||||
"\316\275",
|
||||
"\316\276",
|
||||
"\316\277",
|
||||
"\317\200",
|
||||
"\317\201",
|
||||
"\317\203",
|
||||
"\317\204",
|
||||
"\317\205",
|
||||
"\317\206",
|
||||
"\317\207",
|
||||
"\317\210",
|
||||
"\317\211",
|
||||
};
|
||||
|
||||
static const char* canonicalAbbrevs[] =
|
||||
{
|
||||
"ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET",
|
||||
"IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" ,
|
||||
"RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME",
|
||||
};
|
||||
|
||||
static std::string noAbbrev;
|
||||
|
||||
// Greek alphabet crud . . . should probably moved to it's own module.
|
||||
|
||||
static size_t greekChunkLength(const std::string&);
|
||||
|
||||
Greek* Greek::m_instance = nullptr;
|
||||
|
||||
Greek* Greek::getInstance()
|
||||
{
|
||||
if (m_instance == nullptr)
|
||||
m_instance = new Greek();
|
||||
return m_instance;
|
||||
}
|
||||
|
||||
Greek::Greek()
|
||||
{
|
||||
nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]);
|
||||
names = new std::string[nLetters];
|
||||
abbrevs = new std::string[nLetters];
|
||||
|
||||
for (int i = 0; i < nLetters; i++)
|
||||
{
|
||||
names[i] = std::string(greekAlphabet[i]);
|
||||
abbrevs[i] = std::string(canonicalAbbrevs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
Greek::~Greek()
|
||||
{
|
||||
delete[] names;
|
||||
delete[] abbrevs;
|
||||
}
|
||||
|
||||
const std::string& Greek::canonicalAbbreviation(const std::string& letter)
|
||||
{
|
||||
Greek *instance = Greek::getInstance();
|
||||
int i;
|
||||
for (i = 0; i < instance->nLetters; i++)
|
||||
{
|
||||
if (compareIgnoringCase(letter, instance->names[i]) == 0)
|
||||
return instance->abbrevs[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < instance->nLetters; i++)
|
||||
{
|
||||
if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0)
|
||||
return instance->abbrevs[i];
|
||||
}
|
||||
|
||||
if (letter.length() == 2)
|
||||
{
|
||||
for (i = 0; i < instance->nLetters; i++)
|
||||
{
|
||||
if (letter[0] == greekAlphabetUTF8[i][0] &&
|
||||
letter[1] == greekAlphabetUTF8[i][1])
|
||||
{
|
||||
return instance->abbrevs[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return noAbbrev;
|
||||
}
|
||||
|
||||
static const char* toSuperscript(char c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '0':
|
||||
return UTF8_SUPERSCRIPT_0;
|
||||
case '1':
|
||||
return UTF8_SUPERSCRIPT_1;
|
||||
case '2':
|
||||
return UTF8_SUPERSCRIPT_2;
|
||||
case '3':
|
||||
return UTF8_SUPERSCRIPT_3;
|
||||
case '4':
|
||||
return UTF8_SUPERSCRIPT_4;
|
||||
case '5':
|
||||
return UTF8_SUPERSCRIPT_5;
|
||||
case '6':
|
||||
return UTF8_SUPERSCRIPT_6;
|
||||
case '7':
|
||||
return UTF8_SUPERSCRIPT_7;
|
||||
case '8':
|
||||
return UTF8_SUPERSCRIPT_8;
|
||||
case '9':
|
||||
return UTF8_SUPERSCRIPT_9;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
//! Replaces the Greek letter abbreviation at the beginning
|
||||
//! of a string by the UTF-8 representation of that letter.
|
||||
//! Also, replace digits following Greek letters with UTF-8
|
||||
//! superscripts.
|
||||
std::string ReplaceGreekLetterAbbr(const std::string& str)
|
||||
{
|
||||
Greek *instance = Greek::getInstance();
|
||||
size_t len = greekChunkLength(str);
|
||||
|
||||
if (str[0] >= 'A' && str[0] <= 'Z')
|
||||
{
|
||||
// Linear search through all letter abbreviations
|
||||
for (int i = 0; i < instance->nLetters; i++)
|
||||
{
|
||||
std::string prefix = instance->abbrevs[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
{
|
||||
prefix = instance->names[i];
|
||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string ret = greekAlphabetUTF8[i];
|
||||
auto len = prefix.length();
|
||||
for (; str.length() > len && isdigit(str[len]); len++)
|
||||
ret += toSuperscript(str[len]);
|
||||
ret += str.substr(len);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
//! Replaces the Greek letter abbreviation at the beginning
|
||||
//! of a string by the UTF-8 representation of that letter.
|
||||
//! Also, replace digits following Greek letters with UTF-8
|
||||
//! superscripts. Operates on char* instead of strings--less
|
||||
//! convenient, but more efficient. Return the number of
|
||||
//! characters copied to the destination string, not
|
||||
//! including the zero terminator.
|
||||
#if 0
|
||||
unsigned int
|
||||
ReplaceGreekLetterAbbr(char *dst, unsigned int dstSize, const char* src, unsigned int srcLength)
|
||||
{
|
||||
Greek *instance = Greek::getInstance();
|
||||
if (src[0] >= 'A' && src[0] <= 'Z' &&
|
||||
src[1] >= 'A' && src[1] <= 'Z')
|
||||
{
|
||||
// Linear search through all letter abbreviations
|
||||
for (unsigned int i = 0; i < (unsigned int) instance->nLetters; i++)
|
||||
{
|
||||
const char* abbr = canonicalAbbrevs[i];
|
||||
unsigned int j = 0;
|
||||
while (abbr[j] == src[j] && abbr[j] != '\0' && src[j] != '\0')
|
||||
j++;
|
||||
|
||||
// It's a match if we reached the end of the abbreviation string
|
||||
if (abbr[j] == '\0')
|
||||
{
|
||||
unsigned int abbrevLength = j;
|
||||
unsigned int srcIndex = j;
|
||||
const char *superscript = toSuperscript(src[abbrevLength]);
|
||||
|
||||
const char* utfGreek = greekAlphabetUTF8[i];
|
||||
unsigned int utfGreekLength = strlen(utfGreek);
|
||||
|
||||
unsigned int requiredLength = srcLength;
|
||||
if (utfGreekLength > abbrevLength)
|
||||
requiredLength += utfGreekLength - abbrevLength;
|
||||
if (superscript != nullptr)
|
||||
{
|
||||
requiredLength += strlen(superscript) - 1;
|
||||
srcIndex++;
|
||||
}
|
||||
|
||||
// If there's not enough room, give up translating and just copy as much as possible
|
||||
if (requiredLength + 1 > dstSize)
|
||||
break;
|
||||
|
||||
unsigned int dstIndex = 0;
|
||||
j = 0;
|
||||
while (utfGreek[j] != 0)
|
||||
{
|
||||
dst[dstIndex++] = utfGreek[j];
|
||||
j++;
|
||||
}
|
||||
|
||||
if (superscript != nullptr)
|
||||
{
|
||||
j = 0;
|
||||
while (superscript[j] != 0)
|
||||
{
|
||||
dst[dstIndex++] = superscript[j];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
while (src[srcIndex] != 0)
|
||||
{
|
||||
dst[dstIndex++] = src[srcIndex++];
|
||||
}
|
||||
dst[dstIndex] = '\0';
|
||||
|
||||
return dstIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
strncpy(dst, src, dstSize);
|
||||
if (dstSize > srcLength)
|
||||
return srcLength;
|
||||
|
||||
|
||||
if (dstSize > 0)
|
||||
{
|
||||
dst[dstSize - 1] = '\0';
|
||||
return dstSize - 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int findGreekNameIndexBySubstr(const std::string &, int = 0, unsigned int = UINT_MAX);
|
||||
#if 0
|
||||
static std::string firstGreekAbbrCompletion(const std::string &);
|
||||
#endif
|
||||
|
||||
bool inline isSubstringIgnoringCase(const std::string &s0, const std::string &s1, size_t n)
|
||||
{
|
||||
return UTF8StringCompare(s0, s1, n, true) == 0;
|
||||
}
|
||||
|
||||
static int findGreekNameIndexBySubstr(const std::string &s, int start, unsigned int n)
|
||||
{
|
||||
Greek *instance = Greek::getInstance();
|
||||
|
||||
if (s.empty())
|
||||
return -1;
|
||||
|
||||
for (int i = start; i < instance->nLetters; i++)
|
||||
{
|
||||
if (isSubstringIgnoringCase(instance->names[i], s, n))
|
||||
return i;
|
||||
}
|
||||
|
||||
for (int i = start; i < instance->nLetters; i++)
|
||||
{
|
||||
if (isSubstringIgnoringCase(instance->abbrevs[i], s, n))
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static size_t greekChunkLength(const std::string& str)
|
||||
{
|
||||
bool npos = false;
|
||||
size_t sp = str.find_first_of(' ');
|
||||
if (sp == std::string::npos)
|
||||
{
|
||||
sp = str.length();
|
||||
npos = true;
|
||||
}
|
||||
|
||||
if (sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1])))
|
||||
while(sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1]))) sp--;
|
||||
else if (npos)
|
||||
sp = std::string::npos;
|
||||
return sp;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static std::string firstGreekAbbrCompletion(const std::string &s)
|
||||
{
|
||||
std::string ret;
|
||||
size_t sp = greekChunkLength(s);
|
||||
if (sp == std::string::npos)
|
||||
{
|
||||
int i = findGreekNameIndexBySubstr(s);
|
||||
return (i >= 0) ? Greek::getInstance()->abbrevs[i] : s;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string prefix = s.substr(0, sp);
|
||||
ret = Greek::canonicalAbbreviation(prefix);
|
||||
return ret.empty() ? s : prefix + s.substr(sp);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<std::string> getGreekCompletion(const std::string &s)
|
||||
{
|
||||
std::vector<std::string> ret;
|
||||
if (s.empty())
|
||||
return ret;
|
||||
|
||||
size_t sp = greekChunkLength(s);
|
||||
if (sp == std::string::npos)
|
||||
{
|
||||
sp = UTF8Length(s);
|
||||
for(int i = 0; i >= 0;)
|
||||
{
|
||||
std::string rets;
|
||||
i = findGreekNameIndexBySubstr(s, i, sp);
|
||||
if (i >= 0)
|
||||
{
|
||||
rets = Greek::getInstance()->abbrevs[i];
|
||||
rets += " ";
|
||||
ret.emplace_back(ReplaceGreekLetterAbbr(rets));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string prefix = s.substr(0, sp);
|
||||
std::string rets = Greek::canonicalAbbreviation(prefix);
|
||||
if (!rets.empty())
|
||||
{
|
||||
rets += s.substr(sp);
|
||||
ret.emplace_back(ReplaceGreekLetterAbbr(rets));
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
UTF8Status
|
||||
UTF8Validator::check(char c)
|
||||
{
|
||||
return check(static_cast<unsigned char>(c));
|
||||
}
|
||||
|
||||
UTF8Status
|
||||
UTF8Validator::check(unsigned char c)
|
||||
{
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// utf8.h
|
||||
//
|
||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||
// 2018-present, Celestia Development Team
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of the GNU General Public License
|
||||
|
@ -11,42 +12,30 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <string_view>
|
||||
|
||||
#define UTF8_DEGREE_SIGN "\302\260"
|
||||
#define UTF8_MULTIPLICATION_SIGN "\303\227"
|
||||
#define UTF8_SUPERSCRIPT_0 "\342\201\260"
|
||||
#define UTF8_SUPERSCRIPT_1 "\302\271"
|
||||
#define UTF8_SUPERSCRIPT_2 "\302\262"
|
||||
#define UTF8_SUPERSCRIPT_3 "\302\263"
|
||||
#define UTF8_SUPERSCRIPT_4 "\342\201\264"
|
||||
#define UTF8_SUPERSCRIPT_5 "\342\201\265"
|
||||
#define UTF8_SUPERSCRIPT_6 "\342\201\266"
|
||||
#define UTF8_SUPERSCRIPT_7 "\342\201\267"
|
||||
#define UTF8_SUPERSCRIPT_8 "\342\201\270"
|
||||
#define UTF8_SUPERSCRIPT_9 "\342\201\271"
|
||||
#define UTF8_REPLACEMENT_CHAR "\357\277\275"
|
||||
|
||||
|
||||
bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
|
||||
bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
|
||||
void UTF8Encode(std::uint32_t ch, std::string& dest);
|
||||
int UTF8StringCompare(const std::string& s0, const std::string& s1);
|
||||
int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false);
|
||||
bool UTF8Decode(std::string_view str, int pos, wchar_t &ch);
|
||||
void UTF8Encode(std::uint32_t ch, std::string &dest);
|
||||
int UTF8StringCompare(std::string_view s0, std::string_view s1);
|
||||
int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase = false);
|
||||
|
||||
class UTF8StringOrderingPredicate
|
||||
{
|
||||
public:
|
||||
bool operator()(const std::string& s0, const std::string& s1) const
|
||||
bool operator()(std::string_view s0, std::string_view s1) const
|
||||
{
|
||||
return UTF8StringCompare(s0, s1) == -1;
|
||||
}
|
||||
};
|
||||
|
||||
int UTF8Length(std::string_view s);
|
||||
|
||||
int UTF8Length(const std::string& s);
|
||||
|
||||
inline int UTF8EncodedSize(wchar_t ch)
|
||||
constexpr int
|
||||
UTF8EncodedSize(wchar_t ch)
|
||||
{
|
||||
if (ch < 0x80)
|
||||
return 1;
|
||||
|
@ -66,7 +55,8 @@ inline int UTF8EncodedSize(wchar_t ch)
|
|||
#endif
|
||||
}
|
||||
|
||||
constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
|
||||
constexpr int
|
||||
UTF8EncodedSizeChecked(std::uint32_t ch)
|
||||
{
|
||||
if (ch < 0x80)
|
||||
return 1;
|
||||
|
@ -84,76 +74,6 @@ constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
|
|||
#endif
|
||||
}
|
||||
|
||||
inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
|
||||
{
|
||||
if (ch < 0x80)
|
||||
return 1;
|
||||
if ((ch & 0xe0) == 0xc0)
|
||||
return 2;
|
||||
if ((ch & 0xf0) == 0xe0)
|
||||
return 3;
|
||||
if ((ch & 0xf8) == 0xf0)
|
||||
return 4;
|
||||
if ((ch & 0xfc) == 0xf8)
|
||||
return 5;
|
||||
if ((ch & 0xfe) == 0xfc)
|
||||
return 6;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string ReplaceGreekLetterAbbr(const std::string&);
|
||||
#if 0
|
||||
unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
|
||||
#endif
|
||||
|
||||
class Greek
|
||||
{
|
||||
private:
|
||||
Greek();
|
||||
~Greek();
|
||||
|
||||
public:
|
||||
enum Letter
|
||||
{
|
||||
Alpha = 1,
|
||||
Beta = 2,
|
||||
Gamma = 3,
|
||||
Delta = 4,
|
||||
Epsilon = 5,
|
||||
Zeta = 6,
|
||||
Eta = 7,
|
||||
Theta = 8,
|
||||
Iota = 9,
|
||||
Kappa = 10,
|
||||
Lambda = 11,
|
||||
Mu = 12,
|
||||
Nu = 13,
|
||||
Xi = 14,
|
||||
Omicron = 15,
|
||||
Pi = 16,
|
||||
Rho = 17,
|
||||
Sigma = 18,
|
||||
Tau = 19,
|
||||
Upsilon = 20,
|
||||
Phi = 21,
|
||||
Chi = 22,
|
||||
Psi = 23,
|
||||
Omega = 24,
|
||||
};
|
||||
|
||||
static const std::string& canonicalAbbreviation(const std::string&);
|
||||
private:
|
||||
static Greek* m_instance;
|
||||
public:
|
||||
static Greek* getInstance();
|
||||
int nLetters;
|
||||
std::string* names;
|
||||
std::string* abbrevs;
|
||||
};
|
||||
|
||||
std::vector<std::string> getGreekCompletion(const std::string &);
|
||||
|
||||
enum class UTF8Status
|
||||
{
|
||||
Ok,
|
||||
|
@ -164,9 +84,6 @@ enum class UTF8Status
|
|||
class UTF8Validator
|
||||
{
|
||||
public:
|
||||
UTF8Validator() = default;
|
||||
~UTF8Validator() = default;
|
||||
|
||||
UTF8Status check(char c);
|
||||
UTF8Status check(unsigned char c);
|
||||
|
||||
|
@ -185,3 +102,9 @@ private:
|
|||
|
||||
State state{ State::Initial };
|
||||
};
|
||||
|
||||
inline UTF8Status
|
||||
UTF8Validator::check(char c)
|
||||
{
|
||||
return check(static_cast<unsigned char>(c));
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
test_case(charconv_compat)
|
||||
test_case(greek)
|
||||
test_case(hash)
|
||||
test_case(logger)
|
||||
test_case(stellarclass)
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#include <celutil/greek.h>
|
||||
|
||||
#include <catch.hpp>
|
||||
|
||||
TEST_CASE("Greek", "[Greek]")
|
||||
{
|
||||
SECTION("ReplaceGreekLetterAbbr")
|
||||
{
|
||||
REQUIRE(ReplaceGreekLetterAbbr("XI") == "\316\276");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("XI12") == "\316\276\302\271\302\262");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("XI Foo") == "\316\276 Foo");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("XI12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||
|
||||
REQUIRE(ReplaceGreekLetterAbbr("xi") == "xi");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("xi12") == "xi12");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("xi Foo") == "xi Foo");
|
||||
REQUIRE(ReplaceGreekLetterAbbr("xi12 Bar") == "xi12 Bar");
|
||||
|
||||
REQUIRE(ReplaceGreekLetterAbbr("alpha") == "alpha");
|
||||
}
|
||||
|
||||
SECTION("ReplaceGreekLetter")
|
||||
{
|
||||
REQUIRE(ReplaceGreekLetter("XI") == "\316\276");
|
||||
REQUIRE(ReplaceGreekLetter("XI12") == "\316\276\302\271\302\262");
|
||||
REQUIRE(ReplaceGreekLetter("XI Foo") == "\316\276 Foo");
|
||||
REQUIRE(ReplaceGreekLetter("XI12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||
|
||||
REQUIRE(ReplaceGreekLetter("xi") == "\316\276");
|
||||
REQUIRE(ReplaceGreekLetter("xi12") == "\316\276\302\271\302\262");
|
||||
REQUIRE(ReplaceGreekLetter("xi Foo") == "\316\276 Foo");
|
||||
REQUIRE(ReplaceGreekLetter("xi12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||
|
||||
REQUIRE(ReplaceGreekLetter("alpha") == "\316\261");
|
||||
}
|
||||
|
||||
SECTION("GetCanonicalGreekAbbreviation")
|
||||
{
|
||||
REQUIRE(GetCanonicalGreekAbbreviation("xi") == "XI");
|
||||
REQUIRE(GetCanonicalGreekAbbreviation("alpha") == "ALF");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue