Split greek-letters handling from utf8 and refactor a bit
parent
eaae852f8f
commit
c251dac856
|
@ -8,6 +8,7 @@
|
||||||
// of the License, or (at your option) any later version.
|
// of the License, or (at your option) any later version.
|
||||||
|
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include <celutil/logger.h>
|
#include <celutil/logger.h>
|
||||||
#include <celutil/tokenizer.h>
|
#include <celutil/tokenizer.h>
|
||||||
#include "stardb.h"
|
#include "stardb.h"
|
||||||
|
|
|
@ -179,7 +179,7 @@ void Console::print(char* s)
|
||||||
while (i < length && validChar)
|
while (i < length && validChar)
|
||||||
{
|
{
|
||||||
wchar_t ch = 0;
|
wchar_t ch = 0;
|
||||||
validChar = UTF8Decode(s, i, length, ch);
|
validChar = UTF8Decode(string_view(s, length), i, ch);
|
||||||
i += UTF8EncodedSize(ch);
|
i += UTF8EncodedSize(ch);
|
||||||
print(ch);
|
print(ch);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include <celutil/logger.h>
|
#include <celutil/logger.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include "name.h"
|
#include "name.h"
|
||||||
|
|
||||||
uint32_t NameDatabase::getNameCount() const
|
uint32_t NameDatabase::getNameCount() const
|
||||||
|
@ -97,41 +98,25 @@ NameDatabase::NumberIndex::const_iterator NameDatabase::getFinalNameIter() const
|
||||||
return numberIndex.end();
|
return numberIndex.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n, bool greek) const
|
std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n) const
|
||||||
{
|
{
|
||||||
if (greek)
|
std::string name2 = ReplaceGreekLetter(name);
|
||||||
{
|
|
||||||
auto compList = getGreekCompletion(name);
|
|
||||||
compList.push_back(name);
|
|
||||||
return getCompletion(compList, i18n);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> completion;
|
std::vector<std::string> completion;
|
||||||
int name_length = UTF8Length(name);
|
const int name_length = UTF8Length(name2);
|
||||||
|
|
||||||
for (NameIndex::const_iterator iter = nameIndex.begin(); iter != nameIndex.end(); ++iter)
|
for (const auto &[n, _] : nameIndex)
|
||||||
{
|
{
|
||||||
if (!UTF8StringCompare(iter->first, name, name_length, true))
|
if (!UTF8StringCompare(n, name2, name_length, true))
|
||||||
completion.push_back(iter->first);
|
completion.push_back(n);
|
||||||
}
|
}
|
||||||
if (i18n)
|
if (i18n)
|
||||||
{
|
{
|
||||||
for (NameIndex::const_iterator iter = localizedNameIndex.begin(); iter != localizedNameIndex.end(); ++iter)
|
for (const auto &[n, _] : localizedNameIndex)
|
||||||
{
|
{
|
||||||
if (!UTF8StringCompare(iter->first, name, name_length, true))
|
if (!UTF8StringCompare(n, name2, name_length, true))
|
||||||
completion.push_back(iter->first);
|
completion.push_back(n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return completion;
|
return completion;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> NameDatabase::getCompletion(const std::vector<std::string> &list, bool i18n) const
|
|
||||||
{
|
|
||||||
std::vector<std::string> completion;
|
|
||||||
for (const auto &n : list)
|
|
||||||
{
|
|
||||||
for (const auto &nn : getCompletion(n, i18n, false))
|
|
||||||
completion.emplace_back(nn);
|
|
||||||
}
|
|
||||||
return completion;
|
|
||||||
}
|
|
||||||
|
|
|
@ -45,8 +45,7 @@ class NameDatabase
|
||||||
NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const;
|
NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const;
|
||||||
NumberIndex::const_iterator getFinalNameIter() const;
|
NumberIndex::const_iterator getFinalNameIter() const;
|
||||||
|
|
||||||
std::vector<std::string> getCompletion(const std::string& name, bool i18n, bool greek = true) const;
|
std::vector<std::string> getCompletion(const std::string& name, bool i18n) const;
|
||||||
std::vector<std::string> getCompletion(const std::vector<std::string> &list, bool i18n) const;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NameIndex nameIndex;
|
NameIndex nameIndex;
|
||||||
|
|
|
@ -159,7 +159,7 @@ void Overlay::print_impl(const std::string& s)
|
||||||
while (i < length && validChar)
|
while (i < length && validChar)
|
||||||
{
|
{
|
||||||
wchar_t ch = 0;
|
wchar_t ch = 0;
|
||||||
validChar = UTF8Decode(s.c_str(), i, length, ch);
|
validChar = UTF8Decode(s, i, ch);
|
||||||
i += UTF8EncodedSize(ch);
|
i += UTF8EncodedSize(ch);
|
||||||
print(ch);
|
print(ch);
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,8 +10,10 @@
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
#include <celengine/constellation.h>
|
#include <celengine/constellation.h>
|
||||||
#include <celengine/starname.h>
|
#include <celengine/starname.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -61,26 +63,26 @@ uint32_t StarNameDatabase::findCatalogNumberByName(const string& name, bool i18n
|
||||||
// We have a valid constellation as the last part
|
// We have a valid constellation as the last part
|
||||||
// of the name. Next, we see if the first part of
|
// of the name. Next, we see if the first part of
|
||||||
// the name is a greek letter.
|
// the name is a greek letter.
|
||||||
const string& letter = Greek::canonicalAbbreviation(string(prefix, 0, len));
|
std::string_view letter = GetCanonicalGreekAbbreviation(std::string_view(prefix).substr(0, len));
|
||||||
if (!letter.empty())
|
if (!letter.empty())
|
||||||
{
|
{
|
||||||
// Matched . . . this is a Bayer designation
|
// Matched . . . this is a Bayer designation
|
||||||
if (digit == ' ')
|
if (digit == ' ')
|
||||||
{
|
{
|
||||||
priName = letter + ' ' + con->getAbbreviation();
|
priName = fmt::format("{} {}", letter, con->getAbbreviation());
|
||||||
// If 'let con' doesn't match, try using
|
// If 'let con' doesn't match, try using
|
||||||
// 'let1 con' instead.
|
// 'let1 con' instead.
|
||||||
altName = letter + '1' + ' ' + con->getAbbreviation();
|
altName = fmt::format("{}1 {}", letter, con->getAbbreviation());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
priName = letter + digit + ' ' + con->getAbbreviation();
|
priName = fmt::format("{}{} {}", letter, digit, con->getAbbreviation());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Something other than a Bayer designation
|
// Something other than a Bayer designation
|
||||||
priName = prefix + ' ' + con->getAbbreviation();
|
priName = fmt::format("{} {}", prefix, con->getAbbreviation());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isOrbitingStar)
|
if (isOrbitingStar)
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#include <celmath/mathlib.h>
|
#include <celmath/mathlib.h>
|
||||||
#include <celmath/intersect.h>
|
#include <celmath/intersect.h>
|
||||||
#include <celmath/ray.h>
|
#include <celmath/ray.h>
|
||||||
#include <celutil/utf8.h>
|
#include <celutil/greek.h>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
static const double ANGULAR_RES = 3.5e-6;
|
static const double ANGULAR_RES = 3.5e-6;
|
||||||
|
|
|
@ -970,7 +970,7 @@ void CelestiaCore::charEntered(const char *c_p, int modifiers)
|
||||||
if (textEnterMode & KbAutoComplete)
|
if (textEnterMode & KbAutoComplete)
|
||||||
{
|
{
|
||||||
wchar_t wc = 0; // Null wide character
|
wchar_t wc = 0; // Null wide character
|
||||||
UTF8Decode(c_p, 0, strlen(c_p), wc);
|
UTF8Decode(c_p, 0, wc);
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
if ( wc && (!iscntrl(wc)) )
|
if ( wc && (!iscntrl(wc)) )
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#include <celengine/starbrowser.h>
|
#include <celengine/starbrowser.h>
|
||||||
#include <celengine/stardb.h>
|
#include <celengine/stardb.h>
|
||||||
#include <celengine/univcoord.h>
|
#include <celengine/univcoord.h>
|
||||||
#include <celutil/utf8.h>
|
#include <celutil/greek.h>
|
||||||
|
|
||||||
#include "dialog-star.h"
|
#include "dialog-star.h"
|
||||||
#include "actions.h"
|
#include "actions.h"
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
#include <celengine/simulation.h>
|
#include <celengine/simulation.h>
|
||||||
#include <celestia/celestiacore.h>
|
#include <celestia/celestiacore.h>
|
||||||
#include <celestia/helper.h>
|
#include <celestia/helper.h>
|
||||||
#include <celutil/utf8.h>
|
#include <celutil/greek.h>
|
||||||
|
|
||||||
#include "menu-context.h"
|
#include "menu-context.h"
|
||||||
#include "actions.h"
|
#include "actions.h"
|
||||||
|
|
|
@ -41,6 +41,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include <celutil/tzutil.h>
|
#include <celutil/tzutil.h>
|
||||||
#include "qtappwin.h"
|
#include "qtappwin.h"
|
||||||
#include "qtglwidget.h"
|
#include "qtglwidget.h"
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <celestia/celestiacore.h>
|
#include <celestia/celestiacore.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include "qtcelestialbrowser.h"
|
#include "qtcelestialbrowser.h"
|
||||||
#include "qtcolorswatchwidget.h"
|
#include "qtcolorswatchwidget.h"
|
||||||
#include "qtinfopanel.h"
|
#include "qtinfopanel.h"
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <celestia/celestiacore.h>
|
#include <celestia/celestiacore.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include "qtdeepskybrowser.h"
|
#include "qtdeepskybrowser.h"
|
||||||
#include "qtcolorswatchwidget.h"
|
#include "qtcolorswatchwidget.h"
|
||||||
#include "qtinfopanel.h"
|
#include "qtinfopanel.h"
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
#include <celengine/astro.h>
|
#include <celengine/astro.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
#include <celutil/logger.h>
|
#include <celutil/logger.h>
|
||||||
#include <celutil/utf8.h>
|
#include <celutil/greek.h>
|
||||||
#include <celengine/universe.h>
|
#include <celengine/universe.h>
|
||||||
#include <QTextBrowser>
|
#include <QTextBrowser>
|
||||||
#include <QIODevice>
|
#include <QIODevice>
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include <celengine/axisarrow.h>
|
#include <celengine/axisarrow.h>
|
||||||
#include <celengine/planetgrid.h>
|
#include <celengine/planetgrid.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include <fmt/printf.h>
|
#include <fmt/printf.h>
|
||||||
#include "qtselectionpopup.h"
|
#include "qtselectionpopup.h"
|
||||||
#include "qtappwin.h"
|
#include "qtappwin.h"
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <celestia/celestiacore.h>
|
#include <celestia/celestiacore.h>
|
||||||
#include <celutil/gettext.h>
|
#include <celutil/gettext.h>
|
||||||
|
#include <celutil/greek.h>
|
||||||
#include "qtsolarsystembrowser.h"
|
#include "qtsolarsystembrowser.h"
|
||||||
#include "qtinfopanel.h"
|
#include "qtinfopanel.h"
|
||||||
#include "qtcolorswatchwidget.h"
|
#include "qtcolorswatchwidget.h"
|
||||||
|
|
|
@ -13,6 +13,8 @@ set(CELUTIL_SOURCES
|
||||||
formatnum.h
|
formatnum.h
|
||||||
fsutils.cpp
|
fsutils.cpp
|
||||||
fsutils.h
|
fsutils.h
|
||||||
|
greek.cpp
|
||||||
|
greek.h
|
||||||
logger.cpp
|
logger.cpp
|
||||||
logger.h
|
logger.h
|
||||||
reshandle.h
|
reshandle.h
|
||||||
|
|
|
@ -0,0 +1,270 @@
|
||||||
|
// utf8.cpp
|
||||||
|
//
|
||||||
|
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||||
|
// 2018-present, Celestia Development Team
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
#include "greek.h"
|
||||||
|
|
||||||
|
#include "stringutils.h"
|
||||||
|
#include "utf8.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <cctype>
|
||||||
|
|
||||||
|
using namespace std::string_view_literals;
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
constexpr int nLetters = 24;
|
||||||
|
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_0 = "\342\201\260"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_1 = "\302\271"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_2 = "\302\262"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_3 = "\302\263"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_4 = "\342\201\264"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_5 = "\342\201\265"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_6 = "\342\201\266"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_7 = "\342\201\267"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_8 = "\342\201\270"sv;
|
||||||
|
constexpr std::string_view UTF8_SUPERSCRIPT_9 = "\342\201\271"sv;
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
const std::array<std::string_view, nLetters> greekAlphabet =
|
||||||
|
{
|
||||||
|
"Alpha"sv,
|
||||||
|
"Beta"sv,
|
||||||
|
"Gamma"sv,
|
||||||
|
"Delta"sv,
|
||||||
|
"Epsilon"sv,
|
||||||
|
"Zeta"sv,
|
||||||
|
"Eta"sv,
|
||||||
|
"Theta"sv,
|
||||||
|
"Iota"sv,
|
||||||
|
"Kappa"sv,
|
||||||
|
"Lambda"sv,
|
||||||
|
"Mu"sv,
|
||||||
|
"Nu"sv,
|
||||||
|
"Xi"sv,
|
||||||
|
"Omicron"sv,
|
||||||
|
"Pi"sv,
|
||||||
|
"Rho"sv,
|
||||||
|
"Sigma"sv,
|
||||||
|
"Tau"sv,
|
||||||
|
"Upsilon"sv,
|
||||||
|
"Phi"sv,
|
||||||
|
"Chi"sv,
|
||||||
|
"Psi"sv,
|
||||||
|
"Omega"sv
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::array<std::string_view, nLetters> greekAlphabetUTF8 = {
|
||||||
|
"\316\261"sv, // ALF
|
||||||
|
"\316\262"sv, // BET
|
||||||
|
"\316\263"sv, // GAM
|
||||||
|
"\316\264"sv, // DEL
|
||||||
|
"\316\265"sv, // EPS
|
||||||
|
"\316\266"sv, // ZET
|
||||||
|
"\316\267"sv, // ETA
|
||||||
|
"\316\270"sv, // TET
|
||||||
|
"\316\271"sv, // IOT
|
||||||
|
"\316\272"sv, // KAP
|
||||||
|
"\316\273"sv, // LAM
|
||||||
|
"\316\274"sv, // MU
|
||||||
|
"\316\275"sv, // NU
|
||||||
|
"\316\276"sv, // XI
|
||||||
|
"\316\277"sv, // OMI
|
||||||
|
"\317\200"sv, // PI
|
||||||
|
"\317\201"sv, // RHO
|
||||||
|
"\317\203"sv, // SIG
|
||||||
|
"\317\204"sv, // TAU
|
||||||
|
"\317\205"sv, // UPS
|
||||||
|
"\317\206"sv, // PHI
|
||||||
|
"\317\207"sv, // CHI
|
||||||
|
"\317\210"sv, // PSI
|
||||||
|
"\317\211"sv, // OME
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::array<std::string_view, nLetters> canonicalAbbrevs =
|
||||||
|
{
|
||||||
|
"ALF"sv,
|
||||||
|
"BET"sv,
|
||||||
|
"GAM"sv,
|
||||||
|
"DEL"sv,
|
||||||
|
"EPS"sv,
|
||||||
|
"ZET"sv,
|
||||||
|
"ETA"sv,
|
||||||
|
"TET"sv,
|
||||||
|
"IOT"sv,
|
||||||
|
"KAP"sv,
|
||||||
|
"LAM"sv,
|
||||||
|
"MU"sv,
|
||||||
|
"NU"sv,
|
||||||
|
"XI"sv,
|
||||||
|
"OMI"sv,
|
||||||
|
"PI"sv,
|
||||||
|
"RHO"sv,
|
||||||
|
"SIG"sv,
|
||||||
|
"TAU"sv,
|
||||||
|
"UPS"sv,
|
||||||
|
"PHI"sv,
|
||||||
|
"CHI"sv,
|
||||||
|
"PSI"sv,
|
||||||
|
"OME"sv,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
std::string_view::size_type
|
||||||
|
getFirstWordLength(std::string_view str)
|
||||||
|
{
|
||||||
|
auto sp = str.find(' ');
|
||||||
|
if (sp == std::string_view::npos)
|
||||||
|
sp = str.length();
|
||||||
|
|
||||||
|
// skip digits
|
||||||
|
while (sp > 0 && std::isdigit(str[sp - 1]) != 0)
|
||||||
|
sp--;
|
||||||
|
|
||||||
|
return sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string_view
|
||||||
|
toSuperscript(char c)
|
||||||
|
{
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '0':
|
||||||
|
return UTF8_SUPERSCRIPT_0;
|
||||||
|
case '1':
|
||||||
|
return UTF8_SUPERSCRIPT_1;
|
||||||
|
case '2':
|
||||||
|
return UTF8_SUPERSCRIPT_2;
|
||||||
|
case '3':
|
||||||
|
return UTF8_SUPERSCRIPT_3;
|
||||||
|
case '4':
|
||||||
|
return UTF8_SUPERSCRIPT_4;
|
||||||
|
case '5':
|
||||||
|
return UTF8_SUPERSCRIPT_5;
|
||||||
|
case '6':
|
||||||
|
return UTF8_SUPERSCRIPT_6;
|
||||||
|
case '7':
|
||||||
|
return UTF8_SUPERSCRIPT_7;
|
||||||
|
case '8':
|
||||||
|
return UTF8_SUPERSCRIPT_8;
|
||||||
|
case '9':
|
||||||
|
return UTF8_SUPERSCRIPT_9;
|
||||||
|
default:
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces the Greek letter abbreviation at the beginning
|
||||||
|
* of a string by the UTF-8 representation of that letter.
|
||||||
|
* Also, replaces digits following Greek letters with UTF-8
|
||||||
|
* superscripts.
|
||||||
|
*/
|
||||||
|
std::string
|
||||||
|
ReplaceGreekLetterAbbr(std::string_view str)
|
||||||
|
{
|
||||||
|
if (str.empty())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
if (auto len = getFirstWordLength(str); len > 0 && str[0] >= 'A' && str[0] <= 'Z')
|
||||||
|
{
|
||||||
|
// Linear search through all letter abbreviations
|
||||||
|
for (int i = 0; i < nLetters; i++)
|
||||||
|
{
|
||||||
|
auto prefix = canonicalAbbrevs[i];
|
||||||
|
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||||
|
{
|
||||||
|
prefix = greekAlphabet[i];
|
||||||
|
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ret(greekAlphabetUTF8[i]);
|
||||||
|
for (; str.length() > len && std::isdigit(str[len]); len++)
|
||||||
|
ret.append(toSuperscript(str[len]));
|
||||||
|
ret.append(str.substr(len));
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::string(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns canonical greek abbreviation for a letter passed.
|
||||||
|
* The letter can be: latin name of a greek letter, canonical
|
||||||
|
* representation of it or a greek letter itself in UTF-8.
|
||||||
|
*/
|
||||||
|
std::string_view
|
||||||
|
GetCanonicalGreekAbbreviation(std::string_view letter)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < nLetters; i++)
|
||||||
|
{
|
||||||
|
if (compareIgnoringCase(letter, greekAlphabet[i]) == 0
|
||||||
|
|| compareIgnoringCase(letter, canonicalAbbrevs[i]) == 0)
|
||||||
|
{
|
||||||
|
return canonicalAbbrevs[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (letter.length() == 2)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < nLetters; i++)
|
||||||
|
{
|
||||||
|
if (letter == greekAlphabetUTF8[i]) return canonicalAbbrevs[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces the Greek letter or abbreviation at the beginning
|
||||||
|
* of a string by the UTF-8 representation of that letter.
|
||||||
|
* Also, replaces digits following Greek letters with UTF-8
|
||||||
|
* superscripts.
|
||||||
|
*/
|
||||||
|
std::string
|
||||||
|
ReplaceGreekLetter(std::string_view str)
|
||||||
|
{
|
||||||
|
if (str.empty()) return {};
|
||||||
|
|
||||||
|
if (auto len = getFirstWordLength(str); len > 0)
|
||||||
|
{
|
||||||
|
// Linear search through all letter abbreviations
|
||||||
|
for (int i = 0; i < nLetters; i++)
|
||||||
|
{
|
||||||
|
if (len != 2 || str != greekAlphabetUTF8[i])
|
||||||
|
{
|
||||||
|
auto prefix = canonicalAbbrevs[i];
|
||||||
|
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||||
|
{
|
||||||
|
prefix = greekAlphabet[i];
|
||||||
|
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ret(greekAlphabetUTF8[i]);
|
||||||
|
for (; str.length() > len && std::isdigit(str[len]); len++)
|
||||||
|
ret.append(toSuperscript(str[len]));
|
||||||
|
ret.append(str.substr(len));
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::string(str);
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
// greek.h
|
||||||
|
//
|
||||||
|
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||||
|
// 2018-present, Celestia Development Team
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License
|
||||||
|
// as published by the Free Software Foundation; either version 2
|
||||||
|
// of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
std::string ReplaceGreekLetterAbbr(std::string_view str);
|
||||||
|
std::string ReplaceGreekLetter(std::string_view str);
|
||||||
|
std::string_view GetCanonicalGreekAbbreviation(std::string_view letter);
|
|
@ -1,20 +1,21 @@
|
||||||
// utf8.cpp
|
// utf8.cpp
|
||||||
//
|
//
|
||||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||||
|
// 2018-present, Celestia Development Team
|
||||||
//
|
//
|
||||||
// This program is free software; you can redistribute it and/or
|
// This program is free software; you can redistribute it and/or
|
||||||
// modify it under the terms of the GNU General Public License
|
// modify it under the terms of the GNU General Public License
|
||||||
// as published by the Free Software Foundation; either version 2
|
// as published by the Free Software Foundation; either version 2
|
||||||
// of the License, or (at your option) any later version.
|
// of the License, or (at your option) any later version.
|
||||||
|
|
||||||
#include <cctype>
|
|
||||||
#include <cstring>
|
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#include <climits>
|
|
||||||
#include <fmt/printf.h>
|
|
||||||
#include "stringutils.h"
|
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
uint16_t WGL4_Normalization_00[256] = {
|
uint16_t WGL4_Normalization_00[256] = {
|
||||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||||
|
@ -300,11 +301,45 @@ uint16_t* WGL4NormalizationTables[256] = {
|
||||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
|
||||||
|
{
|
||||||
|
if (ch < 0x80)
|
||||||
|
return 1;
|
||||||
|
if ((ch & 0xe0) == 0xc0)
|
||||||
|
return 2;
|
||||||
|
if ((ch & 0xf0) == 0xe0)
|
||||||
|
return 3;
|
||||||
|
if ((ch & 0xf8) == 0xf0)
|
||||||
|
return 4;
|
||||||
|
if ((ch & 0xfc) == 0xf8)
|
||||||
|
return 5;
|
||||||
|
if ((ch & 0xfe) == 0xfc)
|
||||||
|
return 6;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline wchar_t UTF8Normalize(wchar_t ch)
|
||||||
|
{
|
||||||
|
auto page = (unsigned int) ch >> 8;
|
||||||
|
if (page >= 256)
|
||||||
|
return ch;
|
||||||
|
|
||||||
|
uint16_t* normTable = WGL4NormalizationTables[page];
|
||||||
|
if (normTable == nullptr)
|
||||||
|
return ch;
|
||||||
|
|
||||||
|
return (wchar_t) normTable[(unsigned int) ch & 0xff];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
//! Decode the UTF-8 characters in string str beginning at position pos.
|
//! Decode the UTF-8 characters in string str beginning at position pos.
|
||||||
//! The decoded character is returned in ch; the return value of the function
|
//! The decoded character is returned in ch; the return value of the function
|
||||||
//! is true if a valid UTF-8 sequence was successfully decoded.
|
//! is true if a valid UTF-8 sequence was successfully decoded.
|
||||||
bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
|
bool UTF8Decode(std::string_view str, int pos, wchar_t& ch)
|
||||||
{
|
{
|
||||||
auto c0 = (unsigned int) str[pos];
|
auto c0 = (unsigned int) str[pos];
|
||||||
int charlen = UTF8EncodedSizeFromFirstByte(c0);
|
int charlen = UTF8EncodedSizeFromFirstByte(c0);
|
||||||
|
@ -362,69 +397,6 @@ bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//! Decode the UTF-8 characters in string str beginning at position pos.
|
|
||||||
//! The decoded character is returned in ch; the return value of the function
|
|
||||||
//! is true if a valid UTF-8 sequence was successfully decoded.
|
|
||||||
bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch)
|
|
||||||
{
|
|
||||||
auto c0 = (unsigned int) str[pos];
|
|
||||||
int charlen = UTF8EncodedSizeFromFirstByte(c0);
|
|
||||||
|
|
||||||
// Bad UTF-8 character that extends past end of string
|
|
||||||
if (pos + charlen > length)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// TODO: Should check that the bytes of characters after the first are all
|
|
||||||
// of the form 01xxxxxx
|
|
||||||
// TODO: Need to reject overlong encoding sequences
|
|
||||||
|
|
||||||
switch (charlen)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
ch = c0;
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case 3:
|
|
||||||
ch = ((c0 & 0x0f) << 12) |
|
|
||||||
(((unsigned int) str[pos + 1] & 0x3f) << 6) |
|
|
||||||
((unsigned int) str[pos + 2] & 0x3f);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case 4:
|
|
||||||
ch = ((c0 & 0x07) << 18) |
|
|
||||||
(((unsigned int) str[pos + 1] & 0x3f) << 12) |
|
|
||||||
(((unsigned int) str[pos + 2] & 0x3f) << 6) |
|
|
||||||
((unsigned int) str[pos + 3] & 0x3f);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case 5:
|
|
||||||
ch = ((c0 & 0x03) << 24) |
|
|
||||||
(((unsigned int) str[pos + 1] & 0x3f) << 18) |
|
|
||||||
(((unsigned int) str[pos + 2] & 0x3f) << 12) |
|
|
||||||
(((unsigned int) str[pos + 3] & 0x3f) << 6) |
|
|
||||||
((unsigned int) str[pos + 4] & 0x3f);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
case 6:
|
|
||||||
ch = ((c0 & 0x01) << 30) |
|
|
||||||
(((unsigned int) str[pos + 1] & 0x3f) << 24) |
|
|
||||||
(((unsigned int) str[pos + 2] & 0x3f) << 18) |
|
|
||||||
(((unsigned int) str[pos + 3] & 0x3f) << 12) |
|
|
||||||
(((unsigned int) str[pos + 4] & 0x3f) << 6) |
|
|
||||||
((unsigned int) str[pos + 5] & 0x3f);
|
|
||||||
return true;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//! Appends the UTF-8 encoded version of the code point ch to the
|
//! Appends the UTF-8 encoded version of the code point ch to the
|
||||||
//! destination string
|
//! destination string
|
||||||
void UTF8Encode(std::uint32_t ch, std::string& dest)
|
void UTF8Encode(std::uint32_t ch, std::string& dest)
|
||||||
|
@ -469,9 +441,8 @@ void UTF8Encode(std::uint32_t ch, std::string& dest)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//! Return the number of characters encoded by a UTF-8 string
|
//! Return the number of characters encoded by a UTF-8 string
|
||||||
int UTF8Length(const std::string& s)
|
int UTF8Length(std::string_view s)
|
||||||
{
|
{
|
||||||
int len = s.length();
|
int len = s.length();
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -485,25 +456,10 @@ int UTF8Length(const std::string& s)
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline wchar_t UTF8Normalize(wchar_t ch)
|
|
||||||
{
|
|
||||||
auto page = (unsigned int) ch >> 8;
|
|
||||||
if (page >= 256)
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
uint16_t* normTable = WGL4NormalizationTables[page];
|
|
||||||
if (normTable == nullptr)
|
|
||||||
return ch;
|
|
||||||
|
|
||||||
return (wchar_t) normTable[(unsigned int) ch & 0xff];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//! Perform a normalized comparison of two UTF-8 strings. The normalization
|
//! Perform a normalized comparison of two UTF-8 strings. The normalization
|
||||||
//! only works for characters in the WGL-4 subset, and no multicharacter
|
//! only works for characters in the WGL-4 subset, and no multicharacter
|
||||||
//! translations are performed.
|
//! translations are performed.
|
||||||
int UTF8StringCompare(const std::string& s0, const std::string& s1)
|
int UTF8StringCompare(std::string_view s0, std::string_view s1)
|
||||||
{
|
{
|
||||||
int len0 = s0.length();
|
int len0 = s0.length();
|
||||||
int len1 = s1.length();
|
int len1 = s1.length();
|
||||||
|
@ -542,7 +498,7 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase)
|
int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase)
|
||||||
{
|
{
|
||||||
int len0 = s0.length();
|
int len0 = s0.length();
|
||||||
int len1 = s1.length();
|
int len1 = s1.length();
|
||||||
|
@ -588,432 +544,6 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bo
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
//! Currently incomplete, but could be a helpful class for dealing with
|
|
||||||
//! UTF-8 streams
|
|
||||||
class UTF8StringIterator
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
UTF8StringIterator(const std::string& _str) : str(_str) {};
|
|
||||||
UTF8StringIterator(const UTF8StringIterator& iter) = default;
|
|
||||||
|
|
||||||
UTF8StringIterator& operator++();
|
|
||||||
UTF8StringIterator& operator++(int);
|
|
||||||
|
|
||||||
private:
|
|
||||||
const std::string& str;
|
|
||||||
int position{ 0 };
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
UTF8StringIterator& UTF8StringIterator::operator++()
|
|
||||||
{
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
UTF8StringIterator& UTF8StringIterator::operator++(int)
|
|
||||||
{
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
static const char *greekAlphabet[] =
|
|
||||||
{
|
|
||||||
"Alpha",
|
|
||||||
"Beta",
|
|
||||||
"Gamma",
|
|
||||||
"Delta",
|
|
||||||
"Epsilon",
|
|
||||||
"Zeta",
|
|
||||||
"Eta",
|
|
||||||
"Theta",
|
|
||||||
"Iota",
|
|
||||||
"Kappa",
|
|
||||||
"Lambda",
|
|
||||||
"Mu",
|
|
||||||
"Nu",
|
|
||||||
"Xi",
|
|
||||||
"Omicron",
|
|
||||||
"Pi",
|
|
||||||
"Rho",
|
|
||||||
"Sigma",
|
|
||||||
"Tau",
|
|
||||||
"Upsilon",
|
|
||||||
"Phi",
|
|
||||||
"Chi",
|
|
||||||
"Psi",
|
|
||||||
"Omega"
|
|
||||||
};
|
|
||||||
|
|
||||||
static const char* greekAlphabetUTF8[] =
|
|
||||||
{
|
|
||||||
"\316\261",
|
|
||||||
"\316\262",
|
|
||||||
"\316\263",
|
|
||||||
"\316\264",
|
|
||||||
"\316\265",
|
|
||||||
"\316\266",
|
|
||||||
"\316\267",
|
|
||||||
"\316\270",
|
|
||||||
"\316\271",
|
|
||||||
"\316\272",
|
|
||||||
"\316\273",
|
|
||||||
"\316\274",
|
|
||||||
"\316\275",
|
|
||||||
"\316\276",
|
|
||||||
"\316\277",
|
|
||||||
"\317\200",
|
|
||||||
"\317\201",
|
|
||||||
"\317\203",
|
|
||||||
"\317\204",
|
|
||||||
"\317\205",
|
|
||||||
"\317\206",
|
|
||||||
"\317\207",
|
|
||||||
"\317\210",
|
|
||||||
"\317\211",
|
|
||||||
};
|
|
||||||
|
|
||||||
static const char* canonicalAbbrevs[] =
|
|
||||||
{
|
|
||||||
"ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET",
|
|
||||||
"IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" ,
|
|
||||||
"RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME",
|
|
||||||
};
|
|
||||||
|
|
||||||
static std::string noAbbrev;
|
|
||||||
|
|
||||||
// Greek alphabet crud . . . should probably moved to it's own module.
|
|
||||||
|
|
||||||
static size_t greekChunkLength(const std::string&);
|
|
||||||
|
|
||||||
Greek* Greek::m_instance = nullptr;
|
|
||||||
|
|
||||||
Greek* Greek::getInstance()
|
|
||||||
{
|
|
||||||
if (m_instance == nullptr)
|
|
||||||
m_instance = new Greek();
|
|
||||||
return m_instance;
|
|
||||||
}
|
|
||||||
|
|
||||||
Greek::Greek()
|
|
||||||
{
|
|
||||||
nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]);
|
|
||||||
names = new std::string[nLetters];
|
|
||||||
abbrevs = new std::string[nLetters];
|
|
||||||
|
|
||||||
for (int i = 0; i < nLetters; i++)
|
|
||||||
{
|
|
||||||
names[i] = std::string(greekAlphabet[i]);
|
|
||||||
abbrevs[i] = std::string(canonicalAbbrevs[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Greek::~Greek()
|
|
||||||
{
|
|
||||||
delete[] names;
|
|
||||||
delete[] abbrevs;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string& Greek::canonicalAbbreviation(const std::string& letter)
|
|
||||||
{
|
|
||||||
Greek *instance = Greek::getInstance();
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
if (compareIgnoringCase(letter, instance->names[i]) == 0)
|
|
||||||
return instance->abbrevs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0)
|
|
||||||
return instance->abbrevs[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (letter.length() == 2)
|
|
||||||
{
|
|
||||||
for (i = 0; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
if (letter[0] == greekAlphabetUTF8[i][0] &&
|
|
||||||
letter[1] == greekAlphabetUTF8[i][1])
|
|
||||||
{
|
|
||||||
return instance->abbrevs[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return noAbbrev;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char* toSuperscript(char c)
|
|
||||||
{
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case '0':
|
|
||||||
return UTF8_SUPERSCRIPT_0;
|
|
||||||
case '1':
|
|
||||||
return UTF8_SUPERSCRIPT_1;
|
|
||||||
case '2':
|
|
||||||
return UTF8_SUPERSCRIPT_2;
|
|
||||||
case '3':
|
|
||||||
return UTF8_SUPERSCRIPT_3;
|
|
||||||
case '4':
|
|
||||||
return UTF8_SUPERSCRIPT_4;
|
|
||||||
case '5':
|
|
||||||
return UTF8_SUPERSCRIPT_5;
|
|
||||||
case '6':
|
|
||||||
return UTF8_SUPERSCRIPT_6;
|
|
||||||
case '7':
|
|
||||||
return UTF8_SUPERSCRIPT_7;
|
|
||||||
case '8':
|
|
||||||
return UTF8_SUPERSCRIPT_8;
|
|
||||||
case '9':
|
|
||||||
return UTF8_SUPERSCRIPT_9;
|
|
||||||
default:
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Replaces the Greek letter abbreviation at the beginning
|
|
||||||
//! of a string by the UTF-8 representation of that letter.
|
|
||||||
//! Also, replace digits following Greek letters with UTF-8
|
|
||||||
//! superscripts.
|
|
||||||
std::string ReplaceGreekLetterAbbr(const std::string& str)
|
|
||||||
{
|
|
||||||
Greek *instance = Greek::getInstance();
|
|
||||||
size_t len = greekChunkLength(str);
|
|
||||||
|
|
||||||
if (str[0] >= 'A' && str[0] <= 'Z')
|
|
||||||
{
|
|
||||||
// Linear search through all letter abbreviations
|
|
||||||
for (int i = 0; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
std::string prefix = instance->abbrevs[i];
|
|
||||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
|
||||||
{
|
|
||||||
prefix = instance->names[i];
|
|
||||||
if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ret = greekAlphabetUTF8[i];
|
|
||||||
auto len = prefix.length();
|
|
||||||
for (; str.length() > len && isdigit(str[len]); len++)
|
|
||||||
ret += toSuperscript(str[len]);
|
|
||||||
ret += str.substr(len);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
//! Replaces the Greek letter abbreviation at the beginning
|
|
||||||
//! of a string by the UTF-8 representation of that letter.
|
|
||||||
//! Also, replace digits following Greek letters with UTF-8
|
|
||||||
//! superscripts. Operates on char* instead of strings--less
|
|
||||||
//! convenient, but more efficient. Return the number of
|
|
||||||
//! characters copied to the destination string, not
|
|
||||||
//! including the zero terminator.
|
|
||||||
#if 0
|
|
||||||
unsigned int
|
|
||||||
ReplaceGreekLetterAbbr(char *dst, unsigned int dstSize, const char* src, unsigned int srcLength)
|
|
||||||
{
|
|
||||||
Greek *instance = Greek::getInstance();
|
|
||||||
if (src[0] >= 'A' && src[0] <= 'Z' &&
|
|
||||||
src[1] >= 'A' && src[1] <= 'Z')
|
|
||||||
{
|
|
||||||
// Linear search through all letter abbreviations
|
|
||||||
for (unsigned int i = 0; i < (unsigned int) instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
const char* abbr = canonicalAbbrevs[i];
|
|
||||||
unsigned int j = 0;
|
|
||||||
while (abbr[j] == src[j] && abbr[j] != '\0' && src[j] != '\0')
|
|
||||||
j++;
|
|
||||||
|
|
||||||
// It's a match if we reached the end of the abbreviation string
|
|
||||||
if (abbr[j] == '\0')
|
|
||||||
{
|
|
||||||
unsigned int abbrevLength = j;
|
|
||||||
unsigned int srcIndex = j;
|
|
||||||
const char *superscript = toSuperscript(src[abbrevLength]);
|
|
||||||
|
|
||||||
const char* utfGreek = greekAlphabetUTF8[i];
|
|
||||||
unsigned int utfGreekLength = strlen(utfGreek);
|
|
||||||
|
|
||||||
unsigned int requiredLength = srcLength;
|
|
||||||
if (utfGreekLength > abbrevLength)
|
|
||||||
requiredLength += utfGreekLength - abbrevLength;
|
|
||||||
if (superscript != nullptr)
|
|
||||||
{
|
|
||||||
requiredLength += strlen(superscript) - 1;
|
|
||||||
srcIndex++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there's not enough room, give up translating and just copy as much as possible
|
|
||||||
if (requiredLength + 1 > dstSize)
|
|
||||||
break;
|
|
||||||
|
|
||||||
unsigned int dstIndex = 0;
|
|
||||||
j = 0;
|
|
||||||
while (utfGreek[j] != 0)
|
|
||||||
{
|
|
||||||
dst[dstIndex++] = utfGreek[j];
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (superscript != nullptr)
|
|
||||||
{
|
|
||||||
j = 0;
|
|
||||||
while (superscript[j] != 0)
|
|
||||||
{
|
|
||||||
dst[dstIndex++] = superscript[j];
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (src[srcIndex] != 0)
|
|
||||||
{
|
|
||||||
dst[dstIndex++] = src[srcIndex++];
|
|
||||||
}
|
|
||||||
dst[dstIndex] = '\0';
|
|
||||||
|
|
||||||
return dstIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
strncpy(dst, src, dstSize);
|
|
||||||
if (dstSize > srcLength)
|
|
||||||
return srcLength;
|
|
||||||
|
|
||||||
|
|
||||||
if (dstSize > 0)
|
|
||||||
{
|
|
||||||
dst[dstSize - 1] = '\0';
|
|
||||||
return dstSize - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int findGreekNameIndexBySubstr(const std::string &, int = 0, unsigned int = UINT_MAX);
|
|
||||||
#if 0
|
|
||||||
static std::string firstGreekAbbrCompletion(const std::string &);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool inline isSubstringIgnoringCase(const std::string &s0, const std::string &s1, size_t n)
|
|
||||||
{
|
|
||||||
return UTF8StringCompare(s0, s1, n, true) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int findGreekNameIndexBySubstr(const std::string &s, int start, unsigned int n)
|
|
||||||
{
|
|
||||||
Greek *instance = Greek::getInstance();
|
|
||||||
|
|
||||||
if (s.empty())
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
for (int i = start; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
if (isSubstringIgnoringCase(instance->names[i], s, n))
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = start; i < instance->nLetters; i++)
|
|
||||||
{
|
|
||||||
if (isSubstringIgnoringCase(instance->abbrevs[i], s, n))
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t greekChunkLength(const std::string& str)
|
|
||||||
{
|
|
||||||
bool npos = false;
|
|
||||||
size_t sp = str.find_first_of(' ');
|
|
||||||
if (sp == std::string::npos)
|
|
||||||
{
|
|
||||||
sp = str.length();
|
|
||||||
npos = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1])))
|
|
||||||
while(sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1]))) sp--;
|
|
||||||
else if (npos)
|
|
||||||
sp = std::string::npos;
|
|
||||||
return sp;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
static std::string firstGreekAbbrCompletion(const std::string &s)
|
|
||||||
{
|
|
||||||
std::string ret;
|
|
||||||
size_t sp = greekChunkLength(s);
|
|
||||||
if (sp == std::string::npos)
|
|
||||||
{
|
|
||||||
int i = findGreekNameIndexBySubstr(s);
|
|
||||||
return (i >= 0) ? Greek::getInstance()->abbrevs[i] : s;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::string prefix = s.substr(0, sp);
|
|
||||||
ret = Greek::canonicalAbbreviation(prefix);
|
|
||||||
return ret.empty() ? s : prefix + s.substr(sp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::vector<std::string> getGreekCompletion(const std::string &s)
|
|
||||||
{
|
|
||||||
std::vector<std::string> ret;
|
|
||||||
if (s.empty())
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
size_t sp = greekChunkLength(s);
|
|
||||||
if (sp == std::string::npos)
|
|
||||||
{
|
|
||||||
sp = UTF8Length(s);
|
|
||||||
for(int i = 0; i >= 0;)
|
|
||||||
{
|
|
||||||
std::string rets;
|
|
||||||
i = findGreekNameIndexBySubstr(s, i, sp);
|
|
||||||
if (i >= 0)
|
|
||||||
{
|
|
||||||
rets = Greek::getInstance()->abbrevs[i];
|
|
||||||
rets += " ";
|
|
||||||
ret.emplace_back(ReplaceGreekLetterAbbr(rets));
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::string prefix = s.substr(0, sp);
|
|
||||||
std::string rets = Greek::canonicalAbbreviation(prefix);
|
|
||||||
if (!rets.empty())
|
|
||||||
{
|
|
||||||
rets += s.substr(sp);
|
|
||||||
ret.emplace_back(ReplaceGreekLetterAbbr(rets));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
UTF8Status
|
|
||||||
UTF8Validator::check(char c)
|
|
||||||
{
|
|
||||||
return check(static_cast<unsigned char>(c));
|
|
||||||
}
|
|
||||||
|
|
||||||
UTF8Status
|
UTF8Status
|
||||||
UTF8Validator::check(unsigned char c)
|
UTF8Validator::check(unsigned char c)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// utf8.h
|
// utf8.h
|
||||||
//
|
//
|
||||||
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
|
||||||
|
// 2018-present, Celestia Development Team
|
||||||
//
|
//
|
||||||
// This program is free software; you can redistribute it and/or
|
// This program is free software; you can redistribute it and/or
|
||||||
// modify it under the terms of the GNU General Public License
|
// modify it under the terms of the GNU General Public License
|
||||||
|
@ -11,42 +12,30 @@
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <string_view>
|
||||||
|
|
||||||
#define UTF8_DEGREE_SIGN "\302\260"
|
#define UTF8_DEGREE_SIGN "\302\260"
|
||||||
#define UTF8_MULTIPLICATION_SIGN "\303\227"
|
#define UTF8_MULTIPLICATION_SIGN "\303\227"
|
||||||
#define UTF8_SUPERSCRIPT_0 "\342\201\260"
|
|
||||||
#define UTF8_SUPERSCRIPT_1 "\302\271"
|
|
||||||
#define UTF8_SUPERSCRIPT_2 "\302\262"
|
|
||||||
#define UTF8_SUPERSCRIPT_3 "\302\263"
|
|
||||||
#define UTF8_SUPERSCRIPT_4 "\342\201\264"
|
|
||||||
#define UTF8_SUPERSCRIPT_5 "\342\201\265"
|
|
||||||
#define UTF8_SUPERSCRIPT_6 "\342\201\266"
|
|
||||||
#define UTF8_SUPERSCRIPT_7 "\342\201\267"
|
|
||||||
#define UTF8_SUPERSCRIPT_8 "\342\201\270"
|
|
||||||
#define UTF8_SUPERSCRIPT_9 "\342\201\271"
|
|
||||||
#define UTF8_REPLACEMENT_CHAR "\357\277\275"
|
#define UTF8_REPLACEMENT_CHAR "\357\277\275"
|
||||||
|
|
||||||
|
bool UTF8Decode(std::string_view str, int pos, wchar_t &ch);
|
||||||
bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
|
void UTF8Encode(std::uint32_t ch, std::string &dest);
|
||||||
bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
|
int UTF8StringCompare(std::string_view s0, std::string_view s1);
|
||||||
void UTF8Encode(std::uint32_t ch, std::string& dest);
|
int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase = false);
|
||||||
int UTF8StringCompare(const std::string& s0, const std::string& s1);
|
|
||||||
int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false);
|
|
||||||
|
|
||||||
class UTF8StringOrderingPredicate
|
class UTF8StringOrderingPredicate
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool operator()(const std::string& s0, const std::string& s1) const
|
bool operator()(std::string_view s0, std::string_view s1) const
|
||||||
{
|
{
|
||||||
return UTF8StringCompare(s0, s1) == -1;
|
return UTF8StringCompare(s0, s1) == -1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int UTF8Length(std::string_view s);
|
||||||
|
|
||||||
int UTF8Length(const std::string& s);
|
constexpr int
|
||||||
|
UTF8EncodedSize(wchar_t ch)
|
||||||
inline int UTF8EncodedSize(wchar_t ch)
|
|
||||||
{
|
{
|
||||||
if (ch < 0x80)
|
if (ch < 0x80)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -66,7 +55,8 @@ inline int UTF8EncodedSize(wchar_t ch)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
|
constexpr int
|
||||||
|
UTF8EncodedSizeChecked(std::uint32_t ch)
|
||||||
{
|
{
|
||||||
if (ch < 0x80)
|
if (ch < 0x80)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -84,76 +74,6 @@ constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
|
|
||||||
{
|
|
||||||
if (ch < 0x80)
|
|
||||||
return 1;
|
|
||||||
if ((ch & 0xe0) == 0xc0)
|
|
||||||
return 2;
|
|
||||||
if ((ch & 0xf0) == 0xe0)
|
|
||||||
return 3;
|
|
||||||
if ((ch & 0xf8) == 0xf0)
|
|
||||||
return 4;
|
|
||||||
if ((ch & 0xfc) == 0xf8)
|
|
||||||
return 5;
|
|
||||||
if ((ch & 0xfe) == 0xfc)
|
|
||||||
return 6;
|
|
||||||
else
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ReplaceGreekLetterAbbr(const std::string&);
|
|
||||||
#if 0
|
|
||||||
unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class Greek
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
Greek();
|
|
||||||
~Greek();
|
|
||||||
|
|
||||||
public:
|
|
||||||
enum Letter
|
|
||||||
{
|
|
||||||
Alpha = 1,
|
|
||||||
Beta = 2,
|
|
||||||
Gamma = 3,
|
|
||||||
Delta = 4,
|
|
||||||
Epsilon = 5,
|
|
||||||
Zeta = 6,
|
|
||||||
Eta = 7,
|
|
||||||
Theta = 8,
|
|
||||||
Iota = 9,
|
|
||||||
Kappa = 10,
|
|
||||||
Lambda = 11,
|
|
||||||
Mu = 12,
|
|
||||||
Nu = 13,
|
|
||||||
Xi = 14,
|
|
||||||
Omicron = 15,
|
|
||||||
Pi = 16,
|
|
||||||
Rho = 17,
|
|
||||||
Sigma = 18,
|
|
||||||
Tau = 19,
|
|
||||||
Upsilon = 20,
|
|
||||||
Phi = 21,
|
|
||||||
Chi = 22,
|
|
||||||
Psi = 23,
|
|
||||||
Omega = 24,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const std::string& canonicalAbbreviation(const std::string&);
|
|
||||||
private:
|
|
||||||
static Greek* m_instance;
|
|
||||||
public:
|
|
||||||
static Greek* getInstance();
|
|
||||||
int nLetters;
|
|
||||||
std::string* names;
|
|
||||||
std::string* abbrevs;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<std::string> getGreekCompletion(const std::string &);
|
|
||||||
|
|
||||||
enum class UTF8Status
|
enum class UTF8Status
|
||||||
{
|
{
|
||||||
Ok,
|
Ok,
|
||||||
|
@ -164,9 +84,6 @@ enum class UTF8Status
|
||||||
class UTF8Validator
|
class UTF8Validator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
UTF8Validator() = default;
|
|
||||||
~UTF8Validator() = default;
|
|
||||||
|
|
||||||
UTF8Status check(char c);
|
UTF8Status check(char c);
|
||||||
UTF8Status check(unsigned char c);
|
UTF8Status check(unsigned char c);
|
||||||
|
|
||||||
|
@ -185,3 +102,9 @@ private:
|
||||||
|
|
||||||
State state{ State::Initial };
|
State state{ State::Initial };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline UTF8Status
|
||||||
|
UTF8Validator::check(char c)
|
||||||
|
{
|
||||||
|
return check(static_cast<unsigned char>(c));
|
||||||
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
test_case(charconv_compat)
|
test_case(charconv_compat)
|
||||||
|
test_case(greek)
|
||||||
test_case(hash)
|
test_case(hash)
|
||||||
test_case(logger)
|
test_case(logger)
|
||||||
test_case(stellarclass)
|
test_case(stellarclass)
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
#include <celutil/greek.h>
|
||||||
|
|
||||||
|
#include <catch.hpp>
|
||||||
|
|
||||||
|
TEST_CASE("Greek", "[Greek]")
|
||||||
|
{
|
||||||
|
SECTION("ReplaceGreekLetterAbbr")
|
||||||
|
{
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("XI") == "\316\276");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("XI12") == "\316\276\302\271\302\262");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("XI Foo") == "\316\276 Foo");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("XI12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||||
|
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("xi") == "xi");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("xi12") == "xi12");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("xi Foo") == "xi Foo");
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("xi12 Bar") == "xi12 Bar");
|
||||||
|
|
||||||
|
REQUIRE(ReplaceGreekLetterAbbr("alpha") == "alpha");
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("ReplaceGreekLetter")
|
||||||
|
{
|
||||||
|
REQUIRE(ReplaceGreekLetter("XI") == "\316\276");
|
||||||
|
REQUIRE(ReplaceGreekLetter("XI12") == "\316\276\302\271\302\262");
|
||||||
|
REQUIRE(ReplaceGreekLetter("XI Foo") == "\316\276 Foo");
|
||||||
|
REQUIRE(ReplaceGreekLetter("XI12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||||
|
|
||||||
|
REQUIRE(ReplaceGreekLetter("xi") == "\316\276");
|
||||||
|
REQUIRE(ReplaceGreekLetter("xi12") == "\316\276\302\271\302\262");
|
||||||
|
REQUIRE(ReplaceGreekLetter("xi Foo") == "\316\276 Foo");
|
||||||
|
REQUIRE(ReplaceGreekLetter("xi12 Bar") == "\316\276\302\271\302\262 Bar");
|
||||||
|
|
||||||
|
REQUIRE(ReplaceGreekLetter("alpha") == "\316\261");
|
||||||
|
}
|
||||||
|
|
||||||
|
SECTION("GetCanonicalGreekAbbreviation")
|
||||||
|
{
|
||||||
|
REQUIRE(GetCanonicalGreekAbbreviation("xi") == "XI");
|
||||||
|
REQUIRE(GetCanonicalGreekAbbreviation("alpha") == "ALF");
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue