diff --git a/src/celengine/asterism.cpp b/src/celengine/asterism.cpp index bf8b4e48c..ea8750dfd 100644 --- a/src/celengine/asterism.cpp +++ b/src/celengine/asterism.cpp @@ -8,6 +8,7 @@ // of the License, or (at your option) any later version. #include +#include #include #include #include "stardb.h" diff --git a/src/celengine/console.cpp b/src/celengine/console.cpp index 627e97ff4..ccd40ca9e 100644 --- a/src/celengine/console.cpp +++ b/src/celengine/console.cpp @@ -179,7 +179,7 @@ void Console::print(char* s) while (i < length && validChar) { wchar_t ch = 0; - validChar = UTF8Decode(s, i, length, ch); + validChar = UTF8Decode(string_view(s, length), i, ch); i += UTF8EncodedSize(ch); print(ch); } diff --git a/src/celengine/name.cpp b/src/celengine/name.cpp index 7c21f5b58..cc3735542 100644 --- a/src/celengine/name.cpp +++ b/src/celengine/name.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "name.h" uint32_t NameDatabase::getNameCount() const @@ -97,41 +98,25 @@ NameDatabase::NumberIndex::const_iterator NameDatabase::getFinalNameIter() const return numberIndex.end(); } -std::vector NameDatabase::getCompletion(const std::string& name, bool i18n, bool greek) const +std::vector NameDatabase::getCompletion(const std::string& name, bool i18n) const { - if (greek) - { - auto compList = getGreekCompletion(name); - compList.push_back(name); - return getCompletion(compList, i18n); - } + std::string name2 = ReplaceGreekLetter(name); std::vector completion; - int name_length = UTF8Length(name); + const int name_length = UTF8Length(name2); - for (NameIndex::const_iterator iter = nameIndex.begin(); iter != nameIndex.end(); ++iter) + for (const auto &[n, _] : nameIndex) { - if (!UTF8StringCompare(iter->first, name, name_length, true)) - completion.push_back(iter->first); + if (!UTF8StringCompare(n, name2, name_length, true)) + completion.push_back(n); } if (i18n) { - for (NameIndex::const_iterator iter = localizedNameIndex.begin(); iter != localizedNameIndex.end(); ++iter) + for (const auto &[n, _] : localizedNameIndex) { - if (!UTF8StringCompare(iter->first, name, name_length, true)) - completion.push_back(iter->first); + if (!UTF8StringCompare(n, name2, name_length, true)) + completion.push_back(n); } } return completion; } - -std::vector NameDatabase::getCompletion(const std::vector &list, bool i18n) const -{ - std::vector completion; - for (const auto &n : list) - { - for (const auto &nn : getCompletion(n, i18n, false)) - completion.emplace_back(nn); - } - return completion; -} diff --git a/src/celengine/name.h b/src/celengine/name.h index f9fe81106..45cf10fb0 100644 --- a/src/celengine/name.h +++ b/src/celengine/name.h @@ -45,8 +45,7 @@ class NameDatabase NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const; NumberIndex::const_iterator getFinalNameIter() const; - std::vector getCompletion(const std::string& name, bool i18n, bool greek = true) const; - std::vector getCompletion(const std::vector &list, bool i18n) const; + std::vector getCompletion(const std::string& name, bool i18n) const; protected: NameIndex nameIndex; diff --git a/src/celengine/overlay.cpp b/src/celengine/overlay.cpp index 91b8057c6..375c529e8 100644 --- a/src/celengine/overlay.cpp +++ b/src/celengine/overlay.cpp @@ -159,7 +159,7 @@ void Overlay::print_impl(const std::string& s) while (i < length && validChar) { wchar_t ch = 0; - validChar = UTF8Decode(s.c_str(), i, length, ch); + validChar = UTF8Decode(s, i, ch); i += UTF8EncodedSize(ch); print(ch); } diff --git a/src/celengine/starname.cpp b/src/celengine/starname.cpp index 94ecbc86e..4c25af998 100644 --- a/src/celengine/starname.cpp +++ b/src/celengine/starname.cpp @@ -10,8 +10,10 @@ // // +#include #include #include +#include using namespace std; @@ -61,26 +63,26 @@ uint32_t StarNameDatabase::findCatalogNumberByName(const string& name, bool i18n // We have a valid constellation as the last part // of the name. Next, we see if the first part of // the name is a greek letter. - const string& letter = Greek::canonicalAbbreviation(string(prefix, 0, len)); + std::string_view letter = GetCanonicalGreekAbbreviation(std::string_view(prefix).substr(0, len)); if (!letter.empty()) { // Matched . . . this is a Bayer designation if (digit == ' ') { - priName = letter + ' ' + con->getAbbreviation(); + priName = fmt::format("{} {}", letter, con->getAbbreviation()); // If 'let con' doesn't match, try using // 'let1 con' instead. - altName = letter + '1' + ' ' + con->getAbbreviation(); + altName = fmt::format("{}1 {}", letter, con->getAbbreviation()); } else { - priName = letter + digit + ' ' + con->getAbbreviation(); + priName = fmt::format("{}{} {}", letter, digit, con->getAbbreviation()); } } else { // Something other than a Bayer designation - priName = prefix + ' ' + con->getAbbreviation(); + priName = fmt::format("{} {}", prefix, con->getAbbreviation()); } if (isOrbitingStar) diff --git a/src/celengine/universe.cpp b/src/celengine/universe.cpp index 1c4ffc500..a9c326c37 100644 --- a/src/celengine/universe.cpp +++ b/src/celengine/universe.cpp @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include static const double ANGULAR_RES = 3.5e-6; diff --git a/src/celestia/celestiacore.cpp b/src/celestia/celestiacore.cpp index 41c714561..8f5e7812a 100644 --- a/src/celestia/celestiacore.cpp +++ b/src/celestia/celestiacore.cpp @@ -970,7 +970,7 @@ void CelestiaCore::charEntered(const char *c_p, int modifiers) if (textEnterMode & KbAutoComplete) { wchar_t wc = 0; // Null wide character - UTF8Decode(c_p, 0, strlen(c_p), wc); + UTF8Decode(c_p, 0, wc); #ifdef __APPLE__ if ( wc && (!iscntrl(wc)) ) #else diff --git a/src/celestia/gtk/dialog-star.cpp b/src/celestia/gtk/dialog-star.cpp index 58b8181e2..1a5503dc8 100644 --- a/src/celestia/gtk/dialog-star.cpp +++ b/src/celestia/gtk/dialog-star.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "dialog-star.h" #include "actions.h" diff --git a/src/celestia/gtk/menu-context.cpp b/src/celestia/gtk/menu-context.cpp index 6aa2dfc2e..d29a32043 100644 --- a/src/celestia/gtk/menu-context.cpp +++ b/src/celestia/gtk/menu-context.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "menu-context.h" #include "actions.h" diff --git a/src/celestia/qt/qtappwin.cpp b/src/celestia/qt/qtappwin.cpp index 16e056f66..df99c12c7 100644 --- a/src/celestia/qt/qtappwin.cpp +++ b/src/celestia/qt/qtappwin.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "qtappwin.h" #include "qtglwidget.h" diff --git a/src/celestia/qt/qtcelestialbrowser.cpp b/src/celestia/qt/qtcelestialbrowser.cpp index b6792c880..cda6760d3 100644 --- a/src/celestia/qt/qtcelestialbrowser.cpp +++ b/src/celestia/qt/qtcelestialbrowser.cpp @@ -12,6 +12,7 @@ #include #include +#include #include "qtcelestialbrowser.h" #include "qtcolorswatchwidget.h" #include "qtinfopanel.h" diff --git a/src/celestia/qt/qtdeepskybrowser.cpp b/src/celestia/qt/qtdeepskybrowser.cpp index 178d14f00..c28a28daf 100644 --- a/src/celestia/qt/qtdeepskybrowser.cpp +++ b/src/celestia/qt/qtdeepskybrowser.cpp @@ -12,6 +12,7 @@ #include #include +#include #include "qtdeepskybrowser.h" #include "qtcolorswatchwidget.h" #include "qtinfopanel.h" diff --git a/src/celestia/qt/qtinfopanel.cpp b/src/celestia/qt/qtinfopanel.cpp index b4a661397..5cd9c93e4 100644 --- a/src/celestia/qt/qtinfopanel.cpp +++ b/src/celestia/qt/qtinfopanel.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/celestia/qt/qtselectionpopup.cpp b/src/celestia/qt/qtselectionpopup.cpp index 6e623f750..101a5862a 100644 --- a/src/celestia/qt/qtselectionpopup.cpp +++ b/src/celestia/qt/qtselectionpopup.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "qtselectionpopup.h" #include "qtappwin.h" diff --git a/src/celestia/qt/qtsolarsystembrowser.cpp b/src/celestia/qt/qtsolarsystembrowser.cpp index 0fdb25038..dd433bc45 100644 --- a/src/celestia/qt/qtsolarsystembrowser.cpp +++ b/src/celestia/qt/qtsolarsystembrowser.cpp @@ -12,6 +12,7 @@ #include #include +#include #include "qtsolarsystembrowser.h" #include "qtinfopanel.h" #include "qtcolorswatchwidget.h" diff --git a/src/celutil/CMakeLists.txt b/src/celutil/CMakeLists.txt index 7d2a66c93..be26e0135 100644 --- a/src/celutil/CMakeLists.txt +++ b/src/celutil/CMakeLists.txt @@ -13,6 +13,8 @@ set(CELUTIL_SOURCES formatnum.h fsutils.cpp fsutils.h + greek.cpp + greek.h logger.cpp logger.h reshandle.h diff --git a/src/celutil/greek.cpp b/src/celutil/greek.cpp new file mode 100644 index 000000000..047e95343 --- /dev/null +++ b/src/celutil/greek.cpp @@ -0,0 +1,270 @@ +// utf8.cpp +// +// Copyright (C) 2004, Chris Laurel +// 2018-present, Celestia Development Team +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. + +#include "greek.h" + +#include "stringutils.h" +#include "utf8.h" + +#include +#include +#include + +using namespace std::string_view_literals; + +namespace +{ +constexpr int nLetters = 24; + +constexpr std::string_view UTF8_SUPERSCRIPT_0 = "\342\201\260"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_1 = "\302\271"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_2 = "\302\262"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_3 = "\302\263"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_4 = "\342\201\264"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_5 = "\342\201\265"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_6 = "\342\201\266"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_7 = "\342\201\267"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_8 = "\342\201\270"sv; +constexpr std::string_view UTF8_SUPERSCRIPT_9 = "\342\201\271"sv; + +// clang-format off +const std::array greekAlphabet = +{ + "Alpha"sv, + "Beta"sv, + "Gamma"sv, + "Delta"sv, + "Epsilon"sv, + "Zeta"sv, + "Eta"sv, + "Theta"sv, + "Iota"sv, + "Kappa"sv, + "Lambda"sv, + "Mu"sv, + "Nu"sv, + "Xi"sv, + "Omicron"sv, + "Pi"sv, + "Rho"sv, + "Sigma"sv, + "Tau"sv, + "Upsilon"sv, + "Phi"sv, + "Chi"sv, + "Psi"sv, + "Omega"sv +}; + +const std::array greekAlphabetUTF8 = { + "\316\261"sv, // ALF + "\316\262"sv, // BET + "\316\263"sv, // GAM + "\316\264"sv, // DEL + "\316\265"sv, // EPS + "\316\266"sv, // ZET + "\316\267"sv, // ETA + "\316\270"sv, // TET + "\316\271"sv, // IOT + "\316\272"sv, // KAP + "\316\273"sv, // LAM + "\316\274"sv, // MU + "\316\275"sv, // NU + "\316\276"sv, // XI + "\316\277"sv, // OMI + "\317\200"sv, // PI + "\317\201"sv, // RHO + "\317\203"sv, // SIG + "\317\204"sv, // TAU + "\317\205"sv, // UPS + "\317\206"sv, // PHI + "\317\207"sv, // CHI + "\317\210"sv, // PSI + "\317\211"sv, // OME +}; + +const std::array canonicalAbbrevs = +{ + "ALF"sv, + "BET"sv, + "GAM"sv, + "DEL"sv, + "EPS"sv, + "ZET"sv, + "ETA"sv, + "TET"sv, + "IOT"sv, + "KAP"sv, + "LAM"sv, + "MU"sv, + "NU"sv, + "XI"sv, + "OMI"sv, + "PI"sv, + "RHO"sv, + "SIG"sv, + "TAU"sv, + "UPS"sv, + "PHI"sv, + "CHI"sv, + "PSI"sv, + "OME"sv, +}; +// clang-format on + +std::string_view::size_type +getFirstWordLength(std::string_view str) +{ + auto sp = str.find(' '); + if (sp == std::string_view::npos) + sp = str.length(); + + // skip digits + while (sp > 0 && std::isdigit(str[sp - 1]) != 0) + sp--; + + return sp; +} + +std::string_view +toSuperscript(char c) +{ + switch (c) + { + case '0': + return UTF8_SUPERSCRIPT_0; + case '1': + return UTF8_SUPERSCRIPT_1; + case '2': + return UTF8_SUPERSCRIPT_2; + case '3': + return UTF8_SUPERSCRIPT_3; + case '4': + return UTF8_SUPERSCRIPT_4; + case '5': + return UTF8_SUPERSCRIPT_5; + case '6': + return UTF8_SUPERSCRIPT_6; + case '7': + return UTF8_SUPERSCRIPT_7; + case '8': + return UTF8_SUPERSCRIPT_8; + case '9': + return UTF8_SUPERSCRIPT_9; + default: + return {}; + } +} + +} // namespace + +/** + * Replaces the Greek letter abbreviation at the beginning + * of a string by the UTF-8 representation of that letter. + * Also, replaces digits following Greek letters with UTF-8 + * superscripts. + */ +std::string +ReplaceGreekLetterAbbr(std::string_view str) +{ + if (str.empty()) + return {}; + + if (auto len = getFirstWordLength(str); len > 0 && str[0] >= 'A' && str[0] <= 'Z') + { + // Linear search through all letter abbreviations + for (int i = 0; i < nLetters; i++) + { + auto prefix = canonicalAbbrevs[i]; + if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) + { + prefix = greekAlphabet[i]; + if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) + continue; + } + + std::string ret(greekAlphabetUTF8[i]); + for (; str.length() > len && std::isdigit(str[len]); len++) + ret.append(toSuperscript(str[len])); + ret.append(str.substr(len)); + + return ret; + } + } + + return std::string(str); +} + +/** + * Returns canonical greek abbreviation for a letter passed. + * The letter can be: latin name of a greek letter, canonical + * representation of it or a greek letter itself in UTF-8. + */ +std::string_view +GetCanonicalGreekAbbreviation(std::string_view letter) +{ + for (int i = 0; i < nLetters; i++) + { + if (compareIgnoringCase(letter, greekAlphabet[i]) == 0 + || compareIgnoringCase(letter, canonicalAbbrevs[i]) == 0) + { + return canonicalAbbrevs[i]; + } + } + + if (letter.length() == 2) + { + for (int i = 0; i < nLetters; i++) + { + if (letter == greekAlphabetUTF8[i]) return canonicalAbbrevs[i]; + } + } + + return {}; +} + +/** + * Replaces the Greek letter or abbreviation at the beginning + * of a string by the UTF-8 representation of that letter. + * Also, replaces digits following Greek letters with UTF-8 + * superscripts. + */ +std::string +ReplaceGreekLetter(std::string_view str) +{ + if (str.empty()) return {}; + + if (auto len = getFirstWordLength(str); len > 0) + { + // Linear search through all letter abbreviations + for (int i = 0; i < nLetters; i++) + { + if (len != 2 || str != greekAlphabetUTF8[i]) + { + auto prefix = canonicalAbbrevs[i]; + if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) + { + prefix = greekAlphabet[i]; + if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) + continue; + } + } + + std::string ret(greekAlphabetUTF8[i]); + for (; str.length() > len && std::isdigit(str[len]); len++) + ret.append(toSuperscript(str[len])); + ret.append(str.substr(len)); + + return ret; + } + } + + return std::string(str); +} diff --git a/src/celutil/greek.h b/src/celutil/greek.h new file mode 100644 index 000000000..1d27da85e --- /dev/null +++ b/src/celutil/greek.h @@ -0,0 +1,19 @@ +// greek.h +// +// Copyright (C) 2004, Chris Laurel +// 2018-present, Celestia Development Team +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. + +#pragma once + +#include +#include +#include + +std::string ReplaceGreekLetterAbbr(std::string_view str); +std::string ReplaceGreekLetter(std::string_view str); +std::string_view GetCanonicalGreekAbbreviation(std::string_view letter); diff --git a/src/celutil/utf8.cpp b/src/celutil/utf8.cpp index 3f5a7b66a..1a52b854d 100644 --- a/src/celutil/utf8.cpp +++ b/src/celutil/utf8.cpp @@ -1,20 +1,21 @@ // utf8.cpp // // Copyright (C) 2004, Chris Laurel +// 2018-present, Celestia Development Team // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. -#include -#include #include -#include -#include -#include "stringutils.h" #include "utf8.h" +namespace +{ + +// clang-format off + uint16_t WGL4_Normalization_00[256] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, @@ -300,11 +301,45 @@ uint16_t* WGL4NormalizationTables[256] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, }; +// clang-format on + +inline int UTF8EncodedSizeFromFirstByte(unsigned int ch) +{ + if (ch < 0x80) + return 1; + if ((ch & 0xe0) == 0xc0) + return 2; + if ((ch & 0xf0) == 0xe0) + return 3; + if ((ch & 0xf8) == 0xf0) + return 4; + if ((ch & 0xfc) == 0xf8) + return 5; + if ((ch & 0xfe) == 0xfc) + return 6; + else + return 1; +} + +inline wchar_t UTF8Normalize(wchar_t ch) +{ + auto page = (unsigned int) ch >> 8; + if (page >= 256) + return ch; + + uint16_t* normTable = WGL4NormalizationTables[page]; + if (normTable == nullptr) + return ch; + + return (wchar_t) normTable[(unsigned int) ch & 0xff]; +} + +} // namespace //! Decode the UTF-8 characters in string str beginning at position pos. //! The decoded character is returned in ch; the return value of the function //! is true if a valid UTF-8 sequence was successfully decoded. -bool UTF8Decode(const std::string& str, int pos, wchar_t& ch) +bool UTF8Decode(std::string_view str, int pos, wchar_t& ch) { auto c0 = (unsigned int) str[pos]; int charlen = UTF8EncodedSizeFromFirstByte(c0); @@ -362,69 +397,6 @@ bool UTF8Decode(const std::string& str, int pos, wchar_t& ch) } } - -//! Decode the UTF-8 characters in string str beginning at position pos. -//! The decoded character is returned in ch; the return value of the function -//! is true if a valid UTF-8 sequence was successfully decoded. -bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch) -{ - auto c0 = (unsigned int) str[pos]; - int charlen = UTF8EncodedSizeFromFirstByte(c0); - - // Bad UTF-8 character that extends past end of string - if (pos + charlen > length) - return false; - - // TODO: Should check that the bytes of characters after the first are all - // of the form 01xxxxxx - // TODO: Need to reject overlong encoding sequences - - switch (charlen) - { - case 1: - ch = c0; - return true; - - case 2: - ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f); - return true; - - case 3: - ch = ((c0 & 0x0f) << 12) | - (((unsigned int) str[pos + 1] & 0x3f) << 6) | - ((unsigned int) str[pos + 2] & 0x3f); - return true; - - case 4: - ch = ((c0 & 0x07) << 18) | - (((unsigned int) str[pos + 1] & 0x3f) << 12) | - (((unsigned int) str[pos + 2] & 0x3f) << 6) | - ((unsigned int) str[pos + 3] & 0x3f); - return true; - - case 5: - ch = ((c0 & 0x03) << 24) | - (((unsigned int) str[pos + 1] & 0x3f) << 18) | - (((unsigned int) str[pos + 2] & 0x3f) << 12) | - (((unsigned int) str[pos + 3] & 0x3f) << 6) | - ((unsigned int) str[pos + 4] & 0x3f); - return true; - - case 6: - ch = ((c0 & 0x01) << 30) | - (((unsigned int) str[pos + 1] & 0x3f) << 24) | - (((unsigned int) str[pos + 2] & 0x3f) << 18) | - (((unsigned int) str[pos + 3] & 0x3f) << 12) | - (((unsigned int) str[pos + 4] & 0x3f) << 6) | - ((unsigned int) str[pos + 5] & 0x3f); - return true; - - default: - return false; - } -} - - //! Appends the UTF-8 encoded version of the code point ch to the //! destination string void UTF8Encode(std::uint32_t ch, std::string& dest) @@ -469,9 +441,8 @@ void UTF8Encode(std::uint32_t ch, std::string& dest) } } - //! Return the number of characters encoded by a UTF-8 string -int UTF8Length(const std::string& s) +int UTF8Length(std::string_view s) { int len = s.length(); int count = 0; @@ -485,25 +456,10 @@ int UTF8Length(const std::string& s) return count; } - -inline wchar_t UTF8Normalize(wchar_t ch) -{ - auto page = (unsigned int) ch >> 8; - if (page >= 256) - return ch; - - uint16_t* normTable = WGL4NormalizationTables[page]; - if (normTable == nullptr) - return ch; - - return (wchar_t) normTable[(unsigned int) ch & 0xff]; -} - - //! Perform a normalized comparison of two UTF-8 strings. The normalization //! only works for characters in the WGL-4 subset, and no multicharacter //! translations are performed. -int UTF8StringCompare(const std::string& s0, const std::string& s1) +int UTF8StringCompare(std::string_view s0, std::string_view s1) { int len0 = s0.length(); int len1 = s1.length(); @@ -542,7 +498,7 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1) return 0; } -int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase) +int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase) { int len0 = s0.length(); int len1 = s1.length(); @@ -588,432 +544,6 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bo return 0; } - -#if 0 -//! Currently incomplete, but could be a helpful class for dealing with -//! UTF-8 streams -class UTF8StringIterator -{ -public: - UTF8StringIterator(const std::string& _str) : str(_str) {}; - UTF8StringIterator(const UTF8StringIterator& iter) = default; - - UTF8StringIterator& operator++(); - UTF8StringIterator& operator++(int); - -private: - const std::string& str; - int position{ 0 }; -}; - - -UTF8StringIterator& UTF8StringIterator::operator++() -{ - return *this; -} - - -UTF8StringIterator& UTF8StringIterator::operator++(int) -{ - return *this; -} -#endif - - -static const char *greekAlphabet[] = -{ - "Alpha", - "Beta", - "Gamma", - "Delta", - "Epsilon", - "Zeta", - "Eta", - "Theta", - "Iota", - "Kappa", - "Lambda", - "Mu", - "Nu", - "Xi", - "Omicron", - "Pi", - "Rho", - "Sigma", - "Tau", - "Upsilon", - "Phi", - "Chi", - "Psi", - "Omega" -}; - -static const char* greekAlphabetUTF8[] = -{ - "\316\261", - "\316\262", - "\316\263", - "\316\264", - "\316\265", - "\316\266", - "\316\267", - "\316\270", - "\316\271", - "\316\272", - "\316\273", - "\316\274", - "\316\275", - "\316\276", - "\316\277", - "\317\200", - "\317\201", - "\317\203", - "\317\204", - "\317\205", - "\317\206", - "\317\207", - "\317\210", - "\317\211", -}; - -static const char* canonicalAbbrevs[] = -{ - "ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET", - "IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" , - "RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME", -}; - -static std::string noAbbrev; - -// Greek alphabet crud . . . should probably moved to it's own module. - -static size_t greekChunkLength(const std::string&); - -Greek* Greek::m_instance = nullptr; - -Greek* Greek::getInstance() -{ - if (m_instance == nullptr) - m_instance = new Greek(); - return m_instance; -} - -Greek::Greek() -{ - nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]); - names = new std::string[nLetters]; - abbrevs = new std::string[nLetters]; - - for (int i = 0; i < nLetters; i++) - { - names[i] = std::string(greekAlphabet[i]); - abbrevs[i] = std::string(canonicalAbbrevs[i]); - } -} - -Greek::~Greek() -{ - delete[] names; - delete[] abbrevs; -} - -const std::string& Greek::canonicalAbbreviation(const std::string& letter) -{ - Greek *instance = Greek::getInstance(); - int i; - for (i = 0; i < instance->nLetters; i++) - { - if (compareIgnoringCase(letter, instance->names[i]) == 0) - return instance->abbrevs[i]; - } - - for (i = 0; i < instance->nLetters; i++) - { - if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0) - return instance->abbrevs[i]; - } - - if (letter.length() == 2) - { - for (i = 0; i < instance->nLetters; i++) - { - if (letter[0] == greekAlphabetUTF8[i][0] && - letter[1] == greekAlphabetUTF8[i][1]) - { - return instance->abbrevs[i]; - } - } - } - - return noAbbrev; -} - -static const char* toSuperscript(char c) -{ - switch (c) - { - case '0': - return UTF8_SUPERSCRIPT_0; - case '1': - return UTF8_SUPERSCRIPT_1; - case '2': - return UTF8_SUPERSCRIPT_2; - case '3': - return UTF8_SUPERSCRIPT_3; - case '4': - return UTF8_SUPERSCRIPT_4; - case '5': - return UTF8_SUPERSCRIPT_5; - case '6': - return UTF8_SUPERSCRIPT_6; - case '7': - return UTF8_SUPERSCRIPT_7; - case '8': - return UTF8_SUPERSCRIPT_8; - case '9': - return UTF8_SUPERSCRIPT_9; - default: - return nullptr; - } -} - -//! Replaces the Greek letter abbreviation at the beginning -//! of a string by the UTF-8 representation of that letter. -//! Also, replace digits following Greek letters with UTF-8 -//! superscripts. -std::string ReplaceGreekLetterAbbr(const std::string& str) -{ - Greek *instance = Greek::getInstance(); - size_t len = greekChunkLength(str); - - if (str[0] >= 'A' && str[0] <= 'Z') - { - // Linear search through all letter abbreviations - for (int i = 0; i < instance->nLetters; i++) - { - std::string prefix = instance->abbrevs[i]; - if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) - { - prefix = instance->names[i]; - if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0) - continue; - } - - std::string ret = greekAlphabetUTF8[i]; - auto len = prefix.length(); - for (; str.length() > len && isdigit(str[len]); len++) - ret += toSuperscript(str[len]); - ret += str.substr(len); - return ret; - } - } - - return str; -} - -//! Replaces the Greek letter abbreviation at the beginning -//! of a string by the UTF-8 representation of that letter. -//! Also, replace digits following Greek letters with UTF-8 -//! superscripts. Operates on char* instead of strings--less -//! convenient, but more efficient. Return the number of -//! characters copied to the destination string, not -//! including the zero terminator. -#if 0 -unsigned int -ReplaceGreekLetterAbbr(char *dst, unsigned int dstSize, const char* src, unsigned int srcLength) -{ - Greek *instance = Greek::getInstance(); - if (src[0] >= 'A' && src[0] <= 'Z' && - src[1] >= 'A' && src[1] <= 'Z') - { - // Linear search through all letter abbreviations - for (unsigned int i = 0; i < (unsigned int) instance->nLetters; i++) - { - const char* abbr = canonicalAbbrevs[i]; - unsigned int j = 0; - while (abbr[j] == src[j] && abbr[j] != '\0' && src[j] != '\0') - j++; - - // It's a match if we reached the end of the abbreviation string - if (abbr[j] == '\0') - { - unsigned int abbrevLength = j; - unsigned int srcIndex = j; - const char *superscript = toSuperscript(src[abbrevLength]); - - const char* utfGreek = greekAlphabetUTF8[i]; - unsigned int utfGreekLength = strlen(utfGreek); - - unsigned int requiredLength = srcLength; - if (utfGreekLength > abbrevLength) - requiredLength += utfGreekLength - abbrevLength; - if (superscript != nullptr) - { - requiredLength += strlen(superscript) - 1; - srcIndex++; - } - - // If there's not enough room, give up translating and just copy as much as possible - if (requiredLength + 1 > dstSize) - break; - - unsigned int dstIndex = 0; - j = 0; - while (utfGreek[j] != 0) - { - dst[dstIndex++] = utfGreek[j]; - j++; - } - - if (superscript != nullptr) - { - j = 0; - while (superscript[j] != 0) - { - dst[dstIndex++] = superscript[j]; - j++; - } - } - - while (src[srcIndex] != 0) - { - dst[dstIndex++] = src[srcIndex++]; - } - dst[dstIndex] = '\0'; - - return dstIndex; - } - } - } - - strncpy(dst, src, dstSize); - if (dstSize > srcLength) - return srcLength; - - - if (dstSize > 0) - { - dst[dstSize - 1] = '\0'; - return dstSize - 1; - } - - return 0; -} -#endif - -static int findGreekNameIndexBySubstr(const std::string &, int = 0, unsigned int = UINT_MAX); -#if 0 -static std::string firstGreekAbbrCompletion(const std::string &); -#endif - -bool inline isSubstringIgnoringCase(const std::string &s0, const std::string &s1, size_t n) -{ - return UTF8StringCompare(s0, s1, n, true) == 0; -} - -static int findGreekNameIndexBySubstr(const std::string &s, int start, unsigned int n) -{ - Greek *instance = Greek::getInstance(); - - if (s.empty()) - return -1; - - for (int i = start; i < instance->nLetters; i++) - { - if (isSubstringIgnoringCase(instance->names[i], s, n)) - return i; - } - - for (int i = start; i < instance->nLetters; i++) - { - if (isSubstringIgnoringCase(instance->abbrevs[i], s, n)) - return i; - } - - return -1; -} - -static size_t greekChunkLength(const std::string& str) -{ - bool npos = false; - size_t sp = str.find_first_of(' '); - if (sp == std::string::npos) - { - sp = str.length(); - npos = true; - } - - if (sp != 0 && isdigit(static_cast(str[sp - 1]))) - while(sp != 0 && isdigit(static_cast(str[sp - 1]))) sp--; - else if (npos) - sp = std::string::npos; - return sp; -} - -#if 0 -static std::string firstGreekAbbrCompletion(const std::string &s) -{ - std::string ret; - size_t sp = greekChunkLength(s); - if (sp == std::string::npos) - { - int i = findGreekNameIndexBySubstr(s); - return (i >= 0) ? Greek::getInstance()->abbrevs[i] : s; - } - else - { - std::string prefix = s.substr(0, sp); - ret = Greek::canonicalAbbreviation(prefix); - return ret.empty() ? s : prefix + s.substr(sp); - } - - return ret; -} -#endif - -std::vector getGreekCompletion(const std::string &s) -{ - std::vector ret; - if (s.empty()) - return ret; - - size_t sp = greekChunkLength(s); - if (sp == std::string::npos) - { - sp = UTF8Length(s); - for(int i = 0; i >= 0;) - { - std::string rets; - i = findGreekNameIndexBySubstr(s, i, sp); - if (i >= 0) - { - rets = Greek::getInstance()->abbrevs[i]; - rets += " "; - ret.emplace_back(ReplaceGreekLetterAbbr(rets)); - i++; - } - } - } - else - { - std::string prefix = s.substr(0, sp); - std::string rets = Greek::canonicalAbbreviation(prefix); - if (!rets.empty()) - { - rets += s.substr(sp); - ret.emplace_back(ReplaceGreekLetterAbbr(rets)); - } - } - - return ret; -} - -UTF8Status -UTF8Validator::check(char c) -{ - return check(static_cast(c)); -} - UTF8Status UTF8Validator::check(unsigned char c) { diff --git a/src/celutil/utf8.h b/src/celutil/utf8.h index b5a70d563..60dafe0f3 100644 --- a/src/celutil/utf8.h +++ b/src/celutil/utf8.h @@ -1,6 +1,7 @@ // utf8.h // // Copyright (C) 2004, Chris Laurel +// 2018-present, Celestia Development Team // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License @@ -11,42 +12,30 @@ #include #include -#include +#include #define UTF8_DEGREE_SIGN "\302\260" #define UTF8_MULTIPLICATION_SIGN "\303\227" -#define UTF8_SUPERSCRIPT_0 "\342\201\260" -#define UTF8_SUPERSCRIPT_1 "\302\271" -#define UTF8_SUPERSCRIPT_2 "\302\262" -#define UTF8_SUPERSCRIPT_3 "\302\263" -#define UTF8_SUPERSCRIPT_4 "\342\201\264" -#define UTF8_SUPERSCRIPT_5 "\342\201\265" -#define UTF8_SUPERSCRIPT_6 "\342\201\266" -#define UTF8_SUPERSCRIPT_7 "\342\201\267" -#define UTF8_SUPERSCRIPT_8 "\342\201\270" -#define UTF8_SUPERSCRIPT_9 "\342\201\271" #define UTF8_REPLACEMENT_CHAR "\357\277\275" - -bool UTF8Decode(const std::string& str, int pos, wchar_t& ch); -bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch); -void UTF8Encode(std::uint32_t ch, std::string& dest); -int UTF8StringCompare(const std::string& s0, const std::string& s1); -int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false); +bool UTF8Decode(std::string_view str, int pos, wchar_t &ch); +void UTF8Encode(std::uint32_t ch, std::string &dest); +int UTF8StringCompare(std::string_view s0, std::string_view s1); +int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase = false); class UTF8StringOrderingPredicate { public: - bool operator()(const std::string& s0, const std::string& s1) const + bool operator()(std::string_view s0, std::string_view s1) const { return UTF8StringCompare(s0, s1) == -1; } }; +int UTF8Length(std::string_view s); -int UTF8Length(const std::string& s); - -inline int UTF8EncodedSize(wchar_t ch) +constexpr int +UTF8EncodedSize(wchar_t ch) { if (ch < 0x80) return 1; @@ -66,7 +55,8 @@ inline int UTF8EncodedSize(wchar_t ch) #endif } -constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch) +constexpr int +UTF8EncodedSizeChecked(std::uint32_t ch) { if (ch < 0x80) return 1; @@ -84,76 +74,6 @@ constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch) #endif } -inline int UTF8EncodedSizeFromFirstByte(unsigned int ch) -{ - if (ch < 0x80) - return 1; - if ((ch & 0xe0) == 0xc0) - return 2; - if ((ch & 0xf0) == 0xe0) - return 3; - if ((ch & 0xf8) == 0xf0) - return 4; - if ((ch & 0xfc) == 0xf8) - return 5; - if ((ch & 0xfe) == 0xfc) - return 6; - else - return 1; -} - -std::string ReplaceGreekLetterAbbr(const std::string&); -#if 0 -unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength); -#endif - -class Greek -{ - private: - Greek(); - ~Greek(); - - public: - enum Letter - { - Alpha = 1, - Beta = 2, - Gamma = 3, - Delta = 4, - Epsilon = 5, - Zeta = 6, - Eta = 7, - Theta = 8, - Iota = 9, - Kappa = 10, - Lambda = 11, - Mu = 12, - Nu = 13, - Xi = 14, - Omicron = 15, - Pi = 16, - Rho = 17, - Sigma = 18, - Tau = 19, - Upsilon = 20, - Phi = 21, - Chi = 22, - Psi = 23, - Omega = 24, - }; - - static const std::string& canonicalAbbreviation(const std::string&); - private: - static Greek* m_instance; - public: - static Greek* getInstance(); - int nLetters; - std::string* names; - std::string* abbrevs; -}; - -std::vector getGreekCompletion(const std::string &); - enum class UTF8Status { Ok, @@ -164,9 +84,6 @@ enum class UTF8Status class UTF8Validator { public: - UTF8Validator() = default; - ~UTF8Validator() = default; - UTF8Status check(char c); UTF8Status check(unsigned char c); @@ -185,3 +102,9 @@ private: State state{ State::Initial }; }; + +inline UTF8Status +UTF8Validator::check(char c) +{ + return check(static_cast(c)); +} diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index c2493f2c2..f73674d14 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -1,4 +1,5 @@ test_case(charconv_compat) +test_case(greek) test_case(hash) test_case(logger) test_case(stellarclass) diff --git a/test/unit/greek_test.cpp b/test/unit/greek_test.cpp new file mode 100644 index 000000000..3b2dc5c07 --- /dev/null +++ b/test/unit/greek_test.cpp @@ -0,0 +1,42 @@ +#include + +#include + +TEST_CASE("Greek", "[Greek]") +{ + SECTION("ReplaceGreekLetterAbbr") + { + REQUIRE(ReplaceGreekLetterAbbr("XI") == "\316\276"); + REQUIRE(ReplaceGreekLetterAbbr("XI12") == "\316\276\302\271\302\262"); + REQUIRE(ReplaceGreekLetterAbbr("XI Foo") == "\316\276 Foo"); + REQUIRE(ReplaceGreekLetterAbbr("XI12 Bar") == "\316\276\302\271\302\262 Bar"); + + REQUIRE(ReplaceGreekLetterAbbr("xi") == "xi"); + REQUIRE(ReplaceGreekLetterAbbr("xi12") == "xi12"); + REQUIRE(ReplaceGreekLetterAbbr("xi Foo") == "xi Foo"); + REQUIRE(ReplaceGreekLetterAbbr("xi12 Bar") == "xi12 Bar"); + + REQUIRE(ReplaceGreekLetterAbbr("alpha") == "alpha"); + } + + SECTION("ReplaceGreekLetter") + { + REQUIRE(ReplaceGreekLetter("XI") == "\316\276"); + REQUIRE(ReplaceGreekLetter("XI12") == "\316\276\302\271\302\262"); + REQUIRE(ReplaceGreekLetter("XI Foo") == "\316\276 Foo"); + REQUIRE(ReplaceGreekLetter("XI12 Bar") == "\316\276\302\271\302\262 Bar"); + + REQUIRE(ReplaceGreekLetter("xi") == "\316\276"); + REQUIRE(ReplaceGreekLetter("xi12") == "\316\276\302\271\302\262"); + REQUIRE(ReplaceGreekLetter("xi Foo") == "\316\276 Foo"); + REQUIRE(ReplaceGreekLetter("xi12 Bar") == "\316\276\302\271\302\262 Bar"); + + REQUIRE(ReplaceGreekLetter("alpha") == "\316\261"); + } + + SECTION("GetCanonicalGreekAbbreviation") + { + REQUIRE(GetCanonicalGreekAbbreviation("xi") == "XI"); + REQUIRE(GetCanonicalGreekAbbreviation("alpha") == "ALF"); + } +}