Split greek-letters handling from utf8 and refactor a bit

2021-12-24 18:23:08 +02:00 · 2021-12-24 18:23:08 +02:00 · c251dac856
parent eaae852f8f
commit c251dac856
23 changed files with 427 additions and 648 deletions
--- a/src/celengine/asterism.cpp
+++ b/src/celengine/asterism.cpp
@ -8,6 +8,7 @@
 // of the License, or (at your option) any later version.
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include <celutil/logger.h>
 #include <celutil/tokenizer.h>
 #include "stardb.h"
--- a/src/celengine/console.cpp
+++ b/src/celengine/console.cpp
@ -179,7 +179,7 @@ void Console::print(char* s)
    while (i < length && validChar)
    {
        wchar_t ch = 0;
-        validChar = UTF8Decode(s, i, length, ch);
+        validChar = UTF8Decode(string_view(s, length), i, ch);
        i += UTF8EncodedSize(ch);
        print(ch);
    }
--- a/src/celengine/name.cpp
+++ b/src/celengine/name.cpp
@ -1,5 +1,6 @@
 #include <celutil/logger.h>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include "name.h"
 uint32_t NameDatabase::getNameCount() const
@ -97,41 +98,25 @@ NameDatabase::NumberIndex::const_iterator NameDatabase::getFinalNameIter() const
    return numberIndex.end();
 }
-std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n, bool greek) const
+std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n) const
 {
-    if (greek)
+    std::string name2 = ReplaceGreekLetter(name);
    {
        auto compList = getGreekCompletion(name);
        compList.push_back(name);
        return getCompletion(compList, i18n);
    }
    std::vector<std::string> completion;
-    int name_length = UTF8Length(name);
+    const int name_length = UTF8Length(name2);
-    for (NameIndex::const_iterator iter = nameIndex.begin(); iter != nameIndex.end(); ++iter)
+    for (const auto &[n, _] : nameIndex)
    {
-        if (!UTF8StringCompare(iter->first, name, name_length, true))
+        if (!UTF8StringCompare(n, name2, name_length, true))
-            completion.push_back(iter->first);
+            completion.push_back(n);
    }
    if (i18n)
    {
-        for (NameIndex::const_iterator iter = localizedNameIndex.begin(); iter != localizedNameIndex.end(); ++iter)
+        for (const auto &[n, _] : localizedNameIndex)
        {
-            if (!UTF8StringCompare(iter->first, name, name_length, true))
+            if (!UTF8StringCompare(n, name2, name_length, true))
-                completion.push_back(iter->first);
+                completion.push_back(n);
        }
    }
    return completion;
 }
 std::vector<std::string> NameDatabase::getCompletion(const std::vector<std::string> &list, bool i18n) const
 {
    std::vector<std::string> completion;
    for (const auto &n : list)
    {
        for (const auto &nn : getCompletion(n, i18n, false))
            completion.emplace_back(nn);
    }
    return completion;
 }
--- a/src/celengine/name.h
+++ b/src/celengine/name.h
@ -45,8 +45,7 @@ class NameDatabase
    NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const;
    NumberIndex::const_iterator getFinalNameIter() const;
-    std::vector<std::string> getCompletion(const std::string& name, bool i18n, bool greek = true) const;
+    std::vector<std::string> getCompletion(const std::string& name, bool i18n) const;
    std::vector<std::string> getCompletion(const std::vector<std::string> &list, bool i18n) const;
 protected:
    NameIndex   nameIndex;
--- a/src/celengine/overlay.cpp
+++ b/src/celengine/overlay.cpp
@ -159,7 +159,7 @@ void Overlay::print_impl(const std::string& s)
    while (i < length && validChar)
    {
        wchar_t ch = 0;
-        validChar = UTF8Decode(s.c_str(), i, length, ch);
+        validChar = UTF8Decode(s, i, ch);
        i += UTF8EncodedSize(ch);
        print(ch);
    }
--- a/src/celengine/starname.cpp
+++ b/src/celengine/starname.cpp
@ -10,8 +10,10 @@
 //
 //
 #include <fmt/format.h>
 #include <celengine/constellation.h>
 #include <celengine/starname.h>
 #include <celutil/greek.h>
 using namespace std;
@ -61,26 +63,26 @@ uint32_t StarNameDatabase::findCatalogNumberByName(const string& name, bool i18n
            // We have a valid constellation as the last part
            // of the name.  Next, we see if the first part of
            // the name is a greek letter.
-            const string& letter = Greek::canonicalAbbreviation(string(prefix, 0, len));
+            std::string_view letter = GetCanonicalGreekAbbreviation(std::string_view(prefix).substr(0, len));
            if (!letter.empty())
            {
                // Matched . . . this is a Bayer designation
                if (digit == ' ')
                {
-                    priName  = letter + ' ' + con->getAbbreviation();
+                    priName  = fmt::format("{} {}", letter, con->getAbbreviation());
                    // If 'let con' doesn't match, try using
                    // 'let1 con' instead.
-                    altName  = letter + '1' + ' ' + con->getAbbreviation();
+                    altName  = fmt::format("{}1 {}", letter, con->getAbbreviation());
                }
                else
                {
-                    priName = letter + digit + ' ' + con->getAbbreviation();
+                    priName = fmt::format("{}{} {}", letter, digit, con->getAbbreviation());
                }
            }
            else
            {
                // Something other than a Bayer designation
-                priName = prefix + ' ' + con->getAbbreviation();
+                priName = fmt::format("{} {}", prefix, con->getAbbreviation());
            }
            if (isOrbitingStar)
--- a/src/celengine/universe.cpp
+++ b/src/celengine/universe.cpp
@ -22,7 +22,7 @@
 #include <celmath/mathlib.h>
 #include <celmath/intersect.h>
 #include <celmath/ray.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include <cassert>
 static const double ANGULAR_RES = 3.5e-6;
--- a/src/celestia/celestiacore.cpp
+++ b/src/celestia/celestiacore.cpp
@ -970,7 +970,7 @@ void CelestiaCore::charEntered(const char *c_p, int modifiers)
    if (textEnterMode & KbAutoComplete)
    {
        wchar_t wc = 0; // Null wide character
-        UTF8Decode(c_p, 0, strlen(c_p), wc);
+        UTF8Decode(c_p, 0, wc);
 #ifdef __APPLE__
        if ( wc && (!iscntrl(wc)) )
 #else
--- a/src/celestia/gtk/dialog-star.cpp
+++ b/src/celestia/gtk/dialog-star.cpp
@ -19,7 +19,7 @@
 #include <celengine/starbrowser.h>
 #include <celengine/stardb.h>
 #include <celengine/univcoord.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include "dialog-star.h"
 #include "actions.h"
--- a/src/celestia/gtk/menu-context.cpp
+++ b/src/celestia/gtk/menu-context.cpp
@ -16,7 +16,7 @@
 #include <celengine/simulation.h>
 #include <celestia/celestiacore.h>
 #include <celestia/helper.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include "menu-context.h"
 #include "actions.h"
--- a/src/celestia/qt/qtappwin.cpp
+++ b/src/celestia/qt/qtappwin.cpp
@ -41,6 +41,7 @@
 #include <vector>
 #include <string>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include <celutil/tzutil.h>
 #include "qtappwin.h"
 #include "qtglwidget.h"
--- a/src/celestia/qt/qtcelestialbrowser.cpp
+++ b/src/celestia/qt/qtcelestialbrowser.cpp
@ -12,6 +12,7 @@
 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include "qtcelestialbrowser.h"
 #include "qtcolorswatchwidget.h"
 #include "qtinfopanel.h"
--- a/src/celestia/qt/qtdeepskybrowser.cpp
+++ b/src/celestia/qt/qtdeepskybrowser.cpp
@ -12,6 +12,7 @@
 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include "qtdeepskybrowser.h"
 #include "qtcolorswatchwidget.h"
 #include "qtinfopanel.h"
--- a/src/celestia/qt/qtinfopanel.cpp
+++ b/src/celestia/qt/qtinfopanel.cpp
@ -15,7 +15,7 @@
 #include <celengine/astro.h>
 #include <celutil/gettext.h>
 #include <celutil/logger.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include <celengine/universe.h>
 #include <QTextBrowser>
 #include <QIODevice>
--- a/src/celestia/qt/qtselectionpopup.cpp
+++ b/src/celestia/qt/qtselectionpopup.cpp
@ -17,6 +17,7 @@
 #include <celengine/axisarrow.h>
 #include <celengine/planetgrid.h>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include <fmt/printf.h>
 #include "qtselectionpopup.h"
 #include "qtappwin.h"
--- a/src/celestia/qt/qtsolarsystembrowser.cpp
+++ b/src/celestia/qt/qtsolarsystembrowser.cpp
@ -12,6 +12,7 @@
 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
 #include <celutil/greek.h>
 #include "qtsolarsystembrowser.h"
 #include "qtinfopanel.h"
 #include "qtcolorswatchwidget.h"
--- a/src/celutil/CMakeLists.txt
+++ b/src/celutil/CMakeLists.txt
@ -13,6 +13,8 @@ set(CELUTIL_SOURCES
  formatnum.h
  fsutils.cpp
  fsutils.h
  greek.cpp
  greek.h
  logger.cpp
  logger.h
  reshandle.h
--- a/src/celutil/greek.cpp
+++ b/src/celutil/greek.cpp
@ -0,0 +1,270 @@
 // utf8.cpp
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
 //               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
 #include "greek.h"
 #include "stringutils.h"
 #include "utf8.h"
 #include <algorithm>
 #include <array>
 #include <cctype>
 using namespace std::string_view_literals;
 namespace
 {
 constexpr int nLetters = 24;
 constexpr std::string_view UTF8_SUPERSCRIPT_0 = "\342\201\260"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_1 = "\302\271"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_2 = "\302\262"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_3 = "\302\263"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_4 = "\342\201\264"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_5 = "\342\201\265"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_6 = "\342\201\266"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_7 = "\342\201\267"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_8 = "\342\201\270"sv;
 constexpr std::string_view UTF8_SUPERSCRIPT_9 = "\342\201\271"sv;
 // clang-format off
 const std::array<std::string_view, nLetters> greekAlphabet =
 {
    "Alpha"sv,
    "Beta"sv,
    "Gamma"sv,
    "Delta"sv,
    "Epsilon"sv,
    "Zeta"sv,
    "Eta"sv,
    "Theta"sv,
    "Iota"sv,
    "Kappa"sv,
    "Lambda"sv,
    "Mu"sv,
    "Nu"sv,
    "Xi"sv,
    "Omicron"sv,
    "Pi"sv,
    "Rho"sv,
    "Sigma"sv,
    "Tau"sv,
    "Upsilon"sv,
    "Phi"sv,
    "Chi"sv,
    "Psi"sv,
    "Omega"sv
 };
 const std::array<std::string_view, nLetters> greekAlphabetUTF8 = {
    "\316\261"sv, // ALF
    "\316\262"sv, // BET
    "\316\263"sv, // GAM
    "\316\264"sv, // DEL
    "\316\265"sv, // EPS
    "\316\266"sv, // ZET
    "\316\267"sv, // ETA
    "\316\270"sv, // TET
    "\316\271"sv, // IOT
    "\316\272"sv, // KAP
    "\316\273"sv, // LAM
    "\316\274"sv, // MU
    "\316\275"sv, // NU
    "\316\276"sv, // XI
    "\316\277"sv, // OMI
    "\317\200"sv, // PI
    "\317\201"sv, // RHO
    "\317\203"sv, // SIG
    "\317\204"sv, // TAU
    "\317\205"sv, // UPS
    "\317\206"sv, // PHI
    "\317\207"sv, // CHI
    "\317\210"sv, // PSI
    "\317\211"sv, // OME
 };
 const std::array<std::string_view, nLetters> canonicalAbbrevs =
 {
    "ALF"sv,
    "BET"sv,
    "GAM"sv,
    "DEL"sv,
    "EPS"sv,
    "ZET"sv,
    "ETA"sv,
    "TET"sv,
    "IOT"sv,
    "KAP"sv,
    "LAM"sv,
    "MU"sv,
    "NU"sv,
    "XI"sv,
    "OMI"sv,
    "PI"sv,
    "RHO"sv,
    "SIG"sv,
    "TAU"sv,
    "UPS"sv,
    "PHI"sv,
    "CHI"sv,
    "PSI"sv,
    "OME"sv,
 };
 // clang-format on
 std::string_view::size_type
 getFirstWordLength(std::string_view str)
 {
    auto sp = str.find(' ');
    if (sp == std::string_view::npos)
        sp = str.length();
    // skip digits
    while (sp > 0 && std::isdigit(str[sp - 1]) != 0)
        sp--;
    return sp;
 }
 std::string_view
 toSuperscript(char c)
 {
    switch (c)
    {
    case '0':
        return UTF8_SUPERSCRIPT_0;
    case '1':
        return UTF8_SUPERSCRIPT_1;
    case '2':
        return UTF8_SUPERSCRIPT_2;
    case '3':
        return UTF8_SUPERSCRIPT_3;
    case '4':
        return UTF8_SUPERSCRIPT_4;
    case '5':
        return UTF8_SUPERSCRIPT_5;
    case '6':
        return UTF8_SUPERSCRIPT_6;
    case '7':
        return UTF8_SUPERSCRIPT_7;
    case '8':
        return UTF8_SUPERSCRIPT_8;
    case '9':
        return UTF8_SUPERSCRIPT_9;
    default:
        return {};
    }
 }
 } // namespace
 /**
 * Replaces the Greek letter abbreviation at the beginning
 * of a string by the UTF-8 representation of that letter.
 * Also, replaces digits following Greek letters with UTF-8
 * superscripts.
 */
 std::string
 ReplaceGreekLetterAbbr(std::string_view str)
 {
    if (str.empty())
        return {};
    if (auto len = getFirstWordLength(str); len > 0 && str[0] >= 'A' && str[0] <= 'Z')
    {
        // Linear search through all letter abbreviations
        for (int i = 0; i < nLetters; i++)
        {
            auto prefix = canonicalAbbrevs[i];
            if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
            {
                prefix = greekAlphabet[i];
                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
                    continue;
            }
            std::string ret(greekAlphabetUTF8[i]);
            for (; str.length() > len && std::isdigit(str[len]); len++)
                ret.append(toSuperscript(str[len]));
            ret.append(str.substr(len));
            return ret;
        }
    }
    return std::string(str);
 }
 /**
 * Returns canonical greek abbreviation for a letter passed.
 * The letter can be: latin name of a greek letter, canonical
 * representation of it or a greek letter itself in UTF-8.
 */
 std::string_view
 GetCanonicalGreekAbbreviation(std::string_view letter)
 {
    for (int i = 0; i < nLetters; i++)
    {
        if (compareIgnoringCase(letter, greekAlphabet[i]) == 0
            || compareIgnoringCase(letter, canonicalAbbrevs[i]) == 0)
        {
            return canonicalAbbrevs[i];
        }
    }
    if (letter.length() == 2)
    {
        for (int i = 0; i < nLetters; i++)
        {
            if (letter == greekAlphabetUTF8[i]) return canonicalAbbrevs[i];
        }
    }
    return {};
 }
 /**
 * Replaces the Greek letter or abbreviation at the beginning
 * of a string by the UTF-8 representation of that letter.
 * Also, replaces digits following Greek letters with UTF-8
 * superscripts.
 */
 std::string
 ReplaceGreekLetter(std::string_view str)
 {
    if (str.empty()) return {};
    if (auto len = getFirstWordLength(str); len > 0)
    {
        // Linear search through all letter abbreviations
        for (int i = 0; i < nLetters; i++)
        {
            if (len != 2 || str != greekAlphabetUTF8[i])
            {
                auto prefix = canonicalAbbrevs[i];
                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
                {
                    prefix = greekAlphabet[i];
                    if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
                        continue;
                }
            }
            std::string ret(greekAlphabetUTF8[i]);
            for (; str.length() > len && std::isdigit(str[len]); len++)
                ret.append(toSuperscript(str[len]));
            ret.append(str.substr(len));
            return ret;
        }
    }
    return std::string(str);
 }
--- a/src/celutil/greek.h
+++ b/src/celutil/greek.h
@ -0,0 +1,19 @@
 // greek.h
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
 //               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
 #pragma once
 #include <string>
 #include <string_view>
 #include <vector>
 std::string      ReplaceGreekLetterAbbr(std::string_view str);
 std::string      ReplaceGreekLetter(std::string_view str);
 std::string_view GetCanonicalGreekAbbreviation(std::string_view letter);
--- a/src/celutil/utf8.cpp
+++ b/src/celutil/utf8.cpp
@ -1,20 +1,21 @@
 // utf8.cpp
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
 //               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
 #include <cctype>
 #include <cstring>
 #include <wchar.h>
 #include <climits>
 #include <fmt/printf.h>
 #include "stringutils.h"
 #include "utf8.h"
 namespace
 {
 // clang-format off
 uint16_t WGL4_Normalization_00[256] = {
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
@ -300,11 +301,45 @@ uint16_t* WGL4NormalizationTables[256] = {
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
 };
 // clang-format on
 inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
 {
    if (ch < 0x80)
        return 1;
    if ((ch & 0xe0) == 0xc0)
        return 2;
    if ((ch & 0xf0) == 0xe0)
        return 3;
    if ((ch & 0xf8) == 0xf0)
        return 4;
    if ((ch & 0xfc) == 0xf8)
        return 5;
    if ((ch & 0xfe) == 0xfc)
        return 6;
    else
        return 1;
 }
 inline wchar_t UTF8Normalize(wchar_t ch)
 {
    auto page = (unsigned int) ch >> 8;
    if (page >= 256)
        return ch;
    uint16_t* normTable = WGL4NormalizationTables[page];
    if (normTable == nullptr)
        return ch;
    return (wchar_t) normTable[(unsigned int) ch & 0xff];
 }
 } // namespace
 //! Decode the UTF-8 characters in string str beginning at position pos.
 //! The decoded character is returned in ch; the return value of the function
 //! is true if a valid UTF-8 sequence was successfully decoded.
-bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
+bool UTF8Decode(std::string_view str, int pos, wchar_t& ch)
 {
    auto c0 = (unsigned int) str[pos];
    int charlen = UTF8EncodedSizeFromFirstByte(c0);
@ -362,69 +397,6 @@ bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
    }
 }
 //! Decode the UTF-8 characters in string str beginning at position pos.
 //! The decoded character is returned in ch; the return value of the function
 //! is true if a valid UTF-8 sequence was successfully decoded.
 bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch)
 {
    auto c0 = (unsigned int) str[pos];
    int charlen = UTF8EncodedSizeFromFirstByte(c0);
    // Bad UTF-8 character that extends past end of string
    if (pos + charlen > length)
        return false;
    // TODO: Should check that the bytes of characters after the first are all
    // of the form 01xxxxxx
    // TODO: Need to reject overlong encoding sequences
    switch (charlen)
    {
    case 1:
        ch = c0;
        return true;
    case 2:
        ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
        return true;
    case 3:
        ch = ((c0 & 0x0f) << 12) |
            (((unsigned int) str[pos + 1] & 0x3f) << 6) |
            ((unsigned int)  str[pos + 2] & 0x3f);
        return true;
    case 4:
        ch = ((c0 & 0x07) << 18) |
            (((unsigned int) str[pos + 1] & 0x3f) << 12) |
            (((unsigned int) str[pos + 2] & 0x3f) << 6)  |
            ((unsigned int)  str[pos + 3] & 0x3f);
        return true;
    case 5:
        ch = ((c0 & 0x03) << 24) |
            (((unsigned int) str[pos + 1] & 0x3f) << 18) |
            (((unsigned int) str[pos + 2] & 0x3f) << 12) |
            (((unsigned int) str[pos + 3] & 0x3f) << 6)  |
            ((unsigned int)  str[pos + 4] & 0x3f);
        return true;
    case 6:
        ch = ((c0 & 0x01) << 30) |
            (((unsigned int) str[pos + 1] & 0x3f) << 24) |
            (((unsigned int) str[pos + 2] & 0x3f) << 18) |
            (((unsigned int) str[pos + 3] & 0x3f) << 12) |
            (((unsigned int) str[pos + 4] & 0x3f) << 6)  |
            ((unsigned int)  str[pos + 5] & 0x3f);
        return true;
    default:
        return false;
    }
 }
 //! Appends the UTF-8 encoded version of the code point ch to the
 //! destination string
 void UTF8Encode(std::uint32_t ch, std::string& dest)
@ -469,9 +441,8 @@ void UTF8Encode(std::uint32_t ch, std::string& dest)
    }
 }
 //! Return the number of characters encoded by a UTF-8 string
-int UTF8Length(const std::string& s)
+int UTF8Length(std::string_view s)
 {
    int len = s.length();
    int count = 0;
@ -485,25 +456,10 @@ int UTF8Length(const std::string& s)
    return count;
 }
 inline wchar_t UTF8Normalize(wchar_t ch)
 {
    auto page = (unsigned int) ch >> 8;
    if (page >= 256)
        return ch;
    uint16_t* normTable = WGL4NormalizationTables[page];
    if (normTable == nullptr)
        return ch;
    return (wchar_t) normTable[(unsigned int) ch & 0xff];
 }
 //! Perform a normalized comparison of two UTF-8 strings.  The normalization
 //! only works for characters in the WGL-4 subset, and no multicharacter
 //! translations are performed.
-int UTF8StringCompare(const std::string& s0, const std::string& s1)
+int UTF8StringCompare(std::string_view s0, std::string_view s1)
 {
    int len0 = s0.length();
    int len1 = s1.length();
@ -542,7 +498,7 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1)
        return 0;
 }
-int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase)
+int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase)
 {
    int len0 = s0.length();
    int len1 = s1.length();
@ -588,432 +544,6 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bo
        return 0;
 }
 #if 0
 //! Currently incomplete, but could be a helpful class for dealing with
 //! UTF-8 streams
 class UTF8StringIterator
 {
 public:
    UTF8StringIterator(const std::string& _str) : str(_str) {};
    UTF8StringIterator(const UTF8StringIterator& iter) = default;
    UTF8StringIterator& operator++();
    UTF8StringIterator& operator++(int);
 private:
    const std::string& str;
    int position{ 0 };
 };
 UTF8StringIterator& UTF8StringIterator::operator++()
 {
    return *this;
 }
 UTF8StringIterator& UTF8StringIterator::operator++(int)
 {
    return *this;
 }
 #endif
 static const char *greekAlphabet[] =
 {
    "Alpha",
    "Beta",
    "Gamma",
    "Delta",
    "Epsilon",
    "Zeta",
    "Eta",
    "Theta",
    "Iota",
    "Kappa",
    "Lambda",
    "Mu",
    "Nu",
    "Xi",
    "Omicron",
    "Pi",
    "Rho",
    "Sigma",
    "Tau",
    "Upsilon",
    "Phi",
    "Chi",
    "Psi",
    "Omega"
 };
 static const char* greekAlphabetUTF8[] =
 {
    "\316\261",
    "\316\262",
    "\316\263",
    "\316\264",
    "\316\265",
    "\316\266",
    "\316\267",
    "\316\270",
    "\316\271",
    "\316\272",
    "\316\273",
    "\316\274",
    "\316\275",
    "\316\276",
    "\316\277",
    "\317\200",
    "\317\201",
    "\317\203",
    "\317\204",
    "\317\205",
    "\317\206",
    "\317\207",
    "\317\210",
    "\317\211",
 };
 static const char* canonicalAbbrevs[] =
 {
    "ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET",
    "IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" ,
    "RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME",
 };
 static std::string noAbbrev;
 // Greek alphabet crud . . . should probably moved to it's own module.
 static size_t greekChunkLength(const std::string&);
 Greek* Greek::m_instance = nullptr;
 Greek* Greek::getInstance()
 {
    if (m_instance == nullptr)
        m_instance = new Greek();
    return m_instance;
 }
 Greek::Greek()
 {
    nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]);
    names = new std::string[nLetters];
    abbrevs = new std::string[nLetters];
    for (int i = 0; i < nLetters; i++)
    {
        names[i] = std::string(greekAlphabet[i]);
        abbrevs[i] = std::string(canonicalAbbrevs[i]);
    }
 }
 Greek::~Greek()
 {
    delete[] names;
    delete[] abbrevs;
 }
 const std::string& Greek::canonicalAbbreviation(const std::string& letter)
 {
    Greek *instance = Greek::getInstance();
    int i;
    for (i = 0; i < instance->nLetters; i++)
    {
        if (compareIgnoringCase(letter, instance->names[i]) == 0)
            return instance->abbrevs[i];
    }
    for (i = 0; i < instance->nLetters; i++)
    {
        if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0)
            return instance->abbrevs[i];
    }
    if (letter.length() == 2)
    {
        for (i = 0; i < instance->nLetters; i++)
        {
            if (letter[0] == greekAlphabetUTF8[i][0] &&
                letter[1] == greekAlphabetUTF8[i][1])
            {
                return instance->abbrevs[i];
            }
        }
    }
    return noAbbrev;
 }
 static const char* toSuperscript(char c)
 {
    switch (c)
    {
    case '0':
        return UTF8_SUPERSCRIPT_0;
    case '1':
        return UTF8_SUPERSCRIPT_1;
    case '2':
        return UTF8_SUPERSCRIPT_2;
    case '3':
        return UTF8_SUPERSCRIPT_3;
    case '4':
        return UTF8_SUPERSCRIPT_4;
    case '5':
        return UTF8_SUPERSCRIPT_5;
    case '6':
        return UTF8_SUPERSCRIPT_6;
    case '7':
        return UTF8_SUPERSCRIPT_7;
    case '8':
        return UTF8_SUPERSCRIPT_8;
    case '9':
        return UTF8_SUPERSCRIPT_9;
    default:
        return nullptr;
    }
 }
 //! Replaces the Greek letter abbreviation at the beginning
 //! of a string by the UTF-8 representation of that letter.
 //! Also, replace digits following Greek letters with UTF-8
 //! superscripts.
 std::string ReplaceGreekLetterAbbr(const std::string& str)
 {
    Greek *instance = Greek::getInstance();
    size_t len = greekChunkLength(str);
    if (str[0] >= 'A' && str[0] <= 'Z')
    {
        // Linear search through all letter abbreviations
        for (int i = 0; i < instance->nLetters; i++)
        {
            std::string prefix = instance->abbrevs[i];
            if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
            {
                prefix = instance->names[i];
                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
                    continue;
            }
            std::string ret = greekAlphabetUTF8[i];
            auto len = prefix.length();
            for (; str.length() > len && isdigit(str[len]); len++)
                ret += toSuperscript(str[len]);
            ret += str.substr(len);
            return ret;
        }
    }
    return str;
 }
 //! Replaces the Greek letter abbreviation at the beginning
 //! of a string by the UTF-8 representation of that letter.
 //! Also, replace digits following Greek letters with UTF-8
 //! superscripts. Operates on char* instead of strings--less
 //! convenient, but more efficient. Return the number of
 //! characters copied to the destination string, not
 //! including the zero terminator.
 #if 0
 unsigned int
 ReplaceGreekLetterAbbr(char *dst, unsigned int dstSize, const char* src, unsigned int srcLength)
 {
    Greek *instance = Greek::getInstance();
    if (src[0] >= 'A' && src[0] <= 'Z' &&
        src[1] >= 'A' && src[1] <= 'Z')
    {
        // Linear search through all letter abbreviations
        for (unsigned int i = 0; i < (unsigned int) instance->nLetters; i++)
        {
            const char* abbr = canonicalAbbrevs[i];
            unsigned int j = 0;
            while (abbr[j] == src[j] && abbr[j] != '\0' && src[j] != '\0')
                j++;
            // It's a match if we reached the end of the abbreviation string
            if (abbr[j] == '\0')
            {
                unsigned int abbrevLength = j;
                unsigned int srcIndex = j;
                const char *superscript = toSuperscript(src[abbrevLength]);
                const char* utfGreek = greekAlphabetUTF8[i];
                unsigned int utfGreekLength = strlen(utfGreek);
                unsigned int requiredLength = srcLength;
                if (utfGreekLength > abbrevLength)
                    requiredLength += utfGreekLength - abbrevLength;
                if (superscript != nullptr)
                {
                    requiredLength += strlen(superscript) - 1;
                    srcIndex++;
                }
                // If there's not enough room, give up translating and just copy as much as possible
                if (requiredLength + 1 > dstSize)
                    break;
                unsigned int dstIndex = 0;
                j = 0;
                while (utfGreek[j] != 0)
                {
                    dst[dstIndex++] = utfGreek[j];
                    j++;
                }
                if (superscript != nullptr)
                {
                    j = 0;
                    while (superscript[j] != 0)
                    {
                        dst[dstIndex++] = superscript[j];
                        j++;
                    }
                }
                while (src[srcIndex] != 0)
                {
                    dst[dstIndex++] = src[srcIndex++];
                }
                dst[dstIndex] = '\0';
                return dstIndex;
            }
        }
    }
    strncpy(dst, src, dstSize);
    if (dstSize > srcLength)
        return srcLength;
    if (dstSize > 0)
    {
        dst[dstSize - 1] = '\0';
        return dstSize - 1;
    }
    return 0;
 }
 #endif
 static int findGreekNameIndexBySubstr(const std::string &, int = 0, unsigned int = UINT_MAX);
 #if 0
 static std::string firstGreekAbbrCompletion(const std::string &);
 #endif
 bool inline isSubstringIgnoringCase(const std::string &s0, const std::string &s1, size_t n)
 {
    return UTF8StringCompare(s0, s1, n, true) == 0;
 }
 static int findGreekNameIndexBySubstr(const std::string &s, int start, unsigned int n)
 {
    Greek *instance = Greek::getInstance();
    if (s.empty())
        return -1;
    for (int i = start; i < instance->nLetters; i++)
    {
        if (isSubstringIgnoringCase(instance->names[i], s, n))
            return i;
    }
    for (int i = start; i < instance->nLetters; i++)
    {
        if (isSubstringIgnoringCase(instance->abbrevs[i], s, n))
            return i;
    }
    return -1;
 }
 static size_t greekChunkLength(const std::string& str)
 {
    bool npos = false;
    size_t sp = str.find_first_of(' ');
    if (sp == std::string::npos)
    {
        sp = str.length();
        npos = true;
    }
    if (sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1])))
        while(sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1]))) sp--;
    else if (npos)
        sp = std::string::npos;
    return sp;
 }
 #if 0
 static std::string firstGreekAbbrCompletion(const std::string &s)
 {
    std::string ret;
    size_t sp = greekChunkLength(s);
    if (sp == std::string::npos)
    {
        int i = findGreekNameIndexBySubstr(s);
        return (i >= 0) ? Greek::getInstance()->abbrevs[i] : s;
    }
    else
    {
        std::string prefix = s.substr(0, sp);
        ret = Greek::canonicalAbbreviation(prefix);
        return ret.empty() ? s : prefix + s.substr(sp);
    }
    return ret;
 }
 #endif
 std::vector<std::string> getGreekCompletion(const std::string &s)
 {
    std::vector<std::string> ret;
    if (s.empty())
        return ret;
    size_t sp = greekChunkLength(s);
    if (sp == std::string::npos)
    {
        sp = UTF8Length(s);
        for(int i = 0; i >= 0;)
        {
            std::string rets;
            i = findGreekNameIndexBySubstr(s, i, sp);
            if (i >= 0)
            {
                rets = Greek::getInstance()->abbrevs[i];
                rets += " ";
                ret.emplace_back(ReplaceGreekLetterAbbr(rets));
                i++;
            }
        }
    }
    else
    {
        std::string prefix = s.substr(0, sp);
        std::string rets = Greek::canonicalAbbreviation(prefix);
        if (!rets.empty())
        {
            rets += s.substr(sp);
            ret.emplace_back(ReplaceGreekLetterAbbr(rets));
        }
    }
    return ret;
 }
 UTF8Status
 UTF8Validator::check(char c)
 {
    return check(static_cast<unsigned char>(c));
 }
 UTF8Status
 UTF8Validator::check(unsigned char c)
 {
--- a/src/celutil/utf8.h
+++ b/src/celutil/utf8.h
@ -1,6 +1,7 @@
 // utf8.h
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
 //               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
@ -11,42 +12,30 @@
 #include <cstdint>
 #include <string>
-#include <vector>
+#include <string_view>
 #define UTF8_DEGREE_SIGN         "\302\260"
 #define UTF8_MULTIPLICATION_SIGN "\303\227"
 #define UTF8_SUPERSCRIPT_0       "\342\201\260"
 #define UTF8_SUPERSCRIPT_1       "\302\271"
 #define UTF8_SUPERSCRIPT_2       "\302\262"
 #define UTF8_SUPERSCRIPT_3       "\302\263"
 #define UTF8_SUPERSCRIPT_4       "\342\201\264"
 #define UTF8_SUPERSCRIPT_5       "\342\201\265"
 #define UTF8_SUPERSCRIPT_6       "\342\201\266"
 #define UTF8_SUPERSCRIPT_7       "\342\201\267"
 #define UTF8_SUPERSCRIPT_8       "\342\201\270"
 #define UTF8_SUPERSCRIPT_9       "\342\201\271"
 #define UTF8_REPLACEMENT_CHAR    "\357\277\275"
-
+bool UTF8Decode(std::string_view str, int pos, wchar_t &ch);
-bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
+void UTF8Encode(std::uint32_t ch, std::string &dest);
-bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
+int  UTF8StringCompare(std::string_view s0, std::string_view s1);
-void UTF8Encode(std::uint32_t ch, std::string& dest);
+int  UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase = false);
 int UTF8StringCompare(const std::string& s0, const std::string& s1);
 int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false);
 class UTF8StringOrderingPredicate
 {
 public:
-    bool operator()(const std::string& s0, const std::string& s1) const
+    bool operator()(std::string_view s0, std::string_view s1) const
    {
        return UTF8StringCompare(s0, s1) == -1;
    }
 };
 int UTF8Length(std::string_view s);
-int UTF8Length(const std::string& s);
+constexpr int
-
+UTF8EncodedSize(wchar_t ch)
 inline int UTF8EncodedSize(wchar_t ch)
 {
    if (ch < 0x80)
        return 1;
@ -66,7 +55,8 @@ inline int UTF8EncodedSize(wchar_t ch)
 #endif
 }
-constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
+constexpr int
 UTF8EncodedSizeChecked(std::uint32_t ch)
 {
    if (ch < 0x80)
        return 1;
@ -84,76 +74,6 @@ constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
 #endif
 }
 inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
 {
    if (ch < 0x80)
        return 1;
    if ((ch & 0xe0) == 0xc0)
        return 2;
    if ((ch & 0xf0) == 0xe0)
        return 3;
    if ((ch & 0xf8) == 0xf0)
        return 4;
    if ((ch & 0xfc) == 0xf8)
        return 5;
    if ((ch & 0xfe) == 0xfc)
        return 6;
    else
        return 1;
 }
 std::string ReplaceGreekLetterAbbr(const std::string&);
 #if 0
 unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
 #endif
 class Greek
 {
 private:
    Greek();
    ~Greek();
 public:
    enum Letter
    {
        Alpha     =  1,
        Beta      =  2,
        Gamma     =  3,
        Delta     =  4,
        Epsilon   =  5,
        Zeta      =  6,
        Eta       =  7,
        Theta     =  8,
        Iota      =  9,
        Kappa     = 10,
        Lambda    = 11,
        Mu        = 12,
        Nu        = 13,
        Xi        = 14,
        Omicron   = 15,
        Pi        = 16,
        Rho       = 17,
        Sigma     = 18,
        Tau       = 19,
        Upsilon   = 20,
        Phi       = 21,
        Chi       = 22,
        Psi       = 23,
        Omega     = 24,
    };
    static const std::string& canonicalAbbreviation(const std::string&);
 private:
    static Greek* m_instance;
 public:
    static Greek* getInstance();
    int nLetters;
    std::string* names;
    std::string* abbrevs;
 };
 std::vector<std::string> getGreekCompletion(const std::string &);
 enum class UTF8Status
 {
    Ok,
@ -164,9 +84,6 @@ enum class UTF8Status
 class UTF8Validator
 {
 public:
    UTF8Validator() = default;
    ~UTF8Validator() = default;
    UTF8Status check(char c);
    UTF8Status check(unsigned char c);
@ -185,3 +102,9 @@ private:
    State state{ State::Initial };
 };
 inline UTF8Status
 UTF8Validator::check(char c)
 {
    return check(static_cast<unsigned char>(c));
 }
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@ -1,4 +1,5 @@
 test_case(charconv_compat)
 test_case(greek)
 test_case(hash)
 test_case(logger)
 test_case(stellarclass)
--- a/test/unit/greek_test.cpp
+++ b/test/unit/greek_test.cpp
@ -0,0 +1,42 @@
 #include <celutil/greek.h>
 #include <catch.hpp>
 TEST_CASE("Greek", "[Greek]")
 {
    SECTION("ReplaceGreekLetterAbbr")
    {
        REQUIRE(ReplaceGreekLetterAbbr("XI") == "\316\276");
        REQUIRE(ReplaceGreekLetterAbbr("XI12") == "\316\276\302\271\302\262");
        REQUIRE(ReplaceGreekLetterAbbr("XI Foo") == "\316\276 Foo");
        REQUIRE(ReplaceGreekLetterAbbr("XI12 Bar") == "\316\276\302\271\302\262 Bar");
        REQUIRE(ReplaceGreekLetterAbbr("xi") == "xi");
        REQUIRE(ReplaceGreekLetterAbbr("xi12") == "xi12");
        REQUIRE(ReplaceGreekLetterAbbr("xi Foo") == "xi Foo");
        REQUIRE(ReplaceGreekLetterAbbr("xi12 Bar") == "xi12 Bar");
        REQUIRE(ReplaceGreekLetterAbbr("alpha") == "alpha");
    }
    SECTION("ReplaceGreekLetter")
    {
        REQUIRE(ReplaceGreekLetter("XI") == "\316\276");
        REQUIRE(ReplaceGreekLetter("XI12") == "\316\276\302\271\302\262");
        REQUIRE(ReplaceGreekLetter("XI Foo") == "\316\276 Foo");
        REQUIRE(ReplaceGreekLetter("XI12 Bar") == "\316\276\302\271\302\262 Bar");
        REQUIRE(ReplaceGreekLetter("xi") == "\316\276");
        REQUIRE(ReplaceGreekLetter("xi12") == "\316\276\302\271\302\262");
        REQUIRE(ReplaceGreekLetter("xi Foo") == "\316\276 Foo");
        REQUIRE(ReplaceGreekLetter("xi12 Bar") == "\316\276\302\271\302\262 Bar");
        REQUIRE(ReplaceGreekLetter("alpha") == "\316\261");
    }
    SECTION("GetCanonicalGreekAbbreviation")
    {
        REQUIRE(GetCanonicalGreekAbbreviation("xi") == "XI");
        REQUIRE(GetCanonicalGreekAbbreviation("alpha") == "ALF");
    }
 }