Split greek-letters handling from utf8 and refactor a bit

2021-12-24 18:23:08 +02:00 · 2021-12-24 18:23:08 +02:00 · c251dac856
parent eaae852f8f
commit c251dac856
23 changed files with 427 additions and 648 deletions
--- a/src/celengine/asterism.cpp
+++ b/src/celengine/asterism.cpp
@ -8,6 +8,7 @@
 // of the License, or (at your option) any later version.

 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include <celutil/logger.h>
 #include <celutil/tokenizer.h>
 #include "stardb.h"
--- a/src/celengine/console.cpp
+++ b/src/celengine/console.cpp
@ -179,7 +179,7 @@ void Console::print(char* s)
    while (i < length && validChar)
    {
        wchar_t ch = 0;
-        validChar = UTF8Decode(s, i, length, ch);
+        validChar = UTF8Decode(string_view(s, length), i, ch);
        i += UTF8EncodedSize(ch);
        print(ch);
    }
--- a/src/celengine/name.cpp
+++ b/src/celengine/name.cpp
@ -1,5 +1,6 @@
 #include <celutil/logger.h>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include "name.h"

 uint32_t NameDatabase::getNameCount() const
@ -97,41 +98,25 @@ NameDatabase::NumberIndex::const_iterator NameDatabase::getFinalNameIter() const
    return numberIndex.end();
 }

-std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n, bool greek) const
+std::vector<std::string> NameDatabase::getCompletion(const std::string& name, bool i18n) const
 {
-    if (greek)
-    {
-        auto compList = getGreekCompletion(name);
-        compList.push_back(name);
-        return getCompletion(compList, i18n);
-    }
+    std::string name2 = ReplaceGreekLetter(name);

    std::vector<std::string> completion;
-    int name_length = UTF8Length(name);
+    const int name_length = UTF8Length(name2);

-    for (NameIndex::const_iterator iter = nameIndex.begin(); iter != nameIndex.end(); ++iter)
+    for (const auto &[n, _] : nameIndex)
    {
-        if (!UTF8StringCompare(iter->first, name, name_length, true))
-            completion.push_back(iter->first);
+        if (!UTF8StringCompare(n, name2, name_length, true))
+            completion.push_back(n);
    }
    if (i18n)
    {
-        for (NameIndex::const_iterator iter = localizedNameIndex.begin(); iter != localizedNameIndex.end(); ++iter)
+        for (const auto &[n, _] : localizedNameIndex)
        {
-            if (!UTF8StringCompare(iter->first, name, name_length, true))
-                completion.push_back(iter->first);
+            if (!UTF8StringCompare(n, name2, name_length, true))
+                completion.push_back(n);
        }
    }
    return completion;
 }
-
-std::vector<std::string> NameDatabase::getCompletion(const std::vector<std::string> &list, bool i18n) const
-{
-    std::vector<std::string> completion;
-    for (const auto &n : list)
-    {
-        for (const auto &nn : getCompletion(n, i18n, false))
-            completion.emplace_back(nn);
-    }
-    return completion;
-}
--- a/src/celengine/name.h
+++ b/src/celengine/name.h
@ -45,8 +45,7 @@ class NameDatabase
    NumberIndex::const_iterator getFirstNameIter(const AstroCatalog::IndexNumber catalogNumber) const;
    NumberIndex::const_iterator getFinalNameIter() const;

-    std::vector<std::string> getCompletion(const std::string& name, bool i18n, bool greek = true) const;
-    std::vector<std::string> getCompletion(const std::vector<std::string> &list, bool i18n) const;
+    std::vector<std::string> getCompletion(const std::string& name, bool i18n) const;

 protected:
    NameIndex   nameIndex;
--- a/src/celengine/overlay.cpp
+++ b/src/celengine/overlay.cpp
@ -159,7 +159,7 @@ void Overlay::print_impl(const std::string& s)
    while (i < length && validChar)
    {
        wchar_t ch = 0;
-        validChar = UTF8Decode(s.c_str(), i, length, ch);
+        validChar = UTF8Decode(s, i, ch);
        i += UTF8EncodedSize(ch);
        print(ch);
    }
--- a/src/celengine/starname.cpp
+++ b/src/celengine/starname.cpp
@ -10,8 +10,10 @@
 //
 //

+#include <fmt/format.h>
 #include <celengine/constellation.h>
 #include <celengine/starname.h>
+#include <celutil/greek.h>

 using namespace std;

@ -61,26 +63,26 @@ uint32_t StarNameDatabase::findCatalogNumberByName(const string& name, bool i18n
            // We have a valid constellation as the last part
            // of the name.  Next, we see if the first part of
            // the name is a greek letter.
-            const string& letter = Greek::canonicalAbbreviation(string(prefix, 0, len));
+            std::string_view letter = GetCanonicalGreekAbbreviation(std::string_view(prefix).substr(0, len));
            if (!letter.empty())
            {
                // Matched . . . this is a Bayer designation
                if (digit == ' ')
                {
-                    priName  = letter + ' ' + con->getAbbreviation();
+                    priName  = fmt::format("{} {}", letter, con->getAbbreviation());
                    // If 'let con' doesn't match, try using
                    // 'let1 con' instead.
-                    altName  = letter + '1' + ' ' + con->getAbbreviation();
+                    altName  = fmt::format("{}1 {}", letter, con->getAbbreviation());
                }
                else
                {
-                    priName = letter + digit + ' ' + con->getAbbreviation();
+                    priName = fmt::format("{}{} {}", letter, digit, con->getAbbreviation());
                }
            }
            else
            {
                // Something other than a Bayer designation
-                priName = prefix + ' ' + con->getAbbreviation();
+                priName = fmt::format("{} {}", prefix, con->getAbbreviation());
            }

            if (isOrbitingStar)
--- a/src/celengine/universe.cpp
+++ b/src/celengine/universe.cpp
@ -22,7 +22,7 @@
 #include <celmath/mathlib.h>
 #include <celmath/intersect.h>
 #include <celmath/ray.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include <cassert>

 static const double ANGULAR_RES = 3.5e-6;
--- a/src/celestia/celestiacore.cpp
+++ b/src/celestia/celestiacore.cpp
@ -970,7 +970,7 @@ void CelestiaCore::charEntered(const char *c_p, int modifiers)
    if (textEnterMode & KbAutoComplete)
    {
        wchar_t wc = 0; // Null wide character
-        UTF8Decode(c_p, 0, strlen(c_p), wc);
+        UTF8Decode(c_p, 0, wc);
 #ifdef __APPLE__
        if ( wc && (!iscntrl(wc)) )
 #else
--- a/src/celestia/gtk/dialog-star.cpp
+++ b/src/celestia/gtk/dialog-star.cpp
@ -19,7 +19,7 @@
 #include <celengine/starbrowser.h>
 #include <celengine/stardb.h>
 #include <celengine/univcoord.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>

 #include "dialog-star.h"
 #include "actions.h"
--- a/src/celestia/gtk/menu-context.cpp
+++ b/src/celestia/gtk/menu-context.cpp
@ -16,7 +16,7 @@
 #include <celengine/simulation.h>
 #include <celestia/celestiacore.h>
 #include <celestia/helper.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>

 #include "menu-context.h"
 #include "actions.h"
--- a/src/celestia/qt/qtappwin.cpp
+++ b/src/celestia/qt/qtappwin.cpp
@ -41,6 +41,7 @@
 #include <vector>
 #include <string>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include <celutil/tzutil.h>
 #include "qtappwin.h"
 #include "qtglwidget.h"
--- a/src/celestia/qt/qtcelestialbrowser.cpp
+++ b/src/celestia/qt/qtcelestialbrowser.cpp
@ -12,6 +12,7 @@

 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include "qtcelestialbrowser.h"
 #include "qtcolorswatchwidget.h"
 #include "qtinfopanel.h"
--- a/src/celestia/qt/qtdeepskybrowser.cpp
+++ b/src/celestia/qt/qtdeepskybrowser.cpp
@ -12,6 +12,7 @@

 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include "qtdeepskybrowser.h"
 #include "qtcolorswatchwidget.h"
 #include "qtinfopanel.h"
--- a/src/celestia/qt/qtinfopanel.cpp
+++ b/src/celestia/qt/qtinfopanel.cpp
@ -15,7 +15,7 @@
 #include <celengine/astro.h>
 #include <celutil/gettext.h>
 #include <celutil/logger.h>
-#include <celutil/utf8.h>
+#include <celutil/greek.h>
 #include <celengine/universe.h>
 #include <QTextBrowser>
 #include <QIODevice>
--- a/src/celestia/qt/qtselectionpopup.cpp
+++ b/src/celestia/qt/qtselectionpopup.cpp
@ -17,6 +17,7 @@
 #include <celengine/axisarrow.h>
 #include <celengine/planetgrid.h>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include <fmt/printf.h>
 #include "qtselectionpopup.h"
 #include "qtappwin.h"
--- a/src/celestia/qt/qtsolarsystembrowser.cpp
+++ b/src/celestia/qt/qtsolarsystembrowser.cpp
@ -12,6 +12,7 @@

 #include <celestia/celestiacore.h>
 #include <celutil/gettext.h>
+#include <celutil/greek.h>
 #include "qtsolarsystembrowser.h"
 #include "qtinfopanel.h"
 #include "qtcolorswatchwidget.h"
--- a/src/celutil/CMakeLists.txt
+++ b/src/celutil/CMakeLists.txt
@ -13,6 +13,8 @@ set(CELUTIL_SOURCES
  formatnum.h
  fsutils.cpp
  fsutils.h
+  greek.cpp
+  greek.h
  logger.cpp
  logger.h
  reshandle.h
--- a/src/celutil/greek.cpp
+++ b/src/celutil/greek.cpp
@ -0,0 +1,270 @@
+// utf8.cpp
+//
+// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
+//               2018-present, Celestia Development Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+
+#include "greek.h"
+
+#include "stringutils.h"
+#include "utf8.h"
+
+#include <algorithm>
+#include <array>
+#include <cctype>
+
+using namespace std::string_view_literals;
+
+namespace
+{
+constexpr int nLetters = 24;
+
+constexpr std::string_view UTF8_SUPERSCRIPT_0 = "\342\201\260"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_1 = "\302\271"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_2 = "\302\262"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_3 = "\302\263"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_4 = "\342\201\264"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_5 = "\342\201\265"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_6 = "\342\201\266"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_7 = "\342\201\267"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_8 = "\342\201\270"sv;
+constexpr std::string_view UTF8_SUPERSCRIPT_9 = "\342\201\271"sv;
+
+// clang-format off
+const std::array<std::string_view, nLetters> greekAlphabet =
+{
+    "Alpha"sv,
+    "Beta"sv,
+    "Gamma"sv,
+    "Delta"sv,
+    "Epsilon"sv,
+    "Zeta"sv,
+    "Eta"sv,
+    "Theta"sv,
+    "Iota"sv,
+    "Kappa"sv,
+    "Lambda"sv,
+    "Mu"sv,
+    "Nu"sv,
+    "Xi"sv,
+    "Omicron"sv,
+    "Pi"sv,
+    "Rho"sv,
+    "Sigma"sv,
+    "Tau"sv,
+    "Upsilon"sv,
+    "Phi"sv,
+    "Chi"sv,
+    "Psi"sv,
+    "Omega"sv
+};
+
+const std::array<std::string_view, nLetters> greekAlphabetUTF8 = {
+    "\316\261"sv, // ALF
+    "\316\262"sv, // BET
+    "\316\263"sv, // GAM
+    "\316\264"sv, // DEL
+    "\316\265"sv, // EPS
+    "\316\266"sv, // ZET
+    "\316\267"sv, // ETA
+    "\316\270"sv, // TET
+    "\316\271"sv, // IOT
+    "\316\272"sv, // KAP
+    "\316\273"sv, // LAM
+    "\316\274"sv, // MU
+    "\316\275"sv, // NU
+    "\316\276"sv, // XI
+    "\316\277"sv, // OMI
+    "\317\200"sv, // PI
+    "\317\201"sv, // RHO
+    "\317\203"sv, // SIG
+    "\317\204"sv, // TAU
+    "\317\205"sv, // UPS
+    "\317\206"sv, // PHI
+    "\317\207"sv, // CHI
+    "\317\210"sv, // PSI
+    "\317\211"sv, // OME
+};
+
+const std::array<std::string_view, nLetters> canonicalAbbrevs =
+{
+    "ALF"sv,
+    "BET"sv,
+    "GAM"sv,
+    "DEL"sv,
+    "EPS"sv,
+    "ZET"sv,
+    "ETA"sv,
+    "TET"sv,
+    "IOT"sv,
+    "KAP"sv,
+    "LAM"sv,
+    "MU"sv,
+    "NU"sv,
+    "XI"sv,
+    "OMI"sv,
+    "PI"sv,
+    "RHO"sv,
+    "SIG"sv,
+    "TAU"sv,
+    "UPS"sv,
+    "PHI"sv,
+    "CHI"sv,
+    "PSI"sv,
+    "OME"sv,
+};
+// clang-format on
+
+std::string_view::size_type
+getFirstWordLength(std::string_view str)
+{
+    auto sp = str.find(' ');
+    if (sp == std::string_view::npos)
+        sp = str.length();
+
+    // skip digits
+    while (sp > 0 && std::isdigit(str[sp - 1]) != 0)
+        sp--;
+
+    return sp;
+}
+
+std::string_view
+toSuperscript(char c)
+{
+    switch (c)
+    {
+    case '0':
+        return UTF8_SUPERSCRIPT_0;
+    case '1':
+        return UTF8_SUPERSCRIPT_1;
+    case '2':
+        return UTF8_SUPERSCRIPT_2;
+    case '3':
+        return UTF8_SUPERSCRIPT_3;
+    case '4':
+        return UTF8_SUPERSCRIPT_4;
+    case '5':
+        return UTF8_SUPERSCRIPT_5;
+    case '6':
+        return UTF8_SUPERSCRIPT_6;
+    case '7':
+        return UTF8_SUPERSCRIPT_7;
+    case '8':
+        return UTF8_SUPERSCRIPT_8;
+    case '9':
+        return UTF8_SUPERSCRIPT_9;
+    default:
+        return {};
+    }
+}
+
+} // namespace
+
+/**
+ * Replaces the Greek letter abbreviation at the beginning
+ * of a string by the UTF-8 representation of that letter.
+ * Also, replaces digits following Greek letters with UTF-8
+ * superscripts.
+ */
+std::string
+ReplaceGreekLetterAbbr(std::string_view str)
+{
+    if (str.empty())
+        return {};
+
+    if (auto len = getFirstWordLength(str); len > 0 && str[0] >= 'A' && str[0] <= 'Z')
+    {
+        // Linear search through all letter abbreviations
+        for (int i = 0; i < nLetters; i++)
+        {
+            auto prefix = canonicalAbbrevs[i];
+            if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
+            {
+                prefix = greekAlphabet[i];
+                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
+                    continue;
+            }
+
+            std::string ret(greekAlphabetUTF8[i]);
+            for (; str.length() > len && std::isdigit(str[len]); len++)
+                ret.append(toSuperscript(str[len]));
+            ret.append(str.substr(len));
+
+            return ret;
+        }
+    }
+
+    return std::string(str);
+}
+
+/**
+ * Returns canonical greek abbreviation for a letter passed.
+ * The letter can be: latin name of a greek letter, canonical
+ * representation of it or a greek letter itself in UTF-8.
+ */
+std::string_view
+GetCanonicalGreekAbbreviation(std::string_view letter)
+{
+    for (int i = 0; i < nLetters; i++)
+    {
+        if (compareIgnoringCase(letter, greekAlphabet[i]) == 0
+            || compareIgnoringCase(letter, canonicalAbbrevs[i]) == 0)
+        {
+            return canonicalAbbrevs[i];
+        }
+    }
+
+    if (letter.length() == 2)
+    {
+        for (int i = 0; i < nLetters; i++)
+        {
+            if (letter == greekAlphabetUTF8[i]) return canonicalAbbrevs[i];
+        }
+    }
+
+    return {};
+}
+
+/**
+ * Replaces the Greek letter or abbreviation at the beginning
+ * of a string by the UTF-8 representation of that letter.
+ * Also, replaces digits following Greek letters with UTF-8
+ * superscripts.
+ */
+std::string
+ReplaceGreekLetter(std::string_view str)
+{
+    if (str.empty()) return {};
+
+    if (auto len = getFirstWordLength(str); len > 0)
+    {
+        // Linear search through all letter abbreviations
+        for (int i = 0; i < nLetters; i++)
+        {
+            if (len != 2 || str != greekAlphabetUTF8[i])
+            {
+                auto prefix = canonicalAbbrevs[i];
+                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
+                {
+                    prefix = greekAlphabet[i];
+                    if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
+                        continue;
+                }
+            }
+
+            std::string ret(greekAlphabetUTF8[i]);
+            for (; str.length() > len && std::isdigit(str[len]); len++)
+                ret.append(toSuperscript(str[len]));
+            ret.append(str.substr(len));
+
+            return ret;
+        }
+    }
+
+    return std::string(str);
+}
--- a/src/celutil/greek.h
+++ b/src/celutil/greek.h
@ -0,0 +1,19 @@
+// greek.h
+//
+// Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
+//               2018-present, Celestia Development Team
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+
+#pragma once
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+std::string      ReplaceGreekLetterAbbr(std::string_view str);
+std::string      ReplaceGreekLetter(std::string_view str);
+std::string_view GetCanonicalGreekAbbreviation(std::string_view letter);
--- a/src/celutil/utf8.cpp
+++ b/src/celutil/utf8.cpp
@ -1,20 +1,21 @@
 // utf8.cpp
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
+//               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.

-#include <cctype>
-#include <cstring>
 #include <wchar.h>
-#include <climits>
-#include <fmt/printf.h>
-#include "stringutils.h"
 #include "utf8.h"

+namespace
+{
+
+// clang-format off
+
 uint16_t WGL4_Normalization_00[256] = {
    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
    0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
@ -300,11 +301,45 @@ uint16_t* WGL4NormalizationTables[256] = {
    nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
 };

+// clang-format on
+
+inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
+{
+    if (ch < 0x80)
+        return 1;
+    if ((ch & 0xe0) == 0xc0)
+        return 2;
+    if ((ch & 0xf0) == 0xe0)
+        return 3;
+    if ((ch & 0xf8) == 0xf0)
+        return 4;
+    if ((ch & 0xfc) == 0xf8)
+        return 5;
+    if ((ch & 0xfe) == 0xfc)
+        return 6;
+    else
+        return 1;
+}
+
+inline wchar_t UTF8Normalize(wchar_t ch)
+{
+    auto page = (unsigned int) ch >> 8;
+    if (page >= 256)
+        return ch;
+
+    uint16_t* normTable = WGL4NormalizationTables[page];
+    if (normTable == nullptr)
+        return ch;
+
+    return (wchar_t) normTable[(unsigned int) ch & 0xff];
+}
+
+} // namespace

 //! Decode the UTF-8 characters in string str beginning at position pos.
 //! The decoded character is returned in ch; the return value of the function
 //! is true if a valid UTF-8 sequence was successfully decoded.
-bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
+bool UTF8Decode(std::string_view str, int pos, wchar_t& ch)
 {
    auto c0 = (unsigned int) str[pos];
    int charlen = UTF8EncodedSizeFromFirstByte(c0);
@ -362,69 +397,6 @@ bool UTF8Decode(const std::string& str, int pos, wchar_t& ch)
    }
 }

-
-//! Decode the UTF-8 characters in string str beginning at position pos.
-//! The decoded character is returned in ch; the return value of the function
-//! is true if a valid UTF-8 sequence was successfully decoded.
-bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch)
-{
-    auto c0 = (unsigned int) str[pos];
-    int charlen = UTF8EncodedSizeFromFirstByte(c0);
-
-    // Bad UTF-8 character that extends past end of string
-    if (pos + charlen > length)
-        return false;
-
-    // TODO: Should check that the bytes of characters after the first are all
-    // of the form 01xxxxxx
-    // TODO: Need to reject overlong encoding sequences
-
-    switch (charlen)
-    {
-    case 1:
-        ch = c0;
-        return true;
-
-    case 2:
-        ch = ((c0 & 0x1f) << 6) | ((unsigned int) str[pos + 1] & 0x3f);
-        return true;
-
-    case 3:
-        ch = ((c0 & 0x0f) << 12) |
-            (((unsigned int) str[pos + 1] & 0x3f) << 6) |
-            ((unsigned int)  str[pos + 2] & 0x3f);
-        return true;
-
-    case 4:
-        ch = ((c0 & 0x07) << 18) |
-            (((unsigned int) str[pos + 1] & 0x3f) << 12) |
-            (((unsigned int) str[pos + 2] & 0x3f) << 6)  |
-            ((unsigned int)  str[pos + 3] & 0x3f);
-        return true;
-
-    case 5:
-        ch = ((c0 & 0x03) << 24) |
-            (((unsigned int) str[pos + 1] & 0x3f) << 18) |
-            (((unsigned int) str[pos + 2] & 0x3f) << 12) |
-            (((unsigned int) str[pos + 3] & 0x3f) << 6)  |
-            ((unsigned int)  str[pos + 4] & 0x3f);
-        return true;
-
-    case 6:
-        ch = ((c0 & 0x01) << 30) |
-            (((unsigned int) str[pos + 1] & 0x3f) << 24) |
-            (((unsigned int) str[pos + 2] & 0x3f) << 18) |
-            (((unsigned int) str[pos + 3] & 0x3f) << 12) |
-            (((unsigned int) str[pos + 4] & 0x3f) << 6)  |
-            ((unsigned int)  str[pos + 5] & 0x3f);
-        return true;
-
-    default:
-        return false;
-    }
-}
-
-
 //! Appends the UTF-8 encoded version of the code point ch to the
 //! destination string
 void UTF8Encode(std::uint32_t ch, std::string& dest)
@ -469,9 +441,8 @@ void UTF8Encode(std::uint32_t ch, std::string& dest)
    }
 }

-
 //! Return the number of characters encoded by a UTF-8 string
-int UTF8Length(const std::string& s)
+int UTF8Length(std::string_view s)
 {
    int len = s.length();
    int count = 0;
@ -485,25 +456,10 @@ int UTF8Length(const std::string& s)
    return count;
 }

-
-inline wchar_t UTF8Normalize(wchar_t ch)
-{
-    auto page = (unsigned int) ch >> 8;
-    if (page >= 256)
-        return ch;
-
-    uint16_t* normTable = WGL4NormalizationTables[page];
-    if (normTable == nullptr)
-        return ch;
-
-    return (wchar_t) normTable[(unsigned int) ch & 0xff];
-}
-
-
 //! Perform a normalized comparison of two UTF-8 strings.  The normalization
 //! only works for characters in the WGL-4 subset, and no multicharacter
 //! translations are performed.
-int UTF8StringCompare(const std::string& s0, const std::string& s1)
+int UTF8StringCompare(std::string_view s0, std::string_view s1)
 {
    int len0 = s0.length();
    int len1 = s1.length();
@ -542,7 +498,7 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1)
        return 0;
 }

-int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase)
+int UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase)
 {
    int len0 = s0.length();
    int len1 = s1.length();
@ -588,432 +544,6 @@ int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bo
        return 0;
 }

-
-#if 0
-//! Currently incomplete, but could be a helpful class for dealing with
-//! UTF-8 streams
-class UTF8StringIterator
-{
-public:
-    UTF8StringIterator(const std::string& _str) : str(_str) {};
-    UTF8StringIterator(const UTF8StringIterator& iter) = default;
-
-    UTF8StringIterator& operator++();
-    UTF8StringIterator& operator++(int);
-
-private:
-    const std::string& str;
-    int position{ 0 };
-};
-
-
-UTF8StringIterator& UTF8StringIterator::operator++()
-{
-    return *this;
-}
-
-
-UTF8StringIterator& UTF8StringIterator::operator++(int)
-{
-    return *this;
-}
-#endif
-
-
-static const char *greekAlphabet[] =
-{
-    "Alpha",
-    "Beta",
-    "Gamma",
-    "Delta",
-    "Epsilon",
-    "Zeta",
-    "Eta",
-    "Theta",
-    "Iota",
-    "Kappa",
-    "Lambda",
-    "Mu",
-    "Nu",
-    "Xi",
-    "Omicron",
-    "Pi",
-    "Rho",
-    "Sigma",
-    "Tau",
-    "Upsilon",
-    "Phi",
-    "Chi",
-    "Psi",
-    "Omega"
-};
-
-static const char* greekAlphabetUTF8[] =
-{
-    "\316\261",
-    "\316\262",
-    "\316\263",
-    "\316\264",
-    "\316\265",
-    "\316\266",
-    "\316\267",
-    "\316\270",
-    "\316\271",
-    "\316\272",
-    "\316\273",
-    "\316\274",
-    "\316\275",
-    "\316\276",
-    "\316\277",
-    "\317\200",
-    "\317\201",
-    "\317\203",
-    "\317\204",
-    "\317\205",
-    "\317\206",
-    "\317\207",
-    "\317\210",
-    "\317\211",
-};
-
-static const char* canonicalAbbrevs[] =
-{
-    "ALF", "BET", "GAM", "DEL", "EPS", "ZET", "ETA", "TET",
-    "IOT", "KAP", "LAM", "MU" , "NU" , "XI" , "OMI", "PI" ,
-    "RHO", "SIG", "TAU", "UPS", "PHI", "CHI", "PSI", "OME",
-};
-
-static std::string noAbbrev;
-
-// Greek alphabet crud . . . should probably moved to it's own module.
-
-static size_t greekChunkLength(const std::string&);
-
-Greek* Greek::m_instance = nullptr;
-
-Greek* Greek::getInstance()
-{
-    if (m_instance == nullptr)
-        m_instance = new Greek();
-    return m_instance;
-}
-
-Greek::Greek()
-{
-    nLetters = sizeof(greekAlphabet) / sizeof(greekAlphabet[0]);
-    names = new std::string[nLetters];
-    abbrevs = new std::string[nLetters];
-
-    for (int i = 0; i < nLetters; i++)
-    {
-        names[i] = std::string(greekAlphabet[i]);
-        abbrevs[i] = std::string(canonicalAbbrevs[i]);
-    }
-}
-
-Greek::~Greek()
-{
-    delete[] names;
-    delete[] abbrevs;
-}
-
-const std::string& Greek::canonicalAbbreviation(const std::string& letter)
-{
-    Greek *instance = Greek::getInstance();
-    int i;
-    for (i = 0; i < instance->nLetters; i++)
-    {
-        if (compareIgnoringCase(letter, instance->names[i]) == 0)
-            return instance->abbrevs[i];
-    }
-
-    for (i = 0; i < instance->nLetters; i++)
-    {
-        if (compareIgnoringCase(letter, instance->abbrevs[i]) == 0)
-            return instance->abbrevs[i];
-    }
-
-    if (letter.length() == 2)
-    {
-        for (i = 0; i < instance->nLetters; i++)
-        {
-            if (letter[0] == greekAlphabetUTF8[i][0] &&
-                letter[1] == greekAlphabetUTF8[i][1])
-            {
-                return instance->abbrevs[i];
-            }
-        }
-    }
-
-    return noAbbrev;
-}
-
-static const char* toSuperscript(char c)
-{
-    switch (c)
-    {
-    case '0':
-        return UTF8_SUPERSCRIPT_0;
-    case '1':
-        return UTF8_SUPERSCRIPT_1;
-    case '2':
-        return UTF8_SUPERSCRIPT_2;
-    case '3':
-        return UTF8_SUPERSCRIPT_3;
-    case '4':
-        return UTF8_SUPERSCRIPT_4;
-    case '5':
-        return UTF8_SUPERSCRIPT_5;
-    case '6':
-        return UTF8_SUPERSCRIPT_6;
-    case '7':
-        return UTF8_SUPERSCRIPT_7;
-    case '8':
-        return UTF8_SUPERSCRIPT_8;
-    case '9':
-        return UTF8_SUPERSCRIPT_9;
-    default:
-        return nullptr;
-    }
-}
-
-//! Replaces the Greek letter abbreviation at the beginning
-//! of a string by the UTF-8 representation of that letter.
-//! Also, replace digits following Greek letters with UTF-8
-//! superscripts.
-std::string ReplaceGreekLetterAbbr(const std::string& str)
-{
-    Greek *instance = Greek::getInstance();
-    size_t len = greekChunkLength(str);
-
-    if (str[0] >= 'A' && str[0] <= 'Z')
-    {
-        // Linear search through all letter abbreviations
-        for (int i = 0; i < instance->nLetters; i++)
-        {
-            std::string prefix = instance->abbrevs[i];
-            if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
-            {
-                prefix = instance->names[i];
-                if (len != prefix.length() || UTF8StringCompare(str, prefix, len, true) != 0)
-                    continue;
-            }
-
-            std::string ret = greekAlphabetUTF8[i];
-            auto len = prefix.length();
-            for (; str.length() > len && isdigit(str[len]); len++)
-                ret += toSuperscript(str[len]);
-            ret += str.substr(len);
-            return ret;
-        }
-    }
-
-    return str;
-}
-
-//! Replaces the Greek letter abbreviation at the beginning
-//! of a string by the UTF-8 representation of that letter.
-//! Also, replace digits following Greek letters with UTF-8
-//! superscripts. Operates on char* instead of strings--less
-//! convenient, but more efficient. Return the number of
-//! characters copied to the destination string, not
-//! including the zero terminator.
-#if 0
-unsigned int
-ReplaceGreekLetterAbbr(char *dst, unsigned int dstSize, const char* src, unsigned int srcLength)
-{
-    Greek *instance = Greek::getInstance();
-    if (src[0] >= 'A' && src[0] <= 'Z' &&
-        src[1] >= 'A' && src[1] <= 'Z')
-    {
-        // Linear search through all letter abbreviations
-        for (unsigned int i = 0; i < (unsigned int) instance->nLetters; i++)
-        {
-            const char* abbr = canonicalAbbrevs[i];
-            unsigned int j = 0;
-            while (abbr[j] == src[j] && abbr[j] != '\0' && src[j] != '\0')
-                j++;
-
-            // It's a match if we reached the end of the abbreviation string
-            if (abbr[j] == '\0')
-            {
-                unsigned int abbrevLength = j;
-                unsigned int srcIndex = j;
-                const char *superscript = toSuperscript(src[abbrevLength]);
-
-                const char* utfGreek = greekAlphabetUTF8[i];
-                unsigned int utfGreekLength = strlen(utfGreek);
-
-                unsigned int requiredLength = srcLength;
-                if (utfGreekLength > abbrevLength)
-                    requiredLength += utfGreekLength - abbrevLength;
-                if (superscript != nullptr)
-                {
-                    requiredLength += strlen(superscript) - 1;
-                    srcIndex++;
-                }
-
-                // If there's not enough room, give up translating and just copy as much as possible
-                if (requiredLength + 1 > dstSize)
-                    break;
-
-                unsigned int dstIndex = 0;
-                j = 0;
-                while (utfGreek[j] != 0)
-                {
-                    dst[dstIndex++] = utfGreek[j];
-                    j++;
-                }
-
-                if (superscript != nullptr)
-                {
-                    j = 0;
-                    while (superscript[j] != 0)
-                    {
-                        dst[dstIndex++] = superscript[j];
-                        j++;
-                    }
-                }
-
-                while (src[srcIndex] != 0)
-                {
-                    dst[dstIndex++] = src[srcIndex++];
-                }
-                dst[dstIndex] = '\0';
-
-                return dstIndex;
-            }
-        }
-    }
-
-    strncpy(dst, src, dstSize);
-    if (dstSize > srcLength)
-        return srcLength;
-
-
-    if (dstSize > 0)
-    {
-        dst[dstSize - 1] = '\0';
-        return dstSize - 1;
-    }
-
-    return 0;
-}
-#endif
-
-static int findGreekNameIndexBySubstr(const std::string &, int = 0, unsigned int = UINT_MAX);
-#if 0
-static std::string firstGreekAbbrCompletion(const std::string &);
-#endif
-
-bool inline isSubstringIgnoringCase(const std::string &s0, const std::string &s1, size_t n)
-{
-    return UTF8StringCompare(s0, s1, n, true) == 0;
-}
-
-static int findGreekNameIndexBySubstr(const std::string &s, int start, unsigned int n)
-{
-    Greek *instance = Greek::getInstance();
-
-    if (s.empty())
-        return -1;
-
-    for (int i = start; i < instance->nLetters; i++)
-    {
-        if (isSubstringIgnoringCase(instance->names[i], s, n))
-            return i;
-    }
-
-    for (int i = start; i < instance->nLetters; i++)
-    {
-        if (isSubstringIgnoringCase(instance->abbrevs[i], s, n))
-            return i;
-    }
-
-    return -1;
-}
-
-static size_t greekChunkLength(const std::string& str)
-{
-    bool npos = false;
-    size_t sp = str.find_first_of(' ');
-    if (sp == std::string::npos)
-    {
-        sp = str.length();
-        npos = true;
-    }
-
-    if (sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1])))
-        while(sp != 0 && isdigit(static_cast<unsigned char>(str[sp - 1]))) sp--;
-    else if (npos)
-        sp = std::string::npos;
-    return sp;
-}
-
-#if 0
-static std::string firstGreekAbbrCompletion(const std::string &s)
-{
-    std::string ret;
-    size_t sp = greekChunkLength(s);
-    if (sp == std::string::npos)
-    {
-        int i = findGreekNameIndexBySubstr(s);
-        return (i >= 0) ? Greek::getInstance()->abbrevs[i] : s;
-    }
-    else
-    {
-        std::string prefix = s.substr(0, sp);
-        ret = Greek::canonicalAbbreviation(prefix);
-        return ret.empty() ? s : prefix + s.substr(sp);
-    }
-
-    return ret;
-}
-#endif
-
-std::vector<std::string> getGreekCompletion(const std::string &s)
-{
-    std::vector<std::string> ret;
-    if (s.empty())
-        return ret;
-
-    size_t sp = greekChunkLength(s);
-    if (sp == std::string::npos)
-    {
-        sp = UTF8Length(s);
-        for(int i = 0; i >= 0;)
-        {
-            std::string rets;
-            i = findGreekNameIndexBySubstr(s, i, sp);
-            if (i >= 0)
-            {
-                rets = Greek::getInstance()->abbrevs[i];
-                rets += " ";
-                ret.emplace_back(ReplaceGreekLetterAbbr(rets));
-                i++;
-            }
-        }
-    }
-    else
-    {
-        std::string prefix = s.substr(0, sp);
-        std::string rets = Greek::canonicalAbbreviation(prefix);
-        if (!rets.empty())
-        {
-            rets += s.substr(sp);
-            ret.emplace_back(ReplaceGreekLetterAbbr(rets));
-        }
-    }
-
-    return ret;
-}
-
-UTF8Status
-UTF8Validator::check(char c)
-{
-    return check(static_cast<unsigned char>(c));
-}
-
 UTF8Status
 UTF8Validator::check(unsigned char c)
 {
--- a/src/celutil/utf8.h
+++ b/src/celutil/utf8.h
@ -1,6 +1,7 @@
 // utf8.h
 //
 // Copyright (C) 2004, Chris Laurel <claurel@shatters.net>
+//               2018-present, Celestia Development Team
 //
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
@ -11,42 +12,30 @@

 #include <cstdint>
 #include <string>
-#include <vector>
+#include <string_view>

 #define UTF8_DEGREE_SIGN         "\302\260"
 #define UTF8_MULTIPLICATION_SIGN "\303\227"
-#define UTF8_SUPERSCRIPT_0       "\342\201\260"
-#define UTF8_SUPERSCRIPT_1       "\302\271"
-#define UTF8_SUPERSCRIPT_2       "\302\262"
-#define UTF8_SUPERSCRIPT_3       "\302\263"
-#define UTF8_SUPERSCRIPT_4       "\342\201\264"
-#define UTF8_SUPERSCRIPT_5       "\342\201\265"
-#define UTF8_SUPERSCRIPT_6       "\342\201\266"
-#define UTF8_SUPERSCRIPT_7       "\342\201\267"
-#define UTF8_SUPERSCRIPT_8       "\342\201\270"
-#define UTF8_SUPERSCRIPT_9       "\342\201\271"
 #define UTF8_REPLACEMENT_CHAR    "\357\277\275"

-
-bool UTF8Decode(const std::string& str, int pos, wchar_t& ch);
-bool UTF8Decode(const char* str, int pos, int length, wchar_t& ch);
-void UTF8Encode(std::uint32_t ch, std::string& dest);
-int UTF8StringCompare(const std::string& s0, const std::string& s1);
-int UTF8StringCompare(const std::string& s0, const std::string& s1, size_t n, bool ignoreCase = false);
+bool UTF8Decode(std::string_view str, int pos, wchar_t &ch);
+void UTF8Encode(std::uint32_t ch, std::string &dest);
+int  UTF8StringCompare(std::string_view s0, std::string_view s1);
+int  UTF8StringCompare(std::string_view s0, std::string_view s1, size_t n, bool ignoreCase = false);

 class UTF8StringOrderingPredicate
 {
 public:
-    bool operator()(const std::string& s0, const std::string& s1) const
+    bool operator()(std::string_view s0, std::string_view s1) const
    {
        return UTF8StringCompare(s0, s1) == -1;
    }
 };

+int UTF8Length(std::string_view s);

-int UTF8Length(const std::string& s);
-
-inline int UTF8EncodedSize(wchar_t ch)
+constexpr int
+UTF8EncodedSize(wchar_t ch)
 {
    if (ch < 0x80)
        return 1;
@ -66,7 +55,8 @@ inline int UTF8EncodedSize(wchar_t ch)
 #endif
 }

-constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
+constexpr int
+UTF8EncodedSizeChecked(std::uint32_t ch)
 {
    if (ch < 0x80)
        return 1;
@ -84,76 +74,6 @@ constexpr inline int UTF8EncodedSizeChecked(std::uint32_t ch)
 #endif
 }

-inline int UTF8EncodedSizeFromFirstByte(unsigned int ch)
-{
-    if (ch < 0x80)
-        return 1;
-    if ((ch & 0xe0) == 0xc0)
-        return 2;
-    if ((ch & 0xf0) == 0xe0)
-        return 3;
-    if ((ch & 0xf8) == 0xf0)
-        return 4;
-    if ((ch & 0xfc) == 0xf8)
-        return 5;
-    if ((ch & 0xfe) == 0xfc)
-        return 6;
-    else
-        return 1;
-}
-
-std::string ReplaceGreekLetterAbbr(const std::string&);
-#if 0
-unsigned int ReplaceGreekLetterAbbr(char* dst, unsigned int dstSize, const char* src, unsigned int srcLength);
-#endif
-
-class Greek
-{
- private:
-    Greek();
-    ~Greek();
-
- public:
-    enum Letter
-    {
-        Alpha     =  1,
-        Beta      =  2,
-        Gamma     =  3,
-        Delta     =  4,
-        Epsilon   =  5,
-        Zeta      =  6,
-        Eta       =  7,
-        Theta     =  8,
-        Iota      =  9,
-        Kappa     = 10,
-        Lambda    = 11,
-        Mu        = 12,
-        Nu        = 13,
-        Xi        = 14,
-        Omicron   = 15,
-        Pi        = 16,
-        Rho       = 17,
-        Sigma     = 18,
-        Tau       = 19,
-        Upsilon   = 20,
-        Phi       = 21,
-        Chi       = 22,
-        Psi       = 23,
-        Omega     = 24,
-    };
-
-    static const std::string& canonicalAbbreviation(const std::string&);
- private:
-    static Greek* m_instance;
- public:
-    static Greek* getInstance();
-    int nLetters;
-    std::string* names;
-    std::string* abbrevs;
-};
-
-std::vector<std::string> getGreekCompletion(const std::string &);
-
 enum class UTF8Status
 {
    Ok,
@ -164,9 +84,6 @@ enum class UTF8Status
 class UTF8Validator
 {
 public:
-    UTF8Validator() = default;
-    ~UTF8Validator() = default;
-
    UTF8Status check(char c);
    UTF8Status check(unsigned char c);

@ -185,3 +102,9 @@ private:

    State state{ State::Initial };
 };
+
+inline UTF8Status
+UTF8Validator::check(char c)
+{
+    return check(static_cast<unsigned char>(c));
+}
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@ -1,4 +1,5 @@
 test_case(charconv_compat)
+test_case(greek)
 test_case(hash)
 test_case(logger)
 test_case(stellarclass)
--- a/test/unit/greek_test.cpp
+++ b/test/unit/greek_test.cpp
@ -0,0 +1,42 @@
+#include <celutil/greek.h>
+
+#include <catch.hpp>
+
+TEST_CASE("Greek", "[Greek]")
+{
+    SECTION("ReplaceGreekLetterAbbr")
+    {
+        REQUIRE(ReplaceGreekLetterAbbr("XI") == "\316\276");
+        REQUIRE(ReplaceGreekLetterAbbr("XI12") == "\316\276\302\271\302\262");
+        REQUIRE(ReplaceGreekLetterAbbr("XI Foo") == "\316\276 Foo");
+        REQUIRE(ReplaceGreekLetterAbbr("XI12 Bar") == "\316\276\302\271\302\262 Bar");
+
+        REQUIRE(ReplaceGreekLetterAbbr("xi") == "xi");
+        REQUIRE(ReplaceGreekLetterAbbr("xi12") == "xi12");
+        REQUIRE(ReplaceGreekLetterAbbr("xi Foo") == "xi Foo");
+        REQUIRE(ReplaceGreekLetterAbbr("xi12 Bar") == "xi12 Bar");
+
+        REQUIRE(ReplaceGreekLetterAbbr("alpha") == "alpha");
+    }
+
+    SECTION("ReplaceGreekLetter")
+    {
+        REQUIRE(ReplaceGreekLetter("XI") == "\316\276");
+        REQUIRE(ReplaceGreekLetter("XI12") == "\316\276\302\271\302\262");
+        REQUIRE(ReplaceGreekLetter("XI Foo") == "\316\276 Foo");
+        REQUIRE(ReplaceGreekLetter("XI12 Bar") == "\316\276\302\271\302\262 Bar");
+
+        REQUIRE(ReplaceGreekLetter("xi") == "\316\276");
+        REQUIRE(ReplaceGreekLetter("xi12") == "\316\276\302\271\302\262");
+        REQUIRE(ReplaceGreekLetter("xi Foo") == "\316\276 Foo");
+        REQUIRE(ReplaceGreekLetter("xi12 Bar") == "\316\276\302\271\302\262 Bar");
+
+        REQUIRE(ReplaceGreekLetter("alpha") == "\316\261");
+    }
+
+    SECTION("GetCanonicalGreekAbbreviation")
+    {
+        REQUIRE(GetCanonicalGreekAbbreviation("xi") == "XI");
+        REQUIRE(GetCanonicalGreekAbbreviation("alpha") == "ALF");
+    }
+}