wetrocks/scripts/translate.py

119 lines
3.4 KiB
Python
Executable File

#!/usr/bin/env python3
# Jeff Moe with Parrot and Phind-CodeLlama-34B-v2_q8.gguf
import requests
import json
from gettext import gettext as _
import polib
import re
import argparse
from pycountry import languages
# Set to llama server IP and port.
API = "http://192.168.109.223:8080/completion"
def update_po_file(filename, msgid, new_translation):
po = polib.pofile(filename)
for entry in po:
if entry.msgid == msgid:
entry.msgstr = new_translation
break
po.save()
def process_response(json_data):
formatted_content = json_data["content"].replace("\\n", "\n").replace("\\u", "\\")
formatted_content = formatted_content.replace("```", "")
lines = formatted_content.split("\n")
formatted_msgid, formatted_msgstr = None, None
for line in lines:
if line.startswith("msgid"):
formatted_msgid = line.replace("msgid: ", "")
elif line.startswith("msgstr"):
formatted_msgstr = line.replace("msgstr: ", "")
return formatted_msgid, formatted_msgstr
def get_lang_name(iso_code):
try:
if len(iso_code) == 2:
lang = languages.get(alpha_2=iso_code)
elif len(iso_code) == 3:
lang = languages.get(alpha_3=iso_code)
else:
raise KeyError
return lang.name
except (KeyError, TypeError):
print(f"Unknown language code: {iso_code}")
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", help=".po file", required=True)
parser.add_argument(
"--lang", help='Language ISO 639 code (e.g. "en")', required=True
)
args = parser.parse_args()
POFILE = args.file
DESTISO = args.lang
DESTLANG = get_lang_name(DESTISO)
if not POFILE:
print("Please provide a POFILE using --file option")
return
pofile = polib.pofile(POFILE)
for entry in pofile:
poprompt = f"msgid: {entry.msgid}\nmsgstr: {entry.msgstr}\n"
PROMPT = _("translate into " + DESTLANG + "(" + DESTISO + ")")
PROMPT = _(
"""### System Prompt
You are an expert translator and programmer and write code on the first attempt without any errors or fillers.
### User Message:
Rewrite the code to satisfy this request: "This is a gettext .po file.
The English "msgid" needs to be translated into
"""
+ DESTLANG
+ "("
+ DESTISO
+ ")"
"""
The translated string goes into "msgstr". Keep the English "msgid", dont change the English.
Professional translation. Do not change any instructions. ONLY TRANSLATE text.
DO NOT TRANSLATE URLs between "<" and ">" symbols.
DO NOT change the "msgid".
DO NOT translate the following proper names in the "msgstr": "Parrot".
ONLY write "Here is the translated code:" and display the code.
"""
+ "```"
+ poprompt
+ "```"
)
headers = {"Content-Type": "application/json"}
data = {"prompt": PROMPT, "n_predict": 128}
response = requests.post(API, headers=headers, json=data)
json_data = response.json()
formatted_msgid, formatted_msgstr = process_response(json_data)
print(f"msgid: {formatted_msgid}")
print(f"msgstr: {formatted_msgstr}")
update_po_file(POFILE, formatted_msgid, formatted_msgstr)
if __name__ == "__main__":
main()