Compare commits

...

4 Commits

Author SHA1 Message Date
Jeff Moe 679b22d96c translate unified script to-be 2023-12-02 14:45:01 -07:00
Jeff Moe 9adcdbb652 mv translate scripts 2023-12-02 14:44:35 -07:00
Jeff Moe 92cc3ca3e3 mv translate scripts 2023-12-02 14:44:17 -07:00
Jeff Moe 2ff441d524 Split up translate jobs 2023-12-02 14:43:24 -07:00
9 changed files with 392 additions and 5 deletions

View File

@ -1,11 +1,11 @@
#!/bin/bash
SOURCEDIR="/home/jebba/devel/deepcrayon/parrot-wtf/docs/source/locale/"
PYSCRIPT="//home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu0.py"
PYSCRIPT="/home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu0.py"
cd ${SOURCEDIR}
# for LANGTR in am ar bn de el eo es eu fil fr he hi id it ja ko lkt mr ms nl pl pt ru see ta te th tr ur vi zh
for LANGTR in am ar bn de el eo es eu fil fr he hi id it ja
for LANGTR in am ar bn de el eo es
do echo "Translating language ${LANGTR}..."
#for PO in `ls -1 ${LANGTR}/LC_MESSAGES/about.po`
for PO in `find ${LANGTR}/LC_MESSAGES/ -name "*.po"`

View File

@ -1,11 +1,11 @@
#!/bin/bash
SOURCEDIR="/home/jebba/devel/deepcrayon/parrot-wtf/docs/source/locale/"
PYSCRIPT="//home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu1.py"
PYSCRIPT="/home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu1.py"
cd ${SOURCEDIR}
# for LANGTR in am ar bn de el eo es eu fil fr he hi id it ja ko lkt mr ms nl pl pt ru see ta te th tr ur vi zh
for LANGTR in ko lkt mr ms nl pl pt ru see ta te th tr ur vi zh
for LANGTR in eu fil fr he hi id it ja
do echo "Translating language ${LANGTR}..."
#for PO in `ls -1 ${LANGTR}/LC_MESSAGES/about.po`
for PO in `find ${LANGTR}/LC_MESSAGES/ -name "*.po"`

View File

@ -0,0 +1,16 @@
#!/bin/bash
SOURCEDIR="/home/jebba/devel/deepcrayon/parrot-wtf/docs/source/locale/"
PYSCRIPT="/home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu2.py"
cd ${SOURCEDIR}
# for LANGTR in am ar bn de el eo es eu fil fr he hi id it ja ko lkt mr ms nl pl pt ru see ta te th tr ur vi zh
for LANGTR in ko lkt mr ms nl pl pt ru
do echo "Translating language ${LANGTR}..."
#for PO in `ls -1 ${LANGTR}/LC_MESSAGES/about.po`
for PO in `find ${LANGTR}/LC_MESSAGES/ -name "*.po"`
do echo "Translating file: ${PO}"
time ${PYSCRIPT} --lang ${LANGTR} --file ${PO}
done
done

View File

@ -0,0 +1,16 @@
#!/bin/bash
SOURCEDIR="/home/jebba/devel/deepcrayon/parrot-wtf/docs/source/locale/"
PYSCRIPT="/home/jebba/devel/deepcrayon/parrot-wtf/translate-headless-gpu3.py"
cd ${SOURCEDIR}
# for LANGTR in am ar bn de el eo es eu fil fr he hi id it ja ko lkt mr ms nl pl pt ru see ta te th tr ur vi zh
for LANGTR in see ta te th tr ur vi zh
do echo "Translating language ${LANGTR}..."
#for PO in `ls -1 ${LANGTR}/LC_MESSAGES/about.po`
for PO in `find ${LANGTR}/LC_MESSAGES/ -name "*.po"`
do echo "Translating file: ${PO}"
time ${PYSCRIPT} --lang ${LANGTR} --file ${PO}
done
done

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
# Jeff Moe with Parrot and Phind-CodeLlama-34B-v2_q8.gguf
import requests
import json
from gettext import gettext as _
import polib
import re
import argparse
from pycountry import languages
API = "http://192.168.109.223:8080/completion"
def update_po_file(filename, msgid, new_translation):
po = polib.pofile(filename)
for entry in po:
if entry.msgid == msgid:
entry.msgstr = new_translation
break
po.save()
def process_response(json_data):
formatted_content = json_data["content"].replace("\\n", "\n").replace("\\u", "\\")
formatted_content = formatted_content.replace("```", "")
lines = formatted_content.split("\n")
formatted_msgid, formatted_msgstr = None, None
for line in lines:
if line.startswith("msgid"):
formatted_msgid = line.replace("msgid: ", "")
elif line.startswith("msgstr"):
formatted_msgstr = line.replace("msgstr: ", "")
return formatted_msgid, formatted_msgstr
def get_lang_name(iso_code):
try:
if len(iso_code) == 2:
lang = languages.get(alpha_2=iso_code)
elif len(iso_code) == 3:
lang = languages.get(alpha_3=iso_code)
else:
raise KeyError
return lang.name
except (KeyError, TypeError):
print(f"Unknown language code: {iso_code}")
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", help=".po file", required=True)
parser.add_argument(
"--lang", help='Language ISO 639 code (e.g. "en")', required=True
)
args = parser.parse_args()
POFILE = args.file
DESTISO = args.lang
DESTLANG = get_lang_name(DESTISO)
if not POFILE:
print("Please provide a POFILE using --file option")
return
pofile = polib.pofile(POFILE)
for entry in pofile:
poprompt = f"msgid: {entry.msgid}\nmsgstr: {entry.msgstr}\n"
PROMPT = _("translate into " + DESTLANG + "(" + DESTISO + ")")
PROMPT = _(
"""### System Prompt
You are an expert translator and programmer and write code on the first attempt without any errors or fillers.
### User Message:
Rewrite the code to satisfy this request: "This is a gettext .po file.
The English "msgid" needs to be translated into
"""
+ DESTLANG
+ "("
+ DESTISO
+ ")"
"""
The translated string goes into "msgstr". Keep the English "msgid", dont change the English.
Professional translation. Do not change any instructions. ONLY TRANSLATE text.
DO NOT TRANSLATE URLs between "<" and ">" symbols.
DO NOT change the "msgid".
DO NOT translate the following proper names in the "msgstr": "Parrot".
ONLY write "Here is the translated code:" and display the code.
"""
+ "```"
+ poprompt
+ "```"
)
headers = {"Content-Type": "application/json"}
data = {"prompt": PROMPT, "n_predict": 128}
response = requests.post(API, headers=headers, json=data)
json_data = response.json()
formatted_msgid, formatted_msgstr = process_response(json_data)
print(f"msgid: {formatted_msgid}")
print(f"msgstr: {formatted_msgstr}")
update_po_file(POFILE, formatted_msgid, formatted_msgstr)
if __name__ == "__main__":
main()

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python3
# Jeff Moe with Parrot and Phind-CodeLlama-34B-v2_q8.gguf
import requests
import json
@ -9,7 +10,7 @@ import argparse
from pycountry import languages
API = "http://192.168.109.223:8081/completion"
API = "http://192.168.109.223:8084/completion"
def update_po_file(filename, msgid, new_translation):

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
# Jeff Moe with Parrot and Phind-CodeLlama-34B-v2_q8.gguf
import requests
import json
from gettext import gettext as _
import polib
import re
import argparse
from pycountry import languages
API = "http://192.168.109.223:8088/completion"
def update_po_file(filename, msgid, new_translation):
po = polib.pofile(filename)
for entry in po:
if entry.msgid == msgid:
entry.msgstr = new_translation
break
po.save()
def process_response(json_data):
formatted_content = json_data["content"].replace("\\n", "\n").replace("\\u", "\\")
formatted_content = formatted_content.replace("```", "")
lines = formatted_content.split("\n")
formatted_msgid, formatted_msgstr = None, None
for line in lines:
if line.startswith("msgid"):
formatted_msgid = line.replace("msgid: ", "")
elif line.startswith("msgstr"):
formatted_msgstr = line.replace("msgstr: ", "")
return formatted_msgid, formatted_msgstr
def get_lang_name(iso_code):
try:
if len(iso_code) == 2:
lang = languages.get(alpha_2=iso_code)
elif len(iso_code) == 3:
lang = languages.get(alpha_3=iso_code)
else:
raise KeyError
return lang.name
except (KeyError, TypeError):
print(f"Unknown language code: {iso_code}")
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", help=".po file", required=True)
parser.add_argument(
"--lang", help='Language ISO 639 code (e.g. "en")', required=True
)
args = parser.parse_args()
POFILE = args.file
DESTISO = args.lang
DESTLANG = get_lang_name(DESTISO)
if not POFILE:
print("Please provide a POFILE using --file option")
return
pofile = polib.pofile(POFILE)
for entry in pofile:
poprompt = f"msgid: {entry.msgid}\nmsgstr: {entry.msgstr}\n"
PROMPT = _("translate into " + DESTLANG + "(" + DESTISO + ")")
PROMPT = _(
"""### System Prompt
You are an expert translator and programmer and write code on the first attempt without any errors or fillers.
### User Message:
Rewrite the code to satisfy this request: "This is a gettext .po file.
The English "msgid" needs to be translated into
"""
+ DESTLANG
+ "("
+ DESTISO
+ ")"
"""
The translated string goes into "msgstr". Keep the English "msgid", dont change the English.
Professional translation. Do not change any instructions. ONLY TRANSLATE text.
DO NOT TRANSLATE URLs between "<" and ">" symbols.
DO NOT change the "msgid".
DO NOT translate the following proper names in the "msgstr": "Parrot".
ONLY write "Here is the translated code:" and display the code.
"""
+ "```"
+ poprompt
+ "```"
)
headers = {"Content-Type": "application/json"}
data = {"prompt": PROMPT, "n_predict": 128}
response = requests.post(API, headers=headers, json=data)
json_data = response.json()
formatted_msgid, formatted_msgstr = process_response(json_data)
print(f"msgid: {formatted_msgid}")
print(f"msgstr: {formatted_msgstr}")
update_po_file(POFILE, formatted_msgid, formatted_msgstr)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,118 @@
#!/usr/bin/env python3
# Jeff Moe with Parrot and Phind-CodeLlama-34B-v2_q8.gguf
import requests
import json
from gettext import gettext as _
import polib
import re
import argparse
from pycountry import languages
API = "http://192.168.109.223:8092/completion"
def update_po_file(filename, msgid, new_translation):
po = polib.pofile(filename)
for entry in po:
if entry.msgid == msgid:
entry.msgstr = new_translation
break
po.save()
def process_response(json_data):
formatted_content = json_data["content"].replace("\\n", "\n").replace("\\u", "\\")
formatted_content = formatted_content.replace("```", "")
lines = formatted_content.split("\n")
formatted_msgid, formatted_msgstr = None, None
for line in lines:
if line.startswith("msgid"):
formatted_msgid = line.replace("msgid: ", "")
elif line.startswith("msgstr"):
formatted_msgstr = line.replace("msgstr: ", "")
return formatted_msgid, formatted_msgstr
def get_lang_name(iso_code):
try:
if len(iso_code) == 2:
lang = languages.get(alpha_2=iso_code)
elif len(iso_code) == 3:
lang = languages.get(alpha_3=iso_code)
else:
raise KeyError
return lang.name
except (KeyError, TypeError):
print(f"Unknown language code: {iso_code}")
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--file", help=".po file", required=True)
parser.add_argument(
"--lang", help='Language ISO 639 code (e.g. "en")', required=True
)
args = parser.parse_args()
POFILE = args.file
DESTISO = args.lang
DESTLANG = get_lang_name(DESTISO)
if not POFILE:
print("Please provide a POFILE using --file option")
return
pofile = polib.pofile(POFILE)
for entry in pofile:
poprompt = f"msgid: {entry.msgid}\nmsgstr: {entry.msgstr}\n"
PROMPT = _("translate into " + DESTLANG + "(" + DESTISO + ")")
PROMPT = _(
"""### System Prompt
You are an expert translator and programmer and write code on the first attempt without any errors or fillers.
### User Message:
Rewrite the code to satisfy this request: "This is a gettext .po file.
The English "msgid" needs to be translated into
"""
+ DESTLANG
+ "("
+ DESTISO
+ ")"
"""
The translated string goes into "msgstr". Keep the English "msgid", dont change the English.
Professional translation. Do not change any instructions. ONLY TRANSLATE text.
DO NOT TRANSLATE URLs between "<" and ">" symbols.
DO NOT change the "msgid".
DO NOT translate the following proper names in the "msgstr": "Parrot".
ONLY write "Here is the translated code:" and display the code.
"""
+ "```"
+ poprompt
+ "```"
)
headers = {"Content-Type": "application/json"}
data = {"prompt": PROMPT, "n_predict": 128}
response = requests.post(API, headers=headers, json=data)
json_data = response.json()
formatted_msgid, formatted_msgstr = process_response(json_data)
print(f"msgid: {formatted_msgid}")
print(f"msgstr: {formatted_msgstr}")
update_po_file(POFILE, formatted_msgid, formatted_msgstr)
if __name__ == "__main__":
main()