Module:zle-ort-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Old Ruthenian language text per WT:ZLE-ORT TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:zle-ort-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local letters = {}
local digraphs = {}

local double_grave = mw.ustring.char(0x30F)

letters["Cyrs"] = {
	-- main letters
	["А"]='A', ["Б"]='B', ["В"]='V', ["Г"]='H', ["Д"]='D', ["Е"]='E', ["Ж"]='Ž', ["З"]='Z', ["Ї"]='I', ["И"]='I', ["Й"]='J',
	["К"]='K', ["Л"]='L', ["М"]='M', ["Н"]='N', ["О"]='O', ["П"]='P', ["Р"]='R', ["С"]='S', ["Т"]='T', ["У"]='U', ["Ф"]='F',
	["Х"]='X', ["Ѡ"]='O', ["Ѿ"]='Ot', ["Ц"]='C', ["Ч"]='Č', ["Ш"]='Š', ["Щ"]='Šč', ["Ъ"]='', ["Ы"]='Y', ["Ь"]='ʹ',
	["Ѣ"]='Jě', ["Є"]='Je', ["Ю"]='Ju', ["Ꙗ"]='Ja', ["Ѧ"]='Ja',
	["а"]='a', ["б"]='b', ["в"]='v', ["г"]='h', ["д"]='d', ["е"]='e', ["ж"]='ž', ["з"]='z', ["ї"]='i', ["и"]='i', ["й"]='j',
	["к"]='k', ["л"]='l', ["м"]='m', ["н"]='n', ["о"]='o', ["п"]='p', ["р"]='r', ["с"]='s', ["т"]='t', ["у"]='u', ["ф"]='f',
	["х"]='x', ["ѡ"]='o', ["ѿ"]='ot', ["ц"]='c', ["ч"]='č', ["ш"]='š', ["щ"]='šč', ["ъ"]='', ["ы"]='y', ["ь"]='ʹ',
	["ѣ"]='jě', ["є"]='je', ["ю"]='ju', ["ꙗ"]='ja', ["ѧ"]='ja',
	-- extended letters
	["Ґ"]='G', ["Ѕ"]='Z', ["І"]='I', ["Ꙋ"]='U', ["Ѫ"]='U', ["Ѯ"]='Ks', ["Ѱ"]='Ps', ["Ѳ"]='F', ["Ѵ"]='I', ["Я"]='Ja',
	["ґ"]='g', ["ѕ"]='z', ["і"]='i', ["ꙋ"]='u', ["ѫ"]='u', ["ѯ"]='ks', ["ѱ"]='ps', ["ѳ"]='f', ["ѵ"]='i', ["я"]='ja',
	-- archaic letters & other
	["Э"]='E', ["Ꙁ"]='Z', ["Ѻ"]='O', ["Ꙍ"]='O', ["Ѽ"]='O', ["Ꙑ"]='Y', ["Ꙓ"]='Jě', ["Ѥ"]='Je', ["Ѩ"]='Ja', ["Ѭ"]='Ju', ["Ѷ"]='I' .. double_grave,
	["э"]='e', ["ꙁ"]='z', ["ѻ"]='o', ["ꙍ"]='o', ["ѽ"]='o', ["ꙑ"]='y', ["ꙓ"]='jě', ["ѥ"]='je', ["ѩ"]='ja', ["ѭ"]='ju', ["ѷ"]='i' .. double_grave, ["ⸯ"]='',
}

digraphs["Cyrs"] = {
	["бѣлоу"]="bělou", ["бѣлоу́"]="běloú",
	["О[УѴуѵ]"]="U", ["о[уѵ]"]="u", ["К[Гг]"]="G", ["кг"]="g", ["іа"]="ja",
	["ъ([аеїиоуѡъыьѣєюꙗѧіꙋѫѵяэѻꙍѽꙑꙓѥѩѭѷ])"]="ʺ%1",
	["([аеїиоуѡъыьѣєюꙗѧіꙋѫѵяэѻꙍѽꙑꙓѥѩѭѷАЕЇИОУѠЪЫЬѢЄЮꙖѦІꙊѪѴЯЭѺꙌѼꙐꙒѤѨѬѶ])е"]="%1je",
	["([ъыіѵꙑ])и"]="%1j",
	["([аеїиоуѡъыьѣєюꙗѧіꙋѫѵяэѻꙍѽꙑꙓѥѩѭѷАЕЇИОУѠЪЫЬѢЄЮꙖѦІꙊѪѴЯЭѺꙌѼꙐꙒѤѨѬѶ])ⸯ"]="%1j",
	["([бвгджзйклмнпрстфхцчшщґѕѯѱѳꙁБBГДЖЗЙКЛМНПРСТФХЦЧШЩҐЅѮѰѲꙀ-])ѡ"]="%1ô",
	["([бвгджзйклмнпрстфхцчшщґѕѯѱѳꙁБBГДЖЗЙКЛМНПРСТФХЦЧШЩҐЅѮѰѲꙀ-])ѣ"]="%1ě",
	["([АаЯяꙖꙗѦѧѨѩ])ѵ"]="%1v", ["([АаЯяꙖꙗѦѧѨѩ])Ѵ"]="%1V",
	["([оь])и"]="%1ʲi", ["цъ"]="cʹ",
	["ї([оѡ])"]="j%1", ["Ї([оѡ])"]="J%1", ["и([оѡ])"]="j%1", ["И([оѡ])"]="J%1", 
}

function export.tr(text, lang, sc)
	if not sc then
		sc = require("Module:languages").getByCode(lang):findBestScript(text):getCode()
	end
	
	-- Ѣ was pronounced differently in Old Ukrainian
	if lang == "zle-ouk" then
		letters["Cyrs"]["Ѣ"]='Ji'
		letters["Cyrs"]["ѣ"]='ji'
		digraphs["Cyrs"]["([бвгджзйклмнпрстфхцчшщґѕѯѱѳꙁБBГДЖЗЙКЛМНПРСТФХЦЧШЩҐЅѮѰѲꙀ-])ѣ"]='%1i'
	else
		letters["Cyrs"]["Ѣ"]='Jě'
		letters["Cyrs"]["ѣ"]='jě'
		digraphs["Cyrs"]["([бвгджзйклмнпрстфхцчшщґѕѯѱѳꙁБBГДЖЗЙКЛМНПРСТФХЦЧШЩҐЅѮѰѲꙀ-])ѣ"]='%1ě'
	end
	
	-- Transliterate the kamora as prime
	text = string.gsub(text, "\210\132", "ʹ")
	-- Transliterate the titlo as colon
	text = string.gsub(text, mw.ustring.char(0x0483), ":")
	
	if sc == "Cyrs" then
		for key, repl in pairs(digraphs[sc]) do
			text = mw.ustring.gsub(text, key, repl)
		end
		
		-- pattern for one non-ASCII character
		text = string.gsub(text, '[\194-\244][\128-\191]+', letters[sc])
	end

	return text
end

return export