Module:pa-Arab-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module is in beta stage.
Module has been updated. It should work as expected for majority of the words. It may have problems with diphthongs and a minority of Arabic loanwords. Module needs verification from other native Punjabi speakers who understand Shahmukhi lemmas with diacritics, because this is quite difficult to get your head wrapped around. Check documentation for further listed problems.
This module will transliterate text in the Shahmukhi script. It is also used to transliterate Old Punjabi, Pahari-Potwari, and Saraiki.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:pa-Arab-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

Introduction

....

Exceptions

The following words do not need any diacritics:

Notes

  • نْ should be written as ن٘ when it is from an inherited nasal vowel (i.e. most cases)
  • Shahmukhi distinguishes Sukoon/Jazm diacritic ءْ with the absence of a diacritic ء;
    • Sukoon/Jazm should be used where there is a true consonant cluster, such as in تْرے (trai), بْھرا (bhrā), گْراں (grāṉ) etc.
    • The diacritic should be absent where there is a weak or semi-deleted schwa ([ə̆] or [ᵊ], rarely also [ɪ̆] and [ʊ̆]), most commonly at the end of words such as in مَجّھ (majjhă), ناࣇ (nāḷĭ) etc. but also medially as in چَھڈّݨا (chaḍḍăṇā), دَسّݨا (dassăṇā) etc.
  • Final ہ (h) should be written double if it joins to the previous letter, such as in مُون٘ہہ (mūṉh), ایہہ (eh) but not اوہ (oh). This does not apply when final ہ (h) is used as a vowel, e.g. in چُوچَہ (cūcā) or بارَہ (bārhā̃).

To do list

  • Sort out any problems with diphthongs
    • make sure all diphthongs are represented correctly
    • change īū -> iyū
  • Arabic loanwords
    • al and non al words
    • archaic ha murtaba tah
  • Diacritics detector
  • Distinction between a schwa and loanwords ـہ | need not diacritics when a schwa ('..a') and need when loanword ('..ah')

Working Template Examples

Headword-line templates

With replacing header:
{{pa-noun|gur=ਮਸੀਤ|g=f|head=مَسِیت}}

Check: مسیت (example can't be shown on module documentation)


Without (needing to) replacing header:
{{pa-noun|g=f|gur=ਅਲਮਾਰੀ}}

Check: الماری (example can't be shown on module documentation)

Other templates

{{ux|pa|مَیں کَلّھ لَندَن جا رہا واں۔|I am going to London tomorrow.}}:

مَیں کَلّھ لَندَن جا رِہا واں۔
maiṉ kallh landan jā rihā vāṉ.
I am going to London tomorrow.

Status

Last updated: 23/08/2021


local m_str_utils = require("Module:string utilities")

local U = m_str_utils.char
local gsub = m_str_utils.gsub

local export = {}

local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"

local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local vao = "و"
local aspirate = 'ھ'
local nasal = 'ں'

local consonants = "بٻبپتثجڄڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنںݨوہھٹڈڑ"
local consonantS = "بٻبپتثجڃڄچحخدݙذرزژسشصضطظعغفقکگڳلࣇمنݨہھٹڈڑ"
local consonantS2 = "یٻببپتثجڃڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنݨوہھٹڈڑ" 
local vowels = "اآیئےۓوؤ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"

local mapping = {
	["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["پ"] = 'p', ["ت"] = 't', ["ٹ"] = 'ṭ', ["ث"] = 's̱',
	["ج"] = 'j', ["ڄ"] = 'ǰ', ["چ"] = 'c', ["ح"] = 'ḥ', ["خ"] = 'x', 
	["د"] = 'd', ["ڈ"] = 'ḍ', ["ݙ"] = 'ḏ', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڑ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž',
	["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', 
	["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ق"] = 'q',
	["ک"] = 'k', ["گ"] = 'g', ["ڳ"] = 'g̈', ["ݨ"] = 'ṇ', ["ࣇ"] = 'ḷ',
	["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'v', ["ہ"] = 'h', ["ی"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',

	["ھ"] = "h", 
	["ؤ"] = "'o",
	
	-- diacritics
	[zabar] = "a",
	[zer] = "i",
	[pesh] = "u",
	[jazm] = "", -- also sukun - no vowel
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	
	-- ligatures
	["ﻻ"] = "lā",
	["ﷲ"] = "allāh",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	["ۓ"] = "-ye", 
	["ۀ"] = "h-e" -- he ye (in izafat)
}

function export.tr(text, lang, sc)
	
	-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere

	text = gsub(text, '([' .. consonants .. '])' .. ye .. vao .. nasal, "%1eyoṉ") -- needs to be fixed
	text = gsub(text, '([' .. consonants .. '])' .. ye .. vao, "%1eyo") -- needs to be fixed
    text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])' , "%1e%2") -- needs to be fixed
    text = gsub(text, '([' .. consonants .. '])' .. ye .. alif, "%1eyā") -- needs to be fixed
	text = gsub(text, zabar .. aspirate .. "(ی)" , "hai") -- needs to be fixed
	text = gsub(text, zabar .. aspirate .. "(و)" , "hau") -- needs to be fixed
	text = gsub(text, "ئے", "'e")
	text = gsub(text, "َے", "ai")
	text = gsub(text, "ے", "e")
	text = gsub(text, "ہہ", "h")
    text = gsub(text, "اے", "e")
    text = gsub(text, "اَے", "ai")
    text = gsub(text, "ن٘", "ṉ")
    text = gsub(text, "اللہ", "allāh")
    text = gsub(text, "ؤ" .. pesh, "ū") -- needs to be fixed

    --text = gsub(text, "ُھوِیں", "vīṉ")
    text = gsub(text, "([" .. pesh .. aspirate  .. "])" .. "وِیں", "%1vīṉ")
    
  	-- diacritics
	text = gsub(text, "([" .. consonants  .. "])" .. zer .. ye .. alif, "%1īyā")
	text = gsub(text, pesh .. vao .. jazm .. "", "ū")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. vao, "%1au")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. ye, "%1ai")
	text = gsub(text, "([" .. alif .. consonants  .. "])" .. zabar .. ye3, "%1ai")
	text = gsub(text, "([" .. consonants  .. "])" .. zer .. ye, "%1ī")
    text = gsub(text, jazm .. alif, "ā")
    
    text = gsub(text, ye2 .. zer .. ye, "'ī")
    text = gsub(text, "" .. alif .. ye2 .. "([" .. consonants  .. "])", "ā'i%1") -- needs to be fixed
    
	-- Initial alif
	text = gsub(text, alif .. zer, "ī")
	text = gsub(text, alif .. zabar .. '([' .. consonantS .. '])', "a%1")

	text = gsub(text, alif .. zabar .. vao .. jazm .. "", "au")
	text = gsub(text, alif .. vao .. jazm .. "", "o")
	text = gsub(text, alif .. zabar .. ye .. jazm .. "", "ai")
	text = gsub(text, alif .. ye .. jazm .. "", "e")

	text = gsub(text, alif .. pesh .. vao, "ū")
	text = gsub(text, alif .. pesh .. vao .. jazm .. "", "ū")	
	text = gsub(text, alif .. pesh, "u")

    -- do-chashme-he zabar, zer, pesh / no need to mess about
    --- works for short vowels
    text = gsub(text, "([" .. ZZP  .. "])" .. aspirate, "h%1")
    text = gsub(text, pesh .. aspirate .. vao .. jazm .. "", "ū")
    text = gsub(text, zabar .. aspirate .. vao .. jazm .. "", "ai")
    text = gsub(text, '([' .. consonants .. '])' .. aspirate .. ye .. jazm .. "", "%1he")
   

	-- Tashdeed
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "%1%1")
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1%1%2")
	text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1yy%2")
	text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1vv%2")
	-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1%1%2")
	
	-- tanween diacritic / no need to mess about
	text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1an")
	text = gsub(text, alif .. 'ً', "an")
	text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1an")

	-- khari zabar -- / no need to mess about
	text = gsub(text, '([' .. vowels .. '])' .. 'ٰ', "á")
	text = gsub(text, '([' .. consonants .. '])' .. 'ٰ' .. '([' .. vowels .. '])', "%1á")

	-- ‘ain
	text = gsub(text, alif .. ain , "ā‘") 
	text = gsub(text, ain .. alif  .. '([' .. consonants .. '])', "ʻā%1") 
	text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1ʻa")
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2ʻ")
	text = gsub(text, ain .. zabar .. vao .. '([' .. consonants .. '])', "‘au%1")
	text = gsub(text, ain .. zabar .. ye .. '([' .. consonants .. '])', "‘ai%1")
	text = gsub(text, ain .. zer  .. '([' .. consonants .. '])', "ʻi%1")
	text = gsub(text, ain .. pesh  .. '([' .. consonants .. '])', "ʻu%1")
	text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ʻī%1")
    text = gsub(text, ain .. pesh .. vao .. '([' .. consonantS .. '])', "ʻū%1")
    
    ---  alif
    text = gsub(text, '([' .. consonants .. '])' .. zabar .. alif, "%1ā")
	text = gsub(text, '([' .. consonants .. '])' .. alif, "%1ā")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1%1ā")
  
	-- Vao
	text = gsub(text, vao .. '([' .. ZZP .. '])', "v%1")
    text = gsub(text, 'ُو', "ū")
	text = gsub(text, '([' .. consonants .. '])' .. zabar .. vao .. alif, "%1avā")
	

	-- medial/final consonants

    --- (e) -- works
	text = gsub(text, '([' .. consonants .. '])' .. ye .. jazm .. '([' .. consonants .. '])', "%1e%2")
	text = gsub(text, '([' .. consonants .. '])' .. ye3, "%1e")
	text = gsub(text, '([' .. consonants .. '])' .. zabar .. ye3, "%1ai")
    --- izafat
	text = gsub(text, '([' .. consonants .. '])' .. zer .. " ", "%1-e ")

    --- he 
	text = gsub(text, zabar .. he .. zer .. ye, "ahī")
	text = gsub(text, zabar .. he .. alif, "ahā")
	text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ah%1")

    --- vao

	text = gsub(text, '([' .. consonants .. '])' .. vao, "%1o")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. vao, "%1%1o")

	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. zer .. ye .. jazm .. alif, "%1%1īyā")
	text = gsub(text, zer .. ye .. jazm .. alif, "īyā")
	text = gsub(text, zer .. ye .. alif, "iyā")
	
	--- ye
	text = gsub(text, ye .. zabar .. alif, "yā")
	text = gsub(text, '([' .. consonants .. zer .. '])' .. ye, "%1ī")
	
	text = gsub(text, "ۂ", "a-e")

	text = gsub(text, '.', mapping)

	
	-- Changed these to 'iy(*)', because they will be used for with ی, which are normally written as 'iy'
	text = gsub(text, 'īā', "iyā")
	text = gsub(text, 'īa', "iya")

	text = gsub(text, 'aa', "ā")
	--
	
    text = gsub(text, 'ئy', "'ī") 
    text = gsub(text, "" .. 'ئے', "'ye")
    text = gsub(text, "īے", "iye")
    text = gsub(text, "iīe", "iye")
    text = gsub(text, "īe", "iye")
    text = gsub(text, "iīv", "iyo")
    text = gsub(text, "ئiy", "'ī")

	return text
end
return export