模組:Ine-common

維基詞典,自由的多語言詞典


local export = {}

local short_to_long = {
	["a"] = "ā",
	["e"] = "ē",
	["i"] = "ī",
	["o"] = "ō",
	["u"] = "ū",
	
	["á"] = "ā́",
	["é"] = "ḗ",
	["í"] = "ī́",
	["ó"] = "ṓ",
	["ú"] = "ū́",
}

local acute_to_none = {
	["á"] = "a",
	["é"] = "e",
	["í"] = "i",
	["ó"] = "o",
	["ú"] = "u",
		
	["ḗ"] = "ē",
	["ṓ"] = "ō",
		
	["́"] = "",
}

local colors = {
	["h₂"] = {
		["e"] = "a",
		["é"] = "á",
	},
	["h₃"] = {
		["e"] = "o",
		["é"] = "ó",
	}
}

function export.lengthen(text)
	text = mw.ustring.gsub(text, ".", short_to_long)
	
	return text
end

function export.destress(text)
	text = mw.ustring.gsub(text, ".", acute_to_none)
	
	return text
end

function export.laryngeal_color(text)
	local coloring_laryngeal = "(h[₂₃])"
	local vowel = "([eé])"
	
	-- Not sure what the precedence is here. Does e.g. h₃eh₂ color to o or a?
	text = mw.ustring.gsub(text, coloring_laryngeal .. vowel,
		function(laryngeal, vowel)
			return laryngeal .. colors[laryngeal][vowel]
		end)
	text = mw.ustring.gsub(text, vowel .. coloring_laryngeal,
		function(vowel, laryngeal)
			return colors[laryngeal][vowel] .. laryngeal
		end)
	
	return text
end

local function syllabify(text)
	text = mw.ustring.gsub(text, "w", "u")
	text = mw.ustring.gsub(text, "y", "i")
	text = mw.ustring.gsub(text, "([lrmn])", "%1̥")
	
	return text
end

local function desyllabify(text)
	text = mw.ustring.gsub(text, "u", "w")
	text = mw.ustring.gsub(text, "i", "y")
	text = mw.ustring.gsub(text, "̥", "")
	
	return text
end

function export.add_ending(stem, ending, ending_unstr, do_szemerenyi)
	-- Destress the ending if necessary
	if mw.ustring.find(stem, "[´áéíĺḿńóŕúḗṓ́]") then
		if ending_unstr then
			ending = ending_unstr
		else
			ending = export.destress(ending)
		end
	end
	
	-- Desyllabify sonorants next to vowels
	if (mw.ustring.find(ending, "^[aeiouāēīōūáéíóúḗṓ]") or mw.ustring.find(ending, "^[lrmn]̥")) and mw.ustring.find(stem, "[lmnrwyiu̥]$") then
		local rest, sonorants = mw.ustring.match(stem, "^(.-)([lmnrwyiu̥]+)$")
		
		sonorants = desyllabify(sonorants)
		sonorants = mw.text.split(sonorants, "", true)
		
		for i = #sonorants - 1, 1, -2 do
			sonorants[i] = syllabify(sonorants[i])
		end
		
		if mw.ustring.find(rest, "[aeiouāēīōūáéíóúḗṓ́]$") then
			sonorants[1] = desyllabify(sonorants[1])
		end
		
		stem = rest .. table.concat(sonorants)
	end
	
	if mw.ustring.find(stem, "[aeiouāēīōūáéíóúḗṓ́]$") then
		ending = mw.ustring.gsub(ending, "^([lrmn])̥", "%1")
		ending = mw.ustring.gsub(ending, "^i", "y")
		ending = mw.ustring.gsub(ending, "^u", "w")
	end
	
	local word = stem .. ending
	
	-- Delabialization
	word = mw.ustring.gsub(word, "ʷ([uw])", "%1")
	word = mw.ustring.gsub(word, "([uw][gk])ʷ", "%1")
	
	-- Stang's law
	word = mw.ustring.gsub(word, "[aeoáéó][mwy]m̥$", export.lengthen)
	word = mw.ustring.gsub(word, "([āēōḗṓ́])[mwy]m̥$", "%1m")
	
	word = mw.ustring.gsub(word, "[eé]h₂m̥$", export.laryngeal_color)
	word = mw.ustring.gsub(word, "[aeoáéó]h₂m̥$", export.lengthen)
	word = mw.ustring.gsub(word, "([āēōḗṓ́])h₂m̥$", "%1m")
	
	word = mw.ustring.gsub(word, "([aeiouāēīōūáéíóúḗṓ́]y)i$", export.lengthen)
	
	-- Szemerényi's law
	word = mw.ustring.gsub(word, "([oó])nh₂$", export.lengthen)
	word = mw.ustring.gsub(word, "([aeiouāēīōūáéíóúḗṓ́][lmnrs])h₂$", export.lengthen)
	word = mw.ustring.gsub(word, "([aeiouāēīōūáéíóúḗṓ́]r)d$", export.lengthen)
	
	if do_szemerenyi then
		word = mw.ustring.gsub(word, "([oó])ns$", export.lengthen)
		word = mw.ustring.gsub(word, "([aeiouāēīōūáéíóúḗṓ́][lmnrs])s$", export.lengthen)
	end
	
	-- Laryngeal deletion
	word = mw.ustring.gsub(word, "([oōóṓ][lmnrwy])h[₁₂₃]([ptkḱbdgǵshHwy])", "%1%2")
	word = mw.ustring.gsub(word, "([oōóṓ][lmnrwy])H([ptkḱbdgǵshHwy])", "%1%2")
	
	word = mw.ustring.gsub(word, "([oōóṓ][lmnrwy])h[₁₂₃]([lmnr][^̥])", "%1%2")
	word = mw.ustring.gsub(word, "([oōóṓ][lmnrwy])H([lmnr][^̥])", "%1%2")
	
	-- Degemination
	word = mw.ustring.gsub(word, "ss", "s")
	
	word = mw.ustring.gsub(word, "iy([^aeiouāēīōūáéíóúḗṓ])", "ī%1")
	word = mw.ustring.gsub(word, "íy([^aeiouāēīōūáéíóúḗṓ])", "ī́%1")
	
	word = mw.ustring.gsub(word, "uw([^aeiouāēīōūáéíóúḗṓ])", "ū%1")
	word = mw.ustring.gsub(word, "úw([^aeiouāēīōūáéíóúḗṓ])", "ū́%1")
	
	return word
end

return export