模組:Utilities

下列說明文檔位於Module:Utilities/doc。^[編輯]

該模塊導出各種通用功能，可供其他模塊使用。

pattern_escape

pattern_escape(text)

跳脫匹配模式（Lua的正則表達式版本）中使用的魔法字符。比如，"^$()%.[]*+-?" 變成 "<strong class="error"><span class="scribunto-error mw-scribunto-error-0e5118e6">脚本错误：函数“pattern_escape”不存在。</span></strong>"。

format_categories

format_categories(categories, lang, sort_key, sort_base, force_output)

形成一個分類名稱的列表（表格）。輸出是一個由所有類別組成的字符串，每個類別都使用了[[Category:...]]，並添加了給定的排序鍵。如果命名空間不是主命名空間、附錄命名空間或重構命名空間，輸出將是一個空字符串，除非給出force_output。如果沒有給定排序鍵：

默認的排序鍵是通過使用sort_base（如果有給定的話）或當前的子頁面名稱，以及去除開頭的連字符（以便後綴可以在沒有鍵的情況下進行排序）而生成的。
如果給定的語言有排序鍵，則其將會被用來創建一個遵循該語言規則的排序鍵。

template_categorize

{{#invoke:utilities|template_categorize}}

這個函數使用於 {{categorize}}、{{catlangname}} 和 {{catlangcode}} 等模板。

catfix

這個函數添加了一個「catfix」，它被用於特定語言的分類頁面，為所有條目名稱添加語言屬性，通常是文字類（script classes）。語言屬性和文字類的添加使條目名稱顯示得更好（使用MediaWiki:Common.css中指定的特定語言或腳本樣式），這對於在瀏覽器中沒有一致字體支持的非英語語言尤其重要。

語言屬性是為所有語言添加的，但腳本類只為在其數據文件中列出文字的語言添加，或者為在Module:utilities/data的catfix_script列表中列出一個默認腳本的語言添加。有些語言顯然有一個默認文字，但在他們的數據文件中仍有其他文字，因此需要指定他們的默認文字。其他語言則沒有默認文字。

塞爾維亞-克羅地亞語經常以拉丁字母和西里爾字母兩種文字書寫。因為使用兩種文字，所以塞爾維亞-克羅地亞語不能在其分類頁面的條目中使用文字類別，因為一次只能指定一種文字類別。

俄語通常用西里爾文字（Cyrl）書寫，但盲文（Brai）也列於其數據文件中。因此，俄語需要在catfix_script列表中加入一個條目，這樣Cyrl（西里爾字母）文字類就會被應用於其分類頁面中的條目。

要查看一種語言所列出的文字，請見Module:languages，並使用搜索框找到該語言的數據文件。要想知道一個文字代碼的含義，請在Module:scripts/data中搜索該文字代碼。

local export = {}

local headword_data_module = "Module:headword/data"
local languages_module = "Module:languages"
local links_module = "Module:links"
local script_utilities_module = "Module:script utilities"
local scripts_module = "Module:scripts"
local string_utilities_module = "Module:string utilities"
local utilities_data_module = "Module:utilities/data"

local mw = mw
local package = package
local table = table

local anchor_encode = mw.uri.anchorEncode
local concat = table.concat
local format = string.format
local ipairs = ipairs
local load_data = mw.loadData
local require = require
local tonumber = tonumber
local type = type
local unstrip = mw.text.unstrip

--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
	local function decode_entities(...)
		decode_entities = require(string_utilities_module).decode_entities
		return decode_entities(...)
	end
	
	local function get_lang(...)
		get_lang = require(languages_module).getByCode
		return get_lang(...)
	end
	
	local function get_script(...)
		get_script = require(scripts_module).getByCode
		return get_script(...)
	end
	
	local function remove_links(...)
		remove_links = require(links_module).remove_links
		return remove_links(...)
	end
	
	local function tag_text(...)
		tag_text = require(script_utilities_module).tag_text
		return tag_text(...)
	end
	
	local function trim(...)
		trim = require(string_utilities_module).trim
		return trim(...)
	end
	
	local function uupper(...)
		uupper = require(string_utilities_module).upper
		return uupper(...)
	end

--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
	local catfix_scripts
	local function get_catfix_scripts()
		catfix_scripts, get_catfix_scripts = load_data(utilities_data_module).catfix_scripts, nil
		return catfix_scripts
	end
	
	local current_title
	local function get_current_title()
		current_title, get_current_title = mw.title.getCurrentTitle(), nil
		return current_title
	end
	
	local defaultsort
	local function get_defaultsort()
		defaultsort, get_defaultsort = load_data(headword_data_module).page.pagename_defaultsort, nil
		return defaultsort
	end
	
	local pagename
	local function get_pagename()
		pagename, get_pagename = load_data(headword_data_module).page.encoded_pagename, nil
		return pagename
	end

do
	local loaded = package.loaded
	local loader = package.loaders[2]
	local not_found

	--[==[
	Like require, but returns nil if a module does not exist, instead of throwing an error.
	Outputs are cached, which is faster for all module types, but much faster for nonexistent modules since require will attempt to use the full loader each time (since they don't get cached in {package.loaded}).]==]
	function export.safe_require(modname)
		local mod = loaded[modname]
		if mod ~= nil then
			return mod
		elseif not (not_found and not_found[modname]) then
			-- The loader returns a function if the module exists, or nil if it doesn't, and checking this is faster than using pcall with require. If found, we still use require instead of loading and caching directly, because we still want the safety check against infinite loading loops and in-module errors to be thrown.
			if loader(modname) then
				return require(modname)
			elseif not not_found then
				not_found = {modname = true}
			else
				not_found[modname] = true
			end
		end
		return nil
	end
end

--[==[
Convert decimal to hexadecimal.

Note: About three times as fast as the hex library.]==]
function export.dec_to_hex(dec)
	dec = tonumber(dec)
	if dec == nil or dec % 1 ~= 0 then
		error("Input should be a decimal integer.")
	end
	return format("%X", dec)
end

do
	local function handle_url(capture)
		return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
	end
	
	--[==[
	A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.]==]
	function export.get_plaintext(text)
		text = text
			:gsub("%[%[", "\1")
			:gsub("%]%]", "\2")

		-- Remove strip markers and HTML tags.
		text = unstrip(text):gsub("<[^<>\1\2]+>", "")

		-- Parse internal links for the display text, and remove categories.
		text = remove_links(text)

		-- Remove files.
		text = text:gsub("\1[Ff][Ii][Ll][Ee]:[^\1\2]+\2", "")
			:gsub("\1[Ii][Mm][Aa][Gg][Ee]:[^\1\2]+\2", "")

		-- Parse external links for the display text.
		text = text:gsub("%[(https?://[^%[%]]+)%]", handle_url)
			-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
			:gsub("\1", "&#91;&#91;")
			:gsub("\2", "&#93;&#93;")
			:gsub("%[", "&#91;")
			:gsub("]", "&#93;")
			-- Strip bold, italics and soft hyphens.
			:gsub("('*)'''(.-'*)'''", "%1%2")
			:gsub("('*)''(.-'*)''", "%1%2")
			:gsub("", "")
		
		-- Get any HTML entities and trim.
		-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
		return trim(decode_entities(text))
	end
end

--[==[
Format the categories with the appropriate sort key.
* `categories` is a list of categories. Each entry in the list can be either a string (the full category, minus
  the {"Category:"} prefix) or an object. In the latter case, the object should have fields
  ** `cat`: the full category, minus the {"Category:"} prefix (required);
  ** `lang`: optional language object to override the overall `lang`;
  ** `sort_key`: optional sort key to override the overall `sort_key`;
  ** `sort_base`: optional sort base to override the overall `sort_base`;
  ** `sc`: optional script object to override the overall `sc`.
* `lang` is an object encapsulating a language; if {nil}, the object for language code {"und"} (undetermined) will
  be used. `lang` is used when computing the sort key (either from the subpage name or sort base).
* `sort_key` is placed in the category invocation, and indicates how the page will sort in the respective category.
  Normally '''do not use this'''. Instead, leave it {nil}, and if you need to a control the sort order, use
  {sort_base}, so that language-specific normalization is applied on top of the specified sort base. If neither
  {sort_key} nor {sort_base} is specified, the default is to apply language-specific normalization to the subpage
  name; see below.
* `sort_base` lets you override the default sort key while still maintaining appropriate language-specific
  normalization. If {nil} is specified, this defaults to the subpage name, which is the portion of the full pagename
  after subtracting the namespace prefix (and, in certain namespaces such as {User:}, but notably not in the
  mainspace, after subtracting anything up through the final slash). The actual sort key is derived from the sort
  base approximately by lowercasing, applying language-specific normalization and then uppercasing; note that the
  same process is applied in deriving the sort key when no sort base is specified. For example, for French, Spanish,
  etc. the normalization process maps accented letters to their unaccented equivalents, so that e.g. in French,
  {{m|fr|ça}} sorts after {{m|fr|ca}} (instead of after the default Wikimedia sort order, which is approximately
  based on Unicode sort order and places ç after z) and {{m|fr|côté}} sorts after {{m|fr|coté}} (instead of between
  c and d). Similarly, in Russian the normalization process converts Cyrillic ё to a string consisting of Cyrillic е
  followed by U+10FFFF, so that effectively ё sorts after е instead of the default Wikimedia sort, which (I think)
  puts ё after я, the last letter of the Cyrillic alphabet.
* `force_output` forces normal output in all namespaces. Normally, nothing is output if the page isn't in the main,
  Appendix:, Thesaurus:, Reconstruction: or Citations: namespaces.
* `sc` is a script object; if nil, the default will be derived from the sort base (or its default value, the
  subpage name) by calling {lang:findBestScript()}. The value of `sc` is used during the sort base normalization
  process; for example, languages with multiple scripts will often have script-specific normalization processes.]==]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
	if type(lang) == "table" and not lang.getCode then
		error("The second argument to format_categories should be a language object.")
	end

	if not force_output then
		local ns = (current_title or get_current_title()).namespace
		if not (
			-- Allowed namespaces: (main), Appendix, Thesaurus, Citations, Reconstruction.
			ns == 0 or ns == 100 or ns == 110 or ns == 114 or ns == 118 or
			current_title.prefixedText == "Wiktionary:Sandbox"
		) then
			return ""
		end	
	end

	local current, ret, i, n, default, extra = categories, {}, 0, 0
	
	-- Iterate over all categories in `categories`, then any in `extra` (which might not get created at all).
	while true do
		i = i + 1
		local cat = current[i]
		if cat == nil then
			-- If `current` is `categories`, now iterate over anything in `extra`. Otherwise, return.
			if not (current == categories and extra) then
				return concat(ret)
			end
			current, i = extra, 0
		else
			-- If the category type is a table, use any custom options in it.
			local cat_type, cat_lang, cat_sort_key, cat_sort_base, cat_sc, this_sort_key = type(cat)
			if cat_type == "table" then
				cat_lang = cat.lang or lang
				cat_sort_key = cat.sort_key or sort_key
				cat_sort_base = cat.sort_base or sort_base
				cat_sc = cat.sc or sc
				cat = cat.cat
			-- If `default` has already been determined, use it.
			elseif default then
				this_sort_key = default
			-- Otherwise, calculate `default` and use it.
			else
				cat_lang = lang
				cat_sort_key = sort_key
				cat_sort_base = sort_base
				cat_sc = sc
			end
			-- `this_sort_key` will only be set already if `default` can be used.
			if not this_sort_key then
				-- If the sort key is "-", treat the language as undetermined (the default). This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
				if cat_sort_key == "-" then
					cat_lang, cat_sort_key = get_lang("und"), nil
				elseif not cat_lang then
					cat_lang = get_lang("und")
				end
				-- Generate the automatic sort key.
				local auto = cat_lang:makeSortKey(cat_sort_base or pagename or get_pagename(), cat_sc)
				-- Use the page defaultsort if necessary.
				if not auto or auto == "" then
					auto = defaultsort or get_defaultsort()
				end
				-- If not sort key specified, use the automatic one.
				if not cat_sort_key or cat_sort_key == "" then
					cat_sort_key = auto
				-- Otherwise, if the language is not "und", categorize the manual sort key as either redundant or non-redundant.
				-- FIXME: we should do this for "und" as well, but "Undetermined terms..." does not make sense for translations etc.
				elseif cat_lang:getCode() ~= "und" then
					if not extra then
						extra = {}
					end
					-- extra[#extra + 1] = cat_lang:getFullName() .. " terms with " .. (
					-- 	uupper(cat_sort_key) == auto and "redundant" or
					-- 	"non-redundant non-automated"
					-- ) .. " sortkeys"
				end
				-- If the category type wasn't a table, set `default` as the final sort key.
				if cat_type ~= "table" then
					default = cat_sort_key
				end
				this_sort_key = cat_sort_key
			end
			n = n + 1
			ret[n] = "[[Category:" .. cat .. "|" .. this_sort_key .. "]]"
		end
	end
end

--[==[
Add a "catfix", which is used on language-specific category pages to add language attributes and often script
classes to all entry names. The addition of language attributes and script classes makes the entry names display
better (using the language- or script-specific styles specified in [[MediaWiki:Common.css]]), which is particularly
important for non-English languages that do not have consistent font support in browsers.

Language attributes are added for all languages, but script classes are only added for languages with one script
listed in their data file, or for languages that have a default script listed in the {catfix_script} list in
[[Module:utilities/data]]. Some languages clearly have a default script, but still have other scripts listed in
their data file and therefore need their default script to be specified. Others do not have a default script.

* Serbo-Croatian is regularly written in both the Latin and Cyrillic scripts. Because it uses two scripts,
  Serbo-Croatian cannot have a script class applied to entries in its category pages, as only one script class
  can be specified at a time.
* Russian is usually written in the Cyrillic script ({{cd|Cyrl}}), but Braille ({{cd|Brai}}) is also listed in
  its data file. So Russian needs an entry in the {catfix_script} list, so that the {{cd|Cyrl}} (Cyrillic) script
  class will be applied to entries in its category pages.

To find the scripts listed for a language, go to [[Module:languages]] and use the search box to find the data file
for the language. To find out what a script code means, search the script code in [[Module:scripts/data]].]==]
function export.catfix(lang, sc)
	if not lang or not lang.getCanonicalName then
		error('The first argument to the function "catfix" should be a language object from [[Module:languages]] or [[Module:etymology languages]].')
	end
	if sc and not sc.getCode then
		error('The second argument to the function "catfix" should be a script object from [[Module:scripts]].')
	end
	local canonicalName = lang:getCanonicalName()
	local fullName = lang:getFullName()

	-- To add script classes to links on pages created by category boilerplate templates.
	if not sc then
		local code = (catfix_scripts or get_catfix_scripts())[lang:getCode()] or catfix_scripts[lang:getFullCode()]
		if code then
			sc = get_script(code)
		end
	end

	local catfix_class = anchor_encode("CATFIX-" .. canonicalName)
	if fullName ~= canonicalName then
		catfix_class = catfix_class .. " " .. anchor_encode("CATFIX-" .. fullName)
	end
	return "<span id=\"catfix\" style=\"display:none;\" class=\"" .. catfix_class .. "\">" .. tag_text("&nbsp;", lang, sc, nil) .. "</span>"
end

--[==[
Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language,
family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require
one of these kinds of object.

If `noErr` is set, the function returns false instead of throwing an error, which allows customised error handling to
be done in the calling function.]==]
function export.check_object(typ, noErr, ...)
	if ... == nil then
		if noErr then
			return false
		end
		error("Must provide at least one object to check.", 2)
	end
	for _, obj in ipairs{...} do
		if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
			if noErr then
				return false
			end
			error("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.", 2)
		elseif not (typ == "object" or obj:hasType(typ)) then
			for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
				if obj:hasType(wrong_type) then
					if noErr then
						return false
					end
					error("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.", 2)
				end
			end
			if noErr then
				return false
			end
			error("Function expected a " .. typ .. " object, but received another type of object instead.", 2)
		end
	end
	return true
end

return export