Moduli:IPA: Dallime mes rishikimesh

← Redaktim më i vjetër Ndryshimi më pas →

Versioni i datës 16 gusht 2025 15:32

Udhëzuesi për këtë modul mund të krijohet te Moduli:IPA/doc.

require('strict')
local p = {}

local function multiFind(s, patterns, init)
	local i, j = mw.ustring.find(s, patterns[1], init)
	for n = 2, #patterns do
		local i2, j2 = mw.ustring.find(s, patterns[n], init)
		if i2 and (not i or i2 < i) then
			i, j = i2, j2
		end
	end
	return i, j
end

local function wrapAtSpaces(s)
	return mw.ustring.gsub(s, '(%s+)', '<span class="wrap">%1</span>')
end

local function wrapAtSpacesSafely(s)
	local patterns = {
		'%[%[[^%]|]-%s[^%]|]-|', -- Piped links
		'</?[A-Za-z][^>]-%s[^>]->' -- HTML tags
	}
	s = mw.ustring.gsub(s, '%[%[([^%]|]-%s[^%]|]-)%]%]', '[[%1|%1]]') -- Pipe all links
	local t = {}
	local init
	while true do
		local i, j = multiFind(s, patterns, init)
		if not i then
			break
		end
		local pre = wrapAtSpaces(mw.ustring.sub(s, init, i - 1)) -- What precedes the match
		table.insert(t, pre)
		table.insert(t, mw.ustring.sub(s, i, j)) -- The match
		init = j + 1
	end
	local post = wrapAtSpaces(mw.ustring.sub(s, init)) -- What follows the last match
	table.insert(t, post)
	return table.concat(t)
end

local function checkNamespace(isDebug)
	return isDebug or require('Module:Category handler').main({ true })
end

local function renderCats(cats, isDebug)
	if not cats[1] or not checkNamespace(isDebug) then
		return ''
	end
	local t = {}
	for _, v in ipairs(cats) do
		table.insert(t, string.format(
			'[[%sCategory:%s]]',
			isDebug and ':' or '',
			v
		))
	end
	return table.concat(t)
end

local function resolveSynonym(s)
	return mw.loadData('Module:Lang/ISO 639 synonyms')[s] or s
end

local function splitTag(s)
	local langCode = s:gsub('%-.*', ''):lower()
	langCode = resolveSynonym(langCode)
	local regionCode = s:match('%-(.+)')
	local isPrivate = regionCode and regionCode:sub(1, 2) == 'x-'
	return langCode, regionCode, isPrivate
end

local function getLangName(code, link, raw)
	return require('Module:Lang')._name_from_tag({
		code,
		link = link,
		raw = raw,
		-- Without linking, "{{IPA}}" gets expanded in some contexts
		template = '[[Template:IPA|IPA]]'
	})
end

local function linkLang(name, target, link)
	return link == 'yes' and string.format(
		'[[%s|%s]]',
		target or name .. ' language',
		name
	) or name
end

function p._main(args)
	local ret, cats = {}, {}
	local isDebug = args.debug == 'yes'
	local s, langCode, regionCode, isPrivate
	
	-- Guide-linking mode
	if args[2] and args[2] ~= '' then
		local data = mw.loadData('Module:IPA/data')
		local isGeneric = args.generic == 'yes'
		s = args[2]
		langCode, regionCode, isPrivate = splitTag(args[1])
		local langData = data.langs[langCode] or {}
		if regionCode then
			if not isPrivate then
				regionCode = regionCode:upper()
			end
			if langData.dialects and langData.dialects[regionCode] then
				-- Overwrite language data with the dialect's
				local newLangData = {}
				for k, v in pairs(langData) do
					if k ~= 'dialects' then
						newLangData[k] = v
					end
				end
				local dialectData = langData.dialects[regionCode]
				if dialectData.aliasOf then
					-- Use the canonical region code
					regionCode = dialectData.aliasOf
					isPrivate = regionCode:sub(1, 2) == 'x-'
					dialectData = langData.dialects[regionCode]
				end
				-- Lowercase IANA variant
				if dialectData.isVariant then
					regionCode = regionCode:lower()
				end
				for k, v in pairs(dialectData) do
					newLangData[k] = v
				end
				langData = newLangData
			else
				isGeneric = true
			end
		end
		
		local fullLangCode = regionCode and langCode .. '-' .. regionCode
			or langCode
		local langName = langData.name
			and linkLang(langData.name, langData.link, args.link)
			or getLangName(fullLangCode, args.link)
		if langName:sub(1, 5) == '<span' then
			-- Module:Lang has returned an error
			return langName .. renderCats({ 'IPA template errors' }, isDebug)
		end
		if args.cat ~= 'no' then
			local catLangName = langData.name
				or getLangName(fullLangCode, nil, 'yes')
			if catLangName:sub(1, 5) == '<span' then
				-- Module:Lang has returned an error, but it's not fatal
				table.insert(cats, 'IPA template errors')
				mw.addWarning(catLangName)
			else
				table.insert(cats, string.format('Pages with %s IPA', catLangName))
			end
		end
		
		-- Label
		local label = args.label
		if not label then
			local labelCode = args[3] and args[3]:lower()
				or langData.defaultLabelCode
			if labelCode == '' then
				label = ''
			else
				local langText
				if langData.text then
					langText = linkLang(
						langData.text,
						mw.ustring.match(langName, '^%[%[([^|%]]+)'),
						args.link
					)
				else
					langText = mw.ustring.gsub(
						langName,
						'^%[%[(([^|]+) languages)%]%]$',
						'[[%1|%2]]'
					)
					langText = mw.ustring.gsub(
						langText,
						' languages(%]?%]?)$',
						'%1'
					)
				end
				if labelCode and data.labels[labelCode] then
					label = data.labels[labelCode]:format(langText)
				else
					label = data.defaultLabel:format(langText)
				end
			end
		end
		if label and label ~= '' then
			local span = mw.html.create('span')
				:addClass('IPA-label')
				:wikitext(label)
			if args.small ~= 'no' then
				span:addClass('IPA-label-small')
				table.insert(ret, mw.getCurrentFrame():extensionTag({
					name = 'templatestyles',
					args = { src = 'Module:IPA/styles.css' }
				}))
			end
			table.insert(ret, tostring(span) .. ' ')
		end
		
		-- Brackets
		s = (not isGeneric and langData.format or '&#91;%s&#93;'):format(s)
		
		-- Link to key
		local key = not isGeneric and langData.key or data.defaultKey
		s = string.format('[[%s|%s]]', key, s)
	else
		-- Basic mode
		s = args[1]
		if args.lang and args.lang ~= '' then
			langCode, regionCode, isPrivate = splitTag(args.lang)
		end
		if args.cat ~= 'no' then
			table.insert(cats, 'Pages with plain IPA')
		end
	end
	
	-- Transcription
	do
		local lang = (langCode or 'und') .. '-Latn'
		if not isPrivate and regionCode then
			lang = lang .. '-' .. regionCode
		end
		lang = lang .. '-fonipa'
		local span = mw.html.create('span')
			:addClass('IPA')
			:addClass(args.class)
			:attr('lang', lang)
		-- wrap=all: Do nothing
		-- wrap=none: Never break
		-- Otherwise: Break at spaces only
		if args.wrap ~= 'all' then
			span:addClass('nowrap')
			if args.wrap ~= 'none' then
				s = wrapAtSpacesSafely(s)
			end
		end
		if (not args[2] or args[2] == '') and args.tooltip ~= '' then
			local tooltip = args.tooltip or
				'Representation in the International Phonetic Alphabet (IPA)'
			span:attr('title', tooltip)
		end
		s = tostring(span:wikitext(s))
		table.insert(ret, s)
	end
	
	-- Audio
	local audio = args.audio ~= '' and args.audio or args[4] ~= '' and args[4]
	if audio then
		local button = mw.getCurrentFrame():expandTemplate({
			title = 'Audio',
			args = { audio, '' }
		})
		table.insert(ret, ' ' .. button)
		table.insert(cats, 'Pages including recorded pronunciations')
	end
	
	-- Categories
	table.insert(ret, renderCats(cats, isDebug))
	
	return table.concat(ret)
end

function p.main(frame)
	local args = frame:getParent().args
	if not args[1] then
		return ''
	end
	for i, v in ipairs(args) do
		args[i] = mw.text.trim(v)
	end
	return p._main(args)
end

return p

@@ Rreshti 1: / Rreshti 1: @@
-local export = {}
+require('strict')
+local p = {}
-local force_cat = false -- for testing
+local function multiFind(s, patterns, init)
+	local i, j = mw.ustring.find(s, patterns[1], init)
-local pages_module = "Module:pages"
+	for n = 2, #patterns do
-local pron_qualifier_module = "Module:pron qualifier"
+		local i2, j2 = mw.ustring.find(s, patterns[n], init)
-local qualifier_module = "Module:qualifier"
+		if i2 and (not i or i2 < i) then
-local references_module = "Module:references"
+			i, j = i2, j2
-local string_utilities_module = "Module:string utilities"
-local syllables_module = "Module:syllables"
-local utilities_module = "Module:utilities"
-local m_data = mw.loadData("Module:IPA/data")
-local m_str_utils = require(string_utilities_module)
-local m_syllables -- [[Module:syllables]]; loaded below if needed
-local m_symbols = mw.loadData("Module:IPA/data/symbols")
-local concat = table.concat
-local decode_entities = m_str_utils.decode_entities
-local find = string.find
-local gmatch = m_str_utils.gmatch
-local gsub = string.gsub
-local insert = table.insert
-local is_preview = require(pages_module).is_preview
-local len = m_str_utils.len
-local listToText = mw.text.listToText
-local match = string.match
-local pattern_escape = m_str_utils.pattern_escape
-local sub = string.sub
-local u = m_str_utils.char
-local ugsub = m_str_utils.gsub
-local umatch = m_str_utils.match
-local usub = m_str_utils.sub
-local namespace = mw.title.getCurrentTitle().namespace
-local is_content_page = namespace == 0 or namespace == 118
-local function track(page)
-	require("Module:debug/track")("IPA/" .. page)
-	return true
-end
-local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)
-	if split_output ~= "raw" then
-		if categories[1] then
-			categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat)
-		else
-			categories = ""
 		end
 	end
-	if split_output then -- for use of IPA in links, etc.
+	return i, j
-		if errtext then
-			return prontext, categories, errtext
-		else
-			return prontext, categories
-		end
-	else
-		return prontext .. (errtext or "") .. categories
-	end
 end
---[==[
+local function wrapAtSpaces(s)
-Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
+	return mw.ustring.gsub(s, '(%s+)', '<span class="wrap">%1</span>')
-the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
+end
-` ``lang`` terms with IPA pronunciation`. Other than the extra preceding text and category, this is identical
-to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
-single parameter `data`, an object with the following fields:
-* `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
-   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
-   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
-   ` ``lang`` terms with IPA pronunciation`; and for determining the proper sort keys for categories. Unlike
-   for {format_IPA_multiple()}, `lang` may not be {nil}.
-* `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
-* `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
-* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
-  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
-  `items`.
-* `sort_key`: Explicit sort key used for categories.
-* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
-  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
-  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
-  or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
-  them.
-* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
-  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
-  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
-  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
-  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
-* `include_langname`: If specified, prefix the result with the language name, followed by a colon.
-* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
-  pronunciations and preceding {"IPA:"}.
-* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
-* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
-  pronunciations and preceding {"IPA:"}.
-* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
-]==]
-function export.format_IPA_full(data)
-	if type(data) ~= "table" or data.getCode then
-		error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
-	end
-	local lang = data.lang
-	local items = data.items
-	local err = data.err
-	local separator = data.separator
-	local sort_key = data.sort_key
-	local no_count = data.no_count
-	local split_output = data.split_output
-	local q = data.q
-	local qq = data.qq
-	local a = data.a
-	local aa = data.aa
-	local include_langname = data.include_langname
-	local hasKey = m_data.langs_with_infopages
+local function wrapAtSpacesSafely(s)
+	local patterns = {
-	if not lang or not lang.getCode then
+		'%[%[[^%]|]-%s[^%]|]-|', -- Piped links
-		error("Must specify language to format_IPA_full()")
+		'</?[A-Za-z][^>]-%s[^>]->' -- HTML tags
-	end
+	}
-	local langname = lang:getCanonicalName()
+	s = mw.ustring.gsub(s, '%[%[([^%]|]-%s[^%]|]-)%]%]', '[[%1|%1]]') -- Pipe all links
+	local t = {}
-	local prefix_text
+	local init
-	if err then
+	while true do
-		prefix_text = '<span class="error">' .. err .. '</span>'
+		local i, j = multiFind(s, patterns, init)
-	else
+		if not i then
-		if hasKey[lang:getCode()] then
+			break
-			prefix_text = "Appendix:" .. langname .. " pronunciation"
-		else
-			prefix_text = "wikipedia:" .. langname .. " phonology"
 		end
-		prefix_text = "[[" .. prefix_text .. "|key]]"
+		local pre = wrapAtSpaces(mw.ustring.sub(s, init, i - 1)) -- What precedes the match
+		table.insert(t, pre)
+		table.insert(t, mw.ustring.sub(s, i, j)) -- The match
+		init = j + 1
 	end
+	local post = wrapAtSpaces(mw.ustring.sub(s, init)) -- What follows the last match
+	table.insert(t, post)
+	return table.concat(t)
+end
-	local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>:&#32;"
+local function checkNamespace(isDebug)
+	return isDebug or require('Module:Category handler').main({ true })
+end
-	local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")
+local function renderCats(cats, isDebug)
+	if not cats[1] or not checkNamespace(isDebug) then
-	if is_content_page then
+		return ''
-		insert(categories, {
-			cat = langname .. " terms with IPA pronunciation",
-			sort_key = sort_key
-		})
 	end
+	local t = {}
-	local prontext = prefix .. IPAs
+	for _, v in ipairs(cats) do
-	if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then
+		table.insert(t, string.format(
-		prontext = require(pron_qualifier_module).format_qualifiers {
+			'[[%sCategory:%s]]',
-			lang = lang,
+			isDebug and ':' or '',
-			text = prontext,
+			v
-			q = q,
+		))
-			qq = qq,
-			a = a,
-			aa = aa,
-		}
 	end
-	if include_langname then
+	return table.concat(t)
-		prontext = langname .. ": " .. prontext
-	end
-	return process_maybe_split_categories(split_output, categories, prontext, lang)
 end
-local function split_phonemic_phonetic(pron)
+local function resolveSynonym(s)
-	local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$")
+	return mw.loadData('Module:Lang/ISO 639 synonyms')[s] or s
-	if reconstructed then
-		return reconstructed .. phonemic, reconstructed .. phonetic
-	else
-		return pron, nil
-	end
 end
-local function determine_repr(pron)
+local function splitTag(s)
-	local reconstructed
+	local langCode = s:gsub('%-.*', ''):lower()
+	langCode = resolveSynonym(langCode)
-	-- Temporarily remove any initial asterisk before representation marks,
+	local regionCode = s:match('%-(.+)')
-	-- which avoids having to account for it in the data, but set the
+	local isPrivate = regionCode and regionCode:sub(1, 2) == 'x-'
-	-- `reconstructed` flag.
+	return langCode, regionCode, isPrivate
-	if sub(pron, 1, 1) == "*" then
+end
-		reconstructed = true
-		pron = sub(pron, 2)
-	end
-	-- Some representation types have aliases for convenience (e.g. "// //" is
-	-- an alias for "⫽ ⫽"). and these need to be substituted in before checking
-	-- for other data.
-	local opening, n = match(pron, "^.[\128-\191]*")
-	local subs_data = m_data.representation_subs[opening]
-	if subs_data then
-		pron, n = ugsub(pron, subs_data[1], subs_data[2])
-		-- If the substitution was made, `opening` needs to be changed to the
-		-- new opening character.
-		if n ~= 0 then
-			opening = subs_data[3]
-		end
-	end
-	-- Get the type data based on the opening character (if any), and set the
-	-- representation type if the closing character matches.
-	local type_data, repr, closing = m_data.representation_types[opening]
-	if type_data then
-		closing = type_data[2]
-		if type_data and match(pron, pattern_escape(closing) .. "$", #opening + 1) then
-			repr = type_data[1]
-		end
-	end
-	-- Default to the empty string.
-	if not repr then
-		opening, closing = "", ""
-	end
-	-- Reattach the asterisk if reconstructed.
+local function getLangName(code, link, raw)
-	if reconstructed then
+	return require('Module:Lang')._name_from_tag({
-		pron = "*" .. pron
+		code,
-	end
+		link = link,
+		raw = raw,
-	return pron, repr, opening, closing, reconstructed
+		-- Without linking, "{{IPA}}" gets expanded in some contexts
+		template = '[[Template:IPA|IPA]]'
+	})
 end
-local function hasInvalidSeparators(transcription)
+local function linkLang(name, target, link)
-	-- Escape certain characters as well as pauses, which have the format "(...)" (with any number of dots), to avoid false-positives.
+	return link == 'yes' and string.format(
-	transcription = transcription:gsub(".[\128-\191]*", m_symbols.separator_escapes)
+		'[[%s|%s]]',
-		:gsub("%(%.+%)", "\3")
+		target or name .. ' language',
-		:gsub("[()]+", "")
+		name
-	return (
+	) or name
-		transcription:find("..", nil, true) or
-		transcription:match("%.%f[%z \1\2\3,:;]") or
-		transcription:match("\1%f[%z \2\3,:;]") or
-		transcription:match("\2%f[%z \1\3,:;]") or
-		transcription:match("\3[:;]") or
-		transcription:match("%f[^%z \1\2\3,]%.")
-	) and true or false
 end
---[==[
+function p._main(args)
-Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
+	local ret, cats = {}, {}
-category ` ``lang`` terms with IPA pronunciation`). Individual pronunciations are formatted using
+	local isDebug = args.debug == 'yes'
-{format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.
+	local s, langCode, regionCode, isPrivate
-Parameters accepted are:
-* `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
+	-- Guide-linking mode
-   pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
+	if args[2] and args[2] ~= '' then
-   add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
+		local data = mw.loadData('Module:IPA/data')
-   proper sort keys for categories. `lang` may be {nil}.
+		local isGeneric = args.generic == 'yes'
-* `items` is a list of pronunciations, each of which is an object with the following properties:
+		s = args[2]
-** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
+		langCode, regionCode, isPrivate = splitTag(args[1])
-     (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
+		local langData = data.langs[langCode] or {}
-	 (beginning with a hyphen);
+		if regionCode then
-** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
+			if not isPrivate then
-     qualifiers;
+				regionCode = regionCode:upper()
-** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
-     qualifiers;
-** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
-     pronunciation; note that `qualifiers` is deprecated;
-** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
-** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
-** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
-** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
-     qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
-	 citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
-	 (the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
-	 or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
-	 {{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
-	 {{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
-	 appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
-	 {{cd|<nowiki><references /></nowiki>}} section;
-** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
-** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
-** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
-   qualifiers and pre-text; defaults to the outer `separator` parameter.
-* `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
-  first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
-  `items`.
-* `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
-  only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
-  given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
-  categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
-* `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
-  categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
-  the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
-  strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
-  `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
-]==]
-function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
-	local categories = {}
-	separator = separator or ", "
-	if not lang then
-		track("format-multiple-nolang")
-	end
-	-- Format
-	if not items[1] then
-		if namespace == 10 then -- Template
-			insert(items, {pron = "/aɪ piː ˈeɪ/"})
-		else
-			insert(categories, "Pronunciation templates without a pronunciation")
-		end
-	end
-	local bits = {}
-	for i, item in ipairs(items) do
-		local bit
-		-- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
-		-- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
-		-- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).
-		if item.pron == "" then
-			bit = ""
-		else
-			local item_categories, errtext
-			bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
-			bit = bit .. errtext
-			for _, cat in ipairs(item_categories) do
-				insert(categories, cat)
 			end
-		end
+			if langData.dialects and langData.dialects[regionCode] then
+				-- Overwrite language data with the dialect's
-		if item.pretext then
+				local newLangData = {}
-			bit = item.pretext .. bit
+				for k, v in pairs(langData) do
-		end
+					if k ~= 'dialects' then
+						newLangData[k] = v
-		if item.posttext then
+					end
-			bit = bit .. item.posttext
+				end
-		end
+				local dialectData = langData.dialects[regionCode]
+				if dialectData.aliasOf then
-		local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]
+					-- Use the canonical region code
-			or item.a and item.a[1] or item.aa and item.aa[1]
+					regionCode = dialectData.aliasOf
-		local has_gloss_or_pos = item.gloss or item.pos
+					isPrivate = regionCode:sub(1, 2) == 'x-'
-		if has_qualifiers or has_gloss_or_pos then
+					dialectData = langData.dialects[regionCode]
-			-- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.
+				end
-			-- Should we do something different?
+				-- Lowercase IANA variant
-			local q = item.q
+				if dialectData.isVariant then
-			if has_gloss_or_pos then
+					regionCode = regionCode:lower()
-				q = mw.clone(item.q) or {}
-				if item.gloss then
-					local m_qualifier = require(qualifier_module)
-					insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
-						m_qualifier.wrap_qualifier_css("”", "quote"))
 				end
-				if item.pos then
+				for k, v in pairs(dialectData) do
-					-- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar.
+					newLangData[k] = v
-					insert(q, item.pos)
 				end
+				langData = newLangData
+			else
+				isGeneric = true
 			end
-			bit = require("Module:pron qualifier").format_qualifiers {
-				lang = lang,
-				text = bit,
-				q = q,
-				qq = item.qq,
-				qualifiers = item.qualifiers,
-				a = item.a,
-				aa = item.aa,
-			}
 		end
-		if item.note then
+		local fullLangCode = regionCode and langCode .. '-' .. regionCode
-			-- Support removed on 2024-06-15.
+			or langCode
-			error("Support for `.note` has been removed; switch to `.refs` (which must be a list)")
+		local langName = langData.name
+			and linkLang(langData.name, langData.link, args.link)
+			or getLangName(fullLangCode, args.link)
+		if langName:sub(1, 5) == '<span' then
+			-- Module:Lang has returned an error
+			return langName .. renderCats({ 'IPA template errors' }, isDebug)
 		end
-		if item.refs then
+		if args.cat ~= 'no' then
-			local refspecs = item.refs
+			local catLangName = langData.name
-			if #refspecs > 0 then
+				or getLangName(fullLangCode, nil, 'yes')
-				bit = bit .. require(references_module).format_references(refspecs)
+			if catLangName:sub(1, 5) == '<span' then
+				-- Module:Lang has returned an error, but it's not fatal
+				table.insert(cats, 'IPA template errors')
+				mw.addWarning(catLangName)
+			else
+				table.insert(cats, string.format('Pages with %s IPA', catLangName))
 			end
 		end
-		bit = (item.separator or (i == 1 and "" or separator)) .. bit
+		-- Label
+		local label = args.label
-		insert(bits, bit)
+		if not label then
+			local labelCode = args[3] and args[3]:lower()
-		--[=[	[[Special:WhatLinksHere/Wiktionary:Tracking/IPA/syntax-error]]
+				or langData.defaultLabelCode
-				The length or gemination symbol should not appear after a syllable break or stress symbol.	]=]
+			if labelCode == '' then
+				label = ''
-		-- The nature of the following pattern match is such that we don't have to split a combined '/.../ [...]' spec
+			else
-		-- into its parts in order to process.
+				local langText
-		if match(item.pron, "[.\203][\136\140]?\203[\144\145]") then -- [.ˈˌ][ːˑ]
+				if langData.text then
-			track("syntax-error")
+					langText = linkLang(
-		end
+						langData.text,
+						mw.ustring.match(langName, '^%[%[([^|%]]+)'),
-		if lang then
+						args.link
-			-- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
+					)
-			-- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
+				else
-			if not no_count and namespace == 0 then
+					langText = mw.ustring.gsub(
-				m_syllables = m_syllables or require(syllables_module)
+						langName,
-				local langcode = lang:getCode()
+						'^%[%[(([^|]+) languages)%]%]$',
-				if m_data.langs_to_generate_syllable_count_categories[langcode] then
+						'[[%1|%2]]'
-					local raw_phonemic, phonetic, use_it = split_phonemic_phonetic(item.pron)
+					)
-					local phonemic, repr = determine_repr(raw_phonemic)
+					langText = mw.ustring.gsub(
-					if not phonetic then -- not a '/.../ [...]' combined pronunciation
+						langText,
-						if m_data.langs_to_use_phonetic_notation[langcode] then
+						' languages(%]?%]?)$',
-							use_it = repr == "phonetic" and phonemic or nil
+						'%1'
-						else
+					)
-							use_it = repr == "phonemic" and phonemic or nil
+				end
-						end
+				if labelCode and data.labels[labelCode] then
-					elseif repr == "phonetic" then
+					label = data.labels[labelCode]:format(langText)
-						use_it = phonetic
+				else
-					elseif repr == "phonemic" then
+					label = data.defaultLabel:format(langText)
-						use_it = phonemic
-					end
-					-- Note: two uses of find with plain patterns is much faster than umatch with [ ‿].
-					if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
-						local syllable_count = m_syllables.getVowels(use_it, lang)
-						if syllable_count then
-							insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
-								"-syllable words")
-						end
-					end
 				end
 			end
 		end
-	end
+		if label and label ~= '' then
+			local span = mw.html.create('span')
-	return process_maybe_split_categories(split_output, categories, concat(bits), lang)
+				:addClass('IPA-label')
-end
+				:wikitext(label)
+			if args.small ~= 'no' then
---[=[
+				span:addClass('IPA-label-small')
-Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
+				table.insert(ret, mw.getCurrentFrame():extensionTag({
-{format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
+					name = 'templatestyles',
-pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
+					args = { src = 'Module:IPA/styles.css' }
-categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
+				}))
-may have HTML added surrounding invalid characters so they appear in red.
-]=]
-local function format_one_IPA(lang, raw_pron, err, categories)
-	-- Disallow wikilinks.
-	if match(raw_pron, "%[%[.-%]%]") then
-		error("IPA input must not contain wikilinks.")
-	end
-	raw_pron = decode_entities(raw_pron)
-	-- Detect the type of transcription.
-	local pron, repr, opening, closing, reconstructed = determine_repr(raw_pron)
-	-- Strip any reconstruction asterisk and representation marks.
-	pron = sub(pron, #opening + 1 + (reconstructed and 1 or 0), -#closing - 1)
-	if not repr then
-		insert(categories, "IPA pronunciations with invalid representation marks")
-		-- insert(err, "invalid representation marks")
-		-- Removed because it's annoying when previewing pronunciation pages.
-	end
-	if repr ~= "orthographic" and lang and lang:getCode() == "en" and hasInvalidSeparators(pron) then
-		insert(categories, "English IPA pronunciations with invalid separators")
-	end
-	if pron == "" then
-		insert(categories, "IPA pronunciations with no pronunciation present")
-	end
-	-- Check for obsolete and nonstandard symbols
-	for _, symbol in ipairs(m_data.nonstandard) do
-		local result
-		for nonstandard in gmatch(pron, symbol) do
-			if not result then
-				result = {}
 			end
-			insert(result, nonstandard)
+			table.insert(ret, tostring(span) .. ' ')
-			insert(categories,
-				{cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}
-			)
 		end
-		if result then
+		-- Brackets
-			insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
+		s = (not isGeneric and langData.format or '&#91;%s&#93;'):format(s)
-			break
+		-- Link to key
+		local key = not isGeneric and langData.key or data.defaultKey
+		s = string.format('[[%s|%s]]', key, s)
+	else
+		-- Basic mode
+		s = args[1]
+		if args.lang and args.lang ~= '' then
+			langCode, regionCode, isPrivate = splitTag(args.lang)
 		end
-	end
+		if args.cat ~= 'no' then
+			table.insert(cats, 'Pages with plain IPA')
-	--[[ Check for invalid symbols after removing the following:
-. wikilinks (handled above)
-. paired HTML tags
-. bolding
-. italics
-. asterisk at beginning of transcription
-. comma followed by spacing characters
-. superscripts enclosed in superscript parentheses		]]
-	local found_HTML
-	local result = gsub(pron, "<(%a+)[^>]*>([^<]+)</%1>",
-		function(tagName, content)
-			found_HTML = true
-			return content
-		end)
-	result = gsub(result, "'''([^']*)'''", "%1")
-	result = gsub(result, "''([^']*)''", "%1")
-	result = gsub(result, "^%*", "")
-	result = ugsub(result, ",%s+", "")
-	-- VS15
-	local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
-	if umatch(pron, vs15_class) then
-		local vs15 = u(0xFE0E)
-		if find(result, vs15) then
-			result = gsub(result, vs15, "")
-			pron = gsub(pron, vs15, "")
 		end
-		pron = ugsub(pron, vs15_class, "%0" .. vs15)
 	end
-	if result ~= "" then
+	-- Transcription
-		if lang then
+	do
-			-- Get the per_lang_valid data, and convert any per-language valid sequences to spaces.
+		local lang = (langCode or 'und') .. '-Latn'
-			local per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
+		if not isPrivate and regionCode then
-			if per_lang_valid then
+			lang = lang .. '-' .. regionCode
-				if type(per_lang_valid) == "table" then
-					for _, pattern in pairs(per_lang_valid) do
-						result = ugsub(result, pattern, " ")
-					end
-				else -- Should be a string.
-					result = ugsub(result, per_lang_valid, " ")
-				end
-			end
-		end
-		local suggestions
-		-- Check for any invalid sequences, excluding anything in the per-language lookup table.
-		for k, v in pairs(m_symbols.invalid) do
-			if find(result, k, nil, true) then
-				if not suggestions then
-					suggestions = {}
-				end
-				insert(suggestions, k .. " with " .. v)
-			end
 		end
-		if suggestions and suggestions[1] then
+		lang = lang .. '-fonipa'
-			suggestions = listToText(suggestions)
+		local span = mw.html.create('span')
-			if is_content_page then
+			:addClass('IPA')
-				error("Invalid IPA: replace " .. suggestions)
+			:addClass(args.class)
+			:attr('lang', lang)
+		-- wrap=all: Do nothing
+		-- wrap=none: Never break
+		-- Otherwise: Break at spaces only
+		if args.wrap ~= 'all' then
+			span:addClass('nowrap')
+			if args.wrap ~= 'none' then
+				s = wrapAtSpacesSafely(s)
 			end
-			insert(err, "replace " .. suggestions)
-		end
-		-- Convert any valid character sequences to spaces
-		for _, pattern in pairs(m_symbols.valid) do
-			result = ugsub(result, pattern, " ")
 		end
-		if not match(result, "^ *$") then
+		if (not args[2] or args[2] == '') and args.tooltip ~= '' then
-			local category = "IPA pronunciations with invalid IPA characters"
+			local tooltip = args.tooltip or
-			if not is_content_page then
+				'Representation in the International Phonetic Alphabet (IPA)'
-				category = category .. "/non_mainspace"
+			span:attr('title', tooltip)
-			end
-			insert(categories, category)
-			insert(err, "invalid IPA characters (" .. result .. ")")
 		end
+		s = tostring(span:wikitext(s))
+		table.insert(ret, s)
 	end
-	if found_HTML then
+	-- Audio
-		insert(categories, "IPA pronunciations with paired HTML tags")
+	local audio = args.audio ~= '' and args.audio or args[4] ~= '' and args[4]
+	if audio then
+		local button = mw.getCurrentFrame():expandTemplate({
+			title = 'Audio',
+			args = { audio, '' }
+		})
+		table.insert(ret, ' ' .. button)
+		table.insert(cats, 'Pages including recorded pronunciations')
 	end
-	if (repr == "phonemic" or repr == "rhyme") and lang and m_data.phonemes[lang:getCode()] then
+	-- Categories
-		local valid_phonemes = m_data.phonemes[lang:getCode()]
+	table.insert(ret, renderCats(cats, isDebug))
-		local rest = pron
-		local phonemes = {}
+	return table.concat(ret)
-		while #rest > 0 do
-			local longestmatch, longestmatch_len = "", 0
-			local rest_init = sub(rest, 1, 1)
-			if rest_init == "(" or rest_init == ")" then
-				longestmatch = rest_init
-				longestmatch_len = 1
-			else
-				for _, phoneme in ipairs(valid_phonemes) do
-					local phoneme_len = len(phoneme)
-					if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
-						longestmatch = phoneme
-						longestmatch_len = len(longestmatch)
-					end
-				end
-			end
-			if longestmatch_len > 0 then
-				insert(phonemes, longestmatch)
-				rest = usub(rest, longestmatch_len + 1)
-			else
-				local phoneme = usub(rest, 1, 1)
-				insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
-				rest = usub(rest, 2)
-				insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())
-				track("invalid phonemes/" .. phoneme)
-			end
-		end
-		pron = concat(phonemes)
-	end
-	return (reconstructed and "*" or "") .. opening .. pron .. closing
 end
---[==[
+function p.main(frame)
-Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
+	local args = frame:getParent().args
-error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
+	if not args[1] then
-(surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
+		return ''
-phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
-If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
-will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
-also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
-a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
-are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
-is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
-category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
-If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
-string.
-]==]
-function export.format_IPA(lang, pron, split_output)
-	local err = {}
-	local categories = {}
-	-- `pron` shouldn't contain ref tags.
-	if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then
-		error("<ref> tags found inside pronunciation parameter.")
 	end
+	for i, v in ipairs(args) do
-	if not lang then
+		args[i] = mw.text.trim(v)
-		track("format-nolang")
 	end
+	return p._main(args)
-	local phonemic, phonetic = split_phonemic_phonetic(pron)
-	pron = format_one_IPA(lang, phonemic, err, categories)
-	if phonetic then
-		track("phonemic-phonetic") -- There's no benefit to supporting the "/.../ [...]" format within one parameter.
-		phonetic = format_one_IPA(lang, phonetic, err, categories)
-		pron = pron .. " " .. phonetic
-	end
-	if err[1] and is_preview() then
-		err = '<span class="error" style="font-size: small;>&#32;' .. concat(err, ", ") .. "</span>"
-	else
-		err = ""
-	end
-	return process_maybe_split_categories(split_output, categories, '<span class="IPA">' .. pron .. "</span>", lang,
-		err)
-end
---[==[
-Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
-[[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
-pronunciations are formatted by wrapping them in the `AHD` and `enPR` CSS classes and adding any left and
-right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
-and accent qualifiers. There is a single parameter `data`, an object with the following fields:
-* `items` is a list of enPR pronunciations, each of which is an object with the following properties:
-** `pron`: the enPR pronunciation;
-** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
-** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
-** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
-** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
-* `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
-  pronunciations and preceding {"enPR:"}.
-* `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
-* `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
-  pronunciations and preceding {"enPR:"}.
-* `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
-]==]
-function export.format_enPR_full(data)
-	local prefix = "[[Appendix:English pronunciation|enPR]]: "
-	local lang = require("Module:languages").getByCode("en")
-	local parts = {}
-	for _, item in ipairs(data.items) do
-		local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
-		if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then
-			part = require("Module:pron qualifier").format_qualifiers {
-				lang = lang,
-				text = part,
-				q = item.q,
-				qq = item.qq,
-				a = item.a,
-				aa = item.aa,
-			}
-		end
-		insert(parts, part)
-	end
-	local prontext = prefix .. concat(parts, ", ")
-	if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
-		prontext = require(pron_qualifier_module).format_qualifiers {
-			lang = lang,
-			text = prontext,
-			q = data.q,
-			qq = data.qq,
-			a = data.a,
-			aa = data.aa,
-		}
-	end
-	return prontext
 end
-return export
+return p

Moduli:IPA: Dallime mes rishikimesh

Versioni i datës 16 gusht 2025 15:32

Menu lëvizjesh

Kërko