|
|
Rreshti 1: |
Rreshti 1: |
| local export = {} | | require('strict') |
| | local p = {} |
|
| |
|
| local force_cat = false -- for testing | | local function multiFind(s, patterns, init) |
| | | local i, j = mw.ustring.find(s, patterns[1], init) |
| local pages_module = "Module:pages"
| | for n = 2, #patterns do |
| local pron_qualifier_module = "Module:pron qualifier"
| | local i2, j2 = mw.ustring.find(s, patterns[n], init) |
| local qualifier_module = "Module:qualifier"
| | if i2 and (not i or i2 < i) then |
| local references_module = "Module:references"
| | i, j = i2, j2 |
| local string_utilities_module = "Module:string utilities"
| |
| local syllables_module = "Module:syllables"
| |
| local utilities_module = "Module:utilities"
| |
| | |
| local m_data = mw.loadData("Module:IPA/data")
| |
| local m_str_utils = require(string_utilities_module) | |
| local m_syllables -- [[Module:syllables]]; loaded below if needed
| |
| local m_symbols = mw.loadData("Module:IPA/data/symbols")
| |
| | |
| local concat = table.concat
| |
| local decode_entities = m_str_utils.decode_entities
| |
| local find = string.find
| |
| local gmatch = m_str_utils.gmatch
| |
| local gsub = string.gsub
| |
| local insert = table.insert
| |
| local is_preview = require(pages_module).is_preview
| |
| local len = m_str_utils.len
| |
| local listToText = mw.text.listToText | |
| local match = string.match
| |
| local pattern_escape = m_str_utils.pattern_escape
| |
| local sub = string.sub
| |
| local u = m_str_utils.char
| |
| local ugsub = m_str_utils.gsub
| |
| local umatch = m_str_utils.match
| |
| local usub = m_str_utils.sub
| |
| | |
| local namespace = mw.title.getCurrentTitle().namespace
| |
| local is_content_page = namespace == 0 or namespace == 118
| |
| | |
| local function track(page)
| |
| require("Module:debug/track")("IPA/" .. page)
| |
| return true
| |
| end
| |
| | |
| local function process_maybe_split_categories(split_output, categories, prontext, lang, errtext)
| |
| if split_output ~= "raw" then
| |
| if categories[1] then | |
| categories = require(utilities_module).format_categories(categories, lang, nil, nil, force_cat) | |
| else
| |
| categories = ""
| |
| end | | end |
| end | | end |
| if split_output then -- for use of IPA in links, etc. | | return i, j |
| if errtext then
| |
| return prontext, categories, errtext
| |
| else
| |
| return prontext, categories
| |
| end
| |
| else
| |
| return prontext .. (errtext or "") .. categories
| |
| end
| |
| end | | end |
|
| |
|
| --[==[
| | local function wrapAtSpaces(s) |
| Format a line of one or more IPA pronunciations as {{tl|IPA}} would do it, i.e. with a preceding {"IPA:"} followed by
| | return mw.ustring.gsub(s, '(%s+)', '<span class="wrap">%1</span>') |
| the word {"key"} linking to an Appendix page describing the language's phonology, and with an added category
| | end |
| ` ``lang`` terms with IPA pronunciation`. Other than the extra preceding text and category, this is identical
| |
| to {format_IPA_multiple()}, and the considerations described there in the documentation apply here as well. There is a
| |
| single parameter `data`, an object with the following fields:
| |
| * `lang`: Object representing the language of the pronunciations, which is used when adding cleanup categories for
| |
| pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
| |
| add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); for adding a category
| |
| ` ``lang`` terms with IPA pronunciation`; and for determining the proper sort keys for categories. Unlike
| |
| for {format_IPA_multiple()}, `lang` may not be {nil}.
| |
| * `items`: List of pronunciations, in exactly the same format as for {format_IPA_multiple()}.
| |
| * `err`: If not {nil}, a string containing an error message to use in place of the link to the language's phonology.
| |
| * `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
| |
| first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
| |
| `items`.
| |
| * `sort_key`: Explicit sort key used for categories.
| |
| * `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
| |
| only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
| |
| given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
| |
| or other categories. If you need them suppressed, use `split_output` to return the categories separately and ignore
| |
| them.
| |
| * `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
| |
| categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
| |
| the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
| |
| strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
| |
| `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
| |
| * `include_langname`: If specified, prefix the result with the language name, followed by a colon.
| |
| * `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"IPA:"}.
| |
| * `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
| |
| * `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"IPA:"}.
| |
| * `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
| |
| ]==]
| |
| function export.format_IPA_full(data)
| |
| if type(data) ~= "table" or data.getCode then
| |
| error("Must now supply a table of arguments to format_IPA_full(); first argument should be that table, not a language object")
| |
| end
| |
| local lang = data.lang
| |
| local items = data.items
| |
| local err = data.err
| |
| local separator = data.separator
| |
| local sort_key = data.sort_key
| |
| local no_count = data.no_count
| |
| local split_output = data.split_output
| |
| local q = data.q
| |
| local qq = data.qq
| |
| local a = data.a
| |
| local aa = data.aa
| |
| local include_langname = data.include_langname
| |
|
| |
|
| local hasKey = m_data.langs_with_infopages | | local function wrapAtSpacesSafely(s) |
| | | local patterns = { |
| if not lang or not lang.getCode then | | '%[%[[^%]|]-%s[^%]|]-|', -- Piped links |
| error("Must specify language to format_IPA_full()")
| | '</?[A-Za-z][^>]-%s[^>]->' -- HTML tags |
| end
| | } |
| local langname = lang:getCanonicalName() | | s = mw.ustring.gsub(s, '%[%[([^%]|]-%s[^%]|]-)%]%]', '[[%1|%1]]') -- Pipe all links |
| | | local t = {} |
| local prefix_text | | local init |
| if err then | | while true do |
| prefix_text = '<span class="error">' .. err .. '</span>' | | local i, j = multiFind(s, patterns, init) |
| else
| | if not i then |
| if hasKey[lang:getCode()] then | | break |
| prefix_text = "Appendix:" .. langname .. " pronunciation" | |
| else
| |
| prefix_text = "wikipedia:" .. langname .. " phonology"
| |
| end | | end |
| prefix_text = "[[" .. prefix_text .. "|key]]" | | local pre = wrapAtSpaces(mw.ustring.sub(s, init, i - 1)) -- What precedes the match |
| | table.insert(t, pre) |
| | table.insert(t, mw.ustring.sub(s, i, j)) -- The match |
| | init = j + 1 |
| end | | end |
| | local post = wrapAtSpaces(mw.ustring.sub(s, init)) -- What follows the last match |
| | table.insert(t, post) |
| | return table.concat(t) |
| | end |
|
| |
|
| local prefix = "[[Wiktionary:International Phonetic Alphabet|IPA]]<sup>(" .. prefix_text .. ")</sup>: " | | local function checkNamespace(isDebug) |
| | return isDebug or require('Module:Category handler').main({ true }) |
| | end |
|
| |
|
| local IPAs, categories = export.format_IPA_multiple(lang, items, separator, no_count, "raw")
| | local function renderCats(cats, isDebug) |
| | | if not cats[1] or not checkNamespace(isDebug) then |
| if is_content_page then | | return '' |
| insert(categories, { | |
| cat = langname .. " terms with IPA pronunciation",
| |
| sort_key = sort_key
| |
| })
| |
| end | | end |
| | | local t = {} |
| local prontext = prefix .. IPAs | | for _, v in ipairs(cats) do |
| if q and q[1] or qq and qq[1] or a and a[1] or aa and aa[1] then | | table.insert(t, string.format( |
| prontext = require(pron_qualifier_module).format_qualifiers { | | '[[%sCategory:%s]]', |
| lang = lang, | | isDebug and ':' or '', |
| text = prontext, | | v |
| q = q, | | )) |
| qq = qq,
| |
| a = a,
| |
| aa = aa,
| |
| } | |
| end | | end |
| if include_langname then | | return table.concat(t) |
| prontext = langname .. ": " .. prontext
| |
| end
| |
| return process_maybe_split_categories(split_output, categories, prontext, lang)
| |
| end | | end |
|
| |
|
| local function split_phonemic_phonetic(pron) | | local function resolveSynonym(s) |
| local reconstructed, phonemic, phonetic = match(pron, "^(%*?)(/.-/)%s+(%[.-%])$") | | return mw.loadData('Module:Lang/ISO 639 synonyms')[s] or s |
| if reconstructed then
| |
| return reconstructed .. phonemic, reconstructed .. phonetic
| |
| else
| |
| return pron, nil
| |
| end
| |
| end | | end |
|
| |
|
| local function determine_repr(pron) | | local function splitTag(s) |
| local reconstructed | | local langCode = s:gsub('%-.*', ''):lower() |
| | | langCode = resolveSynonym(langCode) |
| -- Temporarily remove any initial asterisk before representation marks,
| | local regionCode = s:match('%-(.+)') |
| -- which avoids having to account for it in the data, but set the
| | local isPrivate = regionCode and regionCode:sub(1, 2) == 'x-' |
| -- `reconstructed` flag.
| | return langCode, regionCode, isPrivate |
| if sub(pron, 1, 1) == "*" then
| | end |
| reconstructed = true
| |
| pron = sub(pron, 2)
| |
| end | |
| | |
| -- Some representation types have aliases for convenience (e.g. "// //" is
| |
| -- an alias for "⫽ ⫽"). and these need to be substituted in before checking
| |
| -- for other data.
| |
| local opening, n = match(pron, "^.[\128-\191]*") | |
| local subs_data = m_data.representation_subs[opening] | |
| if subs_data then
| |
| pron, n = ugsub(pron, subs_data[1], subs_data[2])
| |
| -- If the substitution was made, `opening` needs to be changed to the
| |
| -- new opening character.
| |
| if n ~= 0 then
| |
| opening = subs_data[3]
| |
| end
| |
| end
| |
| | |
| -- Get the type data based on the opening character (if any), and set the
| |
| -- representation type if the closing character matches.
| |
| local type_data, repr, closing = m_data.representation_types[opening]
| |
| if type_data then
| |
| closing = type_data[2]
| |
| if type_data and match(pron, pattern_escape(closing) .. "$", #opening + 1) then
| |
| repr = type_data[1]
| |
| end
| |
| end
| |
| | |
| -- Default to the empty string.
| |
| if not repr then | |
| opening, closing = "", ""
| |
| end
| |
|
| |
|
| -- Reattach the asterisk if reconstructed. | | local function getLangName(code, link, raw) |
| if reconstructed then
| | return require('Module:Lang')._name_from_tag({ |
| pron = "*" .. pron | | code, |
| end
| | link = link, |
| | | raw = raw, |
| return pron, repr, opening, closing, reconstructed | | -- Without linking, "{{IPA}}" gets expanded in some contexts |
| | template = '[[Template:IPA|IPA]]' |
| | }) |
| end | | end |
|
| |
|
| local function hasInvalidSeparators(transcription) | | local function linkLang(name, target, link) |
| -- Escape certain characters as well as pauses, which have the format "(...)" (with any number of dots), to avoid false-positives.
| | return link == 'yes' and string.format( |
| transcription = transcription:gsub(".[\128-\191]*", m_symbols.separator_escapes) | | '[[%s|%s]]', |
| :gsub("%(%.+%)", "\3")
| | target or name .. ' language', |
| :gsub("[()]+", "") | | name |
| return (
| | ) or name |
| transcription:find("..", nil, true) or
| |
| transcription:match("%.%f[%z \1\2\3,:;]") or
| |
| transcription:match("\1%f[%z \2\3,:;]") or
| |
| transcription:match("\2%f[%z \1\3,:;]") or
| |
| transcription:match("\3[:;]") or | |
| transcription:match("%f[^%z \1\2\3,]%.") | |
| ) and true or false | |
| end | | end |
|
| |
|
| --[==[
| | function p._main(args) |
| Format a line of one or more bare IPA pronunciations (i.e. without any preceding {"IPA:"} and without adding to a
| | local ret, cats = {}, {} |
| category ` ``lang`` terms with IPA pronunciation`). Individual pronunciations are formatted using
| | local isDebug = args.debug == 'yes' |
| {format_IPA()} and are combined with separators, qualifiers, pre-text, post-text, etc. to form a line of pronunciations.
| | local s, langCode, regionCode, isPrivate |
| Parameters accepted are:
| | |
| * `lang` is an object representing the language of the pronunciations, which is used when adding cleanup categories for
| | -- Guide-linking mode |
| pronunciations with invalid phonemes; for determining how many syllables the pronunciations have in them, in order to
| | if args[2] and args[2] ~= '' then |
| add a category such as [[:Category:Italian 2-syllable words]] (for certain languages only); and for computing the
| | local data = mw.loadData('Module:IPA/data') |
| proper sort keys for categories. `lang` may be {nil}.
| | local isGeneric = args.generic == 'yes' |
| * `items` is a list of pronunciations, each of which is an object with the following properties:
| | s = args[2] |
| ** `pron`: the pronunciation, in the same format as is accepted by {format_IPA()}, i.e. it should be either phonemic
| | langCode, regionCode, isPrivate = splitTag(args[1]) |
| (surrounded by {/.../}), phonetic (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}) or a rhyme
| | local langData = data.langs[langCode] or {} |
| (beginning with a hyphen);
| | if regionCode then |
| ** `pretext`: text to display directly before the formatted pronunciation, inside of any qualifiers or accent
| | if not isPrivate then |
| qualifiers;
| | regionCode = regionCode:upper() |
| ** `posttext`: text to display directly after the formatted pronunciation, inside of any qualifiers or accent
| |
| qualifiers;
| |
| ** `q` or `qualifiers`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted
| |
| pronunciation; note that `qualifiers` is deprecated;
| |
| ** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
| |
| ** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
| |
| ** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation;
| |
| ** `refs`: {nil} or a list of references or reference specs to add after the pronunciation and any posttext and
| |
| qualifiers; the value of a list item is either a string containing the reference text (typically a call to a
| |
| citation template such as {{tl|cite-book}}, or a template wrapping such a call), or an object with fields `text`
| |
| (the reference text), `name` (the name of the reference, as in {{cd|<nowiki><ref name="foo">...</ref></nowiki>}}
| |
| or {{cd|<nowiki><ref name="foo" /></nowiki>}}) and/or `group` (the group of the reference, as in
| |
| {{cd|<nowiki><ref name="foo" group="bar">...</ref></nowiki>}} or
| |
| {{cd|<nowiki><ref name="foo" group="bar"/></nowiki>}}); this uses a parser function to format the reference
| |
| appropriately and insert a footnote number that hyperlinks to the actual reference, located in the
| |
| {{cd|<nowiki><references /></nowiki>}} section;
| |
| ** `gloss`: {nil} or a gloss (definition) for this item, if different definitions have different pronunciations;
| |
| ** `pos`: {nil} or a part of speech for this item, if different parts of speech have different pronunciations;
| |
| ** `separator`: the separator text to insert directly before the formatted pronunciation and all qualifiers, accent
| |
| qualifiers and pre-text; defaults to the outer `separator` parameter.
| |
| * `separator`: The default separator to use when separating formatted items. Defaults to {", "}. Does not apply to the
| |
| first item, where the default separator is always the empty string. Overridden by the per-item `separator` field in
| |
| `items`.
| |
| * `no_count`: Suppress adding a {#-syllable words} category such as [[:Category:Italian 2-syllable words]]. Note that
| |
| only certain languages add such categories to begin with, because it depends on knowing how to count syllables in a
| |
| given language, which depends on the phonology of the language. Also, this does not suppress the addition of cleanup
| |
| categories. If you need them suppressed, use `split_output` to return the categories separately and ignore them.
| |
| * `split_output`: If not given, the return value is a concatenation of the formatted pronunciation and formatted
| |
| categories. Otherwise, two values are returned: the formatted pronunciation and the categories. If `split_output` is
| |
| the value {"raw"}, the categories are returned in list form, where the list elements are a combination of category
| |
| strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]]. If
| |
| `split_output` is any other value besides {nil}, the categories are returned as a pre-formatted concatenated string.
| |
| ]==]
| |
| function export.format_IPA_multiple(lang, items, separator, no_count, split_output)
| |
| local categories = {} | |
| separator = separator or ", " | |
| | |
| if not lang then
| |
| track("format-multiple-nolang")
| |
| end
| |
| | |
| -- Format
| |
| if not items[1] then | |
| if namespace == 10 then -- Template
| |
| insert(items, {pron = "/aɪ piː ˈeɪ/"})
| |
| else | |
| insert(categories, "Pronunciation templates without a pronunciation")
| |
| end
| |
| end
| |
| | |
| local bits = {}
| |
| | |
| for i, item in ipairs(items) do
| |
| local bit
| |
| | |
| -- If the pronunciation is entirely empty, allow this and don't do anything, so that e.g. the pretext and/or
| |
| -- posttext can be specified to force something like ''unknown'' to appear in place of the pronunciation
| |
| -- (as happens e.g. when ? is used as a respelling in [[Module:ca-IPA]]; see [[guèiser]] for an example).
| |
| if item.pron == "" then | |
| bit = ""
| |
| else | |
| local item_categories, errtext
| |
| bit, item_categories, errtext = export.format_IPA(lang, item.pron, "raw")
| |
| bit = bit .. errtext
| |
| for _, cat in ipairs(item_categories) do | |
| insert(categories, cat) | |
| end | | end |
| end
| | if langData.dialects and langData.dialects[regionCode] then |
| | | -- Overwrite language data with the dialect's |
| if item.pretext then
| | local newLangData = {} |
| bit = item.pretext .. bit
| | for k, v in pairs(langData) do |
| end
| | if k ~= 'dialects' then |
| | | newLangData[k] = v |
| if item.posttext then
| | end |
| bit = bit .. item.posttext
| | end |
| end
| | local dialectData = langData.dialects[regionCode] |
| | | if dialectData.aliasOf then |
| local has_qualifiers = item.q and item.q[1] or item.qq and item.qq[1] or item.qualifiers and item.qualifiers[1]
| | -- Use the canonical region code |
| or item.a and item.a[1] or item.aa and item.aa[1]
| | regionCode = dialectData.aliasOf |
| local has_gloss_or_pos = item.gloss or item.pos
| | isPrivate = regionCode:sub(1, 2) == 'x-' |
| if has_qualifiers or has_gloss_or_pos then
| | dialectData = langData.dialects[regionCode] |
| -- FIXME: Currently we tack the gloss and POS (in that order) onto the end of the regular left qualifiers.
| | end |
| -- Should we do something different?
| | -- Lowercase IANA variant |
| local q = item.q
| | if dialectData.isVariant then |
| if has_gloss_or_pos then
| | regionCode = regionCode:lower() |
| q = mw.clone(item.q) or {} | |
| if item.gloss then | |
| local m_qualifier = require(qualifier_module) | |
| insert(q, m_qualifier.wrap_qualifier_css("“", "quote") .. item.gloss ..
| |
| m_qualifier.wrap_qualifier_css("”", "quote"))
| |
| end | | end |
| if item.pos then | | for k, v in pairs(dialectData) do |
| -- FIXME: Consider expanding aliases as found in [[Module:headword/data]] or similar. | | newLangData[k] = v |
| insert(q, item.pos)
| |
| end | | end |
| | langData = newLangData |
| | else |
| | isGeneric = true |
| end | | end |
|
| |
| bit = require("Module:pron qualifier").format_qualifiers {
| |
| lang = lang,
| |
| text = bit,
| |
| q = q,
| |
| qq = item.qq,
| |
| qualifiers = item.qualifiers,
| |
| a = item.a,
| |
| aa = item.aa,
| |
| }
| |
| end | | end |
| | | |
| if item.note then | | local fullLangCode = regionCode and langCode .. '-' .. regionCode |
| -- Support removed on 2024-06-15. | | or langCode |
| error("Support for `.note` has been removed; switch to `.refs` (which must be a list)") | | local langName = langData.name |
| | and linkLang(langData.name, langData.link, args.link) |
| | or getLangName(fullLangCode, args.link) |
| | if langName:sub(1, 5) == '<span' then |
| | -- Module:Lang has returned an error |
| | return langName .. renderCats({ 'IPA template errors' }, isDebug) |
| end | | end |
| if item.refs then | | if args.cat ~= 'no' then |
| local refspecs = item.refs | | local catLangName = langData.name |
| if #refspecs > 0 then | | or getLangName(fullLangCode, nil, 'yes') |
| bit = bit .. require(references_module).format_references(refspecs) | | if catLangName:sub(1, 5) == '<span' then |
| | -- Module:Lang has returned an error, but it's not fatal |
| | table.insert(cats, 'IPA template errors') |
| | mw.addWarning(catLangName) |
| | else |
| | table.insert(cats, string.format('Pages with %s IPA', catLangName)) |
| end | | end |
| end | | end |
| | | |
| bit = (item.separator or (i == 1 and "" or separator)) .. bit | | -- Label |
| | | local label = args.label |
| insert(bits, bit)
| | if not label then |
| | | local labelCode = args[3] and args[3]:lower() |
| --[=[ [[Special:WhatLinksHere/Wiktionary:Tracking/IPA/syntax-error]] | | or langData.defaultLabelCode |
| The length or gemination symbol should not appear after a syllable break or stress symbol. ]=]
| | if labelCode == '' then |
| | | label = '' |
| -- The nature of the following pattern match is such that we don't have to split a combined '/.../ [...]' spec
| | else |
| -- into its parts in order to process.
| | local langText |
| if match(item.pron, "[.\203][\136\140]?\203[\144\145]") then -- [.ˈˌ][ːˑ] | | if langData.text then |
| track("syntax-error")
| | langText = linkLang( |
| end
| | langData.text, |
| | | mw.ustring.match(langName, '^%[%[([^|%]]+)'), |
| if lang then
| | args.link |
| -- Add syllable count if the language's diphthongs are listed in [[Module:syllables]].
| | ) |
| -- Don't do this if the term has spaces, a liaison mark (‿) or isn't in mainspace.
| | else |
| if not no_count and namespace == 0 then | | langText = mw.ustring.gsub( |
| m_syllables = m_syllables or require(syllables_module) | | langName, |
| local langcode = lang:getCode() | | '^%[%[(([^|]+) languages)%]%]$', |
| if m_data.langs_to_generate_syllable_count_categories[langcode] then | | '[[%1|%2]]' |
| local raw_phonemic, phonetic, use_it = split_phonemic_phonetic(item.pron) | | ) |
| local phonemic, repr = determine_repr(raw_phonemic)
| | langText = mw.ustring.gsub( |
| if not phonetic then -- not a '/.../ [...]' combined pronunciation
| | langText, |
| if m_data.langs_to_use_phonetic_notation[langcode] then | | ' languages(%]?%]?)$', |
| use_it = repr == "phonetic" and phonemic or nil
| | '%1' |
| else
| | ) |
| use_it = repr == "phonemic" and phonemic or nil
| | end |
| end | | if labelCode and data.labels[labelCode] then |
| elseif repr == "phonetic" then
| | label = data.labels[labelCode]:format(langText) |
| use_it = phonetic | | else |
| elseif repr == "phonemic" then
| | label = data.defaultLabel:format(langText) |
| use_it = phonemic | |
| end | |
| -- Note: two uses of find with plain patterns is much faster than umatch with [ ‿]. | |
| if use_it and not (find(use_it, " ") or find(use_it, "‿")) then
| |
| local syllable_count = m_syllables.getVowels(use_it, lang) | |
| if syllable_count then
| |
| insert(categories, lang:getCanonicalName() .. " " .. syllable_count ..
| |
| "-syllable words")
| |
| end
| |
| end | |
| end | | end |
| end | | end |
| end | | end |
| end
| | if label and label ~= '' then |
| | | local span = mw.html.create('span') |
| return process_maybe_split_categories(split_output, categories, concat(bits), lang)
| | :addClass('IPA-label') |
| end
| | :wikitext(label) |
| | | if args.small ~= 'no' then |
| --[=[
| | span:addClass('IPA-label-small') |
| Format a single IPA pronunciation, which cannot be a combined spec (such as {/.../ [...]}). This has been extracted from
| | table.insert(ret, mw.getCurrentFrame():extensionTag({ |
| {format_IPA()} to allow the latter to handle such combined specs. This works like {format_IPA()} but requires that
| | name = 'templatestyles', |
| pre-created {err} (for error messages) and {categories} lists be passed in, and adds any generated error messages and
| | args = { src = 'Module:IPA/styles.css' } |
| categories to those lists. A single value is returned, the pronunciation, which is usually the same as passed in, but
| | })) |
| may have HTML added surrounding invalid characters so they appear in red.
| |
| ]=]
| |
| local function format_one_IPA(lang, raw_pron, err, categories)
| |
| -- Disallow wikilinks.
| |
| if match(raw_pron, "%[%[.-%]%]") then
| |
| error("IPA input must not contain wikilinks.")
| |
| end
| |
|
| |
| raw_pron = decode_entities(raw_pron)
| |
| | |
| -- Detect the type of transcription.
| |
| local pron, repr, opening, closing, reconstructed = determine_repr(raw_pron)
| |
|
| |
| -- Strip any reconstruction asterisk and representation marks.
| |
| pron = sub(pron, #opening + 1 + (reconstructed and 1 or 0), -#closing - 1)
| |
|
| |
| if not repr then
| |
| insert(categories, "IPA pronunciations with invalid representation marks")
| |
| -- insert(err, "invalid representation marks")
| |
| -- Removed because it's annoying when previewing pronunciation pages.
| |
| end
| |
| if repr ~= "orthographic" and lang and lang:getCode() == "en" and hasInvalidSeparators(pron) then
| |
| insert(categories, "English IPA pronunciations with invalid separators")
| |
| end
| |
| | |
| if pron == "" then
| |
| insert(categories, "IPA pronunciations with no pronunciation present")
| |
| end
| |
| | |
| -- Check for obsolete and nonstandard symbols
| |
| for _, symbol in ipairs(m_data.nonstandard) do
| |
| local result
| |
| for nonstandard in gmatch(pron, symbol) do
| |
| if not result then
| |
| result = {} | |
| end | | end |
| insert(result, nonstandard) | | table.insert(ret, tostring(span) .. ' ') |
| insert(categories,
| |
| {cat = "IPA pronunciations with obsolete or nonstandard characters", sort_key = nonstandard}
| |
| )
| |
| end | | end |
| | | |
| if result then | | -- Brackets |
| insert(err, "obsolete or nonstandard characters (" .. concat(result) .. ")")
| | s = (not isGeneric and langData.format or '[%s]'):format(s) |
| break | | |
| | -- Link to key |
| | local key = not isGeneric and langData.key or data.defaultKey |
| | s = string.format('[[%s|%s]]', key, s) |
| | else |
| | -- Basic mode |
| | s = args[1] |
| | if args.lang and args.lang ~= '' then |
| | langCode, regionCode, isPrivate = splitTag(args.lang) |
| end | | end |
| end
| | if args.cat ~= 'no' then |
| | | table.insert(cats, 'Pages with plain IPA') |
| --[[ Check for invalid symbols after removing the following:
| |
| 1. wikilinks (handled above)
| |
| 2. paired HTML tags
| |
| 3. bolding
| |
| 4. italics
| |
| 5. asterisk at beginning of transcription
| |
| 6. comma followed by spacing characters
| |
| 7. superscripts enclosed in superscript parentheses ]]
| |
| local found_HTML
| |
| local result = gsub(pron, "<(%a+)[^>]*>([^<]+)</%1>",
| |
| function(tagName, content)
| |
| found_HTML = true
| |
| return content
| |
| end)
| |
| result = gsub(result, "'''([^']*)'''", "%1")
| |
| result = gsub(result, "''([^']*)''", "%1")
| |
| result = gsub(result, "^%*", "")
| |
| result = ugsub(result, ",%s+", "")
| |
| | |
| -- VS15
| |
| local vs15_class = "[" .. m_symbols.add_vs15 .. "]"
| |
| if umatch(pron, vs15_class) then
| |
| local vs15 = u(0xFE0E)
| |
| if find(result, vs15) then
| |
| result = gsub(result, vs15, "")
| |
| pron = gsub(pron, vs15, "")
| |
| end | | end |
| pron = ugsub(pron, vs15_class, "%0" .. vs15)
| |
| end | | end |
| | | |
| if result ~= "" then | | -- Transcription |
| if lang then
| | do |
| -- Get the per_lang_valid data, and convert any per-language valid sequences to spaces.
| | local lang = (langCode or 'und') .. '-Latn' |
| local per_lang_valid = m_symbols.per_lang_valid[lang:getCode()]
| | if not isPrivate and regionCode then |
| if per_lang_valid then
| | lang = lang .. '-' .. regionCode |
| if type(per_lang_valid) == "table" then
| |
| for _, pattern in pairs(per_lang_valid) do
| |
| result = ugsub(result, pattern, " ")
| |
| end
| |
| else -- Should be a string.
| |
| result = ugsub(result, per_lang_valid, " ")
| |
| end
| |
| end
| |
| end
| |
| local suggestions
| |
| -- Check for any invalid sequences, excluding anything in the per-language lookup table.
| |
| for k, v in pairs(m_symbols.invalid) do | |
| if find(result, k, nil, true) then
| |
| if not suggestions then
| |
| suggestions = {}
| |
| end
| |
| insert(suggestions, k .. " with " .. v)
| |
| end
| |
| end | | end |
| if suggestions and suggestions[1] then | | lang = lang .. '-fonipa' |
| suggestions = listToText(suggestions) | | local span = mw.html.create('span') |
| if is_content_page then | | :addClass('IPA') |
| error("Invalid IPA: replace " .. suggestions) | | :addClass(args.class) |
| | :attr('lang', lang) |
| | -- wrap=all: Do nothing |
| | -- wrap=none: Never break |
| | -- Otherwise: Break at spaces only |
| | if args.wrap ~= 'all' then |
| | span:addClass('nowrap') |
| | if args.wrap ~= 'none' then |
| | s = wrapAtSpacesSafely(s) |
| end | | end |
| insert(err, "replace " .. suggestions)
| |
| end
| |
| -- Convert any valid character sequences to spaces
| |
| for _, pattern in pairs(m_symbols.valid) do
| |
| result = ugsub(result, pattern, " ")
| |
| end | | end |
| if not match(result, "^ *$") then | | if (not args[2] or args[2] == '') and args.tooltip ~= '' then |
| local category = "IPA pronunciations with invalid IPA characters" | | local tooltip = args.tooltip or |
| if not is_content_page then
| | 'Representation in the International Phonetic Alphabet (IPA)' |
| category = category .. "/non_mainspace" | | span:attr('title', tooltip) |
| end
| |
| insert(categories, category)
| |
| insert(err, "invalid IPA characters (" .. result .. ")") | |
| end | | end |
| | s = tostring(span:wikitext(s)) |
| | table.insert(ret, s) |
| end | | end |
| | | |
| if found_HTML then | | -- Audio |
| insert(categories, "IPA pronunciations with paired HTML tags") | | local audio = args.audio ~= '' and args.audio or args[4] ~= '' and args[4] |
| | if audio then |
| | local button = mw.getCurrentFrame():expandTemplate({ |
| | title = 'Audio', |
| | args = { audio, '' } |
| | }) |
| | table.insert(ret, ' ' .. button) |
| | table.insert(cats, 'Pages including recorded pronunciations') |
| end | | end |
| | | |
| if (repr == "phonemic" or repr == "rhyme") and lang and m_data.phonemes[lang:getCode()] then | | -- Categories |
| local valid_phonemes = m_data.phonemes[lang:getCode()]
| | table.insert(ret, renderCats(cats, isDebug)) |
| local rest = pron
| | |
| local phonemes = {}
| | return table.concat(ret) |
| | |
| while #rest > 0 do
| |
| local longestmatch, longestmatch_len = "", 0
| |
| | |
| local rest_init = sub(rest, 1, 1)
| |
| if rest_init == "(" or rest_init == ")" then
| |
| longestmatch = rest_init
| |
| longestmatch_len = 1
| |
| else
| |
| for _, phoneme in ipairs(valid_phonemes) do
| |
| local phoneme_len = len(phoneme)
| |
| if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then
| |
| longestmatch = phoneme
| |
| longestmatch_len = len(longestmatch)
| |
| end
| |
| end
| |
| end
| |
| | |
| if longestmatch_len > 0 then
| |
| insert(phonemes, longestmatch)
| |
| rest = usub(rest, longestmatch_len + 1)
| |
| else
| |
| local phoneme = usub(rest, 1, 1)
| |
| insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>")
| |
| rest = usub(rest, 2)
| |
| insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode())
| |
| track("invalid phonemes/" .. phoneme)
| |
| end
| |
| end
| |
| | |
| pron = concat(phonemes)
| |
| end | |
| | |
| return (reconstructed and "*" or "") .. opening .. pron .. closing | |
| end | | end |
|
| |
|
| --[==[
| | function p.main(frame) |
| Format an IPA pronunciation. This wraps the pronunciation in appropriate CSS classes and adds cleanup categories and
| | local args = frame:getParent().args |
| error messages as needed. The pronunciation `pron` should be either phonemic (surrounded by {/.../}), phonetic
| | if not args[1] then |
| (surrounded by {[...]}), orthographic (surrounded by {⟨...⟩}), a rhyme (beginning with a hyphen) or a combined
| | return '' |
| phonemic/phonetic spec (of the form {/.../ [...]}). `lang` indicates the language of the pronunciation and can be {nil}.
| |
| If not {nil}, and the specified language has data in [[Module:IPA/data]] indicating the allowed phonemes, then the page
| |
| will be added to a cleanup category and an error message displayed next to the outputted pronunciation. Note that {lang}
| |
| also determines sort key processing in the added cleanup categories. If `split_output` is not given, the return value is
| |
| a concatenation of the formatted pronunciation, error messages and formatted cleanup categories. Otherwise, three values
| |
| are returned: the formatted pronunciation, the cleanup categories and the concatenated error messages. If `split_output`
| |
| is the value {"raw"}, the cleanup categories are returned in list form, where the list elements are a combination of
| |
| category strings and category objects of the form suitable for passing to {format_categories()} in [[Module:utilities]].
| |
| If `split_output` is any other value besides {nil}, the cleanup categories are returned as a pre-formatted concatenated
| |
| string.
| |
| ]==]
| |
| function export.format_IPA(lang, pron, split_output) | |
| local err = {} | |
| local categories = {}
| |
| | |
| -- `pron` shouldn't contain ref tags.
| |
| if match(pron, "\127'\"`UNIQ%-%-ref%-[%dA-F]+%-QINU`\"'\127") then | |
| error("<ref> tags found inside pronunciation parameter.") | |
| end | | end |
| | | for i, v in ipairs(args) do |
| if not lang then | | args[i] = mw.text.trim(v) |
| track("format-nolang") | |
| end | | end |
| | | return p._main(args) |
| local phonemic, phonetic = split_phonemic_phonetic(pron)
| |
| pron = format_one_IPA(lang, phonemic, err, categories)
| |
| if phonetic then
| |
| track("phonemic-phonetic") -- There's no benefit to supporting the "/.../ [...]" format within one parameter.
| |
| phonetic = format_one_IPA(lang, phonetic, err, categories)
| |
| pron = pron .. " " .. phonetic
| |
| end
| |
| | |
| if err[1] and is_preview() then
| |
| err = '<span class="error" style="font-size: small;> ' .. concat(err, ", ") .. "</span>"
| |
| else
| |
| err = ""
| |
| end
| |
| | |
| return process_maybe_split_categories(split_output, categories, '<span class="IPA">' .. pron .. "</span>", lang, | |
| err)
| |
| end
| |
| | |
| --[==[
| |
| Format a line of one or more enPR pronunciations as {{tl|enPR}} would do it, i.e. with a preceding {"enPR:"} (linked to
| |
| [[Appendix:English pronunciation]]) followed by one or more formatted, comma-separated enPR pronunciations. The
| |
| pronunciations are formatted by wrapping them in the `AHD` and `enPR` CSS classes and adding any left and
| |
| right regular and accent qualifiers. In addition, the overall result is wrapped in any overall left and right regular
| |
| and accent qualifiers. There is a single parameter `data`, an object with the following fields:
| |
| * `items` is a list of enPR pronunciations, each of which is an object with the following properties:
| |
| ** `pron`: the enPR pronunciation;
| |
| ** `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display before the formatted pronunciation;
| |
| ** `qq`: {nil} or a list of right qualifiers to display after the formatted pronunciation;
| |
| ** `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display before the formatted pronunciation;
| |
| ** `aa`: {nil} or a list of right accent qualifiers to after before the formatted pronunciation.
| |
| * `q`: {nil} or a list of left qualifiers (as in {{tl|q}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"enPR:"}.
| |
| * `qq`: {nil} or a list of right qualifiers to display after all formatted pronunciations.
| |
| * `a`: {nil} or a list of left accent qualifiers (as in {{tl|a}}) to display at the beginning, before the formatted
| |
| pronunciations and preceding {"enPR:"}.
| |
| * `aa`: {nil} or a list of right accent qualifiers to display after all formatted pronunciations.
| |
| ]==]
| |
| function export.format_enPR_full(data)
| |
| local prefix = "[[Appendix:English pronunciation|enPR]]: "
| |
| local lang = require("Module:languages").getByCode("en")
| |
| local parts = {}
| |
| | |
| for _, item in ipairs(data.items) do
| |
| local part = '<span class="AHD enPR">' .. item.pron .. "</span>"
| |
| | |
| if item.q and item.q[1] or item.qq and item.qq[1] or item.a and item.a[1] or item.aa and item.aa[1] then
| |
| part = require("Module:pron qualifier").format_qualifiers {
| |
| lang = lang,
| |
| text = part,
| |
| q = item.q,
| |
| qq = item.qq,
| |
| a = item.a,
| |
| aa = item.aa,
| |
| }
| |
| end
| |
| insert(parts, part)
| |
| end
| |
| | |
| local prontext = prefix .. concat(parts, ", ")
| |
| if data.q and data.q[1] or data.qq and data.qq[1] or data.a and data.a[1] or data.aa and data.aa[1] then
| |
| prontext = require(pron_qualifier_module).format_qualifiers {
| |
| lang = lang,
| |
| text = prontext,
| |
| q = data.q,
| |
| qq = data.qq,
| |
| a = data.a,
| |
| aa = data.aa,
| |
| }
| |
| end
| |
| | |
| return prontext
| |
| end | | end |
|
| |
|
| return export | | return p |