Difference between revisions of "Module:Template translation"

Module:Template translation (view source)

Revision as of 00:21, 2 February 2014

1,415 bytes added , 00:21, 2 February 2014

m

there are other chars that generate exceptions in mw.language.isKnownLanguageTag, and some language codes still not validated correctly; check this correctly

Anonymous user

>Verdy p

@@ Line 1: / Line 1: @@
 local this = {}
-function this.checkLanguage(subpage, default)
+function this.checkLanguage(subpage)
-     --[[Check first if there's an apostrophe, because they break the isKnownLanguageTag
+     --[[Check first if there's an any invalid character that would cause the
-         function. This test does not work with regexps, use plain search instead (no need
+        mw.language.isKnownLanguageTag function() to throw an exception:
-         to use Unicode parser, apostrophes can only appear isolated as one byte in UTF-8).
+        - all ASCII controls in [\000-\031\127],
-         ]]
+        - double quote ("), sharp sign (#), ampersand (&), apostrophe ('),
-     if (string.find(subpage, "'", 1, true) == nil)
+        - slash (/), colon (:), semicolon (;), lower than (<), greater than (>),
+        - brackets and braces ([, ], {, }), pipe (|), backslash (\\)
+        All other characters are accepted, including space and all non-ASCII
+        characters (including \192, which is invalid in UTF-8).
+    --]]
+    if mw.language.isValidCode(subpage) and mw.language.isKnownLanguageTag(subpage)
+    --[[However "SupportedLanguages" are too restrictive, as they discard many
+         valid BCP47 script variants (only because MediaWiki still does not
+        define automatic transliterators for them, e.g. "en-dsrt" or
+        "fr-brai" for French transliteration in Braille), and country variants,
+        (useful in localized data, even if they are no longer used for
+         translations, such as zh-cn, also useful for legacy codes).
+        We want to avoid matching subpagenames containing any uppercase letter,
+        (even if they are considered valid in BCP 47, in which they are
+        case-insensitive; they are not "SupportedLanguages" for MediaWiki, so
+        they are not "KnownLanguageTags" for MediaWiki).
+         To be more restrictive, we exclude any character that is not
+        ASCII and not a lowercase letter, minus-hyphen, or digit, and any code
+        that does not start by a letter or does not finish by a letter or digit.
+        of that has more than 8 characters between hyphens, or has two hyphens.
+    --]]
+     or  string.find(subpage, "^[%l][%-%d%l]*[%d%l]$") ~= nil
+    and string.find(subpage, "[%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l]") == nil
+    and string.find(subpage, "%-%-") == nil
      then
-         -- Return the subpage only if it is a valid language code.
+         return subpage
-        if (mw.language.isKnownLanguageTag(subpage))
-        then
-            return subpage
-        end
      end
      -- Otherwise there's currently no known language subpage